diff --git a/.github/workflows/Vendor.yml b/.github/workflows/Vendor.yml index b73c528a4..9e2921295 100644 --- a/.github/workflows/Vendor.yml +++ b/.github/workflows/Vendor.yml @@ -85,6 +85,9 @@ jobs: id: commit_and_push if: ${{ steps.check_for_changes.outcome == 'success' && steps.check_for_changes.outputs.has_changes == 'true' }} run: | + # apply local patches + patch -p1 < ./data/icu/comma.patch + # commit and push changes MSG="Update vendored DuckDB sources to ${{ steps.vendor.outputs.vendor_rev }}" git add src/duckdb CMakeLists.txt git commit -m "${MSG}" diff --git a/CMakeLists.txt b/CMakeLists.txt index 2c1d88e99..5e566e77e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -48,8 +48,6 @@ set(DUCKDB_INCLUDE_DIRS src/duckdb/third_party/hyperloglog src/duckdb/third_party/jaro_winkler src/duckdb/third_party/jaro_winkler/details - src/duckdb/third_party/libpg_query - src/duckdb/third_party/libpg_query/include src/duckdb/third_party/lz4 src/duckdb/third_party/brotli/include src/duckdb/third_party/brotli/common @@ -164,6 +162,7 @@ set(DUCKDB_SRC_FILES src/duckdb/src/function/cast/union/from_struct.cpp src/duckdb/ub_src_function_cast_variant.cpp src/duckdb/ub_src_function_pragma.cpp + src/duckdb/src/function/scalar/comparison/between.cpp src/duckdb/ub_src_function_scalar_compressed_materialization.cpp src/duckdb/ub_src_function_scalar.cpp src/duckdb/src/function/scalar/date/strftime.cpp @@ -243,14 +242,12 @@ set(DUCKDB_SRC_FILES src/duckdb/ub_src_parser_constraints.cpp src/duckdb/ub_src_parser_expression.cpp src/duckdb/ub_src_parser_parsed_data.cpp + src/duckdb/ub_src_parser_peg.cpp + src/duckdb/ub_src_parser_peg_tokenizer.cpp + src/duckdb/ub_src_parser_peg_transformer.cpp src/duckdb/ub_src_parser_query_node.cpp src/duckdb/ub_src_parser_statement.cpp src/duckdb/ub_src_parser_tableref.cpp - src/duckdb/src/parser/transform/constraint/transform_constraint.cpp - src/duckdb/ub_src_parser_transform_expression.cpp - src/duckdb/ub_src_parser_transform_helpers.cpp - src/duckdb/ub_src_parser_transform_statement.cpp - src/duckdb/ub_src_parser_transform_tableref.cpp src/duckdb/ub_src_planner.cpp src/duckdb/ub_src_planner_binder_expression.cpp src/duckdb/ub_src_planner_binder_query_node.cpp @@ -277,16 +274,6 @@ set(DUCKDB_SRC_FILES src/duckdb/ub_src_storage_table.cpp src/duckdb/ub_src_storage_table_variant.cpp src/duckdb/ub_src_transaction.cpp - src/duckdb/src/verification/copied_statement_verifier.cpp - src/duckdb/src/verification/deserialized_statement_verifier.cpp - src/duckdb/src/verification/explain_statement_verifier.cpp - src/duckdb/src/verification/external_statement_verifier.cpp - src/duckdb/src/verification/fetch_row_verifier.cpp - src/duckdb/src/verification/no_operator_caching_verifier.cpp - src/duckdb/src/verification/parsed_statement_verifier.cpp - src/duckdb/src/verification/prepared_statement_verifier.cpp - src/duckdb/src/verification/statement_verifier.cpp - src/duckdb/src/verification/unoptimized_statement_verifier.cpp src/duckdb/third_party/fmt/format.cc src/duckdb/third_party/fsst/libfsst.cpp src/duckdb/third_party/miniz/miniz.cpp @@ -319,16 +306,6 @@ set(DUCKDB_SRC_FILES src/duckdb/third_party/fastpforlib/bitpacking.cpp src/duckdb/third_party/utf8proc/utf8proc.cpp src/duckdb/third_party/utf8proc/utf8proc_wrapper.cpp - src/duckdb/third_party/libpg_query/pg_functions.cpp - src/duckdb/third_party/libpg_query/postgres_parser.cpp - src/duckdb/third_party/libpg_query/src_backend_nodes_list.cpp - src/duckdb/third_party/libpg_query/src_backend_nodes_makefuncs.cpp - src/duckdb/third_party/libpg_query/src_backend_nodes_value.cpp - src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp - src/duckdb/third_party/libpg_query/src_backend_parser_parser.cpp - src/duckdb/third_party/libpg_query/src_backend_parser_scan.cpp - src/duckdb/third_party/libpg_query/src_backend_parser_scansup.cpp - src/duckdb/third_party/libpg_query/src_common_keywords.cpp src/duckdb/third_party/mbedtls/library/aes.cpp src/duckdb/third_party/mbedtls/library/asn1parse.cpp src/duckdb/third_party/mbedtls/library/asn1write.cpp @@ -385,52 +362,52 @@ set(DUCKDB_SRC_FILES src/duckdb/third_party/zstd/dict/divsufsort.cpp src/duckdb/third_party/zstd/dict/fastcover.cpp src/duckdb/third_party/zstd/dict/zdict.cpp - src/duckdb/extension/core_functions/function_list.cpp - src/duckdb/extension/core_functions/lambda_functions.cpp src/duckdb/extension/core_functions/core_functions_extension.cpp - src/duckdb/ub_extension_core_functions_scalar_random.cpp - src/duckdb/ub_extension_core_functions_scalar_struct.cpp - src/duckdb/ub_extension_core_functions_scalar_union.cpp - src/duckdb/extension/core_functions/scalar/operators/bitwise.cpp - src/duckdb/ub_extension_core_functions_scalar_date.cpp - src/duckdb/ub_extension_core_functions_scalar_blob.cpp - src/duckdb/ub_extension_core_functions_scalar_array.cpp - src/duckdb/ub_extension_core_functions_scalar_debug.cpp + src/duckdb/extension/core_functions/lambda_functions.cpp + src/duckdb/extension/core_functions/function_list.cpp + src/duckdb/ub_extension_core_functions_aggregate_holistic.cpp + src/duckdb/ub_extension_core_functions_aggregate_nested.cpp + src/duckdb/ub_extension_core_functions_aggregate_distributive.cpp + src/duckdb/ub_extension_core_functions_aggregate_algebraic.cpp + src/duckdb/ub_extension_core_functions_aggregate_regression.cpp + src/duckdb/extension/core_functions/scalar/bit/bitstring.cpp src/duckdb/ub_extension_core_functions_scalar_generic.cpp - src/duckdb/extension/core_functions/scalar/math/numeric.cpp src/duckdb/extension/core_functions/scalar/enum/enum_functions.cpp - src/duckdb/extension/core_functions/scalar/bit/bitstring.cpp - src/duckdb/ub_extension_core_functions_scalar_string.cpp - src/duckdb/ub_extension_core_functions_scalar_map.cpp + src/duckdb/ub_extension_core_functions_scalar_debug.cpp + src/duckdb/extension/core_functions/scalar/math/numeric.cpp + src/duckdb/ub_extension_core_functions_scalar_array.cpp + src/duckdb/ub_extension_core_functions_scalar_union.cpp + src/duckdb/ub_extension_core_functions_scalar_blob.cpp src/duckdb/ub_extension_core_functions_scalar_list.cpp - src/duckdb/ub_extension_core_functions_aggregate_algebraic.cpp - src/duckdb/ub_extension_core_functions_aggregate_regression.cpp - src/duckdb/ub_extension_core_functions_aggregate_holistic.cpp - src/duckdb/ub_extension_core_functions_aggregate_distributive.cpp - src/duckdb/ub_extension_core_functions_aggregate_nested.cpp - src/duckdb/extension/parquet/zstd_file_system.cpp - src/duckdb/extension/parquet/parquet_field_id.cpp - src/duckdb/extension/parquet/parquet_geometry.cpp - src/duckdb/extension/parquet/column_writer.cpp - src/duckdb/extension/parquet/parquet_timestamp.cpp + src/duckdb/ub_extension_core_functions_scalar_map.cpp + src/duckdb/ub_extension_core_functions_scalar_date.cpp + src/duckdb/extension/core_functions/scalar/operators/bitwise.cpp + src/duckdb/ub_extension_core_functions_scalar_random.cpp + src/duckdb/ub_extension_core_functions_scalar_string.cpp + src/duckdb/ub_extension_core_functions_scalar_struct.cpp + src/duckdb/extension/parquet/parquet_reader.cpp src/duckdb/extension/parquet/parquet_statistics.cpp + src/duckdb/extension/parquet/zstd_file_system.cpp + src/duckdb/extension/parquet/parquet_file_metadata_cache.cpp src/duckdb/extension/parquet/parquet_shredding.cpp src/duckdb/extension/parquet/parquet_multi_file_info.cpp - src/duckdb/extension/parquet/parquet_file_metadata_cache.cpp - src/duckdb/extension/parquet/serialize_parquet.cpp - src/duckdb/extension/parquet/parquet_crypto.cpp src/duckdb/extension/parquet/parquet_metadata.cpp + src/duckdb/extension/parquet/parquet_timestamp.cpp + src/duckdb/extension/parquet/parquet_crypto.cpp + src/duckdb/extension/parquet/serialize_parquet.cpp + src/duckdb/extension/parquet/parquet_column_schema.cpp + src/duckdb/extension/parquet/parquet_extension.cpp + src/duckdb/extension/parquet/parquet_field_id.cpp + src/duckdb/extension/parquet/column_writer.cpp + src/duckdb/extension/parquet/column_reader.cpp src/duckdb/extension/parquet/parquet_writer.cpp src/duckdb/extension/parquet/parquet_float16.cpp - src/duckdb/extension/parquet/column_reader.cpp - src/duckdb/extension/parquet/parquet_extension.cpp - src/duckdb/extension/parquet/parquet_column_schema.cpp - src/duckdb/extension/parquet/parquet_reader.cpp + src/duckdb/extension/parquet/parquet_geometry.cpp src/duckdb/ub_extension_parquet_reader.cpp src/duckdb/ub_extension_parquet_reader_variant.cpp + src/duckdb/ub_extension_parquet_decoder.cpp src/duckdb/ub_extension_parquet_writer.cpp src/duckdb/ub_extension_parquet_writer_variant.cpp - src/duckdb/ub_extension_parquet_decoder.cpp src/duckdb/third_party/parquet/parquet_types.cpp src/duckdb/third_party/thrift/thrift/protocol/TProtocol.cpp src/duckdb/third_party/thrift/thrift/transport/TTransportException.cpp @@ -469,157 +446,157 @@ set(DUCKDB_SRC_FILES src/duckdb/third_party/brotli/enc/metablock.cpp src/duckdb/third_party/brotli/enc/static_dict.cpp src/duckdb/third_party/brotli/enc/utf8_util.cpp - src/duckdb/extension/icu/./icu-makedate.cpp - src/duckdb/extension/icu/./icu-timezone.cpp + src/duckdb/extension/icu/./icu-timebucket.cpp + src/duckdb/extension/icu/./icu-datepart.cpp src/duckdb/extension/icu/./icu-datetrunc.cpp - src/duckdb/extension/icu/./icu-list-range.cpp src/duckdb/extension/icu/./icu-current.cpp - src/duckdb/extension/icu/./icu-table-range.cpp src/duckdb/extension/icu/./icu_extension.cpp - src/duckdb/extension/icu/./icu-timebucket.cpp + src/duckdb/extension/icu/./icu-table-range.cpp src/duckdb/extension/icu/./icu-datefunc.cpp + src/duckdb/extension/icu/./icu-makedate.cpp + src/duckdb/extension/icu/./icu-dateadd.cpp + src/duckdb/extension/icu/./icu-timezone.cpp src/duckdb/extension/icu/./icu-strptime.cpp src/duckdb/extension/icu/./icu-datesub.cpp - src/duckdb/extension/icu/./icu-datepart.cpp - src/duckdb/extension/icu/./icu-dateadd.cpp - src/duckdb/extension/icu/third_party/icu/common/unormcmp.cpp - src/duckdb/extension/icu/third_party/icu/common/loclikelysubtags.cpp + src/duckdb/extension/icu/./icu-list-range.cpp src/duckdb/extension/icu/third_party/icu/common/static_unicode_sets.cpp src/duckdb/extension/icu/third_party/icu/common/utrie2_builder.cpp - src/duckdb/extension/icu/third_party/icu/common/utrie2.cpp - src/duckdb/extension/icu/third_party/icu/common/localebuilder.cpp - src/duckdb/extension/icu/third_party/icu/common/ucase.cpp - src/duckdb/extension/icu/third_party/icu/common/uresbund.cpp + src/duckdb/extension/icu/third_party/icu/common/umutablecptrie.cpp + src/duckdb/extension/icu/third_party/icu/common/unames.cpp + src/duckdb/extension/icu/third_party/icu/common/uscript_props.cpp + src/duckdb/extension/icu/third_party/icu/common/putil.cpp + src/duckdb/extension/icu/third_party/icu/common/ubiditransform.cpp + src/duckdb/extension/icu/third_party/icu/common/locmap.cpp + src/duckdb/extension/icu/third_party/icu/common/locdistance.cpp src/duckdb/extension/icu/third_party/icu/common/ucptrie.cpp src/duckdb/extension/icu/third_party/icu/common/uhash.cpp - src/duckdb/extension/icu/third_party/icu/common/ubidi_props.cpp + src/duckdb/extension/icu/third_party/icu/common/localebuilder.cpp src/duckdb/extension/icu/third_party/icu/common/uvector.cpp - src/duckdb/extension/icu/third_party/icu/common/unames.cpp - src/duckdb/extension/icu/third_party/icu/common/locmap.cpp - src/duckdb/extension/icu/third_party/icu/common/loadednormalizer2impl.cpp - src/duckdb/extension/icu/third_party/icu/common/wintz.cpp - src/duckdb/extension/icu/third_party/icu/common/ucharstriebuilder.cpp + src/duckdb/extension/icu/third_party/icu/common/ucase.cpp + src/duckdb/extension/icu/third_party/icu/common/utrie2.cpp + src/duckdb/extension/icu/third_party/icu/common/ubidi_props.cpp src/duckdb/extension/icu/third_party/icu/common/ustrcase.cpp - src/duckdb/extension/icu/third_party/icu/common/bytestriebuilder.cpp - src/duckdb/extension/icu/third_party/icu/common/uloc.cpp src/duckdb/extension/icu/third_party/icu/common/uresdata.cpp - src/duckdb/extension/icu/third_party/icu/common/umutablecptrie.cpp - src/duckdb/extension/icu/third_party/icu/common/ubiditransform.cpp - src/duckdb/extension/icu/third_party/icu/common/putil.cpp - src/duckdb/extension/icu/third_party/icu/common/uscript_props.cpp src/duckdb/extension/icu/third_party/icu/common/umapfile.cpp - src/duckdb/extension/icu/third_party/icu/common/locdistance.cpp src/duckdb/extension/icu/third_party/icu/common/utrie.cpp + src/duckdb/extension/icu/third_party/icu/common/loadednormalizer2impl.cpp + src/duckdb/extension/icu/third_party/icu/common/ucharstriebuilder.cpp + src/duckdb/extension/icu/third_party/icu/common/unormcmp.cpp + src/duckdb/extension/icu/third_party/icu/common/wintz.cpp + src/duckdb/extension/icu/third_party/icu/common/uresbund.cpp + src/duckdb/extension/icu/third_party/icu/common/uloc.cpp + src/duckdb/extension/icu/third_party/icu/common/loclikelysubtags.cpp src/duckdb/extension/icu/third_party/icu/common/unifiedcache.cpp + src/duckdb/extension/icu/third_party/icu/common/bytestriebuilder.cpp src/duckdb/ub_extension_icu_third_party_icu_common.cpp - src/duckdb/extension/icu/third_party/icu/i18n/persncal.cpp - src/duckdb/extension/icu/third_party/icu/i18n/rbnf.cpp - src/duckdb/extension/icu/third_party/icu/i18n/units_data.cpp - src/duckdb/extension/icu/third_party/icu/i18n/double-conversion-strtod.cpp - src/duckdb/extension/icu/third_party/icu/i18n/number_mapper.cpp - src/duckdb/extension/icu/third_party/icu/i18n/dtitvfmt.cpp - src/duckdb/extension/icu/third_party/icu/i18n/string_segment.cpp - src/duckdb/extension/icu/third_party/icu/i18n/choicfmt.cpp - src/duckdb/extension/icu/third_party/icu/i18n/calendar.cpp - src/duckdb/extension/icu/third_party/icu/i18n/number_formatimpl.cpp - src/duckdb/extension/icu/third_party/icu/i18n/tmutfmt.cpp - src/duckdb/extension/icu/third_party/icu/i18n/islamcal.cpp - src/duckdb/extension/icu/third_party/icu/i18n/formattedval_sbimpl.cpp - src/duckdb/extension/icu/third_party/icu/i18n/number_capi.cpp - src/duckdb/extension/icu/third_party/icu/i18n/currpinf.cpp - src/duckdb/extension/icu/third_party/icu/i18n/numparse_decimal.cpp - src/duckdb/extension/icu/third_party/icu/i18n/number_patternstring.cpp - src/duckdb/extension/icu/third_party/icu/i18n/dtfmtsym.cpp - src/duckdb/extension/icu/third_party/icu/i18n/numrange_fluent.cpp - src/duckdb/extension/icu/third_party/icu/i18n/gregocal.cpp - src/duckdb/extension/icu/third_party/icu/i18n/number_scientific.cpp - src/duckdb/extension/icu/third_party/icu/i18n/ucol_res.cpp - src/duckdb/extension/icu/third_party/icu/i18n/units_complexconverter.cpp - src/duckdb/extension/icu/third_party/icu/i18n/number_asformat.cpp - src/duckdb/extension/icu/third_party/icu/i18n/compactdecimalformat.cpp - src/duckdb/extension/icu/third_party/icu/i18n/units_router.cpp - src/duckdb/extension/icu/third_party/icu/i18n/number_fluent.cpp - src/duckdb/extension/icu/third_party/icu/i18n/dayperiodrules.cpp - src/duckdb/extension/icu/third_party/icu/i18n/number_output.cpp - src/duckdb/extension/icu/third_party/icu/i18n/iso8601cal.cpp - src/duckdb/extension/icu/third_party/icu/i18n/number_compact.cpp src/duckdb/extension/icu/third_party/icu/i18n/ucol.cpp - src/duckdb/extension/icu/third_party/icu/i18n/number_usageprefs.cpp + src/duckdb/extension/icu/third_party/icu/i18n/number_grouping.cpp src/duckdb/extension/icu/third_party/icu/i18n/number_symbolswrapper.cpp - src/duckdb/extension/icu/third_party/icu/i18n/number_rounding.cpp - src/duckdb/extension/icu/third_party/icu/i18n/plurrule.cpp - src/duckdb/extension/icu/third_party/icu/i18n/vtzone.cpp - src/duckdb/extension/icu/third_party/icu/i18n/chnsecal.cpp - src/duckdb/extension/icu/third_party/icu/i18n/numsys.cpp - src/duckdb/extension/icu/third_party/icu/i18n/nfrule.cpp - src/duckdb/extension/icu/third_party/icu/i18n/numparse_compositions.cpp - src/duckdb/extension/icu/third_party/icu/i18n/windtfmt.cpp - src/duckdb/extension/icu/third_party/icu/i18n/decNumber.cpp - src/duckdb/extension/icu/third_party/icu/i18n/gregoimp.cpp - src/duckdb/extension/icu/third_party/icu/i18n/number_simple.cpp + src/duckdb/extension/icu/third_party/icu/i18n/number_capi.cpp src/duckdb/extension/icu/third_party/icu/i18n/double-conversion-cached-powers.cpp - src/duckdb/extension/icu/third_party/icu/i18n/quantityformatter.cpp - src/duckdb/extension/icu/third_party/icu/i18n/wintzimpl.cpp + src/duckdb/extension/icu/third_party/icu/i18n/fmtable.cpp + src/duckdb/extension/icu/third_party/icu/i18n/string_segment.cpp + src/duckdb/extension/icu/third_party/icu/i18n/chnsecal.cpp + src/duckdb/extension/icu/third_party/icu/i18n/plurrule.cpp + src/duckdb/extension/icu/third_party/icu/i18n/smpdtfmt.cpp + src/duckdb/extension/icu/third_party/icu/i18n/number_skeletons.cpp + src/duckdb/extension/icu/third_party/icu/i18n/nfsubs.cpp + src/duckdb/extension/icu/third_party/icu/i18n/coptccal.cpp + src/duckdb/extension/icu/third_party/icu/i18n/double-conversion-bignum.cpp + src/duckdb/extension/icu/third_party/icu/i18n/numrange_fluent.cpp + src/duckdb/extension/icu/third_party/icu/i18n/taiwncal.cpp + src/duckdb/extension/icu/third_party/icu/i18n/measfmt.cpp + src/duckdb/extension/icu/third_party/icu/i18n/numfmt.cpp + src/duckdb/extension/icu/third_party/icu/i18n/decimfmt.cpp src/duckdb/extension/icu/third_party/icu/i18n/number_decimalquantity.cpp - src/duckdb/extension/icu/third_party/icu/i18n/number_grouping.cpp + src/duckdb/extension/icu/third_party/icu/i18n/double-conversion-string-to-double.cpp + src/duckdb/extension/icu/third_party/icu/i18n/number_usageprefs.cpp + src/duckdb/extension/icu/third_party/icu/i18n/number_patternstring.cpp + src/duckdb/extension/icu/third_party/icu/i18n/number_formatimpl.cpp + src/duckdb/extension/icu/third_party/icu/i18n/dtfmtsym.cpp + src/duckdb/extension/icu/third_party/icu/i18n/number_integerwidth.cpp + src/duckdb/extension/icu/third_party/icu/i18n/wintzimpl.cpp + src/duckdb/extension/icu/third_party/icu/i18n/number_rounding.cpp + src/duckdb/extension/icu/third_party/icu/i18n/dtitvfmt.cpp src/duckdb/extension/icu/third_party/icu/i18n/numparse_symbols.cpp - src/duckdb/extension/icu/third_party/icu/i18n/double-conversion-fast-dtoa.cpp - src/duckdb/extension/icu/third_party/icu/i18n/coptccal.cpp + src/duckdb/extension/icu/third_party/icu/i18n/quantityformatter.cpp + src/duckdb/extension/icu/third_party/icu/i18n/timezone.cpp + src/duckdb/extension/icu/third_party/icu/i18n/indiancal.cpp src/duckdb/extension/icu/third_party/icu/i18n/msgfmt.cpp - src/duckdb/extension/icu/third_party/icu/i18n/measunit_extra.cpp + src/duckdb/extension/icu/third_party/icu/i18n/persncal.cpp src/duckdb/extension/icu/third_party/icu/i18n/dtptngen.cpp - src/duckdb/extension/icu/third_party/icu/i18n/double-conversion-string-to-double.cpp - src/duckdb/extension/icu/third_party/icu/i18n/japancal.cpp - src/duckdb/extension/icu/third_party/icu/i18n/timezone.cpp - src/duckdb/extension/icu/third_party/icu/i18n/fmtable.cpp - src/duckdb/extension/icu/third_party/icu/i18n/number_longnames.cpp - src/duckdb/extension/icu/third_party/icu/i18n/number_utils.cpp - src/duckdb/extension/icu/third_party/icu/i18n/numrange_capi.cpp - src/duckdb/extension/icu/third_party/icu/i18n/number_integerwidth.cpp - src/duckdb/extension/icu/third_party/icu/i18n/numparse_impl.cpp - src/duckdb/extension/icu/third_party/icu/i18n/double-conversion-double-to-string.cpp - src/duckdb/extension/icu/third_party/icu/i18n/double-conversion-bignum-dtoa.cpp + src/duckdb/extension/icu/third_party/icu/i18n/choicfmt.cpp src/duckdb/extension/icu/third_party/icu/i18n/numparse_affixes.cpp - src/duckdb/extension/icu/third_party/icu/i18n/number_multiplier.cpp - src/duckdb/extension/icu/third_party/icu/i18n/number_currencysymbols.cpp - src/duckdb/extension/icu/third_party/icu/i18n/indiancal.cpp + src/duckdb/extension/icu/third_party/icu/i18n/number_mapper.cpp + src/duckdb/extension/icu/third_party/icu/i18n/compactdecimalformat.cpp + src/duckdb/extension/icu/third_party/icu/i18n/buddhcal.cpp + src/duckdb/extension/icu/third_party/icu/i18n/number_scientific.cpp + src/duckdb/extension/icu/third_party/icu/i18n/double-conversion-bignum-dtoa.cpp + src/duckdb/extension/icu/third_party/icu/i18n/gregocal.cpp + src/duckdb/extension/icu/third_party/icu/i18n/numparse_decimal.cpp + src/duckdb/extension/icu/third_party/icu/i18n/cecal.cpp + src/duckdb/extension/icu/third_party/icu/i18n/double-conversion-double-to-string.cpp src/duckdb/extension/icu/third_party/icu/i18n/hebrwcal.cpp - src/duckdb/extension/icu/third_party/icu/i18n/nfrs.cpp - src/duckdb/extension/icu/third_party/icu/i18n/number_skeletons.cpp + src/duckdb/extension/icu/third_party/icu/i18n/number_longnames.cpp + src/duckdb/extension/icu/third_party/icu/i18n/number_currencysymbols.cpp + src/duckdb/extension/icu/third_party/icu/i18n/upluralrules.cpp + src/duckdb/extension/icu/third_party/icu/i18n/number_compact.cpp + src/duckdb/extension/icu/third_party/icu/i18n/nfrule.cpp + src/duckdb/extension/icu/third_party/icu/i18n/double-conversion-strtod.cpp + src/duckdb/extension/icu/third_party/icu/i18n/numparse_parsednumber.cpp + src/duckdb/extension/icu/third_party/icu/i18n/vtzone.cpp src/duckdb/extension/icu/third_party/icu/i18n/plurfmt.cpp - src/duckdb/extension/icu/third_party/icu/i18n/numparse_validators.cpp - src/duckdb/extension/icu/third_party/icu/i18n/ethpccal.cpp - src/duckdb/extension/icu/third_party/icu/i18n/pluralranges.cpp - src/duckdb/extension/icu/third_party/icu/i18n/double-conversion-bignum.cpp + src/duckdb/extension/icu/third_party/icu/i18n/windtfmt.cpp + src/duckdb/extension/icu/third_party/icu/i18n/calendar.cpp + src/duckdb/extension/icu/third_party/icu/i18n/formattedval_sbimpl.cpp + src/duckdb/extension/icu/third_party/icu/i18n/numrange_capi.cpp + src/duckdb/extension/icu/third_party/icu/i18n/units_complexconverter.cpp + src/duckdb/extension/icu/third_party/icu/i18n/number_utils.cpp + src/duckdb/extension/icu/third_party/icu/i18n/tzgnames.cpp + src/duckdb/extension/icu/third_party/icu/i18n/nfrs.cpp + src/duckdb/extension/icu/third_party/icu/i18n/decNumber.cpp + src/duckdb/extension/icu/third_party/icu/i18n/numsys.cpp src/duckdb/extension/icu/third_party/icu/i18n/number_patternmodifier.cpp - src/duckdb/extension/icu/third_party/icu/i18n/decimfmt.cpp + src/duckdb/extension/icu/third_party/icu/i18n/units_router.cpp + src/duckdb/extension/icu/third_party/icu/i18n/numparse_impl.cpp src/duckdb/extension/icu/third_party/icu/i18n/numparse_scientific.cpp - src/duckdb/extension/icu/third_party/icu/i18n/winnmfmt.cpp - src/duckdb/extension/icu/third_party/icu/i18n/buddhcal.cpp - src/duckdb/extension/icu/third_party/icu/i18n/numparse_currency.cpp - src/duckdb/extension/icu/third_party/icu/i18n/smpdtfmt.cpp - src/duckdb/extension/icu/third_party/icu/i18n/tzgnames.cpp + src/duckdb/extension/icu/third_party/icu/i18n/islamcal.cpp + src/duckdb/extension/icu/third_party/icu/i18n/double-conversion-fast-dtoa.cpp + src/duckdb/extension/icu/third_party/icu/i18n/units_data.cpp src/duckdb/extension/icu/third_party/icu/i18n/collationroot.cpp - src/duckdb/extension/icu/third_party/icu/i18n/numfmt.cpp - src/duckdb/extension/icu/third_party/icu/i18n/nfsubs.cpp - src/duckdb/extension/icu/third_party/icu/i18n/upluralrules.cpp - src/duckdb/extension/icu/third_party/icu/i18n/taiwncal.cpp + src/duckdb/extension/icu/third_party/icu/i18n/currpinf.cpp + src/duckdb/extension/icu/third_party/icu/i18n/gregoimp.cpp + src/duckdb/extension/icu/third_party/icu/i18n/number_simple.cpp + src/duckdb/extension/icu/third_party/icu/i18n/rbnf.cpp + src/duckdb/extension/icu/third_party/icu/i18n/iso8601cal.cpp + src/duckdb/extension/icu/third_party/icu/i18n/tmutfmt.cpp + src/duckdb/extension/icu/third_party/icu/i18n/number_asformat.cpp + src/duckdb/extension/icu/third_party/icu/i18n/pluralranges.cpp + src/duckdb/extension/icu/third_party/icu/i18n/dayperiodrules.cpp + src/duckdb/extension/icu/third_party/icu/i18n/ethpccal.cpp + src/duckdb/extension/icu/third_party/icu/i18n/number_multiplier.cpp + src/duckdb/extension/icu/third_party/icu/i18n/ucol_res.cpp + src/duckdb/extension/icu/third_party/icu/i18n/numparse_currency.cpp + src/duckdb/extension/icu/third_party/icu/i18n/number_fluent.cpp src/duckdb/extension/icu/third_party/icu/i18n/numrange_impl.cpp - src/duckdb/extension/icu/third_party/icu/i18n/measfmt.cpp - src/duckdb/extension/icu/third_party/icu/i18n/cecal.cpp - src/duckdb/extension/icu/third_party/icu/i18n/numparse_parsednumber.cpp + src/duckdb/extension/icu/third_party/icu/i18n/numparse_compositions.cpp + src/duckdb/extension/icu/third_party/icu/i18n/winnmfmt.cpp + src/duckdb/extension/icu/third_party/icu/i18n/number_output.cpp + src/duckdb/extension/icu/third_party/icu/i18n/japancal.cpp + src/duckdb/extension/icu/third_party/icu/i18n/numparse_validators.cpp + src/duckdb/extension/icu/third_party/icu/i18n/measunit_extra.cpp src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp - src/duckdb/extension/json/json_multi_file_info.cpp + src/duckdb/extension/json/json_reader.cpp + src/duckdb/extension/json/json_functions.cpp src/duckdb/extension/json/json_deserializer.cpp + src/duckdb/extension/json/json_serializer.cpp src/duckdb/extension/json/json_scan.cpp - src/duckdb/extension/json/json_common.cpp src/duckdb/extension/json/serialize_json.cpp src/duckdb/extension/json/json_enums.cpp - src/duckdb/extension/json/json_reader.cpp - src/duckdb/extension/json/json_serializer.cpp src/duckdb/extension/json/json_extension.cpp - src/duckdb/extension/json/json_functions.cpp + src/duckdb/extension/json/json_multi_file_info.cpp + src/duckdb/extension/json/json_common.cpp src/duckdb/ub_extension_json_json_functions.cpp src/duckdb/generated_extension_loader_package_build.cpp) diff --git a/data/icu/comma.patch b/data/icu/comma.patch new file mode 100644 index 000000000..104151120 --- /dev/null +++ b/data/icu/comma.patch @@ -0,0 +1,23 @@ +diff --git a/src/duckdb/extension/icu/third_party/icu/common/static_unicode_sets.h b/src/duckdb/extension/icu/third_party/icu/common/static_unicode_sets.h +index 5d90ce59..49742844 100644 +--- a/src/duckdb/extension/icu/third_party/icu/common/static_unicode_sets.h ++++ b/src/duckdb/extension/icu/third_party/icu/common/static_unicode_sets.h +@@ -25,6 +25,9 @@ + U_NAMESPACE_BEGIN + namespace unisets { + ++#pragma push_macro("COMMA") ++#undef COMMA ++ + enum Key { + // NONE is used to indicate null in chooseFrom(). + // EMPTY is used to get an empty UnicodeSet. +@@ -75,6 +78,8 @@ enum Key { + UNISETS_KEY_COUNT + }; + ++#pragma pop_macro("COMMA") ++ + /** + * Gets the static-allocated UnicodeSet according to the provided key. The + * pointer will be deleted during u_cleanup(); the caller should NOT delete it. diff --git a/src/duckdb/extension/core_functions/aggregate/algebraic/avg.cpp b/src/duckdb/extension/core_functions/aggregate/algebraic/avg.cpp index dd1451914..2b685e309 100644 --- a/src/duckdb/extension/core_functions/aggregate/algebraic/avg.cpp +++ b/src/duckdb/extension/core_functions/aggregate/algebraic/avg.cpp @@ -239,14 +239,14 @@ struct TimeTZAverageOperation : public BaseSumOperation children; children.emplace_back("count", LogicalType::UBIGINT); children.emplace_back("value", function.GetArguments()[0]); return LogicalType::STRUCT(std::move(children)); } -LogicalType GetKahanAvgStateType(const AggregateFunction &function) { +LogicalType GetKahanAvgStateType(const BoundAggregateFunction &function) { child_list_t children; children.emplace_back("count", LogicalType::UBIGINT); children.emplace_back("value", LogicalType::DOUBLE); @@ -289,9 +289,9 @@ AggregateFunction GetAverageAggregate(PhysicalType type) { unique_ptr BindDecimalAvg(BindAggregateFunctionInput &input) { auto &function = input.GetBoundFunction(); auto &arguments = input.GetArguments(); - auto decimal_type = arguments[0]->return_type; - function = GetAverageAggregate(decimal_type.InternalType()); - function.name = "avg"; + auto decimal_type = arguments[0]->GetReturnType(); + function.ReplaceImplementation(GetAverageAggregate(decimal_type.InternalType())); + function.SetName("avg"); function.GetArguments()[0] = decimal_type; function.SetReturnType(LogicalType::DOUBLE); return make_uniq( diff --git a/src/duckdb/extension/core_functions/aggregate/algebraic/corr.cpp b/src/duckdb/extension/core_functions/aggregate/algebraic/corr.cpp index dada66c1e..c3cb919bb 100644 --- a/src/duckdb/extension/core_functions/aggregate/algebraic/corr.cpp +++ b/src/duckdb/extension/core_functions/aggregate/algebraic/corr.cpp @@ -7,14 +7,27 @@ namespace duckdb { LogicalType GetCorrStateType() { + child_list_t covar_children; + covar_children.emplace_back("count", LogicalType::UBIGINT); + covar_children.emplace_back("meanx", LogicalType::DOUBLE); + covar_children.emplace_back("meany", LogicalType::DOUBLE); + covar_children.emplace_back("co_moment", LogicalType::DOUBLE); + auto cov_pop_type = LogicalType::STRUCT(std::move(covar_children)); + + child_list_t stddev_types; + stddev_types.emplace_back("count", LogicalType::UBIGINT); + stddev_types.emplace_back("mean", LogicalType::DOUBLE); + stddev_types.emplace_back("dsquared", LogicalType::DOUBLE); + auto stddev_type = LogicalType::STRUCT(std::move(stddev_types)); + child_list_t state_children; - state_children.emplace_back("cov_pop", CovarPopFun::GetFunction().GetStateType()); - state_children.emplace_back("dev_pop_x", VarPopFun::GetFunction().GetStateType()); - state_children.emplace_back("dev_pop_y", VarPopFun::GetFunction().GetStateType()); + state_children.emplace_back("cov_pop", std::move(cov_pop_type)); + state_children.emplace_back("dev_pop_x", stddev_type); + state_children.emplace_back("dev_pop_y", stddev_type); return LogicalType::STRUCT(std::move(state_children)); } -LogicalType GetCorrExportStateType(const AggregateFunction &) { +LogicalType GetCorrExportStateType(const BoundAggregateFunction &) { return GetCorrStateType(); } diff --git a/src/duckdb/extension/core_functions/aggregate/algebraic/covar.cpp b/src/duckdb/extension/core_functions/aggregate/algebraic/covar.cpp index 8696cde9f..0c5a848a8 100644 --- a/src/duckdb/extension/core_functions/aggregate/algebraic/covar.cpp +++ b/src/duckdb/extension/core_functions/aggregate/algebraic/covar.cpp @@ -5,7 +5,7 @@ namespace duckdb { namespace { -LogicalType GetCovarStateType(const AggregateFunction &) { +LogicalType GetCovarStateType(const BoundAggregateFunction &) { child_list_t child_types; child_types.emplace_back("count", LogicalType::UBIGINT); child_types.emplace_back("meanx", LogicalType::DOUBLE); diff --git a/src/duckdb/extension/core_functions/aggregate/algebraic/stddev.cpp b/src/duckdb/extension/core_functions/aggregate/algebraic/stddev.cpp index 51543a987..3b252d80d 100644 --- a/src/duckdb/extension/core_functions/aggregate/algebraic/stddev.cpp +++ b/src/duckdb/extension/core_functions/aggregate/algebraic/stddev.cpp @@ -6,7 +6,7 @@ namespace duckdb { namespace { -LogicalType GetStddevStateType(const AggregateFunction &) { +LogicalType GetStddevStateType(const BoundAggregateFunction &) { child_list_t child_types; child_types.emplace_back("count", LogicalType::UBIGINT); child_types.emplace_back("mean", LogicalType::DOUBLE); diff --git a/src/duckdb/extension/core_functions/aggregate/distributive/arg_min_max.cpp b/src/duckdb/extension/core_functions/aggregate/distributive/arg_min_max.cpp index 8bc8bad6a..ce0fd7409 100644 --- a/src/duckdb/extension/core_functions/aggregate/distributive/arg_min_max.cpp +++ b/src/duckdb/extension/core_functions/aggregate/distributive/arg_min_max.cpp @@ -189,11 +189,11 @@ struct ArgMinMaxBase { auto &context = input.GetClientContext(); auto &function = input.GetBoundFunction(); auto &arguments = input.GetArguments(); - if (arguments[1]->return_type.InternalType() == PhysicalType::VARCHAR) { - ExpressionBinder::PushCollation(context, arguments[1], arguments[1]->return_type); + if (arguments[1]->GetReturnType().InternalType() == PhysicalType::VARCHAR) { + ExpressionBinder::PushCollation(context, arguments[1], arguments[1]->GetReturnType()); } - function.GetArguments()[0] = arguments[0]->return_type; - function.SetReturnType(arguments[0]->return_type); + function.GetArguments()[0] = arguments[0]->GetReturnType(); + function.SetReturnType(arguments[0]->GetReturnType()); auto function_data = make_uniq(NULL_HANDLING); return unique_ptr(std::move(function_data)); @@ -207,7 +207,7 @@ struct SpecializedGenericArgMinMaxState { } static void PrepareData(Vector &by, idx_t count, bool &, UnifiedVectorFormat &result) { - by.ToUnifiedFormat(count, result); + by.ToUnifiedFormat(result); } }; @@ -220,7 +220,7 @@ struct GenericArgMinMaxState { static void PrepareData(Vector &by, idx_t count, Vector &extra_state, UnifiedVectorFormat &result) { OrderModifiers modifiers(ORDER_TYPE, OrderByNullType::NULLS_LAST); CreateSortKeyHelpers::CreateSortKeyWithValidity(by, extra_state, modifiers, count); - extra_state.ToUnifiedFormat(count, result); + extra_state.ToUnifiedFormat(result); } }; @@ -234,7 +234,7 @@ struct VectorArgMinMaxBase : ArgMinMaxBase { auto &arg = inputs[0]; UnifiedVectorFormat adata; - arg.ToUnifiedFormat(count, adata); + arg.ToUnifiedFormat(adata); using ARG_TYPE = typename STATE::ARG_TYPE; using BY_TYPE = typename STATE::BY_TYPE; @@ -245,7 +245,7 @@ struct VectorArgMinMaxBase : ArgMinMaxBase { const auto bys = UnifiedVectorFormat::GetData(bdata); UnifiedVectorFormat sdata; - state_vector.ToUnifiedFormat(count, sdata); + state_vector.ToUnifiedFormat(sdata); STATE *last_state = nullptr; sel_t assign_sel[STANDARD_VECTOR_SIZE]; @@ -354,11 +354,11 @@ struct VectorArgMinMaxBase : ArgMinMaxBase { auto &context = input.GetClientContext(); auto &function = input.GetBoundFunction(); auto &arguments = input.GetArguments(); - if (arguments[1]->return_type.InternalType() == PhysicalType::VARCHAR) { - ExpressionBinder::PushCollation(context, arguments[1], arguments[1]->return_type); + if (arguments[1]->GetReturnType().InternalType() == PhysicalType::VARCHAR) { + ExpressionBinder::PushCollation(context, arguments[1], arguments[1]->GetReturnType()); } - function.GetArguments()[0] = arguments[0]->return_type; - function.SetReturnType(arguments[0]->return_type); + function.GetArguments()[0] = arguments[0]->GetReturnType(); + function.SetReturnType(arguments[0]->GetReturnType()); auto function_data = make_uniq(NULL_HANDLING); return unique_ptr(std::move(function_data)); @@ -402,7 +402,7 @@ AggregateFunction GetVectorArgMinMaxFunctionInternal(const LogicalType &by_type, #else auto function = GetGenericArgMinMaxFunction(null_handling); function.GetArguments() = {type, by_type}; - function.return_type = type; + function.SetReturnType(type); return function; #endif } @@ -463,7 +463,7 @@ AggregateFunction GetArgMinMaxFunctionInternal(const LogicalType &by_type, const #else auto function = GetGenericArgMinMaxFunction(null_handling); function.GetArguments() = {type, by_type}; - function.return_type = type; + function.SetReturnType(type); #endif return function; } @@ -526,8 +526,8 @@ unique_ptr BindDecimalArgMinMax(BindAggregateFunctionInput &input) auto &context = input.GetClientContext(); auto &function = input.GetBoundFunction(); auto &arguments = input.GetArguments(); - auto decimal_type = arguments[0]->return_type; - auto by_type = arguments[1]->return_type; + auto decimal_type = arguments[0]->GetReturnType(); + auto by_type = arguments[1]->GetReturnType(); // To avoid a combinatorial explosion, cast the ordering argument to one from the list auto by_types = ArgMaxByTypes(); @@ -554,9 +554,9 @@ unique_ptr BindDecimalArgMinMax(BindAggregateFunctionInput &input) by_type = by_types[best_target]; } - auto name = std::move(function.name); - function = GetDecimalArgMinMaxFunction(by_type, decimal_type, NULL_HANDLING); - function.name = std::move(name); + auto name = function.GetName(); + function.ReplaceImplementation(GetDecimalArgMinMaxFunction(by_type, decimal_type, NULL_HANDLING)); + function.SetName(std::move(name)); function.SetReturnType(decimal_type); auto function_data = make_uniq(NULL_HANDLING); @@ -669,8 +669,8 @@ void ArgMinMaxNUpdate(Vector inputs[], AggregateInputData &aggr_input, idx_t inp STATE::VAL_TYPE::PrepareData(val_vector, count, val_extra_state, val_format, bind_data.nulls_last); STATE::ARG_TYPE::PrepareData(arg_vector, count, arg_extra_state, arg_format, bind_data.nulls_last); - n_vector.ToUnifiedFormat(count, n_format); - state_vector.ToUnifiedFormat(count, state_format); + n_vector.ToUnifiedFormat(n_format); + state_vector.ToUnifiedFormat(state_format); auto states = UnifiedVectorFormat::GetData(state_format); @@ -719,7 +719,7 @@ void ArgMinMaxNUpdate(Vector inputs[], AggregateInputData &aggr_input, idx_t inp // Bind //------------------------------------------------------------------------------ template -void SpecializeArgMinMaxNFunction(AggregateFunction &function) { +void SpecializeArgMinMaxNFunction(BoundAggregateFunction &function) { using STATE = ArgMinMaxNState; using OP = MinMaxNOperation; @@ -733,7 +733,7 @@ void SpecializeArgMinMaxNFunction(AggregateFunction &function) { } template -void SpecializeArgMinMaxNFunction(PhysicalType arg_type, AggregateFunction &function) { +void SpecializeArgMinMaxNFunction(PhysicalType arg_type, BoundAggregateFunction &function) { switch (arg_type) { #ifndef DUCKDB_SMALLER_BINARY case PhysicalType::VARCHAR: @@ -759,7 +759,7 @@ void SpecializeArgMinMaxNFunction(PhysicalType arg_type, AggregateFunction &func } template -void SpecializeArgMinMaxNFunction(PhysicalType val_type, PhysicalType arg_type, AggregateFunction &function) { +void SpecializeArgMinMaxNFunction(PhysicalType val_type, PhysicalType arg_type, BoundAggregateFunction &function) { switch (val_type) { #ifndef DUCKDB_SMALLER_BINARY case PhysicalType::VARCHAR: @@ -785,7 +785,7 @@ void SpecializeArgMinMaxNFunction(PhysicalType val_type, PhysicalType arg_type, } template -void SpecializeArgMinMaxNullNFunction(AggregateFunction &function) { +void SpecializeArgMinMaxNullNFunction(BoundAggregateFunction &function) { using STATE = ArgMinMaxNState; using OP = MinMaxNOperation; @@ -798,7 +798,7 @@ void SpecializeArgMinMaxNullNFunction(AggregateFunction &function) { } template -void SpecializeArgMinMaxNullNFunction(PhysicalType arg_type, AggregateFunction &function) { +void SpecializeArgMinMaxNullNFunction(PhysicalType arg_type, BoundAggregateFunction &function) { switch (arg_type) { #ifndef DUCKDB_SMALLER_BINARY case PhysicalType::VARCHAR: @@ -824,7 +824,7 @@ void SpecializeArgMinMaxNullNFunction(PhysicalType arg_type, AggregateFunction & } template -void SpecializeArgMinMaxNullNFunction(PhysicalType val_type, PhysicalType arg_type, AggregateFunction &function) { +void SpecializeArgMinMaxNullNFunction(PhysicalType val_type, PhysicalType arg_type, BoundAggregateFunction &function) { switch (val_type) { #ifndef DUCKDB_SMALLER_BINARY case PhysicalType::VARCHAR: @@ -858,14 +858,14 @@ unique_ptr ArgMinMaxNBind(BindAggregateFunctionInput &input) { auto &function = input.GetBoundFunction(); auto &arguments = input.GetArguments(); for (auto &arg : arguments) { - if (arg->return_type.id() == LogicalTypeId::UNKNOWN) { + if (arg->GetReturnType().id() == LogicalTypeId::UNKNOWN) { throw ParameterNotResolvedException(); } } - const auto val_type = arguments[0]->return_type.InternalType(); - const auto arg_type = arguments[1]->return_type.InternalType(); - function.SetReturnType(LogicalType::LIST(arguments[0]->return_type)); + const auto val_type = arguments[0]->GetReturnType().InternalType(); + const auto arg_type = arguments[1]->GetReturnType().InternalType(); + function.SetReturnType(LogicalType::LIST(arguments[0]->GetReturnType())); // Specialize the function based on the input types auto function_data = make_uniq(NULL_HANDLING, NULLS_LAST); diff --git a/src/duckdb/extension/core_functions/aggregate/distributive/bitagg.cpp b/src/duckdb/extension/core_functions/aggregate/distributive/bitagg.cpp index a86603870..7c5a3845a 100644 --- a/src/duckdb/extension/core_functions/aggregate/distributive/bitagg.cpp +++ b/src/duckdb/extension/core_functions/aggregate/distributive/bitagg.cpp @@ -18,7 +18,7 @@ struct BitState { }; template -LogicalType GetBitStateType(const AggregateFunction &function) { +LogicalType GetBitStateType(const BoundAggregateFunction &function) { child_list_t child_types; child_types.emplace_back("is_set", LogicalType::BOOLEAN); @@ -28,7 +28,7 @@ LogicalType GetBitStateType(const AggregateFunction &function) { return LogicalType::STRUCT(std::move(child_types)); } -LogicalType GetBitStringStateType(const AggregateFunction &function) { +LogicalType GetBitStringStateType(const BoundAggregateFunction &function) { child_list_t child_types; child_types.emplace_back("is_set", LogicalType::BOOLEAN); child_types.emplace_back("value", function.GetReturnType()); diff --git a/src/duckdb/extension/core_functions/aggregate/distributive/bitstring_agg.cpp b/src/duckdb/extension/core_functions/aggregate/distributive/bitstring_agg.cpp index 2fcc7658e..bcbeb4f74 100644 --- a/src/duckdb/extension/core_functions/aggregate/distributive/bitstring_agg.cpp +++ b/src/duckdb/extension/core_functions/aggregate/distributive/bitstring_agg.cpp @@ -49,13 +49,13 @@ struct BitstringAggBindData : public FunctionData { } static void Serialize(Serializer &serializer, const optional_ptr bind_data_p, - const AggregateFunction &) { + const BoundAggregateFunction &) { auto &bind_data = bind_data_p->Cast(); serializer.WriteProperty(100, "min", bind_data.min); serializer.WriteProperty(101, "max", bind_data.max); } - static unique_ptr Deserialize(Deserializer &deserializer, AggregateFunction &) { + static unique_ptr Deserialize(Deserializer &deserializer, BoundAggregateFunction &) { Value min; Value max; deserializer.ReadProperty(100, "min", min); @@ -271,7 +271,7 @@ void BindBitString(AggregateFunctionSet &bitstring_agg, const LogicalTypeId &typ function.SetStatisticsCallback( BitstringPropagateStats); // stores min and max from column stats in BitstringAggBindData bitstring_agg.AddFunction(function); // uses the BitstringAggBindData to access statistics for creating bitstring - function.GetArguments() = {type, type, type}; + function.GetSignature() = FunctionSignature({type, type, type}, LogicalType::BIT); function.SetStatisticsCallback(nullptr); // min and max are provided as arguments bitstring_agg.AddFunction(function); } diff --git a/src/duckdb/extension/core_functions/aggregate/distributive/bool.cpp b/src/duckdb/extension/core_functions/aggregate/distributive/bool.cpp index b457e7c24..64938c759 100644 --- a/src/duckdb/extension/core_functions/aggregate/distributive/bool.cpp +++ b/src/duckdb/extension/core_functions/aggregate/distributive/bool.cpp @@ -93,7 +93,7 @@ struct BoolOrFunFunction { } }; -LogicalType GetBoolAndStateType(const AggregateFunction &function) { +LogicalType GetBoolAndStateType(const BoundAggregateFunction &function) { child_list_t child_types; child_types.emplace_back("empty", LogicalType::BOOLEAN); child_types.emplace_back("val", LogicalType::BOOLEAN); diff --git a/src/duckdb/extension/core_functions/aggregate/distributive/kurtosis.cpp b/src/duckdb/extension/core_functions/aggregate/distributive/kurtosis.cpp index d9b2a1a4e..95eb5a2e8 100644 --- a/src/duckdb/extension/core_functions/aggregate/distributive/kurtosis.cpp +++ b/src/duckdb/extension/core_functions/aggregate/distributive/kurtosis.cpp @@ -100,7 +100,7 @@ struct KurtosisOperation { } }; -LogicalType GetKurtosisStateType(const AggregateFunction &function) { +LogicalType GetKurtosisStateType(const BoundAggregateFunction &function) { child_list_t children; children.emplace_back("n", LogicalType::UBIGINT); children.emplace_back("sum", LogicalType::DOUBLE); diff --git a/src/duckdb/extension/core_functions/aggregate/distributive/product.cpp b/src/duckdb/extension/core_functions/aggregate/distributive/product.cpp index 90d2507c0..3bfa40af2 100644 --- a/src/duckdb/extension/core_functions/aggregate/distributive/product.cpp +++ b/src/duckdb/extension/core_functions/aggregate/distributive/product.cpp @@ -55,7 +55,7 @@ struct ProductFunction { } }; -LogicalType GetProductStateType(const AggregateFunction &function) { +LogicalType GetProductStateType(const BoundAggregateFunction &function) { child_list_t children; children.emplace_back("empty", LogicalType::BOOLEAN); children.emplace_back("val", LogicalType::DOUBLE); diff --git a/src/duckdb/extension/core_functions/aggregate/distributive/string_agg.cpp b/src/duckdb/extension/core_functions/aggregate/distributive/string_agg.cpp index c1d58f3b7..b80e1e4b0 100644 --- a/src/duckdb/extension/core_functions/aggregate/distributive/string_agg.cpp +++ b/src/duckdb/extension/core_functions/aggregate/distributive/string_agg.cpp @@ -124,7 +124,7 @@ unique_ptr StringAggBind(BindAggregateFunctionInput &input) { D_ASSERT(arguments.size() == 2); // Check if any argument is of UNKNOWN type (parameter not yet bound) for (auto &arg : arguments) { - if (arg->return_type.id() == LogicalTypeId::UNKNOWN) { + if (arg->GetReturnType().id() == LogicalTypeId::UNKNOWN) { throw ParameterNotResolvedException(); } } @@ -146,12 +146,12 @@ unique_ptr StringAggBind(BindAggregateFunctionInput &input) { } void StringAggSerialize(Serializer &serializer, const optional_ptr bind_data_p, - const AggregateFunction &function) { + const BoundAggregateFunction &function) { auto bind_data = bind_data_p->Cast(); serializer.WriteProperty(100, "separator", bind_data.sep); } -unique_ptr StringAggDeserialize(Deserializer &deserializer, AggregateFunction &bound_function) { +unique_ptr StringAggDeserialize(Deserializer &deserializer, BoundAggregateFunction &bound_function) { auto sep = deserializer.ReadProperty(100, "separator"); return make_uniq(std::move(sep)); } @@ -171,7 +171,7 @@ AggregateFunctionSet StringAggFun::GetFunctions() { string_agg_param.SetSerializeCallback(StringAggSerialize); string_agg_param.SetDeserializeCallback(StringAggDeserialize); string_agg.AddFunction(string_agg_param); - string_agg_param.GetArguments().emplace_back(LogicalType::VARCHAR); + string_agg_param.GetSignature().AddParameter(LogicalType::VARCHAR); string_agg.AddFunction(string_agg_param); return string_agg; } diff --git a/src/duckdb/extension/core_functions/aggregate/distributive/sum.cpp b/src/duckdb/extension/core_functions/aggregate/distributive/sum.cpp index ee14bdfdf..b86db63c5 100644 --- a/src/duckdb/extension/core_functions/aggregate/distributive/sum.cpp +++ b/src/duckdb/extension/core_functions/aggregate/distributive/sum.cpp @@ -90,7 +90,7 @@ LogicalType GetValueLogicalType() { } template -LogicalType GetSumStateType(const AggregateFunction &function) { +LogicalType GetSumStateType(const BoundAggregateFunction &function) { child_list_t child_types; child_types.emplace_back("isset", LogicalType::BOOLEAN); @@ -109,11 +109,11 @@ unique_ptr SumNoOverflowBind(BindAggregateFunctionInput &input) { } void SumNoOverflowSerialize(Serializer &serializer, const optional_ptr bind_data, - const AggregateFunction &function) { + const BoundAggregateFunction &function) { return; } -unique_ptr SumNoOverflowDeserialize(Deserializer &deserializer, AggregateFunction &function) { +unique_ptr SumNoOverflowDeserialize(Deserializer &deserializer, BoundAggregateFunction &function) { function.SetReturnType(deserializer.Get()); return nullptr; } @@ -183,7 +183,7 @@ unique_ptr SumPropagateStats(ClientContext &context, BoundAggreg return nullptr; } // total sum is guaranteed to fit in a single int64: use int64 sum instead of hugeint sum - expr.function = GetSumAggregateNoOverflow(internal_type); + expr.function.ReplaceImplementation(GetSumAggregateNoOverflow(internal_type)); } return nullptr; } @@ -234,9 +234,9 @@ AggregateFunction GetSumAggregate(PhysicalType type) { unique_ptr BindDecimalSum(BindAggregateFunctionInput &input) { auto &function = input.GetBoundFunction(); auto &arguments = input.GetArguments(); - auto decimal_type = arguments[0]->return_type; - function = GetSumAggregate(decimal_type.InternalType()); - function.name = "sum"; + auto decimal_type = arguments[0]->GetReturnType(); + function.ReplaceImplementation(GetSumAggregate(decimal_type.InternalType())); + function.SetName("sum"); function.GetArguments()[0] = decimal_type; function.SetReturnType(LogicalType::DECIMAL(Decimal::MAX_WIDTH_DECIMAL, DecimalType::GetScale(decimal_type))); function.SetOrderDependent(AggregateOrderDependent::NOT_ORDER_DEPENDENT); @@ -278,12 +278,10 @@ struct BignumOperation { return; } if (!target.is_set) { - target.value = source.value; + target.value.Initialize(input.allocator); target.is_set = true; - return; } target.value.AddInPlace(input.allocator, source.value); - target.is_set = true; } template @@ -333,7 +331,7 @@ AggregateFunctionSet SumNoOverflowFun::GetFunctions() { return sum_no_overflow; } -LogicalType GetKahanSumStateType(const AggregateFunction &function) { +LogicalType GetKahanSumStateType(const BoundAggregateFunction &function) { child_list_t children; children.emplace_back("isset", LogicalType::BOOLEAN); children.emplace_back("value", LogicalType::DOUBLE); diff --git a/src/duckdb/extension/core_functions/aggregate/holistic/approx_top_k.cpp b/src/duckdb/extension/core_functions/aggregate/holistic/approx_top_k.cpp index 1a3c11e3b..8ad6d0127 100644 --- a/src/duckdb/extension/core_functions/aggregate/holistic/approx_top_k.cpp +++ b/src/duckdb/extension/core_functions/aggregate/holistic/approx_top_k.cpp @@ -343,7 +343,6 @@ template void ApproxTopKFinalize(Vector &state_vector, AggregateInputData &, Vector &result, idx_t count, idx_t offset) { auto states = state_vector.Values(count); - auto &mask = FlatVector::ValidityMutable(result); auto old_len = ListVector::GetListSize(result); idx_t new_entries = 0; // figure out how much space we need @@ -358,18 +357,17 @@ void ApproxTopKFinalize(Vector &state_vector, AggregateInputData &, Vector &resu } // reserve space in the list vector ListVector::Reserve(result, old_len + new_entries); - auto list_entries = FlatVector::GetDataMutable(result); + auto list_entries = FlatVector::Writer(result, offset + count, offset); auto &child_data = ListVector::GetChildMutable(result); idx_t current_offset = old_len; for (idx_t i = 0; i < count; i++) { - const auto rid = i + offset; auto &state = states[i].GetValue()->GetState(); if (state.values.empty()) { - mask.SetInvalid(rid); + list_entries.WriteNull(); continue; } - auto &list_entry = list_entries[rid]; + list_entry_t list_entry; list_entry.offset = current_offset; for (idx_t val_idx = 0; val_idx < MinValue(state.values.size(), state.k); val_idx++) { auto &val = state.values[val_idx].get(); @@ -378,25 +376,26 @@ void ApproxTopKFinalize(Vector &state_vector, AggregateInputData &, Vector &resu current_offset++; } list_entry.length = current_offset - list_entry.offset; + list_entries.WriteValue(list_entry); } D_ASSERT(current_offset == old_len + new_entries); ListVector::SetListSize(result, current_offset); - result.Verify(count); + result.Verify(); } unique_ptr ApproxTopKBind(BindAggregateFunctionInput &input) { auto &function = input.GetBoundFunction(); auto &arguments = input.GetArguments(); for (auto &arg : arguments) { - if (arg->return_type.id() == LogicalTypeId::UNKNOWN) { + if (arg->GetReturnType().id() == LogicalTypeId::UNKNOWN) { throw ParameterNotResolvedException(); } } - if (arguments[0]->return_type.id() == LogicalTypeId::VARCHAR) { + if (arguments[0]->GetReturnType().id() == LogicalTypeId::VARCHAR) { function.SetStateUpdateCallback(ApproxTopKUpdate); function.SetStateFinalizeCallback(ApproxTopKFinalize); } - function.SetReturnType(LogicalType::LIST(arguments[0]->return_type)); + function.SetReturnType(LogicalType::LIST(arguments[0]->GetReturnType())); return nullptr; } diff --git a/src/duckdb/extension/core_functions/aggregate/holistic/approximate_quantile.cpp b/src/duckdb/extension/core_functions/aggregate/holistic/approximate_quantile.cpp index 33efe7e02..f5e7d118a 100644 --- a/src/duckdb/extension/core_functions/aggregate/holistic/approximate_quantile.cpp +++ b/src/duckdb/extension/core_functions/aggregate/holistic/approximate_quantile.cpp @@ -86,12 +86,12 @@ struct ApproximateQuantileBindData : public FunctionData { } static void Serialize(Serializer &serializer, const optional_ptr bind_data_p, - const AggregateFunction &function) { + const BoundAggregateFunction &function) { auto &bind_data = bind_data_p->Cast(); serializer.WriteProperty(100, "quantiles", bind_data.quantiles); } - static unique_ptr Deserialize(Deserializer &deserializer, AggregateFunction &function) { + static unique_ptr Deserialize(Deserializer &deserializer, BoundAggregateFunction &function) { auto result = make_uniq(); deserializer.ReadProperty(100, "quantiles", result->quantiles); return std::move(result); @@ -283,7 +283,7 @@ unique_ptr BindApproxQuantileDecimal(BindAggregateFunctionInput &i auto &function = input.GetBoundFunction(); auto &arguments = input.GetArguments(); auto bind_data = BindApproxQuantile(input); - function = ApproxQuantileDecimalFunction(arguments[0]->return_type); + function.ReplaceImplementation(ApproxQuantileDecimalFunction(arguments[0]->GetReturnType())); return bind_data; } @@ -293,7 +293,7 @@ AggregateFunction GetApproximateQuantileAggregate(const LogicalType &type) { fun.SetSerializeCallback(ApproximateQuantileBindData::Serialize); fun.SetDeserializeCallback(ApproximateQuantileBindData::Deserialize); // temporarily push an argument so we can bind the actual quantile - fun.GetArguments().emplace_back(LogicalType::FLOAT); + fun.GetSignature().AddParameter(LogicalType::FLOAT); return fun; } @@ -404,7 +404,7 @@ unique_ptr BindApproxQuantileDecimalList(BindAggregateFunctionInpu auto &function = input.GetBoundFunction(); auto &arguments = input.GetArguments(); auto bind_data = BindApproxQuantile(input); - function = ApproxQuantileDecimalListFunction(arguments[0]->return_type); + function.ReplaceImplementation(ApproxQuantileDecimalListFunction(arguments[0]->GetReturnType())); return bind_data; } @@ -415,17 +415,18 @@ AggregateFunction GetApproxQuantileListAggregate(const LogicalType &type) { fun.SetDeserializeCallback(ApproximateQuantileBindData::Deserialize); // temporarily push an argument so we can bind the actual quantile auto list_of_float = LogicalType::LIST(LogicalType::FLOAT); - fun.GetArguments().push_back(list_of_float); + fun.GetSignature().AddParameter(list_of_float); return fun; } -unique_ptr ApproxQuantileDecimalDeserialize(Deserializer &deserializer, AggregateFunction &function) { +unique_ptr ApproxQuantileDecimalDeserialize(Deserializer &deserializer, + BoundAggregateFunction &function) { auto bind_data = ApproximateQuantileBindData::Deserialize(deserializer, function); auto &return_type = deserializer.Get(); if (return_type.id() == LogicalTypeId::LIST) { - function = ApproxQuantileDecimalListFunction(function.GetArguments()[0]); + function.ReplaceImplementation(ApproxQuantileDecimalListFunction(function.GetArguments()[0])); } else { - function = ApproxQuantileDecimalFunction(function.GetArguments()[0]); + function.ReplaceImplementation(ApproxQuantileDecimalFunction(function.GetArguments()[0])); } return bind_data; } diff --git a/src/duckdb/extension/core_functions/aggregate/holistic/mad.cpp b/src/duckdb/extension/core_functions/aggregate/holistic/mad.cpp index 6d01b1d9f..8b08efd64 100644 --- a/src/duckdb/extension/core_functions/aggregate/holistic/mad.cpp +++ b/src/duckdb/extension/core_functions/aggregate/holistic/mad.cpp @@ -191,8 +191,12 @@ struct MedianAbsoluteDeviationOperation : QuantileOperation { template static void Window(AggregateInputData &aggr_input_data, const WindowPartitionInput &partition, - const_data_ptr_t g_state, data_ptr_t l_state, const SubFrames &frames, Vector &result, - idx_t ridx) { + const_data_ptr_t g_state, data_ptr_t l_state, const SubFrames *subframes_per_row, idx_t count, + Vector &result, idx_t row_idx) { + using MAD = MadAccessor; + using ID = QuantileIndirect; + using MadIndirect = QuantileComposed; + auto &state = *reinterpret_cast(l_state); auto gstate = reinterpret_cast(g_state); @@ -200,58 +204,60 @@ struct MedianAbsoluteDeviationOperation : QuantileOperation { const auto &fmask = partition.filter_mask; auto rdata = FlatVector::GetDataMutable(result); + auto &rmask = FlatVector::ValidityMutable(result); QuantileIncluded included(fmask, data); - const auto n = FrameSize(included, frames); - if (!n) { - auto &rmask = FlatVector::ValidityMutable(result); - rmask.Set(ridx, false); - return; - } - - // Compute the median D_ASSERT(aggr_input_data.bind_data); auto &bind_data = aggr_input_data.bind_data->Cast(); D_ASSERT(bind_data.quantiles.size() == 1); const auto &quantile = bind_data.quantiles[0]; auto &window_state = state.GetOrCreateWindowState(); + auto &prevs = window_state.prevs; MEDIAN_TYPE med; - if (gstate && gstate->HasTree()) { - med = gstate->GetWindowState().template WindowScalar(data, frames, n, result, quantile); - } else { - window_state.UpdateSkip(data, frames, included); - med = window_state.template WindowScalar(data, frames, n, result, quantile); - } - // Lazily initialise frame state - window_state.SetCount(frames.back().end - frames.front().start); - auto index2 = window_state.m.data(); - D_ASSERT(index2); + for (idx_t ridx = 0; ridx < count; ++ridx) { + const auto &frames = subframes_per_row[ridx]; + const auto n = FrameSize(included, frames); + if (!n) { + rmask.Set(ridx, false); + continue; + } - // The replacement trick does not work on the second index because if - // the median has changed, the previous order is not correct. - // It is probably close, however, and so reuse is helpful. - auto &prevs = window_state.prevs; - ReuseIndexes(index2, frames, prevs); - std::partition(index2, index2 + window_state.count, included); + // Compute the median + if (gstate && gstate->HasTree()) { + med = gstate->GetWindowState().template WindowScalar(data, frames, n, result, + quantile); + } else { + window_state.UpdateSkip(data, frames, included); + med = window_state.template WindowScalar(data, frames, n, result, quantile); + } - QuantileInterpolator interp(quantile, n, false); + // Lazily initialise frame state + window_state.SetCount(frames.back().end - frames.front().start); + auto index2 = window_state.m.data(); + D_ASSERT(index2); - // Compute mad from the second index - using ID = QuantileIndirect; - ID indirect(data); + // The replacement trick does not work on the second index because if + // the median has changed, the previous order is not correct. + // It is probably close, however, and so reuse is helpful. + ReuseIndexes(index2, frames, prevs); + std::partition(index2, index2 + window_state.count, included); - using MAD = MadAccessor; - MAD mad(med); + QuantileInterpolator interp(quantile, n, false); - using MadIndirect = QuantileComposed; - MadIndirect mad_indirect(mad, indirect); - rdata[ridx] = interp.template Operation(index2, result, mad_indirect); + // Compute mad from the second index + ID indirect(data); + + MAD mad(med); - // Prev is used by both skip lists and increments - prevs = frames; + MadIndirect mad_indirect(mad, indirect); + rdata[ridx] = interp.template Operation(index2, result, mad_indirect); + + // Prev is used by both skip lists and increments + prevs = frames; + } } }; @@ -269,7 +275,7 @@ AggregateFunction GetTypedMedianAbsoluteDeviationAggregateFunction(const Logical fun.SetBindCallback(BindMAD); fun.SetOrderDependent(AggregateOrderDependent::NOT_ORDER_DEPENDENT); #ifndef DUCKDB_SMALLER_BINARY - fun.SetWindowCallback(OP::template Window); + fun.SetWindowBatchCallback(OP::template Window); fun.SetWindowInitCallback(OP::template WindowInit); #endif return fun; @@ -322,8 +328,9 @@ AggregateFunction GetMedianAbsoluteDeviationAggregateFunction(const LogicalType unique_ptr BindMedianAbsoluteDeviationDecimal(BindAggregateFunctionInput &input) { auto &function = input.GetBoundFunction(); auto &arguments = input.GetArguments(); - function = GetMedianAbsoluteDeviationAggregateFunction(arguments[0]->return_type); - function.name = "mad"; + auto impl = GetMedianAbsoluteDeviationAggregateFunction(arguments[0]->GetReturnType()); + function.ReplaceImplementation(impl); + function.SetName("mad"); function.SetOrderDependent(AggregateOrderDependent::NOT_ORDER_DEPENDENT); return BindMAD(input); } diff --git a/src/duckdb/extension/core_functions/aggregate/holistic/mode.cpp b/src/duckdb/extension/core_functions/aggregate/holistic/mode.cpp index 740c08bd6..798e44e84 100644 --- a/src/duckdb/extension/core_functions/aggregate/holistic/mode.cpp +++ b/src/duckdb/extension/core_functions/aggregate/holistic/mode.cpp @@ -365,8 +365,8 @@ struct ModeFunction : TypedModeFunction { template static void Window(AggregateInputData &aggr_input_data, const WindowPartitionInput &partition, - const_data_ptr_t g_state, data_ptr_t l_state, const SubFrames &frames, Vector &result, - idx_t rid) { + const_data_ptr_t g_state, data_ptr_t l_state, const SubFrames *subframes_per_row, idx_t count, + Vector &result, idx_t row_idx) { auto &state = *reinterpret_cast(l_state); state.InitializePage(partition); @@ -381,44 +381,49 @@ struct ModeFunction : TypedModeFunction { ModeIncluded included(fmask, state); + using Updater = UpdateWindowState; + Updater updater(state, included); + if (!state.frequency_map) { state.frequency_map = TYPE_OP::CreateEmpty(Allocator::DefaultAllocator()); } const size_t tau_inverse = 4; // tau==0.25 - if (state.nonzero <= (state.frequency_map->size() / tau_inverse) || prevs.back().end <= frames.front().start || - frames.back().end <= prevs.front().start) { - state.Reset(); - // for f ∈ F do - for (const auto &frame : frames) { - for (auto i = frame.start; i < frame.end; ++i) { - if (included(i)) { - state.ModeAdd(i); + for (idx_t rid = 0; rid < count; ++rid) { + const auto &frames = subframes_per_row[rid]; + + if (state.nonzero <= (state.frequency_map->size() / tau_inverse) || + prevs.back().end <= frames.front().start || frames.back().end <= prevs.front().start) { + state.Reset(); + // for f ∈ F do + for (const auto &frame : frames) { + for (auto i = frame.start; i < frame.end; ++i) { + if (included(i)) { + state.ModeAdd(i); + } } } + } else { + AggregateExecutor::IntersectFrames(prevs, frames, updater); } - } else { - using Updater = UpdateWindowState; - Updater updater(state, included); - AggregateExecutor::IntersectFrames(prevs, frames, updater); - } - - if (!state.valid) { - // Rescan - auto highest_frequency = state.Scan(); - if (highest_frequency != state.frequency_map->end()) { - state.Update(highest_frequency->first); - state.count = highest_frequency->second.count; - state.valid = (state.count > 0); + + if (!state.valid) { + // Rescan + auto highest_frequency = state.Scan(); + if (highest_frequency != state.frequency_map->end()) { + state.Update(highest_frequency->first); + state.count = highest_frequency->second.count; + state.valid = (state.count > 0); + } } - } - if (state.valid) { - rdata[rid] = TYPE_OP::template Assign(result, *state.mode); - } else { - rmask.Set(rid, false); - } + if (state.valid) { + rdata[rid] = TYPE_OP::template Assign(result, *state.mode); + } else { + rmask.Set(rid, false); + } - prevs = frames; + prevs = frames; + } } }; @@ -459,7 +464,7 @@ AggregateFunction GetTypedModeFunction(const LogicalType &type) { auto func = AggregateFunction::UnaryAggregateDestructor( type, type); - func.SetWindowCallback(OP::template Window); + func.SetWindowBatchCallback(OP::template Window); return func; } @@ -503,8 +508,8 @@ AggregateFunction GetModeAggregate(const LogicalType &type) { unique_ptr BindModeAggregate(BindAggregateFunctionInput &input) { auto &function = input.GetBoundFunction(); auto &arguments = input.GetArguments(); - function = GetModeAggregate(arguments[0]->return_type); - function.name = "mode"; + function.ReplaceImplementation(GetModeAggregate(arguments[0]->GetReturnType())); + function.SetName("mode"); return nullptr; } @@ -605,8 +610,8 @@ AggregateFunction GetEntropyFunction(const LogicalType &type) { unique_ptr BindEntropyAggregate(BindAggregateFunctionInput &input) { auto &function = input.GetBoundFunction(); auto &arguments = input.GetArguments(); - function = GetEntropyFunction(arguments[0]->return_type); - function.name = "entropy"; + function.ReplaceImplementation(GetEntropyFunction(arguments[0]->GetReturnType())); + function.SetName("entropy"); return nullptr; } diff --git a/src/duckdb/extension/core_functions/aggregate/holistic/quantile.cpp b/src/duckdb/extension/core_functions/aggregate/holistic/quantile.cpp index 25854a3d0..d0c655795 100644 --- a/src/duckdb/extension/core_functions/aggregate/holistic/quantile.cpp +++ b/src/duckdb/extension/core_functions/aggregate/holistic/quantile.cpp @@ -108,7 +108,7 @@ bool QuantileBindData::Equals(const FunctionData &other_p) const { } void QuantileBindData::Serialize(Serializer &serializer, const optional_ptr bind_data_p, - const AggregateFunction &function) { + const BoundAggregateFunction &function) { auto &bind_data = bind_data_p->Cast(); vector raw; for (const auto &q : bind_data.quantiles) { @@ -119,7 +119,7 @@ void QuantileBindData::Serialize(Serializer &serializer, const optional_ptr QuantileBindData::Deserialize(Deserializer &deserializer, AggregateFunction &function) { +unique_ptr QuantileBindData::Deserialize(Deserializer &deserializer, BoundAggregateFunction &function) { auto result = make_uniq(); vector raw; deserializer.ReadProperty(100, "quantiles", raw); @@ -172,8 +172,8 @@ struct QuantileScalarOperation : public QuantileOperation { template static void Window(AggregateInputData &aggr_input_data, const WindowPartitionInput &partition, - const_data_ptr_t g_state, data_ptr_t l_state, const SubFrames &frames, Vector &result, - idx_t ridx) { + const_data_ptr_t g_state, data_ptr_t l_state, const SubFrames *subframes_per_row, idx_t count, + Vector &result, idx_t row_idx) { auto &state = *reinterpret_cast(l_state); auto gstate = reinterpret_cast(g_state); @@ -181,7 +181,6 @@ struct QuantileScalarOperation : public QuantileOperation { const auto &fmask = partition.filter_mask; QuantileIncluded included(fmask, data); - const auto n = FrameSize(included, frames); D_ASSERT(aggr_input_data.bind_data); auto &bind_data = aggr_input_data.bind_data->Cast(); @@ -189,26 +188,40 @@ struct QuantileScalarOperation : public QuantileOperation { auto rdata = FlatVector::GetDataMutable(result); auto &rmask = FlatVector::ValidityMutable(result); - if (!n) { - rmask.Set(ridx, false); - return; - } - const auto &quantile = bind_data.quantiles[0]; if (gstate && gstate->HasTree()) { - rdata[ridx] = gstate->GetWindowState().template WindowScalar(data, frames, n, result, - quantile); + for (idx_t ridx = 0; ridx < count; ++ridx) { + const auto &frames = subframes_per_row[ridx]; + const auto n = FrameSize(included, frames); + if (!n) { + rmask.Set(ridx, false); + continue; + } + + rdata[ridx] = gstate->GetWindowState().template WindowScalar(data, frames, n, + result, quantile); + } } else { auto &window_state = state.GetOrCreateWindowState(); - // Update the skip list - window_state.UpdateSkip(data, frames, included); + for (idx_t ridx = 0; ridx < count; ++ridx) { + const auto &frames = subframes_per_row[ridx]; + const auto n = FrameSize(included, frames); + if (!n) { + rmask.Set(ridx, false); + continue; + } - // Find the position(s) needed - rdata[ridx] = window_state.template WindowScalar(data, frames, n, result, quantile); + // Update the skip list + window_state.UpdateSkip(data, frames, included); - // Save the previous state for next time - window_state.prevs = frames; + // Find the position(s) needed + rdata[ridx] = + window_state.template WindowScalar(data, frames, n, result, quantile); + + // Save the previous state for next time + window_state.prevs = frames; + } } } }; @@ -275,8 +288,8 @@ struct QuantileListOperation : QuantileOperation { template static void Window(AggregateInputData &aggr_input_data, const WindowPartitionInput &partition, - const_data_ptr_t g_state, data_ptr_t l_state, const SubFrames &frames, Vector &list, - idx_t lidx) { + const_data_ptr_t g_state, data_ptr_t l_state, const SubFrames *subframes_per_row, idx_t count, + Vector &list, idx_t row_idx) { auto &state = *reinterpret_cast(l_state); auto gstate = reinterpret_cast(g_state); @@ -287,22 +300,36 @@ struct QuantileListOperation : QuantileOperation { auto &bind_data = aggr_input_data.bind_data->Cast(); QuantileIncluded included(fmask, data); - const auto n = FrameSize(included, frames); // Result is a constant LIST with a fixed length - if (!n) { - auto &lmask = FlatVector::ValidityMutable(list); - lmask.Set(lidx, false); - return; - } + auto &lmask = FlatVector::ValidityMutable(list); if (gstate && gstate->HasTree()) { - gstate->GetWindowState().template WindowList(data, frames, n, list, lidx, bind_data); + for (idx_t lidx = 0; lidx < count; ++lidx) { + const auto &frames = subframes_per_row[lidx]; + const auto n = FrameSize(included, frames); + if (!n) { + lmask.Set(lidx, false); + continue; + } + + gstate->GetWindowState().template WindowList(data, frames, n, list, lidx, + bind_data); + } } else { auto &window_state = state.GetOrCreateWindowState(); - window_state.UpdateSkip(data, frames, included); - window_state.template WindowList(data, frames, n, list, lidx, bind_data); - window_state.prevs = frames; + for (idx_t lidx = 0; lidx < count; ++lidx) { + const auto &frames = subframes_per_row[lidx]; + const auto n = FrameSize(included, frames); + if (!n) { + lmask.Set(lidx, false); + continue; + } + + window_state.UpdateSkip(data, frames, included); + window_state.template WindowList(data, frames, n, list, lidx, bind_data); + window_state.prevs = frames; + } } } }; @@ -386,7 +413,7 @@ struct ScalarDiscreteQuantile { auto fun = AggregateFunction::UnaryAggregateDestructor(type, type); #ifndef DUCKDB_SMALLER_BINARY - fun.SetWindowCallback(OP::Window); + fun.SetWindowBatchCallback(OP::Window); fun.SetWindowInitCallback(OP::WindowInit); #endif return fun; @@ -425,7 +452,7 @@ struct ListDiscreteQuantile { auto fun = QuantileListAggregate(type, type); fun.SetOrderDependent(AggregateOrderDependent::NOT_ORDER_DEPENDENT); #ifndef DUCKDB_SMALLER_BINARY - fun.SetWindowCallback(OP::template Window); + fun.SetWindowBatchCallback(OP::template Window); fun.SetWindowInitCallback(OP::template WindowInit); #endif return fun; @@ -496,6 +523,7 @@ AggregateFunction GetContinuousQuantileTemplated(const LogicalType &type) { return OP::template GetFunction(type, LogicalType::TIMESTAMP); case LogicalTypeId::TIMESTAMP: case LogicalTypeId::TIMESTAMP_TZ: + case LogicalTypeId::TIMESTAMP_TZ_NS: case LogicalTypeId::TIMESTAMP_SEC: case LogicalTypeId::TIMESTAMP_MS: case LogicalTypeId::TIMESTAMP_NS: @@ -518,7 +546,7 @@ struct ScalarContinuousQuantile { AggregateDestructorType::LEGACY>(input_type, target_type); fun.SetOrderDependent(AggregateOrderDependent::NOT_ORDER_DEPENDENT); #ifndef DUCKDB_SMALLER_BINARY - fun.SetWindowCallback(OP::template Window); + fun.SetWindowBatchCallback(OP::template Window); fun.SetWindowInitCallback(OP::template WindowInit); #endif return fun; @@ -533,7 +561,7 @@ struct ListContinuousQuantile { auto fun = QuantileListAggregate(input_type, target_type); fun.SetOrderDependent(AggregateOrderDependent::NOT_ORDER_DEPENDENT); #ifndef DUCKDB_SMALLER_BINARY - fun.SetWindowCallback(OP::template Window); + fun.SetWindowBatchCallback(OP::template Window); fun.SetWindowInitCallback(OP::template WindowInit); #endif return fun; @@ -629,6 +657,7 @@ static bool CanInterpolate(const LogicalType &type) { case LogicalTypeId::DATE: case LogicalTypeId::TIMESTAMP: case LogicalTypeId::TIMESTAMP_TZ: + case LogicalTypeId::TIMESTAMP_TZ_NS: case LogicalTypeId::TIMESTAMP_SEC: case LogicalTypeId::TIMESTAMP_MS: case LogicalTypeId::TIMESTAMP_NS: @@ -649,18 +678,18 @@ struct MedianFunction { return fun; } - static unique_ptr Deserialize(Deserializer &deserializer, AggregateFunction &function) { + static unique_ptr Deserialize(Deserializer &deserializer, BoundAggregateFunction &function) { auto bind_data = QuantileBindData::Deserialize(deserializer, function); auto &input_type = function.GetArguments()[0]; - function = GetAggregate(input_type); + function.ReplaceImplementation(GetAggregate(input_type)); return bind_data; } static unique_ptr Bind(BindAggregateFunctionInput &input) { auto &function = input.GetBoundFunction(); auto &arguments = input.GetArguments(); - function = GetAggregate(arguments[0]->return_type); + function.ReplaceImplementation(GetAggregate(arguments[0]->GetReturnType())); return make_uniq(Value::DECIMAL(int16_t(5), 2, 1)); } }; @@ -673,23 +702,23 @@ struct DiscreteQuantileListFunction { fun.SetSerializeCallback(QuantileBindData::Serialize); fun.SetDeserializeCallback(Deserialize); // temporarily push an argument so we can bind the actual quantile - fun.GetArguments().emplace_back(LogicalType::LIST(LogicalType::DOUBLE)); + fun.GetSignature().AddParameter(LogicalType::LIST(LogicalType::DOUBLE)); fun.SetOrderDependent(AggregateOrderDependent::NOT_ORDER_DEPENDENT); return fun; } - static unique_ptr Deserialize(Deserializer &deserializer, AggregateFunction &function) { + static unique_ptr Deserialize(Deserializer &deserializer, BoundAggregateFunction &function) { auto bind_data = QuantileBindData::Deserialize(deserializer, function); auto &input_type = function.GetArguments()[0]; - function = GetAggregate(input_type); + function.ReplaceImplementation(GetAggregate(input_type)); return bind_data; } static unique_ptr Bind(BindAggregateFunctionInput &input) { auto &function = input.GetBoundFunction(); auto &arguments = input.GetArguments(); - function = GetAggregate(arguments[0]->return_type); + function.ReplaceImplementation(GetAggregate(arguments[0]->GetReturnType())); return BindQuantile(input); } }; @@ -702,20 +731,20 @@ struct DiscreteQuantileFunction { fun.SetSerializeCallback(QuantileBindData::Serialize); fun.SetDeserializeCallback(Deserialize); // temporarily push an argument so we can bind the actual quantile - fun.GetArguments().emplace_back(LogicalType::DOUBLE); + fun.GetSignature().AddParameter(LogicalType::DOUBLE); fun.SetOrderDependent(AggregateOrderDependent::NOT_ORDER_DEPENDENT); return fun; } - static unique_ptr Deserialize(Deserializer &deserializer, AggregateFunction &function) { + static unique_ptr Deserialize(Deserializer &deserializer, BoundAggregateFunction &function) { auto bind_data = QuantileBindData::Deserialize(deserializer, function); auto &quantile_data = bind_data->Cast(); auto &input_type = function.GetArguments()[0]; if (quantile_data.quantiles.size() == 1) { - function = GetAggregate(input_type); + function.ReplaceImplementation(GetAggregate(input_type)); } else { - function = DiscreteQuantileListFunction::GetAggregate(input_type); + function.ReplaceImplementation(DiscreteQuantileListFunction::GetAggregate(input_type)); } return bind_data; } @@ -723,7 +752,7 @@ struct DiscreteQuantileFunction { static unique_ptr Bind(BindAggregateFunctionInput &input) { auto &function = input.GetBoundFunction(); auto &arguments = input.GetArguments(); - function = GetAggregate(arguments[0]->return_type); + function.ReplaceImplementation(GetAggregate(arguments[0]->GetReturnType())); return BindQuantile(input); } }; @@ -736,24 +765,26 @@ struct ContinuousQuantileFunction { fun.SetSerializeCallback(QuantileBindData::Serialize); fun.SetDeserializeCallback(Deserialize); // temporarily push an argument so we can bind the actual quantile - fun.GetArguments().emplace_back(LogicalType::DOUBLE); + fun.GetSignature().AddParameter(LogicalType::DOUBLE); fun.SetOrderDependent(AggregateOrderDependent::NOT_ORDER_DEPENDENT); return fun; } - static unique_ptr Deserialize(Deserializer &deserializer, AggregateFunction &function) { + static unique_ptr Deserialize(Deserializer &deserializer, BoundAggregateFunction &function) { auto bind_data = QuantileBindData::Deserialize(deserializer, function); auto &input_type = function.GetArguments()[0]; - function = GetAggregate(input_type); + function.ReplaceImplementation(GetAggregate(input_type)); return bind_data; } static unique_ptr Bind(BindAggregateFunctionInput &input) { auto &function = input.GetBoundFunction(); auto &arguments = input.GetArguments(); - function = GetAggregate(function.GetArguments()[0].id() == LogicalTypeId::DECIMAL ? arguments[0]->return_type - : function.GetArguments()[0]); + auto impl = + GetAggregate(function.GetArguments()[0].id() == LogicalTypeId::DECIMAL ? arguments[0]->GetReturnType() + : function.GetArguments()[0]); + function.ReplaceImplementation(impl); return BindQuantile(input); } }; @@ -767,33 +798,36 @@ struct ContinuousQuantileListFunction { fun.SetDeserializeCallback(Deserialize); // temporarily push an argument so we can bind the actual quantile auto list_of_double = LogicalType::LIST(LogicalType::DOUBLE); - fun.GetArguments().push_back(list_of_double); + fun.GetSignature().AddParameter(list_of_double); fun.SetOrderDependent(AggregateOrderDependent::NOT_ORDER_DEPENDENT); return fun; } - static unique_ptr Deserialize(Deserializer &deserializer, AggregateFunction &function) { + static unique_ptr Deserialize(Deserializer &deserializer, BoundAggregateFunction &function) { auto bind_data = QuantileBindData::Deserialize(deserializer, function); auto &input_type = function.GetArguments()[0]; - function = GetAggregate(input_type); + function.ReplaceImplementation(GetAggregate(input_type)); return bind_data; } static unique_ptr Bind(BindAggregateFunctionInput &input) { auto &function = input.GetBoundFunction(); auto &arguments = input.GetArguments(); - function = GetAggregate(function.GetArguments()[0].id() == LogicalTypeId::DECIMAL ? arguments[0]->return_type - : function.GetArguments()[0]); + auto impl = + GetAggregate(function.GetArguments()[0].id() == LogicalTypeId::DECIMAL ? arguments[0]->GetReturnType() + : function.GetArguments()[0]); + function.ReplaceImplementation(impl); return BindQuantile(input); } }; template -AggregateFunction EmptyQuantileFunction(LogicalType input, const LogicalType &result, const LogicalType &extra_arg) { +static AggregateFunction EmptyQuantileFunction(LogicalType input, const LogicalType &result, + const LogicalType &extra_arg) { AggregateFunction fun({std::move(input)}, result, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, OP::Bind); if (extra_arg.id() != LogicalTypeId::INVALID) { - fun.GetArguments().push_back(extra_arg); + fun.GetSignature().AddParameter(extra_arg); } fun.SetSerializeCallback(QuantileBindData::Serialize); fun.SetDeserializeCallback(OP::Deserialize); @@ -819,10 +853,11 @@ AggregateFunctionSet QuantileDiscFun::GetFunctions() { return set; } -vector GetContinuousQuantileTypes() { +static vector GetContinuousQuantileTypes() { return {LogicalType::TINYINT, LogicalType::SMALLINT, LogicalType::INTEGER, LogicalType::BIGINT, LogicalType::HUGEINT, LogicalType::FLOAT, LogicalType::DOUBLE, LogicalType::DATE, - LogicalType::TIMESTAMP, LogicalType::TIME, LogicalType::TIMESTAMP_TZ, LogicalType::TIME_TZ}; + LogicalType::TIMESTAMP, LogicalType::TIME, LogicalType::TIMESTAMP_TZ, LogicalType::TIMESTAMP_TZ_NS, + LogicalType::TIME_TZ}; } AggregateFunctionSet QuantileContFun::GetFunctions() { diff --git a/src/duckdb/extension/core_functions/aggregate/holistic/reservoir_quantile.cpp b/src/duckdb/extension/core_functions/aggregate/holistic/reservoir_quantile.cpp index 2b56e3008..c98ab3d24 100644 --- a/src/duckdb/extension/core_functions/aggregate/holistic/reservoir_quantile.cpp +++ b/src/duckdb/extension/core_functions/aggregate/holistic/reservoir_quantile.cpp @@ -74,13 +74,13 @@ struct ReservoirQuantileBindData : public FunctionData { } static void Serialize(Serializer &serializer, const optional_ptr bind_data_p, - const AggregateFunction &function) { + const BoundAggregateFunction &function) { auto &bind_data = bind_data_p->Cast(); serializer.WriteProperty(100, "quantiles", bind_data.quantiles); serializer.WriteProperty(101, "sample_size", bind_data.sample_size); } - static unique_ptr Deserialize(Deserializer &deserializer, AggregateFunction &function) { + static unique_ptr Deserialize(Deserializer &deserializer, BoundAggregateFunction &function) { auto result = make_uniq(); deserializer.ReadProperty(100, "quantiles", result->quantiles); deserializer.ReadProperty(101, "sample_size", result->sample_size); @@ -366,9 +366,9 @@ unique_ptr BindReservoirQuantile(BindAggregateFunctionInput &input unique_ptr BindReservoirQuantileDecimal(BindAggregateFunctionInput &input) { auto &function = input.GetBoundFunction(); auto &arguments = input.GetArguments(); - function = GetReservoirQuantileAggregateFunction(arguments[0]->return_type.InternalType()); + function.ReplaceImplementation(GetReservoirQuantileAggregateFunction(arguments[0]->GetReturnType().InternalType())); auto bind_data = BindReservoirQuantile(input); - function.name = "reservoir_quantile"; + function.SetName("reservoir_quantile"); function.SetSerializeCallback(ReservoirQuantileBindData::Serialize); function.SetDeserializeCallback(ReservoirQuantileBindData::Deserialize); return bind_data; @@ -380,7 +380,7 @@ AggregateFunction GetReservoirQuantileAggregate(PhysicalType type) { fun.SetSerializeCallback(ReservoirQuantileBindData::Serialize); fun.SetDeserializeCallback(ReservoirQuantileBindData::Deserialize); // temporarily push an argument so we can bind the actual quantile - fun.GetArguments().emplace_back(LogicalType::DOUBLE); + fun.GetSignature().AddParameter(LogicalType::DOUBLE); return fun; } @@ -391,7 +391,7 @@ AggregateFunction GetReservoirQuantileListAggregate(const LogicalType &type) { fun.SetDeserializeCallback(ReservoirQuantileBindData::Deserialize); // temporarily push an argument so we can bind the actual quantile auto list_of_double = LogicalType::LIST(LogicalType::DOUBLE); - fun.GetArguments().push_back(list_of_double); + fun.GetSignature().AddParameter(list_of_double); return fun; } @@ -400,14 +400,14 @@ void DefineReservoirQuantile(AggregateFunctionSet &set, const LogicalType &type) auto fun = GetReservoirQuantileAggregate(type.InternalType()); set.AddFunction(fun); - fun.GetArguments().emplace_back(LogicalType::INTEGER); + fun.GetSignature().AddParameter(LogicalType::INTEGER); set.AddFunction(fun); // List variants fun = GetReservoirQuantileListAggregate(type); set.AddFunction(fun); - fun.GetArguments().emplace_back(LogicalType::INTEGER); + fun.GetSignature().AddParameter(LogicalType::INTEGER); set.AddFunction(fun); } @@ -419,7 +419,7 @@ void GetReservoirQuantileDecimalFunction(AggregateFunctionSet &set, const vector fun.SetDeserializeCallback(ReservoirQuantileBindData::Deserialize); set.AddFunction(fun); - fun.GetArguments().emplace_back(LogicalType::INTEGER); + fun.GetSignature().AddParameter(LogicalType::INTEGER); set.AddFunction(fun); } diff --git a/src/duckdb/extension/core_functions/aggregate/nested/binned_histogram.cpp b/src/duckdb/extension/core_functions/aggregate/nested/binned_histogram.cpp index 44fcee4e1..5d6044091 100644 --- a/src/duckdb/extension/core_functions/aggregate/nested/binned_histogram.cpp +++ b/src/duckdb/extension/core_functions/aggregate/nested/binned_histogram.cpp @@ -198,6 +198,7 @@ bool SupportsOtherBucket(const LogicalType &type) { case LogicalTypeId::DATE: case LogicalTypeId::TIMESTAMP: case LogicalTypeId::TIMESTAMP_TZ: + case LogicalTypeId::TIMESTAMP_TZ_NS: case LogicalTypeId::TIMESTAMP_SEC: case LogicalTypeId::TIMESTAMP_MS: case LogicalTypeId::TIMESTAMP_NS: @@ -229,6 +230,7 @@ Value OtherBucketValue(const LogicalType &type) { case LogicalTypeId::DATE: case LogicalTypeId::TIMESTAMP: case LogicalTypeId::TIMESTAMP_TZ: + case LogicalTypeId::TIMESTAMP_TZ_NS: case LogicalTypeId::TIMESTAMP_SEC: case LogicalTypeId::TIMESTAMP_MS: case LogicalTypeId::TIMESTAMP_NS: @@ -258,16 +260,16 @@ Value OtherBucketValue(const LogicalType &type) { void IsHistogramOtherBinFunction(DataChunk &args, ExpressionState &state, Vector &result) { auto &input_type = args.data[0].GetType(); if (!SupportsOtherBucket(input_type)) { - result.Reference(Value::BOOLEAN(false)); + result.Reference(Value::BOOLEAN(false), count_t(args.size())); return; } auto v = OtherBucketValue(input_type); - Vector ref(v); + Vector ref(v, count_t(args.size())); VectorOperations::NotDistinctFrom(args.data[0], ref, result, args.size()); // Set NULL if input is NULL. UnifiedVectorFormat input_data; - args.data[0].ToUnifiedFormat(args.size(), input_data); + args.data[0].ToUnifiedFormat(input_data); if (!input_data.validity.CannotHaveNull()) { auto &result_validity = FlatVector::ValidityMutable(result); for (idx_t idx = 0; idx < args.size(); ++idx) { @@ -334,7 +336,7 @@ void HistogramBinFinalizeFunction(Vector &state_vector, AggregateInputData &, Ve } D_ASSERT(current_offset == old_len + new_entries); ListVector::SetListSize(result, current_offset); - result.Verify(count); + result.Verify(); } template @@ -393,12 +395,12 @@ unique_ptr HistogramBinBindFunction(BindAggregateFunctionInput &in auto &function = input.GetBoundFunction(); auto &arguments = input.GetArguments(); for (auto &arg : arguments) { - if (arg->return_type.id() == LogicalTypeId::UNKNOWN) { + if (arg->GetReturnType().id() == LogicalTypeId::UNKNOWN) { throw ParameterNotResolvedException(); } } - function = GetHistogramBinFunction(arguments[0]->return_type); + function.ReplaceImplementation(GetHistogramBinFunction(arguments[0]->GetReturnType())); return nullptr; } diff --git a/src/duckdb/extension/core_functions/aggregate/nested/histogram.cpp b/src/duckdb/extension/core_functions/aggregate/nested/histogram.cpp index 8c401ec32..032966da0 100644 --- a/src/duckdb/extension/core_functions/aggregate/nested/histogram.cpp +++ b/src/duckdb/extension/core_functions/aggregate/nested/histogram.cpp @@ -132,7 +132,7 @@ void HistogramFinalizeFunction(Vector &state_vector, AggregateInputData &, Vecto } D_ASSERT(current_offset == old_len + new_entries); ListVector::SetListSize(result, current_offset); - result.Verify(count); + result.Verify(); } template @@ -210,10 +210,10 @@ unique_ptr HistogramBindFunction(BindAggregateFunctionInput &input auto &arguments = input.GetArguments(); D_ASSERT(arguments.size() == 1); - if (arguments[0]->return_type.id() == LogicalTypeId::UNKNOWN) { + if (arguments[0]->GetReturnType().id() == LogicalTypeId::UNKNOWN) { throw ParameterNotResolvedException(); } - function = GetHistogramFunction(arguments[0]->return_type); + function.ReplaceImplementation(GetHistogramFunction(arguments[0]->GetReturnType())); return make_uniq(function.GetReturnType()); } diff --git a/src/duckdb/extension/core_functions/aggregate/nested/list.cpp b/src/duckdb/extension/core_functions/aggregate/nested/list.cpp index af8f7f2c8..79eff7d3c 100644 --- a/src/duckdb/extension/core_functions/aggregate/nested/list.cpp +++ b/src/duckdb/extension/core_functions/aggregate/nested/list.cpp @@ -52,7 +52,7 @@ void ListUpdateFunction(Vector inputs[], AggregateInputData &aggr_input_data, id D_ASSERT(input_count == 1); auto &input = inputs[0]; RecursiveUnifiedVectorFormat input_data; - Vector::RecursiveToUnifiedFormat(input, count, input_data); + Vector::RecursiveToUnifiedFormat(input, input_data); auto states = state_vector.Values(count); auto &list_bind_data = aggr_input_data.bind_data->Cast(); @@ -134,6 +134,7 @@ void ListFinalize(Vector &states_vector, AggregateInputData &aggr_input_data, Ve } ListVector::SetListSize(result, total_len); + FlatVector::SetSize(result, count_t(offset + count)); } void ListCombineFunction(Vector &states_vector, Vector &combined, AggregateInputData &aggr_input_data, idx_t count) { @@ -158,7 +159,7 @@ void ListCombineFunction(Vector &states_vector, Vector &combined, AggregateInput list_bind_data.functions.BuildListVector(source.linked_list, input, 0); RecursiveUnifiedVectorFormat input_data; - Vector::RecursiveToUnifiedFormat(input, entry_count, input_data); + Vector::RecursiveToUnifiedFormat(input, input_data); for (idx_t entry_idx = 0; entry_idx < entry_count; ++entry_idx) { aggr_input_data.allocator.AlignNext(); @@ -170,7 +171,7 @@ void ListCombineFunction(Vector &states_vector, Vector &combined, AggregateInput unique_ptr ListBindFunction(BindAggregateFunctionInput &input) { auto &function = input.GetBoundFunction(); auto &arguments = input.GetArguments(); - function.SetReturnType(LogicalType::LIST(arguments[0]->return_type)); + function.SetReturnType(LogicalType::LIST(arguments[0]->GetReturnType())); return make_uniq(function.GetReturnType()); } diff --git a/src/duckdb/extension/core_functions/aggregate/regression/regr_avg.cpp b/src/duckdb/extension/core_functions/aggregate/regression/regr_avg.cpp index 3b79e4f24..0f82395e3 100644 --- a/src/duckdb/extension/core_functions/aggregate/regression/regr_avg.cpp +++ b/src/duckdb/extension/core_functions/aggregate/regression/regr_avg.cpp @@ -54,7 +54,7 @@ struct RegrAvgYFunction : RegrAvgFunction { } }; -LogicalType GetRegrAvgStateType(const AggregateFunction &) { +LogicalType GetRegrAvgStateType(const BoundAggregateFunction &) { child_list_t child_types; child_types.emplace_back("sum", LogicalType::DOUBLE); child_types.emplace_back("count", LogicalType::UBIGINT); diff --git a/src/duckdb/extension/core_functions/aggregate/regression/regr_count.cpp b/src/duckdb/extension/core_functions/aggregate/regression/regr_count.cpp index 7533b2230..ec464e7fa 100644 --- a/src/duckdb/extension/core_functions/aggregate/regression/regr_count.cpp +++ b/src/duckdb/extension/core_functions/aggregate/regression/regr_count.cpp @@ -9,7 +9,7 @@ namespace duckdb { namespace { -LogicalType GetRegrCountStateType(const AggregateFunction &) { +LogicalType GetRegrCountStateType(const BoundAggregateFunction &) { child_list_t child_types; child_types.emplace_back("count", LogicalType::UBIGINT); return LogicalType::STRUCT(std::move(child_types)); diff --git a/src/duckdb/extension/core_functions/aggregate/regression/regr_intercept.cpp b/src/duckdb/extension/core_functions/aggregate/regression/regr_intercept.cpp index f91923d53..d93b540ed 100644 --- a/src/duckdb/extension/core_functions/aggregate/regression/regr_intercept.cpp +++ b/src/duckdb/extension/core_functions/aggregate/regression/regr_intercept.cpp @@ -60,14 +60,27 @@ struct RegrInterceptOperation { } }; -LogicalType GetRegrInterceptStateType(const AggregateFunction &) { +LogicalType GetRegrInterceptStateType(const BoundAggregateFunction &) { + child_list_t covpop_children; + covpop_children.emplace_back("count", LogicalType::UBIGINT); + covpop_children.emplace_back("meanx", LogicalType::DOUBLE); + covpop_children.emplace_back("meany", LogicalType::DOUBLE); + covpop_children.emplace_back("co_moment", LogicalType::DOUBLE); + auto covpop_type = LogicalType::STRUCT(std::move(covpop_children)); + + child_list_t varpop_children; + varpop_children.emplace_back("count", LogicalType::UBIGINT); + varpop_children.emplace_back("mean", LogicalType::DOUBLE); + varpop_children.emplace_back("dsquared", LogicalType::DOUBLE); + auto varpop_type = LogicalType::STRUCT(std::move(varpop_children)); + child_list_t state_children; state_children.emplace_back("count", LogicalType::UBIGINT); state_children.emplace_back("sum_x", LogicalType::DOUBLE); state_children.emplace_back("sum_y", LogicalType::DOUBLE); child_list_t slope_children; - slope_children.emplace_back("cov_pop", CovarPopFun::GetFunction().GetStateType()); - slope_children.emplace_back("var_pop", VarPopFun::GetFunction().GetStateType()); + slope_children.emplace_back("cov_pop", std::move(covpop_type)); + slope_children.emplace_back("var_pop", std::move(varpop_type)); state_children.emplace_back("slope", LogicalType::STRUCT(std::move(slope_children))); return LogicalType::STRUCT(std::move(state_children)); } diff --git a/src/duckdb/extension/core_functions/aggregate/regression/regr_r2.cpp b/src/duckdb/extension/core_functions/aggregate/regression/regr_r2.cpp index 385b1a766..7f6407cad 100644 --- a/src/duckdb/extension/core_functions/aggregate/regression/regr_r2.cpp +++ b/src/duckdb/extension/core_functions/aggregate/regression/regr_r2.cpp @@ -61,11 +61,30 @@ struct RegrR2Operation { } }; -LogicalType GetRegrR2StateType(const AggregateFunction &) { +LogicalType GetRegrR2StateType(const BoundAggregateFunction &) { + child_list_t covar_children; + covar_children.emplace_back("count", LogicalType::UBIGINT); + covar_children.emplace_back("meanx", LogicalType::DOUBLE); + covar_children.emplace_back("meany", LogicalType::DOUBLE); + covar_children.emplace_back("co_moment", LogicalType::DOUBLE); + auto cov_pop_type = LogicalType::STRUCT(std::move(covar_children)); + + child_list_t stddev_types; + stddev_types.emplace_back("count", LogicalType::UBIGINT); + stddev_types.emplace_back("mean", LogicalType::DOUBLE); + stddev_types.emplace_back("dsquared", LogicalType::DOUBLE); + auto stddev_type = LogicalType::STRUCT(std::move(stddev_types)); + + child_list_t corr_children; + corr_children.emplace_back("cov_pop", std::move(cov_pop_type)); + corr_children.emplace_back("dev_pop_x", stddev_type); + corr_children.emplace_back("dev_pop_y", stddev_type); + auto corr_state = LogicalType::STRUCT(std::move(corr_children)); + child_list_t state_children; - state_children.emplace_back("corr", CorrFun::GetFunction().GetStateType()); - state_children.emplace_back("var_pop_x", VarPopFun::GetFunction().GetStateType()); - state_children.emplace_back("var_pop_y", VarPopFun::GetFunction().GetStateType()); + state_children.emplace_back("corr", corr_state); + state_children.emplace_back("var_pop_x", stddev_type); + state_children.emplace_back("var_pop_y", stddev_type); return LogicalType::STRUCT(std::move(state_children)); } diff --git a/src/duckdb/extension/core_functions/aggregate/regression/regr_slope.cpp b/src/duckdb/extension/core_functions/aggregate/regression/regr_slope.cpp index 1ebbe3730..70f18cda3 100644 --- a/src/duckdb/extension/core_functions/aggregate/regression/regr_slope.cpp +++ b/src/duckdb/extension/core_functions/aggregate/regression/regr_slope.cpp @@ -14,10 +14,23 @@ namespace duckdb { namespace { -LogicalType GetRegrSlopeStateType(const AggregateFunction &) { +LogicalType GetRegrSlopeStateType(const BoundAggregateFunction &) { + child_list_t covar_children; + covar_children.emplace_back("count", LogicalType::UBIGINT); + covar_children.emplace_back("meanx", LogicalType::DOUBLE); + covar_children.emplace_back("meany", LogicalType::DOUBLE); + covar_children.emplace_back("co_moment", LogicalType::DOUBLE); + auto cov_pop_type = LogicalType::STRUCT(std::move(covar_children)); + + child_list_t stddev_types; + stddev_types.emplace_back("count", LogicalType::UBIGINT); + stddev_types.emplace_back("mean", LogicalType::DOUBLE); + stddev_types.emplace_back("dsquared", LogicalType::DOUBLE); + auto stddev_type = LogicalType::STRUCT(std::move(stddev_types)); + child_list_t state_children; - state_children.emplace_back("cov_pop", CovarPopFun::GetFunction().GetStateType()); - state_children.emplace_back("var_pop", VarPopFun::GetFunction().GetStateType()); + state_children.emplace_back("cov_pop", std::move(cov_pop_type)); + state_children.emplace_back("var_pop", std::move(stddev_type)); return LogicalType::STRUCT(std::move(state_children)); } diff --git a/src/duckdb/extension/core_functions/aggregate/regression/regr_sxx_syy.cpp b/src/duckdb/extension/core_functions/aggregate/regression/regr_sxx_syy.cpp index e6aa1adfb..ec01aa50d 100644 --- a/src/duckdb/extension/core_functions/aggregate/regression/regr_sxx_syy.cpp +++ b/src/duckdb/extension/core_functions/aggregate/regression/regr_sxx_syy.cpp @@ -60,10 +60,16 @@ struct RegrSYYOperation : RegrBaseOperation { } }; -LogicalType GetRegrSStateType(const AggregateFunction &) { +LogicalType GetRegrSStateType(const BoundAggregateFunction &) { + child_list_t stddev_types; + stddev_types.emplace_back("count", LogicalType::UBIGINT); + stddev_types.emplace_back("mean", LogicalType::DOUBLE); + stddev_types.emplace_back("dsquared", LogicalType::DOUBLE); + auto stddev_type = LogicalType::STRUCT(std::move(stddev_types)); + child_list_t state_children; state_children.emplace_back("count", LogicalType::UBIGINT); - state_children.emplace_back("var_pop", VarPopFun::GetFunction().GetStateType()); + state_children.emplace_back("var_pop", stddev_type); return LogicalType::STRUCT(std::move(state_children)); } diff --git a/src/duckdb/extension/core_functions/aggregate/regression/regr_sxy.cpp b/src/duckdb/extension/core_functions/aggregate/regression/regr_sxy.cpp index b9d1be04f..e46d025f8 100644 --- a/src/duckdb/extension/core_functions/aggregate/regression/regr_sxy.cpp +++ b/src/duckdb/extension/core_functions/aggregate/regression/regr_sxy.cpp @@ -47,10 +47,18 @@ struct RegrSXYOperation { } }; -LogicalType GetRegrSXYStateType(const AggregateFunction &) { +LogicalType GetRegrSXYStateType(const BoundAggregateFunction &) { + child_list_t covar_children; + covar_children.emplace_back("count", LogicalType::UBIGINT); + covar_children.emplace_back("meanx", LogicalType::DOUBLE); + covar_children.emplace_back("meany", LogicalType::DOUBLE); + covar_children.emplace_back("co_moment", LogicalType::DOUBLE); + auto cov_pop_type = LogicalType::STRUCT(std::move(covar_children)); + child_list_t state_children; state_children.emplace_back("count", LogicalType::UBIGINT); - state_children.emplace_back("cov_pop", CovarPopFun::GetFunction().GetStateType()); + state_children.emplace_back("cov_pop", std::move(cov_pop_type)); + return LogicalType::STRUCT(std::move(state_children)); } diff --git a/src/duckdb/extension/core_functions/include/core_functions/aggregate/histogram_helpers.hpp b/src/duckdb/extension/core_functions/include/core_functions/aggregate/histogram_helpers.hpp index fe3d28aed..b0b7ad0de 100644 --- a/src/duckdb/extension/core_functions/include/core_functions/aggregate/histogram_helpers.hpp +++ b/src/duckdb/extension/core_functions/include/core_functions/aggregate/histogram_helpers.hpp @@ -24,7 +24,7 @@ struct HistogramFunctor { } static void PrepareData(Vector &input, idx_t count, bool &, UnifiedVectorFormat &result) { - input.ToUnifiedFormat(count, result); + input.ToUnifiedFormat(result); } template @@ -71,7 +71,7 @@ struct HistogramStringFunctor : HistogramStringFunctorBase { } static void PrepareData(Vector &input, idx_t count, bool &, UnifiedVectorFormat &result) { - input.ToUnifiedFormat(count, result); + input.ToUnifiedFormat(result); } }; @@ -89,10 +89,10 @@ struct HistogramGenericFunctor : HistogramStringFunctorBase { static void PrepareData(Vector &input, idx_t count, Vector &extra_state, UnifiedVectorFormat &result) { OrderModifiers modifiers(OrderType::ASCENDING, OrderByNullType::NULLS_LAST); CreateSortKeyHelpers::CreateSortKey(input, count, modifiers, extra_state); - input.Flatten(count); - extra_state.Flatten(count); + input.Flatten(); + extra_state.Flatten(); FlatVector::ValidityMutable(extra_state).Initialize(FlatVector::Validity(input)); - extra_state.ToUnifiedFormat(count, result); + extra_state.ToUnifiedFormat(result); } }; diff --git a/src/duckdb/extension/core_functions/include/core_functions/aggregate/quantile_helpers.hpp b/src/duckdb/extension/core_functions/include/core_functions/aggregate/quantile_helpers.hpp index 253657f5a..9617b8b4a 100644 --- a/src/duckdb/extension/core_functions/include/core_functions/aggregate/quantile_helpers.hpp +++ b/src/duckdb/extension/core_functions/include/core_functions/aggregate/quantile_helpers.hpp @@ -53,9 +53,9 @@ struct QuantileBindData : public FunctionData { bool Equals(const FunctionData &other_p) const override; static void Serialize(Serializer &serializer, const optional_ptr bind_data_p, - const AggregateFunction &function); + const BoundAggregateFunction &function); - static unique_ptr Deserialize(Deserializer &deserializer, AggregateFunction &function); + static unique_ptr Deserialize(Deserializer &deserializer, BoundAggregateFunction &function); vector quantiles; vector order; diff --git a/src/duckdb/extension/core_functions/lambda_functions.cpp b/src/duckdb/extension/core_functions/lambda_functions.cpp index 7d10c3490..8b0dd4fe8 100644 --- a/src/duckdb/extension/core_functions/lambda_functions.cpp +++ b/src/duckdb/extension/core_functions/lambda_functions.cpp @@ -2,6 +2,7 @@ #include "duckdb/common/serializer/serializer.hpp" #include "duckdb/common/serializer/deserializer.hpp" +#include "duckdb/common/vector/flat_vector.hpp" #include "duckdb/planner/expression/bound_function_expression.hpp" #include "duckdb/planner/expression/bound_cast_expression.hpp" @@ -32,7 +33,7 @@ struct LambdaExecuteInfo { } // get the result types - vector result_types {lambda_expr.return_type}; + vector result_types {lambda_expr.GetReturnType()}; // initialize the data chunks input_chunk.InitializeEmpty(input_types); @@ -157,7 +158,7 @@ vector LambdaFunctions::GetColumnInfo(DataChunk &ar // skip the input list and then insert all remaining input vectors for (idx_t i = 1; i < args.ColumnCount(); i++) { data.emplace_back(args.data[i]); - args.data[i].ToUnifiedFormat(row_count, data.back().format); + args.data[i].ToUnifiedFormat(data.back().format); } return data; } @@ -194,17 +195,13 @@ static void ExecuteExpression(const idx_t elem_cnt, const LambdaFunctions::Colum // (slice and) reference the other columns vector slices; for (idx_t i = 0; i < column_infos.size(); i++) { - if (column_infos[i].vector.get().GetVectorType() == VectorType::CONSTANT_VECTOR) { - // only reference constant vectorsl - info.input_chunk.data[i + slice_offset].Reference(column_infos[i].vector); - - } else { - // slice inconstant vectors - slices.emplace_back(column_infos[i].vector, column_infos[i].sel, elem_cnt); - info.input_chunk.data[i + slice_offset].Reference(slices.back()); - } + slices.emplace_back(column_infos[i].vector, column_infos[i].sel, elem_cnt); + info.input_chunk.data[i + slice_offset].Reference(slices.back()); } + // ensure all input vectors are sized to the chunk cardinality (some references inherit a different size) + info.input_chunk.SetChildCardinality(elem_cnt); + // execute the lambda expression info.expr_executor->Execute(info.input_chunk, info.lambda_chunk); } @@ -214,7 +211,7 @@ static void ExecuteExpression(const idx_t elem_cnt, const LambdaFunctions::Colum //===--------------------------------------------------------------------===// void ListLambdaBindData::Serialize(Serializer &serializer, const optional_ptr bind_data_p, - const ScalarFunction &) { + const BoundScalarFunction &) { auto &bind_data = bind_data_p->Cast(); serializer.WriteProperty(100, "return_type", bind_data.return_type); serializer.WritePropertyWithDefault(101, "lambda_expr", bind_data.lambda_expr, unique_ptr()); @@ -222,7 +219,7 @@ void ListLambdaBindData::Serialize(Serializer &serializer, const optional_ptr(103, "has_initial", bind_data.has_initial, false); } -unique_ptr ListLambdaBindData::Deserialize(Deserializer &deserializer, ScalarFunction &) { +unique_ptr ListLambdaBindData::Deserialize(Deserializer &deserializer, BoundScalarFunction &) { auto return_type = deserializer.ReadProperty(100, "return_type"); auto lambda_expr = deserializer.ReadPropertyWithExplicitDefault>(101, "lambda_expr", unique_ptr()); @@ -273,9 +270,8 @@ static void ExecuteLambda(DataChunk &args, ExpressionState &state, Vector &resul auto mutable_column_infos = LambdaFunctions::GetMutableColumnInfo(info.column_infos); // special-handling for the child_vector - auto child_vector_size = ListVector::GetListSize(args.data[0]); LambdaFunctions::ColumnInfo child_info(*info.child_vector); - info.child_vector->ToUnifiedFormat(child_vector_size, child_info.format); + info.child_vector->ToUnifiedFormat(child_info.format); // get the expression executor LambdaExecuteInfo execute_info(state.GetContext(), *info.lambda_expr, args, info.has_index, *info.child_vector); @@ -338,7 +334,11 @@ static void ExecuteLambda(DataChunk &args, ExpressionState &state, Vector &resul } execute_info.lambda_chunk.Reset(); - ExecuteExpression(elem_cnt, child_info, info.column_infos, index_vector, execute_info); + if (elem_cnt > 0) { + // only execute when there are remaining list elements; calling with elem_cnt = 0 would + // resize the (shared) source buffers down to size 0 + ExecuteExpression(elem_cnt, child_info, info.column_infos, index_vector, execute_info); + } auto &lambda_vector = execute_info.lambda_chunk.data[0]; FUNCTION_FUNCTOR::AppendResult(result, lambda_vector, elem_cnt, result_entries, list_filter_info, execute_info); @@ -346,28 +346,29 @@ static void ExecuteLambda(DataChunk &args, ExpressionState &state, Vector &resul if (info.is_all_constant && !info.is_volatile) { result.SetVectorType(VectorType::CONSTANT_VECTOR); } + FlatVector::SetSize(result, count_t(info.row_count)); } unique_ptr LambdaFunctions::ListLambdaPrepareBind(vector> &arguments, ClientContext &context, - ScalarFunction &bound_function) { + BoundScalarFunction &bound_function) { // NULL list parameter - if (arguments[0]->return_type.id() == LogicalTypeId::SQLNULL) { + if (arguments[0]->GetReturnType().id() == LogicalTypeId::SQLNULL) { bound_function.GetArguments()[0] = LogicalType::SQLNULL; bound_function.SetReturnType(LogicalType::SQLNULL); return make_uniq(bound_function.GetReturnType(), nullptr); } // prepared statements - if (arguments[0]->return_type.id() == LogicalTypeId::UNKNOWN) { + if (arguments[0]->GetReturnType().id() == LogicalTypeId::UNKNOWN) { throw ParameterNotResolvedException(); } arguments[0] = BoundCastExpression::AddArrayCastToList(context, std::move(arguments[0])); - D_ASSERT(arguments[0]->return_type.id() == LogicalTypeId::LIST); + D_ASSERT(arguments[0]->GetReturnType().id() == LogicalTypeId::LIST); return nullptr; } -unique_ptr LambdaFunctions::ListLambdaBind(ClientContext &context, ScalarFunction &bound_function, +unique_ptr LambdaFunctions::ListLambdaBind(ClientContext &context, BoundScalarFunction &bound_function, vector> &arguments, const bool has_index) { unique_ptr bind_data = ListLambdaPrepareBind(arguments, context, bound_function); diff --git a/src/duckdb/extension/core_functions/scalar/array/array_functions.cpp b/src/duckdb/extension/core_functions/scalar/array/array_functions.cpp index 0971eb722..da8c616db 100644 --- a/src/duckdb/extension/core_functions/scalar/array/array_functions.cpp +++ b/src/duckdb/extension/core_functions/scalar/array/array_functions.cpp @@ -9,8 +9,8 @@ static unique_ptr ArrayGenericBinaryBind(BindScalarFunctionInput & auto &context = input.GetClientContext(); auto &bound_function = input.GetBoundFunction(); auto &arguments = input.GetArguments(); - const auto &lhs_type = arguments[0]->return_type; - const auto &rhs_type = arguments[1]->return_type; + const auto &lhs_type = arguments[0]->GetReturnType(); + const auto &rhs_type = arguments[1]->GetReturnType(); if (lhs_type.IsUnknown() && rhs_type.IsUnknown()) { bound_function.GetArguments()[0] = rhs_type; @@ -25,14 +25,14 @@ static unique_ptr ArrayGenericBinaryBind(BindScalarFunctionInput & if (bound_function.GetArguments()[0].id() != LogicalTypeId::ARRAY || bound_function.GetArguments()[1].id() != LogicalTypeId::ARRAY) { throw InvalidInputException( - StringUtil::Format("%s: Arguments must be arrays of FLOAT or DOUBLE", bound_function.name)); + StringUtil::Format("%s: Arguments must be arrays of FLOAT or DOUBLE", bound_function.GetName())); } const auto lhs_size = ArrayType::GetSize(bound_function.GetArguments()[0]); const auto rhs_size = ArrayType::GetSize(bound_function.GetArguments()[1]); if (lhs_size != rhs_size) { - throw BinderException("%s: Array arguments must be of the same size", bound_function.name); + throw BinderException("%s: Array arguments must be of the same size", bound_function.GetName()); } const auto &lhs_element_type = ArrayType::GetChildType(bound_function.GetArguments()[0]); @@ -41,13 +41,13 @@ static unique_ptr ArrayGenericBinaryBind(BindScalarFunctionInput & // Resolve common type LogicalType common_type; if (!LogicalType::TryGetMaxLogicalType(context, lhs_element_type, rhs_element_type, common_type)) { - throw BinderException("%s: Cannot infer common element type (left = '%s', right = '%s')", bound_function.name, - lhs_element_type.ToString(), rhs_element_type.ToString()); + throw BinderException("%s: Cannot infer common element type (left = '%s', right = '%s')", + bound_function.GetName(), lhs_element_type.ToString(), rhs_element_type.ToString()); } // Ensure it is float or double if (common_type.id() != LogicalTypeId::FLOAT && common_type.id() != LogicalTypeId::DOUBLE) { - throw BinderException("%s: Arguments must be arrays of FLOAT or DOUBLE", bound_function.name); + throw BinderException("%s: Arguments must be arrays of FLOAT or DOUBLE", bound_function.GetName()); } // The important part is just that we resolve the size of the input arrays @@ -87,7 +87,7 @@ template static void ArrayFixedCombine(DataChunk &args, ExpressionState &state, Vector &result) { const auto &lstate = state.Cast(); const auto &expr = lstate.expr.Cast(); - const auto &func_name = expr.function.name; + const auto &func_name = expr.function.GetName(); const auto count = args.size(); auto &lhs_child = ArrayVector::GetChildMutable(args.data[0]); @@ -100,8 +100,8 @@ static void ArrayFixedCombine(DataChunk &args, ExpressionState &state, Vector &r UnifiedVectorFormat lhs_format; UnifiedVectorFormat rhs_format; - args.data[0].ToUnifiedFormat(count, lhs_format); - args.data[1].ToUnifiedFormat(count, rhs_format); + args.data[0].ToUnifiedFormat(lhs_format); + args.data[1].ToUnifiedFormat(rhs_format); auto lhs_data = FlatVector::GetData(lhs_child); auto rhs_data = FlatVector::GetData(rhs_child); @@ -149,7 +149,7 @@ template static void ArrayGenericFold(DataChunk &args, ExpressionState &state, Vector &result) { const auto &lstate = state.Cast(); const auto &expr = lstate.expr.Cast(); - const auto &func_name = expr.function.name; + const auto &func_name = expr.function.GetName(); const auto count = args.size(); auto &lhs_child = ArrayVector::GetChildMutable(args.data[0]); @@ -161,8 +161,8 @@ static void ArrayGenericFold(DataChunk &args, ExpressionState &state, Vector &re UnifiedVectorFormat lhs_format; UnifiedVectorFormat rhs_format; - args.data[0].ToUnifiedFormat(count, lhs_format); - args.data[1].ToUnifiedFormat(count, rhs_format); + args.data[0].ToUnifiedFormat(lhs_format); + args.data[1].ToUnifiedFormat(rhs_format); auto lhs_data = FlatVector::GetData(lhs_child); auto rhs_data = FlatVector::GetData(rhs_child); diff --git a/src/duckdb/extension/core_functions/scalar/array/array_value.cpp b/src/duckdb/extension/core_functions/scalar/array/array_value.cpp index c3dff8d91..1b312b877 100644 --- a/src/duckdb/extension/core_functions/scalar/array/array_value.cpp +++ b/src/duckdb/extension/core_functions/scalar/array/array_value.cpp @@ -42,7 +42,7 @@ void ArrayValueFunction(DataChunk &args, ExpressionState &state, Vector &result) } } - result.Verify(args.size()); + result.Verify(); } unique_ptr ArrayValueBind(BindScalarFunctionInput &input) { @@ -54,17 +54,15 @@ unique_ptr ArrayValueBind(BindScalarFunctionInput &input) { } // construct return type - LogicalType child_type = arguments[0]->return_type; + LogicalType child_type = arguments[0]->GetReturnType(); for (idx_t i = 1; i < arguments.size(); i++) { - child_type = LogicalType::MaxLogicalType(context, child_type, arguments[i]->return_type); + child_type = LogicalType::MaxLogicalType(context, child_type, arguments[i]->GetReturnType()); } if (arguments.size() > ArrayType::MAX_ARRAY_SIZE) { throw OutOfRangeException("Array size exceeds maximum allowed size"); } - // this is more for completeness reasons - bound_function.SetVarArgs(child_type); bound_function.SetReturnType(LogicalType::ARRAY(child_type, arguments.size())); return make_uniq(bound_function.GetReturnType()); } @@ -72,7 +70,7 @@ unique_ptr ArrayValueBind(BindScalarFunctionInput &input) { unique_ptr ArrayValueStats(ClientContext &context, FunctionStatisticsInput &input) { auto &child_stats = input.child_stats; auto &expr = input.expr; - auto list_stats = ArrayStats::CreateEmpty(expr.return_type); + auto list_stats = ArrayStats::CreateEmpty(expr.GetReturnType()); auto &list_child_stats = ArrayStats::GetChildStats(list_stats); for (idx_t i = 0; i < child_stats.size(); i++) { list_child_stats.Merge(child_stats[i]); diff --git a/src/duckdb/extension/core_functions/scalar/date/age.cpp b/src/duckdb/extension/core_functions/scalar/date/age.cpp index cf7281f08..c03af29f0 100644 --- a/src/duckdb/extension/core_functions/scalar/date/age.cpp +++ b/src/duckdb/extension/core_functions/scalar/date/age.cpp @@ -18,28 +18,26 @@ static void AgeFunctionStandard(DataChunk &input, ExpressionState &state, Vector auto current_date = Timestamp::FromDatetime( Timestamp::GetDate(MetaTransaction::Get(state.GetContext()).start_timestamp), dtime_t(0)); - UnaryExecutor::ExecuteWithNulls(input.data[0], result, input.size(), - [&](timestamp_t input, ValidityMask &mask, idx_t idx) { - if (Timestamp::IsFinite(input)) { - return Interval::GetAge(current_date, input); - } else { - mask.SetInvalid(idx); - return interval_t(); - } - }); + UnaryExecutor::Execute(input.data[0], result, input.size(), + [&](timestamp_t input) -> optional { + if (Timestamp::IsFinite(input)) { + return Interval::GetAge(current_date, input); + } else { + return nullopt; + } + }); } static void AgeFunction(DataChunk &input, ExpressionState &state, Vector &result) { D_ASSERT(input.ColumnCount() == 2); - BinaryExecutor::ExecuteWithNulls( + BinaryExecutor::Execute( input.data[0], input.data[1], result, input.size(), - [&](timestamp_t input1, timestamp_t input2, ValidityMask &mask, idx_t idx) { + [&](timestamp_t input1, timestamp_t input2) -> optional { if (Timestamp::IsFinite(input1) && Timestamp::IsFinite(input2)) { return Interval::GetAge(input1, input2); } else { - mask.SetInvalid(idx); - return interval_t(); + return nullopt; } }); } diff --git a/src/duckdb/extension/core_functions/scalar/date/current.cpp b/src/duckdb/extension/core_functions/scalar/date/current.cpp index bf928618d..4961299f3 100644 --- a/src/duckdb/extension/core_functions/scalar/date/current.cpp +++ b/src/duckdb/extension/core_functions/scalar/date/current.cpp @@ -18,7 +18,7 @@ static void CurrentTimestampFunction(DataChunk &input, ExpressionState &state, V D_ASSERT(input.ColumnCount() == 0); auto ts = GetTransactionTimestamp(state); auto val = Value::TIMESTAMPTZ(timestamp_tz_t(ts)); - result.Reference(val); + result.Reference(val, count_t(input.size())); } ScalarFunction GetCurrentTimestampFun::GetFunction() { diff --git a/src/duckdb/extension/core_functions/scalar/date/date_diff.cpp b/src/duckdb/extension/core_functions/scalar/date/date_diff.cpp index 6988579c3..e9ca61519 100644 --- a/src/duckdb/extension/core_functions/scalar/date/date_diff.cpp +++ b/src/duckdb/extension/core_functions/scalar/date/date_diff.cpp @@ -19,15 +19,13 @@ namespace { struct DateDiff { template static inline void BinaryExecute(Vector &left, Vector &right, Vector &result, idx_t count) { - BinaryExecutor::ExecuteWithNulls( - left, right, result, count, [&](TA startdate, TB enddate, ValidityMask &mask, idx_t idx) { - if (Value::IsFinite(startdate) && Value::IsFinite(enddate)) { - return OP::template Operation(startdate, enddate); - } else { - mask.SetInvalid(idx); - return TR(); - } - }); + BinaryExecutor::Execute(left, right, result, count, [&](TA startdate, TB enddate) -> optional { + if (Value::IsFinite(startdate) && Value::IsFinite(enddate)) { + return OP::template Operation(startdate, enddate); + } else { + return nullopt; + } + }); } // We need to truncate down, not towards 0 @@ -355,12 +353,11 @@ int64_t DifferenceDates(DatePartSpecifier type, TA startdate, TB enddate) { struct DateDiffTernaryOperator { template - static inline TR Operation(TS part, TA startdate, TB enddate, ValidityMask &mask, idx_t idx) { + static inline optional Operation(TS part, TA startdate, TB enddate) { if (Value::IsFinite(startdate) && Value::IsFinite(enddate)) { return DifferenceDates(GetDatePartSpecifier(part.GetString()), startdate, enddate); } else { - mask.SetInvalid(idx); - return TR(); + return nullopt; } } }; @@ -436,9 +433,8 @@ void DateDiffFunction(DataChunk &args, ExpressionState &state, Vector &result) { const auto type = GetDatePartSpecifier(ConstantVector::GetData(part_arg)->GetString()); DateDiffBinaryExecutor(type, start_arg, end_arg, result, args.size()); } else { - TernaryExecutor::ExecuteWithNulls( - part_arg, start_arg, end_arg, result, args.size(), - DateDiffTernaryOperator::Operation); + TernaryExecutor::Execute(part_arg, start_arg, end_arg, result, args.size(), + DateDiffTernaryOperator::Operation); } } @@ -452,6 +448,8 @@ ScalarFunctionSet DateDiffFun::GetFunctions() { LogicalType::BIGINT, DateDiffFunction)); date_diff.AddFunction(ScalarFunction({LogicalType::VARCHAR, LogicalType::TIME, LogicalType::TIME}, LogicalType::BIGINT, DateDiffFunction)); + date_diff.SetArgProperties(1, ArgProperties().NonIncreasing()); + date_diff.SetArgProperties(2, ArgProperties().NonDecreasing()); return date_diff; } diff --git a/src/duckdb/extension/core_functions/scalar/date/date_part.cpp b/src/duckdb/extension/core_functions/scalar/date/date_part.cpp index d64ab076a..63f80cfb5 100644 --- a/src/duckdb/extension/core_functions/scalar/date/date_part.cpp +++ b/src/duckdb/extension/core_functions/scalar/date/date_part.cpp @@ -1,4 +1,3 @@ -#include "duckdb/common/vector/map_vector.hpp" #include "duckdb/common/vector/struct_vector.hpp" #include "core_functions/scalar/date_functions.hpp" #include "duckdb/common/case_insensitive_map.hpp" @@ -17,7 +16,7 @@ namespace duckdb { namespace { -DatePartSpecifier GetDateTypePartSpecifier(const string &specifier, LogicalType &type) { +DatePartSpecifier GetDateTypePartSpecifier(const string &specifier, const LogicalType &type) { const auto part = GetDatePartSpecifier(specifier); switch (type.id()) { case LogicalType::TIMESTAMP: @@ -773,9 +772,8 @@ struct DatePart { template void DatePartCachedFunction(DataChunk &args, ExpressionState &state, Vector &result) { auto &lstate = ExecuteFunctionState::GetFunctionState(state)->Cast>(); - UnaryExecutor::ExecuteWithNulls( - args.data[0], result, args.size(), - [&](T input, ValidityMask &mask, idx_t idx) { return lstate.cache.ExtractElement(input, mask, idx); }); + UnaryExecutor::Execute(args.data[0], result, args.size(), + [&](T input) { return lstate.cache.ExtractElement(input); }); } template <> @@ -1750,13 +1748,12 @@ void DatePartFunction(DataChunk &args, ExpressionState &state, Vector &result) { auto &spec_arg = args.data[0]; auto &date_arg = args.data[1]; - BinaryExecutor::ExecuteWithNulls( - spec_arg, date_arg, result, args.size(), [&](string_t specifier, T date, ValidityMask &mask, idx_t idx) { + BinaryExecutor::Execute( + spec_arg, date_arg, result, args.size(), [&](string_t specifier, T date) -> optional { if (Value::IsFinite(date)) { return ExtractElement(GetDatePartSpecifier(specifier.GetString()), date); } else { - mask.SetInvalid(idx); - return int64_t(0); + return nullopt; } }); } @@ -1777,9 +1774,9 @@ unique_ptr DatePartBind(BindScalarFunctionInput &input) { case DatePartSpecifier::JULIAN_DAY: arguments.erase(arguments.begin()); bound_function.GetArguments().erase(bound_function.GetArguments().begin()); - bound_function.name = "julian"; + bound_function.SetName("julian"); bound_function.SetReturnType(LogicalType::DOUBLE); - switch (arguments[0]->return_type.id()) { + switch (arguments[0]->GetReturnType().id()) { case LogicalType::TIMESTAMP: case LogicalType::TIMESTAMP_S: case LogicalType::TIMESTAMP_MS: @@ -1794,15 +1791,15 @@ unique_ptr DatePartBind(BindScalarFunctionInput &input) { bound_function.SetStatisticsCallback(DatePart::JulianDayOperator::template PropagateStatistics); break; default: - throw BinderException("%s can only take DATE or TIMESTAMP arguments", bound_function.name); + throw BinderException("%s can only take DATE or TIMESTAMP arguments", bound_function.GetName()); } break; case DatePartSpecifier::EPOCH: arguments.erase(arguments.begin()); bound_function.GetArguments().erase(bound_function.GetArguments().begin()); - bound_function.name = "epoch"; + bound_function.SetName("epoch"); bound_function.SetReturnType(LogicalType::DOUBLE); - switch (arguments[0]->return_type.id()) { + switch (arguments[0]->GetReturnType().id()) { case LogicalType::TIMESTAMP: case LogicalType::TIMESTAMP_S: case LogicalType::TIMESTAMP_MS: @@ -1831,7 +1828,7 @@ unique_ptr DatePartBind(BindScalarFunctionInput &input) { bound_function.SetStatisticsCallback(DatePart::EpochOperator::template PropagateStatistics); break; default: - throw BinderException("%s can only take temporal arguments", bound_function.name); + throw BinderException("%s can only take temporal arguments", bound_function.GetName()); } break; default: @@ -1952,7 +1949,7 @@ struct StructDatePart { throw ParameterNotResolvedException(); } if (!arguments[0]->IsFoldable()) { - throw BinderException("%s can only take constant lists of part names", bound_function.name); + throw BinderException("%s can only take constant lists of part names", bound_function.GetName()); } case_insensitive_set_t name_collision_set; @@ -1963,16 +1960,17 @@ struct StructDatePart { if (parts_list.type().id() == LogicalTypeId::LIST) { auto &list_children = ListValue::GetChildren(parts_list); if (list_children.empty()) { - throw BinderException("%s requires non-empty lists of part names", bound_function.name); + throw BinderException("%s requires non-empty lists of part names", bound_function.GetName()); } for (const auto &part_value : list_children) { if (part_value.IsNull()) { - throw BinderException("NULL struct entry name in %s", bound_function.name); + throw BinderException("NULL struct entry name in %s", bound_function.GetName()); } const auto part_name = part_value.ToString(); - const auto part_code = GetDateTypePartSpecifier(part_name, arguments[1]->return_type); + const auto part_code = GetDateTypePartSpecifier(part_name, arguments[1]->GetReturnType()); if (name_collision_set.find(part_name) != name_collision_set.end()) { - throw BinderException("Duplicate struct entry name \"%s\" in %s", part_name, bound_function.name); + throw BinderException("Duplicate struct entry name \"%s\" in %s", part_name, + bound_function.GetName()); } name_collision_set.insert(part_name); part_codes.emplace_back(part_code); @@ -1980,7 +1978,7 @@ struct StructDatePart { struct_children.emplace_back(make_pair(part_name, part_type)); } } else { - throw BinderException("%s can only take constant lists of part names", bound_function.name); + throw BinderException("%s can only take constant lists of part names", bound_function.GetName()); } Function::EraseArgument(bound_function, arguments, 0); @@ -2074,18 +2072,19 @@ struct StructDatePart { } } - result.Verify(count); + result.Verify(); } static void SerializeFunction(Serializer &serializer, const optional_ptr bind_data_p, - const ScalarFunction &function) { + const BoundScalarFunction &function) { D_ASSERT(bind_data_p); auto &info = bind_data_p->Cast(); serializer.WriteProperty(100, "stype", info.stype); serializer.WriteProperty(101, "part_codes", info.part_codes); } - static unique_ptr DeserializeFunction(Deserializer &deserializer, ScalarFunction &bound_function) { + static unique_ptr DeserializeFunction(Deserializer &deserializer, + BoundScalarFunction &bound_function) { auto stype = deserializer.ReadProperty(100, "stype"); auto part_codes = deserializer.ReadProperty>(101, "part_codes"); return make_uniq(std::move(stype), std::move(part_codes)); @@ -2112,7 +2111,9 @@ ScalarFunctionSet GetCachedDatepartFunction() { } // namespace ScalarFunctionSet YearFun::GetFunctions() { - return GetCachedDatepartFunction(); + auto set = GetCachedDatepartFunction(); + set.SetUnaryArgProperties(ArgProperties().NonDecreasing()); + return set; } ScalarFunctionSet MonthFun::GetFunctions() { @@ -2124,15 +2125,21 @@ ScalarFunctionSet DayFun::GetFunctions() { } ScalarFunctionSet DecadeFun::GetFunctions() { - return GetDatePartFunction(); + auto set = GetDatePartFunction(); + set.SetUnaryArgProperties(ArgProperties().NonDecreasing()); + return set; } ScalarFunctionSet CenturyFun::GetFunctions() { - return GetDatePartFunction(); + auto set = GetDatePartFunction(); + set.SetUnaryArgProperties(ArgProperties().NonDecreasing()); + return set; } ScalarFunctionSet MillenniumFun::GetFunctions() { - return GetDatePartFunction(); + auto set = GetDatePartFunction(); + set.SetUnaryArgProperties(ArgProperties().NonDecreasing()); + return set; } ScalarFunctionSet QuarterFun::GetFunctions() { @@ -2160,11 +2167,15 @@ ScalarFunctionSet WeekFun::GetFunctions() { } ScalarFunctionSet ISOYearFun::GetFunctions() { - return GetDatePartFunction(); + auto set = GetDatePartFunction(); + set.SetUnaryArgProperties(ArgProperties().NonDecreasing()); + return set; } ScalarFunctionSet EraFun::GetFunctions() { - return GetDatePartFunction(); + auto set = GetDatePartFunction(); + set.SetUnaryArgProperties(ArgProperties().NonDecreasing()); + return set; } ScalarFunctionSet TimezoneFun::GetFunctions() { @@ -2192,7 +2203,9 @@ ScalarFunctionSet TimezoneMinuteFun::GetFunctions() { } ScalarFunctionSet EpochFun::GetFunctions() { - return GetTimePartFunction(LogicalType::DOUBLE); + auto set = GetTimePartFunction(LogicalType::DOUBLE); + set.SetUnaryArgProperties(ArgProperties().NonDecreasing()); + return set; } struct GetEpochNanosOperator { @@ -2217,9 +2230,11 @@ ScalarFunctionSet EpochNsFun::GetFunctions() { auto tstz_stats = OP::template PropagateStatistics; operator_set.AddFunction( ScalarFunction({LogicalType::TIMESTAMP_TZ}, LogicalType::BIGINT, tstz_func, nullptr, tstz_stats)); - operator_set.AddFunction( ScalarFunction({LogicalType::TIMESTAMP_NS}, LogicalType::BIGINT, ExecuteGetNanosFromTimestampNs)); + operator_set.AddFunction( + ScalarFunction({LogicalType::TIMESTAMP_TZ_NS}, LogicalType::BIGINT, ExecuteGetNanosFromTimestampNs)); + operator_set.SetUnaryArgProperties(ArgProperties().NonDecreasing()); return operator_set; } @@ -2232,6 +2247,7 @@ ScalarFunctionSet EpochUsFun::GetFunctions() { auto tstz_stats = OP::template PropagateStatistics; operator_set.AddFunction( ScalarFunction({LogicalType::TIMESTAMP_TZ}, LogicalType::BIGINT, tstz_func, nullptr, tstz_stats)); + operator_set.SetUnaryArgProperties(ArgProperties().NonDecreasing()); return operator_set; } @@ -2249,6 +2265,7 @@ ScalarFunctionSet EpochMsFun::GetFunctions() { operator_set.AddFunction( ScalarFunction({LogicalType::BIGINT}, LogicalType::TIMESTAMP, DatePart::EpochMillisOperator::Inverse)); + operator_set.SetUnaryArgProperties(ArgProperties().NonDecreasing()); return operator_set; } @@ -2256,6 +2273,7 @@ ScalarFunctionSet MakeTimestampMsFun::GetFunctions() { ScalarFunctionSet operator_set("make_timestamp_ms"); operator_set.AddFunction( ScalarFunction({LogicalType::BIGINT}, LogicalType::TIMESTAMP, DatePart::EpochMillisOperator::Inverse)); + operator_set.SetUnaryArgProperties(ArgProperties().NonDecreasing()); return operator_set; } @@ -2268,12 +2286,12 @@ ScalarFunctionSet NanosecondsFun::GetFunctions() { auto ns_func = DatePart::UnaryFunction; auto ns_stats = OP::template PropagateStatistics; operator_set.AddFunction(ScalarFunction({LogicalType::TIMESTAMP_NS}, result_type, ns_func, nullptr, ns_stats)); + operator_set.AddFunction(ScalarFunction({LogicalType::TIMESTAMP_TZ_NS}, result_type, ns_func, nullptr, ns_stats)); // TIMESTAMP WITH TIME ZONE has the same representation as TIMESTAMP so no need to defer to ICU auto tstz_func = DatePart::UnaryFunction; auto tstz_stats = OP::template PropagateStatistics; - operator_set.AddFunction( - ScalarFunction({LogicalType::TIMESTAMP_TZ}, LogicalType::BIGINT, tstz_func, nullptr, tstz_stats)); + operator_set.AddFunction(ScalarFunction({LogicalType::TIMESTAMP_TZ}, result_type, tstz_func, nullptr, tstz_stats)); return operator_set; } @@ -2299,7 +2317,9 @@ ScalarFunctionSet HoursFun::GetFunctions() { } ScalarFunctionSet YearWeekFun::GetFunctions() { - return GetDatePartFunction(); + auto set = GetDatePartFunction(); + set.SetUnaryArgProperties(ArgProperties().NonDecreasing()); + return set; } ScalarFunctionSet DayOfMonthFun::GetFunctions() { diff --git a/src/duckdb/extension/core_functions/scalar/date/date_sub.cpp b/src/duckdb/extension/core_functions/scalar/date/date_sub.cpp index 2fddd3365..c1304cd74 100644 --- a/src/duckdb/extension/core_functions/scalar/date/date_sub.cpp +++ b/src/duckdb/extension/core_functions/scalar/date/date_sub.cpp @@ -23,15 +23,13 @@ struct DateSub { template static inline void BinaryExecute(Vector &left, Vector &right, Vector &result, idx_t count) { - BinaryExecutor::ExecuteWithNulls( - left, right, result, count, [&](TA startdate, TB enddate, ValidityMask &mask, idx_t idx) { - if (Value::IsFinite(startdate) && Value::IsFinite(enddate)) { - return OP::template Operation(startdate, enddate); - } else { - mask.SetInvalid(idx); - return TR(); - } - }); + BinaryExecutor::Execute(left, right, result, count, [&](TA startdate, TB enddate) -> optional { + if (Value::IsFinite(startdate) && Value::IsFinite(enddate)) { + return OP::template Operation(startdate, enddate); + } else { + return nullopt; + } + }); } struct MonthOperator { @@ -354,12 +352,11 @@ int64_t SubtractDateParts(DatePartSpecifier type, TA startdate, TB enddate) { struct DateSubTernaryOperator { template - static inline TR Operation(TS part, TA startdate, TB enddate, ValidityMask &mask, idx_t idx) { + static inline optional Operation(TS part, TA startdate, TB enddate) { if (Value::IsFinite(startdate) && Value::IsFinite(enddate)) { return SubtractDateParts(GetDatePartSpecifier(part.GetString()), startdate, enddate); } else { - mask.SetInvalid(idx); - return TR(); + return nullopt; } } }; @@ -433,9 +430,8 @@ void DateSubFunction(DataChunk &args, ExpressionState &state, Vector &result) { const auto type = GetDatePartSpecifier(ConstantVector::GetData(part_arg)->GetString()); DateSubBinaryExecutor(type, start_arg, end_arg, result, args.size()); } else { - TernaryExecutor::ExecuteWithNulls( - part_arg, start_arg, end_arg, result, args.size(), - DateSubTernaryOperator::Operation); + TernaryExecutor::Execute(part_arg, start_arg, end_arg, result, args.size(), + DateSubTernaryOperator::Operation); } } @@ -449,6 +445,8 @@ ScalarFunctionSet DateSubFun::GetFunctions() { LogicalType::BIGINT, DateSubFunction)); date_sub.AddFunction(ScalarFunction({LogicalType::VARCHAR, LogicalType::TIME, LogicalType::TIME}, LogicalType::BIGINT, DateSubFunction)); + date_sub.SetArgProperties(1, ArgProperties().NonIncreasing()); + date_sub.SetArgProperties(2, ArgProperties().NonDecreasing()); return date_sub; } diff --git a/src/duckdb/extension/core_functions/scalar/date/date_trunc.cpp b/src/duckdb/extension/core_functions/scalar/date/date_trunc.cpp index b109f2f6a..00ec4bd9b 100644 --- a/src/duckdb/extension/core_functions/scalar/date/date_trunc.cpp +++ b/src/duckdb/extension/core_functions/scalar/date/date_trunc.cpp @@ -96,52 +96,42 @@ struct DateTrunc { } }; + // Truncate a UTC timestamp to the nearest fixed-width interval boundary. + // Applies to any unit whose length is a constant number of microseconds + // (second, minute, hour, day). Variable-length units (month, year, etc.) + // must use the calendar-decomposition path above. + static inline timestamp_t TruncFixed(timestamp_t ts, int64_t interval_us) { + const int64_t v = ts.value; + // Round towards negative infinity instead of 0 + const int64_t q = v / interval_us - (v % interval_us != 0 && v < 0 ? 1 : 0); + return timestamp_t(q * interval_us); + } + struct HourOperator { template static inline TR Operation(TA input) { - int32_t hour, min, sec, micros; - date_t date; - dtime_t time; - Timestamp::Convert(input, date, time); - Time::Convert(time, hour, min, sec, micros); - return Timestamp::FromDatetime(date, Time::FromTime(hour, 0, 0, 0)); + return TruncFixed(input, Interval::MICROS_PER_HOUR); } }; struct MinuteOperator { template static inline TR Operation(TA input) { - int32_t hour, min, sec, micros; - date_t date; - dtime_t time; - Timestamp::Convert(input, date, time); - Time::Convert(time, hour, min, sec, micros); - return Timestamp::FromDatetime(date, Time::FromTime(hour, min, 0, 0)); + return TruncFixed(input, Interval::MICROS_PER_MINUTE); } }; struct SecondOperator { template static inline TR Operation(TA input) { - int32_t hour, min, sec, micros; - date_t date; - dtime_t time; - Timestamp::Convert(input, date, time); - Time::Convert(time, hour, min, sec, micros); - return Timestamp::FromDatetime(date, Time::FromTime(hour, min, sec, 0)); + return TruncFixed(input, Interval::MICROS_PER_SEC); } }; struct MillisecondOperator { template static inline TR Operation(TA input) { - int32_t hour, min, sec, micros; - date_t date; - dtime_t time; - Timestamp::Convert(input, date, time); - Time::Convert(time, hour, min, sec, micros); - micros -= UnsafeNumericCast(micros % Interval::MICROS_PER_MSEC); - return Timestamp::FromDatetime(date, Time::FromTime(hour, min, sec, micros)); + return TruncFixed(input, Interval::MICROS_PER_MSEC); } }; @@ -241,7 +231,7 @@ timestamp_t DateTrunc::DayOperator::Operation(date_t input) { template <> timestamp_t DateTrunc::DayOperator::Operation(timestamp_t input) { - return DayOperator::Operation(Timestamp::GetDate(input)); + return TruncFixed(input, Interval::MICROS_PER_DAY); } template <> @@ -609,6 +599,7 @@ ScalarFunctionSet DateTruncFun::GetFunctions() { DateTruncFunction)); for (auto &func : date_trunc.functions) { func.SetFallible(); + func.SetArgProperties(1, ArgProperties().NonDecreasing()); } return date_trunc; } diff --git a/src/duckdb/extension/core_functions/scalar/date/epoch.cpp b/src/duckdb/extension/core_functions/scalar/date/epoch.cpp index 5aa808ec8..9af33ed9f 100644 --- a/src/duckdb/extension/core_functions/scalar/date/epoch.cpp +++ b/src/duckdb/extension/core_functions/scalar/date/epoch.cpp @@ -55,7 +55,9 @@ void TimeTZSortKeyFunction(DataChunk &input, ExpressionState &state, Vector &res ScalarFunction ToTimestampFun::GetFunction() { // to_timestamp is an alias from Postgres that converts the time in seconds to a timestamp - return ScalarFunction({LogicalType::DOUBLE}, LogicalType::TIMESTAMP_TZ, EpochSecFunction); + ScalarFunction func({LogicalType::DOUBLE}, LogicalType::TIMESTAMP_TZ, EpochSecFunction); + func.SetUnaryArgProperties(ArgProperties().NonDecreasing()); + return func; } ScalarFunction NormalizedIntervalFun::GetFunction() { diff --git a/src/duckdb/extension/core_functions/scalar/date/make_date.cpp b/src/duckdb/extension/core_functions/scalar/date/make_date.cpp index 76015d732..fc27dc401 100644 --- a/src/duckdb/extension/core_functions/scalar/date/make_date.cpp +++ b/src/duckdb/extension/core_functions/scalar/date/make_date.cpp @@ -1,12 +1,10 @@ -#include "duckdb/common/vector/map_vector.hpp" -#include "duckdb/common/vector/struct_vector.hpp" #include "core_functions/scalar/date_functions.hpp" #include "duckdb/common/operator/cast_operators.hpp" #include "duckdb/common/types/date.hpp" #include "duckdb/common/types/time.hpp" #include "duckdb/common/types/timestamp.hpp" #include "duckdb/common/vector_operations/ternary_executor.hpp" -#include "duckdb/common/vector_operations/senary_executor.hpp" +#include "duckdb/common/vector_operations/variadic_executor.hpp" #include "duckdb/common/exception/conversion_exception.hpp" #include @@ -54,14 +52,20 @@ void ExecuteStructMakeDate(DataChunk &input, ExpressionState &state, Vector &res // this should be guaranteed by the binder D_ASSERT(input.ColumnCount() == 1); auto &vec = input.data[0]; - - auto &children = StructVector::GetEntries(vec); - D_ASSERT(children.size() == 3); - auto &yyyy = children[0]; - auto &mm = children[1]; - auto &dd = children[2]; - - TernaryExecutor::Execute(yyyy, mm, dd, result, input.size(), FromDateCast); + const auto count = input.size(); + + auto iter = vec.Values>(count); + auto writer = FlatVector::Writer(result, count); + for (const auto entry : iter) { + const auto y = entry.template GetChildValue<0>(); + const auto m = entry.template GetChildValue<1>(); + const auto d = entry.template GetChildValue<2>(); + if (!entry.IsValid() || !y.IsValid() || !m.IsValid() || !d.IsValid()) { + writer.WriteNull(); + continue; + } + writer.WriteValue(FromDateCast(y.GetValueUnsafe(), m.GetValueUnsafe(), d.GetValueUnsafe())); + } } struct MakeTimeOperator { @@ -125,7 +129,7 @@ void ExecuteMakeTimestamp(DataChunk &input, ExpressionState &state, Vector &resu D_ASSERT(input.ColumnCount() == 6); auto func = MakeTimestampOperator::Operation; - SenaryExecutor::Execute(input, result, func); + VariadicExecutor::Execute(input, result, func); } template @@ -151,6 +155,7 @@ ScalarFunctionSet MakeDateFun::GetFunctions() { ScalarFunction({LogicalType::STRUCT(make_date_children)}, LogicalType::DATE, ExecuteStructMakeDate)); for (auto &func : make_date.functions) { func.SetFallible(); + func.SetUnaryArgProperties(ArgProperties().StrictlyIncreasing()); } return make_date; } @@ -172,6 +177,7 @@ ScalarFunctionSet MakeTimestampFun::GetFunctions() { for (auto &func : operator_set.functions) { func.SetFallible(); + func.SetUnaryArgProperties(ArgProperties().StrictlyIncreasing()); } return operator_set; } @@ -180,6 +186,7 @@ ScalarFunctionSet MakeTimestampNsFun::GetFunctions() { ScalarFunctionSet operator_set("make_timestamp_ns"); operator_set.AddFunction( ScalarFunction({LogicalType::BIGINT}, LogicalType::TIMESTAMP_NS, ExecuteMakeTimestampNs)); + operator_set.SetUnaryArgProperties(ArgProperties().StrictlyIncreasing()); return operator_set; } diff --git a/src/duckdb/extension/core_functions/scalar/date/time_bucket.cpp b/src/duckdb/extension/core_functions/scalar/date/time_bucket.cpp index 08115458b..a2f22b1ee 100644 --- a/src/duckdb/extension/core_functions/scalar/date/time_bucket.cpp +++ b/src/duckdb/extension/core_functions/scalar/date/time_bucket.cpp @@ -206,10 +206,9 @@ struct TimeBucket { struct OriginTernaryOperator { template - static inline TR Operation(TA bucket_width, TB ts, TC origin, ValidityMask &mask, idx_t idx) { + static inline optional Operation(TA bucket_width, TB ts, TC origin) { if (!Value::IsFinite(origin)) { - mask.SetInvalid(idx); - return TR(); + return nullopt; } BucketWidthType bucket_width_type = ClassifyBucketWidthErrorThrow(bucket_width); switch (bucket_width_type) { @@ -315,7 +314,7 @@ void TimeBucketOriginFunction(DataChunk &args, ExpressionState &state, Vector &r origin_arg.GetVectorType() == VectorType::CONSTANT_VECTOR) { if (ConstantVector::IsNull(bucket_width_arg) || ConstantVector::IsNull(origin_arg) || !Value::IsFinite(*ConstantVector::GetData(origin_arg))) { - ConstantVector::SetNull(result); + ConstantVector::SetNull(result, count_t(args.size())); } else { interval_t bucket_width = *ConstantVector::GetData(bucket_width_arg); TimeBucket::BucketWidthType bucket_width_type = TimeBucket::ClassifyBucketWidth(bucket_width); @@ -331,7 +330,7 @@ void TimeBucketOriginFunction(DataChunk &args, ExpressionState &state, Vector &r TimeBucket::OriginWidthConvertibleToMonthsTernaryOperator::Operation); break; case TimeBucket::BucketWidthType::UNCLASSIFIED: - TernaryExecutor::ExecuteWithNulls( + TernaryExecutor::Execute( bucket_width_arg, ts_arg, origin_arg, result, args.size(), TimeBucket::OriginTernaryOperator::Operation); break; @@ -340,7 +339,7 @@ void TimeBucketOriginFunction(DataChunk &args, ExpressionState &state, Vector &r } } } else { - TernaryExecutor::ExecuteWithNulls( + TernaryExecutor::Execute( bucket_width_arg, ts_arg, origin_arg, result, args.size(), TimeBucket::OriginTernaryOperator::Operation); } @@ -364,6 +363,7 @@ ScalarFunctionSet TimeBucketFun::GetFunctions() { LogicalType::TIMESTAMP, TimeBucketOriginFunction)); for (auto &func : time_bucket.functions) { func.SetFallible(); + func.SetArgProperties(1, ArgProperties().NonDecreasing()); } return time_bucket; } diff --git a/src/duckdb/extension/core_functions/scalar/debug/index_key.cpp b/src/duckdb/extension/core_functions/scalar/debug/index_key.cpp index 5626f5a75..19b01017e 100644 --- a/src/duckdb/extension/core_functions/scalar/debug/index_key.cpp +++ b/src/duckdb/extension/core_functions/scalar/debug/index_key.cpp @@ -74,7 +74,8 @@ static TableDescription EvaluateTableDescription(ClientContext &context, const E throw BinderException("index_key: path parameter must evaluate to a STRUCT"); } - return ExtractTableDescription(StructType::GetChildTypes(expr.return_type), StructValue::GetChildren(input_struct)); + return ExtractTableDescription(StructType::GetChildTypes(expr.GetReturnType()), + StructValue::GetChildren(input_struct)); } static string GetStringArgument(ClientContext &context, const Expression &expr, const string ¶m_name) { @@ -178,8 +179,8 @@ static unique_ptr IndexKeyBind(BindScalarFunctionInput &input) { // that only references the key columns during execution. We could erase the first two arguments here, but // that also requires some (de)serialization boilerplate, so for now we don't do it. bound_function.GetArguments().clear(); - bound_function.GetArguments().push_back(arguments[0]->return_type); - bound_function.GetArguments().push_back(arguments[1]->return_type); + bound_function.GetArguments().push_back(arguments[0]->GetReturnType()); + bound_function.GetArguments().push_back(arguments[1]->GetReturnType()); for (auto &key_type : key_types) { bound_function.GetArguments().push_back(key_type); } @@ -218,7 +219,7 @@ static void IndexKeyFunction(DataChunk &args, ExpressionState &state, Vector &re if (count == 1) { result.SetVectorType(VectorType::CONSTANT_VECTOR); } - result.Verify(count); + result.Verify(); } } // namespace diff --git a/src/duckdb/extension/core_functions/scalar/enum/enum_functions.cpp b/src/duckdb/extension/core_functions/scalar/enum/enum_functions.cpp index 5539a1a4d..b1728d851 100644 --- a/src/duckdb/extension/core_functions/scalar/enum/enum_functions.cpp +++ b/src/duckdb/extension/core_functions/scalar/enum/enum_functions.cpp @@ -7,7 +7,7 @@ static void EnumFirstFunction(DataChunk &input, ExpressionState &state, Vector & D_ASSERT(types.size() == 1); auto &enum_vector = EnumType::GetValuesInsertOrder(types[0]); auto val = Value(enum_vector.GetValue(0)); - result.Reference(val); + result.Reference(val, count_t(input.size())); } static void EnumLastFunction(DataChunk &input, ExpressionState &state, Vector &result) { @@ -16,7 +16,7 @@ static void EnumLastFunction(DataChunk &input, ExpressionState &state, Vector &r auto enum_size = EnumType::GetSize(types[0]); auto &enum_vector = EnumType::GetValuesInsertOrder(types[0]); auto val = Value(enum_vector.GetValue(enum_size - 1)); - result.Reference(val); + result.Reference(val, count_t(input.size())); } static void EnumRangeFunction(DataChunk &input, ExpressionState &state, Vector &result) { @@ -29,7 +29,7 @@ static void EnumRangeFunction(DataChunk &input, ExpressionState &state, Vector & enum_values.emplace_back(enum_vector.GetValue(i)); } auto val = Value::LIST(LogicalType::VARCHAR, enum_values); - result.Reference(val); + result.Reference(val, count_t(input.size())); } static void EnumRangeBoundaryFunction(DataChunk &input, ExpressionState &state, Vector &result) { @@ -57,7 +57,7 @@ static void EnumRangeBoundaryFunction(DataChunk &input, ExpressionState &state, enum_values.emplace_back(enum_vector.GetValue(i)); } auto val = Value::LIST(LogicalType::VARCHAR, enum_values); - result.Reference(val); + result.Reference(val, count_t(input.size())); } static void EnumCodeFunction(DataChunk &input, ExpressionState &state, Vector &result) { @@ -74,7 +74,7 @@ static void CheckEnumParameter(const Expression &expr) { static unique_ptr BindEnumFunction(BindScalarFunctionInput &input) { auto &arguments = input.GetArguments(); CheckEnumParameter(*arguments[0]); - if (arguments[0]->return_type.id() != LogicalTypeId::ENUM) { + if (arguments[0]->GetReturnType().id() != LogicalTypeId::ENUM) { throw BinderException("This function needs an ENUM as an argument"); } return nullptr; @@ -84,11 +84,11 @@ static unique_ptr BindEnumCodeFunction(BindScalarFunctionInput &in auto &bound_function = input.GetBoundFunction(); auto &arguments = input.GetArguments(); CheckEnumParameter(*arguments[0]); - if (arguments[0]->return_type.id() != LogicalTypeId::ENUM) { + if (arguments[0]->GetReturnType().id() != LogicalTypeId::ENUM) { throw BinderException("This function needs an ENUM as an argument"); } - auto phy_type = EnumType::GetPhysicalType(arguments[0]->return_type); + auto phy_type = EnumType::GetPhysicalType(arguments[0]->GetReturnType()); switch (phy_type) { case PhysicalType::UINT8: bound_function.SetReturnType(LogicalType(LogicalTypeId::UTINYINT)); @@ -113,18 +113,21 @@ static unique_ptr BindEnumRangeBoundaryFunction(BindScalarFunction auto &arguments = input.GetArguments(); CheckEnumParameter(*arguments[0]); CheckEnumParameter(*arguments[1]); - if (arguments[0]->return_type.id() != LogicalTypeId::ENUM && arguments[0]->return_type != LogicalType::SQLNULL) { + if (arguments[0]->GetReturnType().id() != LogicalTypeId::ENUM && + arguments[0]->GetReturnType() != LogicalType::SQLNULL) { throw BinderException("This function needs an ENUM as an argument"); } - if (arguments[1]->return_type.id() != LogicalTypeId::ENUM && arguments[1]->return_type != LogicalType::SQLNULL) { + if (arguments[1]->GetReturnType().id() != LogicalTypeId::ENUM && + arguments[1]->GetReturnType() != LogicalType::SQLNULL) { throw BinderException("This function needs an ENUM as an argument"); } - if (arguments[0]->return_type == LogicalType::SQLNULL && arguments[1]->return_type == LogicalType::SQLNULL) { + if (arguments[0]->GetReturnType() == LogicalType::SQLNULL && + arguments[1]->GetReturnType() == LogicalType::SQLNULL) { throw BinderException("This function needs an ENUM as an argument"); } - if (arguments[0]->return_type.id() == LogicalTypeId::ENUM && - arguments[1]->return_type.id() == LogicalTypeId::ENUM && - arguments[0]->return_type != arguments[1]->return_type) { + if (arguments[0]->GetReturnType().id() == LogicalTypeId::ENUM && + arguments[1]->GetReturnType().id() == LogicalTypeId::ENUM && + arguments[0]->GetReturnType() != arguments[1]->GetReturnType()) { throw BinderException("The parameters need to link to ONLY one enum OR be NULL "); } return nullptr; diff --git a/src/duckdb/extension/core_functions/scalar/generic/alias.cpp b/src/duckdb/extension/core_functions/scalar/generic/alias.cpp index 222510cb8..977f91005 100644 --- a/src/duckdb/extension/core_functions/scalar/generic/alias.cpp +++ b/src/duckdb/extension/core_functions/scalar/generic/alias.cpp @@ -6,7 +6,7 @@ namespace duckdb { static void AliasFunction(DataChunk &args, ExpressionState &state, Vector &result) { auto &func_expr = state.expr.Cast(); Value v(state.expr.GetAlias().empty() ? func_expr.children[0]->GetName() : state.expr.GetAlias()); - result.Reference(v); + result.Reference(v, count_t(args.size())); } ScalarFunction AliasFun::GetFunction() { diff --git a/src/duckdb/extension/core_functions/scalar/generic/binning.cpp b/src/duckdb/extension/core_functions/scalar/generic/binning.cpp index a32591448..8610f349d 100644 --- a/src/duckdb/extension/core_functions/scalar/generic/binning.cpp +++ b/src/duckdb/extension/core_functions/scalar/generic/binning.cpp @@ -411,7 +411,7 @@ unique_ptr BindEquiWidthFunction(BindScalarFunctionInput &input) { // while internally the bins are computed over a unified type // the equi_width_bins function returns the same type as the input MAX LogicalType child_type; - switch (arguments[1]->return_type.id()) { + switch (arguments[1]->GetReturnType().id()) { case LogicalTypeId::UNKNOWN: case LogicalTypeId::SQLNULL: return nullptr; @@ -420,7 +420,7 @@ unique_ptr BindEquiWidthFunction(BindScalarFunctionInput &input) { child_type = LogicalType::DOUBLE; break; default: - child_type = arguments[1]->return_type; + child_type = arguments[1]->GetReturnType(); break; } bound_function.SetReturnType(LogicalType::LIST(child_type)); @@ -474,11 +474,11 @@ void UnsupportedEquiWidth(DataChunk &args, ExpressionState &state, Vector &) { throw BinderException(state.expr, "Unsupported type \"%s\" for equi_width_bins", args.data[0].GetType()); } -void EquiWidthBinSerialize(Serializer &, const optional_ptr, const ScalarFunction &) { +void EquiWidthBinSerialize(Serializer &, const optional_ptr, const BoundScalarFunction &) { return; } -unique_ptr EquiWidthBinDeserialize(Deserializer &deserializer, ScalarFunction &function) { +unique_ptr EquiWidthBinDeserialize(Deserializer &deserializer, BoundScalarFunction &function) { function.SetReturnType(deserializer.Get()); return nullptr; } diff --git a/src/duckdb/extension/core_functions/scalar/generic/can_implicitly_cast.cpp b/src/duckdb/extension/core_functions/scalar/generic/can_implicitly_cast.cpp index ca212a29d..6c5828d8c 100644 --- a/src/duckdb/extension/core_functions/scalar/generic/can_implicitly_cast.cpp +++ b/src/duckdb/extension/core_functions/scalar/generic/can_implicitly_cast.cpp @@ -16,12 +16,12 @@ void CanCastImplicitlyFunction(DataChunk &args, ExpressionState &state, Vector & auto &context = state.GetContext(); bool can_cast_implicitly = CanCastImplicitly(context, args.data[0].GetType(), args.data[1].GetType()); auto v = Value::BOOLEAN(can_cast_implicitly); - result.Reference(v); + result.Reference(v, count_t(args.size())); } unique_ptr BindCanCastImplicitlyExpression(FunctionBindExpressionInput &input) { - auto &source_type = input.children[0]->return_type; - auto &target_type = input.children[1]->return_type; + auto &source_type = input.children[0]->GetReturnType(); + auto &target_type = input.children[1]->GetReturnType(); if (source_type.id() == LogicalTypeId::UNKNOWN || source_type.id() == LogicalTypeId::SQLNULL || target_type.id() == LogicalTypeId::UNKNOWN || target_type.id() == LogicalTypeId::SQLNULL) { // parameter - unknown return type diff --git a/src/duckdb/extension/core_functions/scalar/generic/cast_to_type.cpp b/src/duckdb/extension/core_functions/scalar/generic/cast_to_type.cpp index 51ff98ace..f9e507146 100644 --- a/src/duckdb/extension/core_functions/scalar/generic/cast_to_type.cpp +++ b/src/duckdb/extension/core_functions/scalar/generic/cast_to_type.cpp @@ -10,7 +10,7 @@ void CastToTypeFunction(DataChunk &args, ExpressionState &state, Vector &result) } unique_ptr BindCastToTypeFunction(FunctionBindExpressionInput &input) { - auto &return_type = input.children[1]->return_type; + auto &return_type = input.children[1]->GetReturnType(); if (return_type.id() == LogicalTypeId::UNKNOWN) { // parameter - unknown return type throw ParameterNotResolvedException(); diff --git a/src/duckdb/extension/core_functions/scalar/generic/current_setting.cpp b/src/duckdb/extension/core_functions/scalar/generic/current_setting.cpp index 854427ebf..03d528bdb 100644 --- a/src/duckdb/extension/core_functions/scalar/generic/current_setting.cpp +++ b/src/duckdb/extension/core_functions/scalar/generic/current_setting.cpp @@ -30,7 +30,7 @@ struct CurrentSettingBindData : public FunctionData { void CurrentSettingFunction(DataChunk &args, ExpressionState &state, Vector &result) { auto &func_expr = state.expr.Cast(); auto &info = func_expr.bind_info->Cast(); - result.Reference(info.value); + result.Reference(info.value, count_t(args.size())); } unique_ptr CurrentSettingBind(BindScalarFunctionInput &input) { @@ -38,11 +38,11 @@ unique_ptr CurrentSettingBind(BindScalarFunctionInput &input) { auto &bound_function = input.GetBoundFunction(); auto &arguments = input.GetArguments(); auto &key_child = arguments[0]; - if (key_child->return_type.id() == LogicalTypeId::UNKNOWN) { + if (key_child->GetReturnType().id() == LogicalTypeId::UNKNOWN) { throw ParameterNotResolvedException(); } - if (key_child->return_type.id() != LogicalTypeId::VARCHAR || - key_child->return_type.id() != LogicalTypeId::VARCHAR || !key_child->IsFoldable()) { + if (key_child->GetReturnType().id() != LogicalTypeId::VARCHAR || + key_child->GetReturnType().id() != LogicalTypeId::VARCHAR || !key_child->IsFoldable()) { throw ParserException("Key name for current_setting needs to be a constant string"); } Value key_val = ExpressionExecutor::EvaluateScalar(context, *key_child); diff --git a/src/duckdb/extension/core_functions/scalar/generic/least.cpp b/src/duckdb/extension/core_functions/scalar/generic/least.cpp index 2b549a8b1..a5380ddf6 100644 --- a/src/duckdb/extension/core_functions/scalar/generic/least.cpp +++ b/src/duckdb/extension/core_functions/scalar/generic/least.cpp @@ -228,8 +228,9 @@ unique_ptr BindLeastGreatest(BindScalarFunctionInput &input) { bound_function.SetInitStateCallback(LeastGreatestSortKeyInit); break; } - bound_function.GetArguments()[0] = child_type; - bound_function.SetVarArgs(child_type); + for (auto &arg : bound_function.GetArguments()) { + arg = child_type; + } bound_function.SetReturnType(child_type); return nullptr; } diff --git a/src/duckdb/extension/core_functions/scalar/generic/replace_type.cpp b/src/duckdb/extension/core_functions/scalar/generic/replace_type.cpp index 64bfc9ffa..74b14b969 100644 --- a/src/duckdb/extension/core_functions/scalar/generic/replace_type.cpp +++ b/src/duckdb/extension/core_functions/scalar/generic/replace_type.cpp @@ -9,8 +9,8 @@ static void ReplaceTypeFunction(DataChunk &, ExpressionState &, Vector &) { } static unique_ptr BindReplaceTypeFunction(FunctionBindExpressionInput &input) { - const auto &from = input.children[1]->return_type; - const auto &to = input.children[2]->return_type; + const auto &from = input.children[1]->GetReturnType(); + const auto &to = input.children[2]->GetReturnType(); if (from.id() == LogicalTypeId::UNKNOWN || to.id() == LogicalTypeId::UNKNOWN) { // parameters - unknown return type throw ParameterNotResolvedException(); @@ -19,7 +19,7 @@ static unique_ptr BindReplaceTypeFunction(FunctionBindExpressionInpu throw InvalidInputException("replace_type cannot be used to replace type with NULL"); } const auto return_type = TypeVisitor::VisitReplace( - input.children[0]->return_type, [&from, &to](const LogicalType &type) { return type == from ? to : type; }); + input.children[0]->GetReturnType(), [&from, &to](const LogicalType &type) { return type == from ? to : type; }); return BoundCastExpression::AddCastToType(input.context, std::move(input.children[0]), return_type); } diff --git a/src/duckdb/extension/core_functions/scalar/generic/stats.cpp b/src/duckdb/extension/core_functions/scalar/generic/stats.cpp index 2f0d6156a..949b74e27 100644 --- a/src/duckdb/extension/core_functions/scalar/generic/stats.cpp +++ b/src/duckdb/extension/core_functions/scalar/generic/stats.cpp @@ -24,7 +24,7 @@ struct StatsBindData : public FunctionData { void StatsFunction(DataChunk &args, ExpressionState &state, Vector &result) { auto &func_expr = state.expr.Cast(); auto &info = func_expr.bind_info->Cast(); - result.Reference(info.stats); + result.Reference(info.stats, count_t(args.size())); } unique_ptr StatsBind(BindScalarFunctionInput &input) { diff --git a/src/duckdb/extension/core_functions/scalar/generic/system_functions.cpp b/src/duckdb/extension/core_functions/scalar/generic/system_functions.cpp index 11cfbcd85..76c491a20 100644 --- a/src/duckdb/extension/core_functions/scalar/generic/system_functions.cpp +++ b/src/duckdb/extension/core_functions/scalar/generic/system_functions.cpp @@ -16,19 +16,19 @@ namespace { // current_query void CurrentQueryFunction(DataChunk &input, ExpressionState &state, Vector &result) { Value val(state.GetContext().GetCurrentQuery()); - result.Reference(val); + result.Reference(val, count_t(input.size())); } // current_schema void CurrentSchemaFunction(DataChunk &input, ExpressionState &state, Vector &result) { Value val(ClientData::Get(state.GetContext()).catalog_search_path->GetDefault().schema); - result.Reference(val); + result.Reference(val, count_t(input.size())); } // current_database void CurrentDatabaseFunction(DataChunk &input, ExpressionState &state, Vector &result) { Value val(DatabaseManager::GetDefaultDatabase(state.GetContext())); - result.Reference(val); + result.Reference(val, count_t(input.size())); } struct CurrentSchemasBindData : public FunctionData { @@ -50,7 +50,7 @@ struct CurrentSchemasBindData : public FunctionData { unique_ptr CurrentSchemasBind(BindScalarFunctionInput &input) { auto &context = input.GetClientContext(); auto &arguments = input.GetArguments(); - if (arguments[0]->return_type.id() != LogicalTypeId::BOOLEAN) { + if (arguments[0]->GetReturnType().id() != LogicalTypeId::BOOLEAN) { throw BinderException("current_schemas requires a boolean input"); } if (!arguments[0]->IsFoldable()) { @@ -77,7 +77,7 @@ unique_ptr CurrentSchemasBind(BindScalarFunctionInput &input) { void CurrentSchemasFunction(DataChunk &input, ExpressionState &state, Vector &result) { auto &func_expr = state.expr.Cast(); auto &info = func_expr.bind_info->Cast(); - result.Reference(info.result); + result.Reference(info.result, count_t(input.size())); } // in_search_path @@ -96,13 +96,13 @@ void TransactionIdCurrent(DataChunk &input, ExpressionState &state, Vector &resu auto &catalog = Catalog::GetCatalog(context, DatabaseManager::GetDefaultDatabase(context)); auto &transaction = DuckTransaction::Get(context, catalog); auto val = Value::UBIGINT(transaction.start_time); - result.Reference(val); + result.Reference(val, count_t(input.size())); } // version void VersionFunction(DataChunk &input, ExpressionState &state, Vector &result) { auto val = Value(DuckDB::LibraryVersion()); - result.Reference(val); + result.Reference(val, count_t(input.size())); } } // namespace diff --git a/src/duckdb/extension/core_functions/scalar/generic/type_functions.cpp b/src/duckdb/extension/core_functions/scalar/generic/type_functions.cpp index a0e0846de..97833ba42 100644 --- a/src/duckdb/extension/core_functions/scalar/generic/type_functions.cpp +++ b/src/duckdb/extension/core_functions/scalar/generic/type_functions.cpp @@ -14,11 +14,11 @@ namespace duckdb { static void TypeOfFunction(DataChunk &args, ExpressionState &state, Vector &result) { Value v(args.data[0].GetType().ToString()); - result.Reference(v); + result.Reference(v, count_t(args.size())); } static unique_ptr BindTypeOfFunctionExpression(FunctionBindExpressionInput &input) { - auto &return_type = input.children[0]->return_type; + auto &return_type = input.children[0]->GetReturnType(); if (return_type.id() == LogicalTypeId::UNKNOWN || return_type.id() == LogicalTypeId::SQLNULL) { // parameter - unknown return type return nullptr; @@ -41,7 +41,7 @@ ScalarFunction TypeOfFun::GetFunction() { static void GetTypeFunction(DataChunk &args, ExpressionState &state, Vector &result) { auto v = Value::TYPE(args.data[0].GetType()); - result.Reference(v); + result.Reference(v, count_t(args.size())); } static unique_ptr BindGetTypeFunction(BindScalarFunctionInput &input) { @@ -50,12 +50,12 @@ static unique_ptr BindGetTypeFunction(BindScalarFunctionInput &inp if (arguments[0]->HasParameter()) { throw ParameterNotResolvedException(); } - bound_function.GetArguments()[0] = arguments[0]->return_type; + bound_function.GetArguments()[0] = arguments[0]->GetReturnType(); return nullptr; } static unique_ptr BindGetTypeFunctionExpression(FunctionBindExpressionInput &input) { - auto &return_type = input.children[0]->return_type; + auto &return_type = input.children[0]->GetReturnType(); if (return_type.id() == LogicalTypeId::UNKNOWN || return_type.id() == LogicalTypeId::SQLNULL) { // parameter - unknown return type return nullptr; @@ -83,7 +83,7 @@ static unique_ptr BindMakeTypeFunctionExpression(FunctionBindExpress // Evaluate all arguments to constant values for (auto &child : input.children) { - string name = child->alias; + string name = child->GetAlias(); if (!child->IsFoldable()) { throw BinderException("make_type function arguments must be constant expressions"); } diff --git a/src/duckdb/extension/core_functions/scalar/list/array_slice.cpp b/src/duckdb/extension/core_functions/scalar/list/array_slice.cpp index 5ebac7fb0..0d71cf5e2 100644 --- a/src/duckdb/extension/core_functions/scalar/list/array_slice.cpp +++ b/src/duckdb/extension/core_functions/scalar/list/array_slice.cpp @@ -164,11 +164,11 @@ void ExecuteFlatSlice(Vector &result, Vector &list_vector, Vector &begin_vector, UnifiedVectorFormat list_data, begin_data, end_data, step_data; idx_t sel_length = 0; - list_vector.ToUnifiedFormat(count, list_data); - begin_vector.ToUnifiedFormat(count, begin_data); - end_vector.ToUnifiedFormat(count, end_data); + list_vector.ToUnifiedFormat(list_data); + begin_vector.ToUnifiedFormat(begin_data); + end_vector.ToUnifiedFormat(end_data); if (step_vector) { - step_vector->ToUnifiedFormat(count, step_data); + step_vector->ToUnifiedFormat(step_data); sel.Initialize(ListVector::GetListSize(list_vector)); } @@ -226,7 +226,7 @@ void ExecuteFlatSlice(Vector &result, Vector &list_vector, Vector &begin_vector, new_sel.set_index(i, sel.get_index(i)); } result_child_vector->Slice(new_sel, sel_length); - result_child_vector->Flatten(sel_length); + result_child_vector->Flatten(); ListVector::SetListSize(result, sel_length); } } @@ -245,7 +245,7 @@ void ExecuteSlice(Vector &result, Vector &list_or_str_vector, Vector &begin_vect ExecuteFlatSlice(result, list_or_str_vector, begin_vector, end_vector, step_vector, count, sel, sel_idx, result_child_vector, begin_is_empty, end_is_empty); - result.Verify(count); + result.Verify(); } void ArraySliceFunction(DataChunk &args, ExpressionState &state, Vector &result) { @@ -258,7 +258,7 @@ void ArraySliceFunction(DataChunk &args, ExpressionState &state, Vector &result) VectorOperations::Copy(args.data[0], list_or_str_vector, count, 0, 0); if (list_or_str_vector.GetType().id() == LogicalTypeId::SQLNULL) { - ConstantVector::SetNull(result); + ConstantVector::SetNull(result, count_t(count)); return; } @@ -279,7 +279,7 @@ void ArraySliceFunction(DataChunk &args, ExpressionState &state, Vector &result) // Share the value dictionary as we are just going to slice it if (list_or_str_vector.GetVectorType() != VectorType::FLAT_VECTOR && list_or_str_vector.GetVectorType() != VectorType::CONSTANT_VECTOR) { - list_or_str_vector.Flatten(count); + list_or_str_vector.Flatten(); } ExecuteSlice(result, list_or_str_vector, begin_vector, end_vector, step_vector, count, begin_is_empty, end_is_empty); @@ -300,7 +300,7 @@ void ArraySliceFunction(DataChunk &args, ExpressionState &state, Vector &result) bool CheckIfParamIsEmpty(duckdb::unique_ptr ¶m) { bool is_empty = false; - if (param->return_type.id() == LogicalTypeId::LIST) { + if (param->GetReturnType().id() == LogicalTypeId::LIST) { auto empty_list = make_uniq(Value::LIST(LogicalType::INTEGER, vector())); is_empty = param->Equals(*empty_list); if (!is_empty) { @@ -318,17 +318,17 @@ unique_ptr ArraySliceBind(BindScalarFunctionInput &input) { D_ASSERT(arguments.size() == 3 || arguments.size() == 4); D_ASSERT(bound_function.GetArguments().size() == 3 || bound_function.GetArguments().size() == 4); - switch (arguments[0]->return_type.id()) { + switch (arguments[0]->GetReturnType().id()) { case LogicalTypeId::ARRAY: { // Cast to list - auto child_type = ArrayType::GetChildType(arguments[0]->return_type); + auto child_type = ArrayType::GetChildType(arguments[0]->GetReturnType()); auto target_type = LogicalType::LIST(child_type); arguments[0] = BoundCastExpression::AddCastToType(context, std::move(arguments[0]), target_type); - bound_function.SetReturnType(arguments[0]->return_type); + bound_function.SetReturnType(arguments[0]->GetReturnType()); } break; case LogicalTypeId::LIST: // The result is the same type - bound_function.SetReturnType(arguments[0]->return_type); + bound_function.SetReturnType(arguments[0]->GetReturnType()); break; case LogicalTypeId::BLOB: case LogicalTypeId::VARCHAR: @@ -338,15 +338,15 @@ unique_ptr ArraySliceBind(BindScalarFunctionInput &input) { "Slice with steps has not been implemented for string types, you can consider rewriting your query as " "follows:\n SELECT array_to_string((str_split(string, '')[begin:end:step], '');"); } - if (arguments[0]->return_type.IsJSONType()) { + if (arguments[0]->GetReturnType().IsJSONType()) { // This is needed to avoid producing invalid JSON bound_function.GetArguments()[0] = LogicalType::VARCHAR; bound_function.SetReturnType(LogicalType::VARCHAR); } else { - bound_function.SetReturnType(arguments[0]->return_type); + bound_function.SetReturnType(arguments[0]->GetReturnType()); } for (idx_t i = 1; i < 3; i++) { - if (arguments[i]->return_type.id() != LogicalTypeId::LIST) { + if (arguments[i]->GetReturnType().id() != LogicalTypeId::LIST) { bound_function.GetArguments()[i] = LogicalType::BIGINT; } } @@ -381,7 +381,7 @@ ScalarFunctionSet ListSliceFun::GetFunctions() { fun.SetFallible(); ScalarFunctionSet set; set.AddFunction(fun); - fun.GetArguments().push_back(LogicalType::BIGINT); + fun.GetSignature().AddParameter(LogicalType::BIGINT); set.AddFunction(fun); return set; } diff --git a/src/duckdb/extension/core_functions/scalar/list/flatten.cpp b/src/duckdb/extension/core_functions/scalar/list/flatten.cpp index bd12d3207..f5cd6fb73 100644 --- a/src/duckdb/extension/core_functions/scalar/list/flatten.cpp +++ b/src/duckdb/extension/core_functions/scalar/list/flatten.cpp @@ -19,7 +19,7 @@ void ListFlattenFunction(DataChunk &args, ExpressionState &, Vector &result) { // Setup outer vec; auto &outer_vec = args.data[0]; const auto outer_count = args.size(); - outer_vec.ToUnifiedFormat(outer_count, outer_format); + outer_vec.ToUnifiedFormat(outer_format); // Special case: outer list is all-null if (outer_vec.GetType().id() == LogicalTypeId::SQLNULL) { @@ -29,8 +29,7 @@ void ListFlattenFunction(DataChunk &args, ExpressionState &, Vector &result) { // Setup inner vec auto &inner_vec = ListVector::GetChildMutable(outer_vec); - const auto inner_count = ListVector::GetListSize(outer_vec); - inner_vec.ToUnifiedFormat(inner_count, inner_format); + inner_vec.ToUnifiedFormat(inner_format); // Special case: inner list is all-null if (inner_vec.GetType().id() == LogicalTypeId::SQLNULL) { @@ -47,8 +46,7 @@ void ListFlattenFunction(DataChunk &args, ExpressionState &, Vector &result) { // Setup items vec auto &items_vec = ListVector::GetChildMutable(inner_vec); - const auto items_count = ListVector::GetListSize(inner_vec); - items_vec.ToUnifiedFormat(items_count, items_format); + items_vec.ToUnifiedFormat(items_format); // First pass: Figure out the total amount of items. // This can be more than items_count if the inner list reference the same item(s) multiple times. @@ -129,7 +127,7 @@ void ListFlattenFunction(DataChunk &args, ExpressionState &, Vector &result) { auto &result_child_vector = ListVector::GetChildMutable(result); result_child_vector.Slice(items_vec, sel, sel_idx); - result_child_vector.Flatten(sel_idx); + result_child_vector.Flatten(); } unique_ptr ListFlattenStats(ClientContext &context, FunctionStatisticsInput &input) { diff --git a/src/duckdb/extension/core_functions/scalar/list/list_aggregates.cpp b/src/duckdb/extension/core_functions/scalar/list/list_aggregates.cpp index 088d23e2a..65a77bc01 100644 --- a/src/duckdb/extension/core_functions/scalar/list/list_aggregates.cpp +++ b/src/duckdb/extension/core_functions/scalar/list/list_aggregates.cpp @@ -30,7 +30,7 @@ unique_ptr ListAggregatesInitLocalState(ExpressionState &sta } // FIXME: benchmark the use of simple_update against using update (if applicable) -unique_ptr ListAggregatesBindFailure(ScalarFunction &bound_function) { +unique_ptr ListAggregatesBindFailure(BoundScalarFunction &bound_function) { bound_function.GetArguments()[0] = LogicalType::SQLNULL; bound_function.SetReturnType(LogicalType::SQLNULL); return make_uniq(LogicalType::SQLNULL); @@ -63,12 +63,13 @@ struct ListAggregatesBindData : public FunctionData { } static void SerializeFunction(Serializer &serializer, const optional_ptr bind_data_p, - const ScalarFunction &function) { + const BoundScalarFunction &function) { auto bind_data = dynamic_cast(bind_data_p.get()); serializer.WritePropertyWithDefault(100, "bind_data", bind_data, (const ListAggregatesBindData *)nullptr); } - static unique_ptr DeserializeFunction(Deserializer &deserializer, ScalarFunction &bound_function) { + static unique_ptr DeserializeFunction(Deserializer &deserializer, + BoundScalarFunction &bound_function) { auto result = deserializer.ReadPropertyWithExplicitDefault>( 100, "bind_data", unique_ptr(nullptr)); if (!result) { @@ -137,7 +138,7 @@ struct DistinctFunctor { template > static void ListExecuteFunction(Vector &result, Vector &state_vector, idx_t count) { UnifiedVectorFormat sdata; - state_vector.ToUnifiedFormat(count, sdata); + state_vector.ToUnifiedFormat(sdata); auto states = UnifiedVectorFormat::GetData *>(sdata); auto old_len = ListVector::GetListSize(result); @@ -172,7 +173,7 @@ struct DistinctFunctor { } D_ASSERT(current_offset == old_len + new_entries); ListVector::SetListSize(result, current_offset); - result.Verify(count); + result.Verify(); } }; @@ -180,7 +181,7 @@ struct UniqueFunctor { template > static void ListExecuteFunction(Vector &result, Vector &state_vector, idx_t count) { UnifiedVectorFormat sdata; - state_vector.ToUnifiedFormat(count, sdata); + state_vector.ToUnifiedFormat(sdata); auto states = UnifiedVectorFormat::GetData *>(sdata); auto result_data = FlatVector::Writer(result, count); @@ -193,7 +194,7 @@ struct UniqueFunctor { } result_data.WriteValue(state->hist->size()); } - result.Verify(count); + result.Verify(); } }; @@ -207,7 +208,7 @@ void ListAggregatesFunction(DataChunk &args, ExpressionState &state, Vector &res auto &result_validity = FlatVector::ValidityMutable(result); if (lists.GetType().id() == LogicalTypeId::SQLNULL) { - ConstantVector::SetNull(result); + ConstantVector::SetNull(result, count_t(count)); return; } @@ -221,15 +222,14 @@ void ListAggregatesFunction(DataChunk &args, ExpressionState &state, Vector &res D_ASSERT(aggr.function.HasStateUpdateCallback()); - auto lists_size = ListVector::GetListSize(lists); auto &child_vector = ListVector::GetChildMutable(lists); - child_vector.Flatten(lists_size); + child_vector.Flatten(); UnifiedVectorFormat child_data; - child_vector.ToUnifiedFormat(lists_size, child_data); + child_vector.ToUnifiedFormat(child_data); UnifiedVectorFormat lists_data; - lists.ToUnifiedFormat(count, lists_data); + lists.ToUnifiedFormat(lists_data); auto list_entries = UnifiedVectorFormat::GetData(lists_data); // state_buffer holds the state for each list of this chunk @@ -378,9 +378,10 @@ void ListUniqueFunction(DataChunk &args, ExpressionState &state, Vector &result) } template -unique_ptr -ListAggregatesBindFunction(ClientContext &context, ScalarFunction &bound_function, const LogicalType &list_child_type, - AggregateFunction &aggr_function, vector> &arguments) { +unique_ptr ListAggregatesBindFunction(ClientContext &context, BoundScalarFunction &bound_function, + const LogicalType &list_child_type, + const AggregateFunction &aggr_function, + vector> &arguments) { // create the child expression and its type vector> children; auto expr = make_uniq(Value(list_child_type)); @@ -417,17 +418,17 @@ unique_ptr ListAggregatesBind(BindScalarFunctionInput &input) { auto &arguments = input.GetArguments(); arguments[0] = BoundCastExpression::AddArrayCastToList(context, std::move(arguments[0])); - if (arguments[0]->return_type.id() == LogicalTypeId::SQLNULL) { + if (arguments[0]->GetReturnType().id() == LogicalTypeId::SQLNULL) { return ListAggregatesBindFailure(bound_function); } - bool is_parameter = arguments[0]->return_type.id() == LogicalTypeId::UNKNOWN; + bool is_parameter = arguments[0]->GetReturnType().id() == LogicalTypeId::UNKNOWN; LogicalType child_type; if (is_parameter) { child_type = LogicalType::ANY; - } else if (arguments[0]->return_type.id() == LogicalTypeId::LIST || - arguments[0]->return_type.id() == LogicalTypeId::MAP) { - child_type = ListType::GetChildType(arguments[0]->return_type); + } else if (arguments[0]->GetReturnType().id() == LogicalTypeId::LIST || + arguments[0]->GetReturnType().id() == LogicalTypeId::MAP) { + child_type = ListType::GetChildType(arguments[0]->GetReturnType()); } else { // Unreachable throw InvalidInputException("First argument of list aggregate must be a list, map or array"); @@ -460,7 +461,7 @@ unique_ptr ListAggregatesBind(BindScalarFunctionInput &input) { types.push_back(child_type); // push any extra arguments into the type list for (idx_t i = 2; i < arguments.size(); i++) { - types.push_back(arguments[i]->return_type); + types.push_back(arguments[i]->GetReturnType()); } FunctionBinder function_binder(context); @@ -470,14 +471,14 @@ unique_ptr ListAggregatesBind(BindScalarFunctionInput &input) { } // found a matching function, bind it as an aggregate - auto best_function = func.functions.GetFunctionByOffset(best_function_idx.GetIndex()); + const auto &best_function = func.functions.GetFunctionByOffset(best_function_idx.GetIndex()); if (IS_AGGR) { bound_function.SetErrorMode(best_function.GetErrorMode()); return ListAggregatesBindFunction(context, bound_function, child_type, best_function, arguments); } // create the unordered map histogram function - D_ASSERT(best_function.GetArguments().size() == 1); + D_ASSERT(best_function.GetSignature().GetParameterCount() == 1); auto aggr_function = HistogramFun::GetHistogramUnorderedMap(child_type); return ListAggregatesBindFunction(context, bound_function, child_type, aggr_function, arguments); } diff --git a/src/duckdb/extension/core_functions/scalar/list/list_distance.cpp b/src/duckdb/extension/core_functions/scalar/list/list_distance.cpp index ab9248eef..810728a8b 100644 --- a/src/duckdb/extension/core_functions/scalar/list/list_distance.cpp +++ b/src/duckdb/extension/core_functions/scalar/list/list_distance.cpp @@ -15,7 +15,7 @@ template static void ListGenericFold(DataChunk &args, ExpressionState &state, Vector &result) { const auto &lstate = state.Cast(); const auto &expr = lstate.expr.Cast(); - const auto &func_name = expr.function.name; + const auto &func_name = expr.function.GetName(); auto count = args.size(); @@ -28,8 +28,8 @@ static void ListGenericFold(DataChunk &args, ExpressionState &state, Vector &res auto &lhs_child = ListVector::GetChildMutable(lhs_vec); auto &rhs_child = ListVector::GetChildMutable(rhs_vec); - lhs_child.Flatten(lhs_count); - rhs_child.Flatten(rhs_count); + lhs_child.Flatten(); + rhs_child.Flatten(); D_ASSERT(lhs_child.GetVectorType() == VectorType::FLAT_VECTOR); D_ASSERT(rhs_child.GetVectorType() == VectorType::FLAT_VECTOR); @@ -45,9 +45,8 @@ static void ListGenericFold(DataChunk &args, ExpressionState &state, Vector &res auto lhs_data = FlatVector::GetData(lhs_child); auto rhs_data = FlatVector::GetData(rhs_child); - BinaryExecutor::ExecuteWithNulls( - lhs_vec, rhs_vec, result, count, - [&](const list_entry_t &left, const list_entry_t &right, ValidityMask &mask, idx_t row_idx) { + BinaryExecutor::Execute( + lhs_vec, rhs_vec, result, count, [&](const list_entry_t &left, const list_entry_t &right) -> optional { if (left.length != right.length) { throw InvalidInputException( "%s: list dimensions must be equal, got left length '%d' and right length '%d'", func_name, @@ -55,8 +54,7 @@ static void ListGenericFold(DataChunk &args, ExpressionState &state, Vector &res } if (!OP::ALLOW_EMPTY && left.length == 0) { - mask.SetInvalid(row_idx); - return TYPE(); + return nullopt; } return OP::Operation(lhs_data + left.offset, rhs_data + right.offset, left.length); diff --git a/src/duckdb/extension/core_functions/scalar/list/list_filter.cpp b/src/duckdb/extension/core_functions/scalar/list/list_filter.cpp index bbe51b2d7..73914598b 100644 --- a/src/duckdb/extension/core_functions/scalar/list/list_filter.cpp +++ b/src/duckdb/extension/core_functions/scalar/list/list_filter.cpp @@ -19,7 +19,7 @@ static unique_ptr ListFilterBind(BindScalarFunctionInput &input) { auto &bound_lambda_expr = arguments[1]->Cast(); // try to cast to boolean, if the return type of the lambda filter expression is not already boolean - if (bound_lambda_expr.lambda_expr->return_type != LogicalType::BOOLEAN) { + if (bound_lambda_expr.lambda_expr->GetReturnType() != LogicalType::BOOLEAN) { auto cast_lambda_expr = BoundCastExpression::AddCastToType(context, std::move(bound_lambda_expr.lambda_expr), LogicalType::BOOLEAN); bound_lambda_expr.lambda_expr = std::move(cast_lambda_expr); @@ -27,7 +27,7 @@ static unique_ptr ListFilterBind(BindScalarFunctionInput &input) { arguments[0] = BoundCastExpression::AddArrayCastToList(context, std::move(arguments[0])); - bound_function.SetReturnType(arguments[0]->return_type); + bound_function.SetReturnType(arguments[0]->GetReturnType()); auto has_index = bound_lambda_expr.parameter_count == 2; return LambdaFunctions::ListLambdaBind(context, bound_function, arguments, has_index); } diff --git a/src/duckdb/extension/core_functions/scalar/list/list_has_any_or_all.cpp b/src/duckdb/extension/core_functions/scalar/list/list_has_any_or_all.cpp index a38c127ae..9e1144bfc 100644 --- a/src/duckdb/extension/core_functions/scalar/list/list_has_any_or_all.cpp +++ b/src/duckdb/extension/core_functions/scalar/list/list_has_any_or_all.cpp @@ -27,8 +27,8 @@ static void ListHasAnyFunction(DataChunk &args, ExpressionState &, Vector &resul UnifiedVectorFormat l_child_format; UnifiedVectorFormat r_child_format; - l_child.ToUnifiedFormat(l_size, l_child_format); - r_child.ToUnifiedFormat(r_size, r_child_format); + l_child.ToUnifiedFormat(l_child_format); + r_child.ToUnifiedFormat(r_child_format); // Create the sort keys for the list elements Vector l_sortkey_vec(LogicalType::BLOB, l_size); @@ -95,7 +95,7 @@ static void ListHasAnyFunction(DataChunk &args, ExpressionState &, Vector &resul static void ListHasAllFunction(DataChunk &args, ExpressionState &state, Vector &result) { const auto &func_expr = state.expr.Cast(); - const auto swap = func_expr.function.name == "<@"; + const auto swap = func_expr.function.GetName() == "<@"; auto &l_vec = args.data[swap ? 1 : 0]; auto &r_vec = args.data[swap ? 0 : 1]; @@ -117,8 +117,8 @@ static void ListHasAllFunction(DataChunk &args, ExpressionState &state, Vector & UnifiedVectorFormat build_format; UnifiedVectorFormat probe_format; - l_child.ToUnifiedFormat(l_size, build_format); - r_child.ToUnifiedFormat(r_size, probe_format); + l_child.ToUnifiedFormat(build_format); + r_child.ToUnifiedFormat(probe_format); // Create the sort keys for the list elements Vector l_sortkey_vec(LogicalType::BLOB, l_size); diff --git a/src/duckdb/extension/core_functions/scalar/list/list_reduce.cpp b/src/duckdb/extension/core_functions/scalar/list/list_reduce.cpp index 3be3275b8..0ede2c95b 100644 --- a/src/duckdb/extension/core_functions/scalar/list/list_reduce.cpp +++ b/src/duckdb/extension/core_functions/scalar/list/list_reduce.cpp @@ -48,7 +48,7 @@ struct ReduceExecuteInfo { input_types.push_back(LogicalType::BIGINT); } input_types.push_back(info.child_vector->GetType()); - input_types.push_back(info.lambda_expr->return_type); + input_types.push_back(info.lambda_expr->GetReturnType()); // info.column_infos includes the list column plus captured args (and the initial value if present). // skip the first entry if there is an initial value @@ -56,7 +56,7 @@ struct ReduceExecuteInfo { input_types.push_back(info.column_infos[i].vector.get().GetType()); } - accumulator_cast = make_uniq(info.lambda_expr->return_type, info.row_count); + accumulator_cast = make_uniq(info.lambda_expr->GetReturnType(), info.row_count); expr_executor = make_uniq(context, *info.lambda_expr); }; @@ -146,7 +146,7 @@ bool ExecuteReduce(const idx_t loops, ReduceExecuteInfo &execute_info, LambdaFun } // create the index vector, where the index is that of the current node. - Vector index_vector(Value::BIGINT(UnsafeNumericCast(loops_offset + 1))); + Vector index_vector(Value::BIGINT(UnsafeNumericCast(loops_offset + 1)), count_t(reduced_row_idx)); // slice the left and right slice execute_info.left_slice->Slice(*execute_info.left_slice, execute_info.left_sel, reduced_row_idx); @@ -230,15 +230,15 @@ unique_ptr ListReduceBind(BindScalarFunctionInput &input) { bool has_initial = arguments.size() == 3; LogicalType accumulator_type; if (has_initial) { - const auto &initial_value_type = arguments[2]->return_type; + const auto &initial_value_type = arguments[2]->GetReturnType(); auto &bound_lambda_expr = arguments[1]->Cast(); - auto &lambda_return_type = bound_lambda_expr.lambda_expr->return_type; + auto &lambda_return_type = bound_lambda_expr.lambda_expr->GetReturnType(); accumulator_type = ResolveReduceAccumulatorType(context, initial_value_type, lambda_return_type); arguments[2] = BoundCastExpression::AddCastToType(context, std::move(arguments[2]), accumulator_type); } else { auto &bound_lambda_expr = arguments[1]->Cast(); - auto list_child_type = LambdaFunctions::DetermineListChildType(arguments[0]->return_type); - auto &lambda_return_type = bound_lambda_expr.lambda_expr->return_type; + auto list_child_type = LambdaFunctions::DetermineListChildType(arguments[0]->GetReturnType()); + auto &lambda_return_type = bound_lambda_expr.lambda_expr->GetReturnType(); if (!LogicalType::TryGetMaxLogicalType(context, list_child_type, lambda_return_type, accumulator_type)) { throw BinderException("No common super type between list element type %s and lambda return type %s", list_child_type.ToString(), lambda_return_type.ToString()); @@ -251,14 +251,14 @@ unique_ptr ListReduceBind(BindScalarFunctionInput &input) { } auto has_index = bound_lambda_expr.parameter_count == 3; - const auto lambda_return_type = bound_lambda_expr.lambda_expr->return_type; + const auto lambda_return_type = bound_lambda_expr.lambda_expr->GetReturnType(); auto cast_lambda_expr = BoundCastExpression::AddCastToType(context, std::move(bound_lambda_expr.lambda_expr), accumulator_type); if (!cast_lambda_expr) { throw BinderException("Could not cast lambda return type %s to accumulator type %s", lambda_return_type.ToString(), accumulator_type.ToString()); } - bound_function.SetReturnType(cast_lambda_expr->return_type); + bound_function.SetReturnType(cast_lambda_expr->GetReturnType()); return make_uniq(bound_function.GetReturnType(), std::move(cast_lambda_expr), has_index, has_initial); } @@ -301,10 +301,10 @@ void LambdaFunctions::ListReduceFunction(DataChunk &args, ExpressionState &state // This means there is always an empty result chunk for the next iteration, // without the referenced chunk having to be reset until the current iteration is complete. DataChunk odd_result_chunk; - odd_result_chunk.Initialize(Allocator::DefaultAllocator(), {info.lambda_expr->return_type}); + odd_result_chunk.Initialize(Allocator::DefaultAllocator(), {info.lambda_expr->GetReturnType()}); DataChunk even_result_chunk; - even_result_chunk.Initialize(Allocator::DefaultAllocator(), {info.lambda_expr->return_type}); + even_result_chunk.Initialize(Allocator::DefaultAllocator(), {info.lambda_expr->GetReturnType()}); // Execute reduce until all rows are finished. idx_t loops = 0; @@ -334,7 +334,7 @@ ScalarFunctionSet ListReduceFun::GetFunctions() { ScalarFunctionSet set; set.AddFunction(fun); - fun.GetArguments().push_back(LogicalType::ANY); + fun.GetSignature().AddParameter(LogicalType::ANY); set.AddFunction(fun); return set; } diff --git a/src/duckdb/extension/core_functions/scalar/list/list_sort.cpp b/src/duckdb/extension/core_functions/scalar/list/list_sort.cpp index ffb4a7dd4..1a595a9f3 100644 --- a/src/duckdb/extension/core_functions/scalar/list/list_sort.cpp +++ b/src/duckdb/extension/core_functions/scalar/list/list_sort.cpp @@ -80,7 +80,7 @@ static void SinkDataChunk(const Sort &sort, ExecutionContext &context, OperatorS chunk.data[0].Reference(lists_indices); chunk.data[1].Reference(slice); chunk.data[2].Reference(payload_vector); - chunk.SetCardinality(offset_lists_indices); + chunk.SetChildCardinality(offset_lists_indices); chunk.Verify(context.client.db); // sink @@ -244,16 +244,16 @@ static void ListSortFunction(DataChunk &args, ExpressionState &state, Vector &re } } else { child_vector.Slice(sel_sorted, sel_sorted_idx); - child_vector.Flatten(sel_sorted_idx); + child_vector.Flatten(); } } } -static unique_ptr ListSortBind(ClientContext &context, ScalarFunction &bound_function, +static unique_ptr ListSortBind(ClientContext &context, BoundScalarFunction &bound_function, vector> &arguments, OrderType &order, OrderByNullType &null_order) { LogicalType child_type; - if (arguments[0]->return_type == LogicalTypeId::UNKNOWN) { + if (arguments[0]->GetReturnType() == LogicalTypeId::UNKNOWN) { bound_function.GetArguments()[0] = LogicalTypeId::UNKNOWN; bound_function.SetReturnType(LogicalType::SQLNULL); child_type = bound_function.GetReturnType(); @@ -262,10 +262,10 @@ static unique_ptr ListSortBind(ClientContext &context, ScalarFunct } arguments[0] = BoundCastExpression::AddArrayCastToList(context, std::move(arguments[0])); - child_type = ListType::GetChildType(arguments[0]->return_type); + child_type = ListType::GetChildType(arguments[0]->GetReturnType()); - bound_function.GetArguments()[0] = arguments[0]->return_type; - bound_function.SetReturnType(arguments[0]->return_type); + bound_function.GetArguments()[0] = arguments[0]->GetReturnType(); + bound_function.SetReturnType(arguments[0]->GetReturnType()); return make_uniq(order, null_order, false, bound_function.GetReturnType(), child_type, context); } @@ -302,9 +302,9 @@ static unique_ptr ListGradeUpBind(BindScalarFunctionInput &input) arguments[0] = BoundCastExpression::AddArrayCastToList(context, std::move(arguments[0])); - bound_function.GetArguments()[0] = arguments[0]->return_type; + bound_function.GetArguments()[0] = arguments[0]->GetReturnType(); bound_function.SetReturnType(LogicalType::LIST(LogicalTypeId::BIGINT)); - auto child_type = ListType::GetChildType(arguments[0]->return_type); + auto child_type = ListType::GetChildType(arguments[0]->GetReturnType()); return make_uniq(order, null_order, true, bound_function.GetReturnType(), child_type, context); } diff --git a/src/duckdb/extension/core_functions/scalar/list/list_transform.cpp b/src/duckdb/extension/core_functions/scalar/list/list_transform.cpp index ed5137490..fb3ef0a66 100644 --- a/src/duckdb/extension/core_functions/scalar/list/list_transform.cpp +++ b/src/duckdb/extension/core_functions/scalar/list/list_transform.cpp @@ -19,7 +19,7 @@ static unique_ptr ListTransformBind(BindScalarFunctionInput &input arguments[0] = BoundCastExpression::AddArrayCastToList(context, std::move(arguments[0])); auto &bound_lambda_expr = arguments[1]->Cast(); - bound_function.SetReturnType(LogicalType::LIST(bound_lambda_expr.lambda_expr->return_type)); + bound_function.SetReturnType(LogicalType::LIST(bound_lambda_expr.lambda_expr->GetReturnType())); auto has_index = bound_lambda_expr.parameter_count == 2; return LambdaFunctions::ListLambdaBind(context, bound_function, arguments, has_index); } diff --git a/src/duckdb/extension/core_functions/scalar/list/list_value.cpp b/src/duckdb/extension/core_functions/scalar/list/list_value.cpp index eadd641fe..f01d54a70 100644 --- a/src/duckdb/extension/core_functions/scalar/list/list_value.cpp +++ b/src/duckdb/extension/core_functions/scalar/list/list_value.cpp @@ -185,7 +185,7 @@ bool StructFunction(DataChunk &args, Vector &result) { for (idx_t col = 0; col < column_count; col++) { auto &struct_vector = args.data[col]; if (struct_vector.GetVectorType() != VectorType::CONSTANT_VECTOR) { - struct_vector.Flatten(args.size()); + struct_vector.Flatten(); } auto &struct_vector_members = StructVector::GetEntries(struct_vector); chunk.data[col].Reference(struct_vector_members[member_idx]); @@ -258,7 +258,7 @@ unique_ptr UnpivotBind(BindScalarFunctionInput &input) { if (k == i) { error_index = list_arguments.size(); } - list_arguments += arguments[k]->ToString() + " " + arguments[k]->return_type.ToString(); + list_arguments += arguments[k]->ToString() + " " + arguments[k]->GetReturnType().ToString(); } auto error = StringUtil::Format("Cannot unpivot columns of types %s and %s - an explicit cast is required", child_type.ToString(), arg_type.ToString()); @@ -268,8 +268,6 @@ unique_ptr UnpivotBind(BindScalarFunctionInput &input) { } child_type = LogicalType::NormalizeType(child_type); - // this is more for completeness reasons - bound_function.SetVarArgs(child_type); bound_function.SetReturnType(LogicalType::LIST(child_type)); return make_uniq(bound_function.GetReturnType()); } @@ -277,7 +275,7 @@ unique_ptr UnpivotBind(BindScalarFunctionInput &input) { unique_ptr ListValueStats(ClientContext &context, FunctionStatisticsInput &input) { auto &child_stats = input.child_stats; auto &expr = input.expr; - auto list_stats = ListStats::CreateEmpty(expr.return_type); + auto list_stats = ListStats::CreateEmpty(expr.GetReturnType()); auto &list_child_stats = ListStats::GetChildStats(list_stats); for (idx_t i = 0; i < child_stats.size(); i++) { list_child_stats.Merge(child_stats[i]); diff --git a/src/duckdb/extension/core_functions/scalar/list/range.cpp b/src/duckdb/extension/core_functions/scalar/list/range.cpp index 7ce02cc04..6a01159d5 100644 --- a/src/duckdb/extension/core_functions/scalar/list/range.cpp +++ b/src/duckdb/extension/core_functions/scalar/list/range.cpp @@ -1,7 +1,5 @@ #include "duckdb/common/vector/flat_vector.hpp" -#include "duckdb/common/vector/list_vector.hpp" #include "core_functions/scalar/list_functions.hpp" -#include "duckdb/planner/expression/bound_function_expression.hpp" #include "duckdb/common/types/data_chunk.hpp" #include "duckdb/common/types/vector.hpp" #include "duckdb/common/types/timestamp.hpp" @@ -118,16 +116,16 @@ class RangeInfoStruct { explicit RangeInfoStruct(DataChunk &args_p) : args(args_p) { switch (args.ColumnCount()) { case 1: - args.data[0].ToUnifiedFormat(args.size(), vdata[0]); + args.data[0].ToUnifiedFormat(vdata[0]); break; case 2: - args.data[0].ToUnifiedFormat(args.size(), vdata[0]); - args.data[1].ToUnifiedFormat(args.size(), vdata[1]); + args.data[0].ToUnifiedFormat(vdata[0]); + args.data[1].ToUnifiedFormat(vdata[1]); break; case 3: - args.data[0].ToUnifiedFormat(args.size(), vdata[0]); - args.data[1].ToUnifiedFormat(args.size(), vdata[1]); - args.data[2].ToUnifiedFormat(args.size(), vdata[2]); + args.data[0].ToUnifiedFormat(vdata[0]); + args.data[1].ToUnifiedFormat(vdata[1]); + args.data[2].ToUnifiedFormat(vdata[2]); break; default: throw InternalException("Unsupported number of parameters for range"); @@ -196,49 +194,27 @@ void ListRangeFunction(DataChunk &args, ExpressionState &state, Vector &result) D_ASSERT(result.GetType().id() == LogicalTypeId::LIST); RangeInfoStruct info(args); - idx_t args_size = 1; - auto result_type = VectorType::CONSTANT_VECTOR; - for (idx_t i = 0; i < args.ColumnCount(); i++) { - if (args.data[i].GetVectorType() != VectorType::CONSTANT_VECTOR) { - args_size = args.size(); - result_type = VectorType::FLAT_VECTOR; - break; - } - } - auto result_data = FlatVector::Writer(result, args_size); - uint64_t total_size = 0; - vector list_lengths(args_size, 0); + auto args_size = args.size(); + auto list_writer = FlatVector::Writer>(result, args_size); for (idx_t i = 0; i < args_size; i++) { if (!info.RowIsValid(i)) { - result_data.WriteNull(list_entry_t(total_size, 0)); - } else { - const auto length = info.ListLength(i); - list_lengths[i] = length; - result_data.WriteValue(list_entry_t(total_size, length)); - total_size += length; + list_writer.WriteNull(); + continue; } - } - - // now construct the child vector of the list - ListVector::Reserve(result, total_size); - auto range_data = FlatVector::Writer(ListVector::GetChildMutable(result), total_size); - for (idx_t i = 0; i < args_size; i++) { typename OP::TYPE start_value = info.StartListValue(i); typename OP::INCREMENT_TYPE increment = info.ListIncrementValue(i); + const auto length = info.ListLength(i); typename OP::TYPE range_value = start_value; - for (idx_t range_idx = 0; range_idx < list_lengths[i]; range_idx++) { - if (range_idx > 0) { + bool seen_value = false; + for (auto &child_writer : list_writer.WriteList(length)) { + if (seen_value) { OP::Increment(range_value, increment); } - range_data.WriteValue(range_value); + child_writer.WriteValue(range_value); + seen_value = true; } } - - ListVector::SetListSize(result, total_size); - result.SetVectorType(result_type); - - result.Verify(args.size()); } } // namespace diff --git a/src/duckdb/extension/core_functions/scalar/map/cardinality.cpp b/src/duckdb/extension/core_functions/scalar/map/cardinality.cpp index b2ce37177..ab70027fc 100644 --- a/src/duckdb/extension/core_functions/scalar/map/cardinality.cpp +++ b/src/duckdb/extension/core_functions/scalar/map/cardinality.cpp @@ -29,7 +29,7 @@ static unique_ptr CardinalityBind(BindScalarFunctionInput &input) throw BinderException("Cardinality must have exactly one arguments"); } - if (arguments[0]->return_type.id() != LogicalTypeId::MAP) { + if (arguments[0]->GetReturnType().id() != LogicalTypeId::MAP) { throw BinderException("Cardinality can only operate on MAPs"); } diff --git a/src/duckdb/extension/core_functions/scalar/map/map.cpp b/src/duckdb/extension/core_functions/scalar/map/map.cpp index 5eae9262e..48bbaa551 100644 --- a/src/duckdb/extension/core_functions/scalar/map/map.cpp +++ b/src/duckdb/extension/core_functions/scalar/map/map.cpp @@ -18,7 +18,7 @@ static void MapFunctionEmptyInput(Vector &result, const idx_t row_count) { auto result_data = ConstantVector::GetData(result); result_data[0] = list_entry_t(); - result.Verify(row_count); + result.Verify(); } static bool MapIsNull(DataChunk &chunk) { @@ -63,23 +63,23 @@ static void MapFunction(DataChunk &args, ExpressionState &, Vector &result) { // a LIST vector, where each row contains a LIST of KEYS UnifiedVectorFormat keys_data; - keys.ToUnifiedFormat(row_count, keys_data); + keys.ToUnifiedFormat(keys_data); auto keys_entries = UnifiedVectorFormat::GetData(keys_data); // the KEYs child vector auto &keys_child_vector = ListVector::GetChildMutable(keys); UnifiedVectorFormat keys_child_data; - keys_child_vector.ToUnifiedFormat(ListVector::GetListSize(keys), keys_child_data); + keys_child_vector.ToUnifiedFormat(keys_child_data); // a LIST vector, where each row contains a LIST of VALUES UnifiedVectorFormat values_data; - values.ToUnifiedFormat(row_count, values_data); + values.ToUnifiedFormat(values_data); auto values_entries = UnifiedVectorFormat::GetData(values_data); // the VALUEs child vector auto &values_child_vector = ListVector::GetChildMutable(values); UnifiedVectorFormat values_child_data; - values_child_vector.ToUnifiedFormat(ListVector::GetListSize(values), values_child_data); + values_child_vector.ToUnifiedFormat(values_child_data); // a LIST vector, where each row contains a MAP (LIST of STRUCTs) auto result_entries = FlatVector::Writer(result, row_count); @@ -152,11 +152,11 @@ static void MapFunction(DataChunk &args, ExpressionState &, Vector &result) { ListVector::SetListSize(result, offset); result_key_vector.Slice(keys_child_vector, sel_keys, offset); - result_key_vector.Flatten(offset); + result_key_vector.Flatten(); result_value_vector.Slice(values_child_vector, sel_values, offset); - result_value_vector.Flatten(offset); + result_value_vector.Flatten(); FlatVector::ValidityMutable(ListVector::GetChildMutable(result)).Resize(result_child_size); - result.Verify(row_count); + result.Verify(); } ScalarFunctionSet MapFun::GetFunctions() { diff --git a/src/duckdb/extension/core_functions/scalar/map/map_concat.cpp b/src/duckdb/extension/core_functions/scalar/map/map_concat.cpp index ea5981618..caff21735 100644 --- a/src/duckdb/extension/core_functions/scalar/map/map_concat.cpp +++ b/src/duckdb/extension/core_functions/scalar/map/map_concat.cpp @@ -49,7 +49,7 @@ void MapConcatFunction(DataChunk &args, ExpressionState &state, Vector &result) vector map_formats(map_count); for (idx_t i = 0; i < map_count; i++) { auto &map = args.data[i]; - map.ToUnifiedFormat(count, map_formats[i]); + map.ToUnifiedFormat(map_formats[i]); } auto result_data = FlatVector::Writer(result, count); for (idx_t i = 0; i < count; i++) { @@ -131,7 +131,7 @@ unique_ptr MapConcatBind(BindScalarFunctionInput &input) { throw InvalidInputException("The provided amount of arguments is incorrect, please provide 2 or more maps"); } - if (arguments[0]->return_type.id() == LogicalTypeId::UNKNOWN) { + if (arguments[0]->GetReturnType().id() == LogicalTypeId::UNKNOWN) { // Prepared statement bound_function.GetArguments().emplace_back(LogicalTypeId::UNKNOWN); bound_function.SetReturnType(LogicalTypeId::SQLNULL); @@ -144,7 +144,7 @@ unique_ptr MapConcatBind(BindScalarFunctionInput &input) { // Check and verify that all the maps are of the same type for (idx_t i = 0; i < arg_count; i++) { auto &arg = arguments[i]; - auto &map = arg->return_type; + auto &map = arg->GetReturnType(); if (map.id() == LogicalTypeId::UNKNOWN) { // Prepared statement bound_function.GetArguments().emplace_back(LogicalTypeId::UNKNOWN); diff --git a/src/duckdb/extension/core_functions/scalar/map/map_entries.cpp b/src/duckdb/extension/core_functions/scalar/map/map_entries.cpp index 03b99f2c0..d1a7fe730 100644 --- a/src/duckdb/extension/core_functions/scalar/map/map_entries.cpp +++ b/src/duckdb/extension/core_functions/scalar/map/map_entries.cpp @@ -14,7 +14,7 @@ static void MapEntriesFunction(DataChunk &args, ExpressionState &state, Vector & auto &map = args.data[0]; if (map.GetType().id() == LogicalTypeId::SQLNULL) { - ConstantVector::SetNull(result); + ConstantVector::SetNull(result, count_t(count)); return; } MapUtil::ReinterpretMap(result, map, count); diff --git a/src/duckdb/extension/core_functions/scalar/map/map_extract.cpp b/src/duckdb/extension/core_functions/scalar/map/map_extract.cpp index 08e8c5961..e1daa9bf0 100644 --- a/src/duckdb/extension/core_functions/scalar/map/map_extract.cpp +++ b/src/duckdb/extension/core_functions/scalar/map/map_extract.cpp @@ -23,8 +23,8 @@ static void MapExtractValueFunc(DataChunk &args, ExpressionState &state, Vector UnifiedVectorFormat pos_format; UnifiedVectorFormat lst_format; - pos_vec.ToUnifiedFormat(count, pos_format); - map_vec.ToUnifiedFormat(count, lst_format); + pos_vec.ToUnifiedFormat(pos_format); + map_vec.ToUnifiedFormat(lst_format); const auto pos_data = UnifiedVectorFormat::GetData(pos_format); const auto inc_list_data = UnifiedVectorFormat::GetData(lst_format); @@ -52,7 +52,7 @@ static void MapExtractValueFunc(DataChunk &args, ExpressionState &state, Vector result.SetVectorType(VectorType::CONSTANT_VECTOR); } - result.Verify(count); + result.Verify(); } static void MapExtractListFunc(DataChunk &args, ExpressionState &state, Vector &result) { @@ -68,41 +68,31 @@ static void MapExtractListFunc(DataChunk &args, ExpressionState &state, Vector & Vector pos_vec(LogicalType::INTEGER, count); ListSearchOp(map_vec, key_vec, arg_vec, pos_vec, args.size()); - UnifiedVectorFormat pos_format; - UnifiedVectorFormat lst_format; - - pos_vec.ToUnifiedFormat(count, pos_format); - map_vec.ToUnifiedFormat(count, lst_format); - - const auto pos_data = UnifiedVectorFormat::GetData(pos_format); - const auto inc_list_data = UnifiedVectorFormat::GetData(lst_format); + auto pos_entries = pos_vec.Values(count); + auto map_entries = map_vec.Values(count); + const auto val_size = ListVector::GetListSize(map_vec); auto out_list_data = FlatVector::Writer(result, count); - idx_t offset = 0; - vector val_idx_data; for (idx_t row_idx = 0; row_idx < count; row_idx++) { - const auto lst_idx = lst_format.sel->get_index(row_idx); - if (!lst_format.validity.RowIsValid(lst_idx)) { + auto map_entry = map_entries[row_idx]; + if (!map_entry.IsValid()) { out_list_data.WriteNull(); continue; } - const auto pos_idx = pos_format.sel->get_index(row_idx); - if (!pos_format.validity.RowIsValid(pos_idx)) { - // We didnt find the key in the map, so return empty list - out_list_data.WriteValue(list_entry_t(offset, 0)); + auto list = out_list_data.WriteDynamicList(); + auto pos_entry = pos_entries[row_idx]; + if (!pos_entry.IsValid()) { + // key not found: return empty list continue; } - auto &inc_list = inc_list_data[lst_idx]; - // Compute the actual position of the value in the map value vector - const auto pos = inc_list.offset + UnsafeNumericCast(pos_data[pos_idx] - 1); - out_list_data.WriteValue(list_entry_t(offset, 1)); - val_idx_data.emplace_back(pos); - offset++; + const auto &inc_list = map_entry.GetValue(); + const auto pos = inc_list.offset + UnsafeNumericCast(pos_entry.GetValue() - 1); + SelectionVector sel(1); + sel.set_index(0, pos); + list.Append(val_vec, sel, val_size, 0, 1); } - SelectionVector val_sel(val_idx_data.data(), val_idx_data.size()); - ListVector::Append(result, val_vec, val_sel, val_idx_data.size()); } ScalarFunction MapExtractValueFun::GetFunction() { diff --git a/src/duckdb/extension/core_functions/scalar/map/map_keys_values.cpp b/src/duckdb/extension/core_functions/scalar/map/map_keys_values.cpp index c05e75091..f6b186d42 100644 --- a/src/duckdb/extension/core_functions/scalar/map/map_keys_values.cpp +++ b/src/duckdb/extension/core_functions/scalar/map/map_keys_values.cpp @@ -15,11 +15,11 @@ static void MapKeyValueFunction(DataChunk &args, ExpressionState &state, Vector D_ASSERT(result.GetType().id() == LogicalTypeId::LIST); if (map.GetType().id() == LogicalTypeId::SQLNULL) { - ConstantVector::SetNull(result); + ConstantVector::SetNull(result, count_t(args.size())); return; } auto count = args.size(); - map.Flatten(count); + map.Flatten(); D_ASSERT(map.GetType().id() == LogicalTypeId::MAP); auto &child = get_child_vector(map); @@ -27,11 +27,11 @@ static void MapKeyValueFunction(DataChunk &args, ExpressionState &state, Vector auto &entries = ListVector::GetChildMutable(result); entries.Reference(child); - FlatVector::SetData(result, FlatVector::GetDataMutable(map), count); + FlatVector::SetData(result, FlatVector::GetDataMutable(map), count_t(count)); FlatVector::SetValidity(result, FlatVector::ValidityMutable(map)); auto list_size = ListVector::GetListSize(map); ListVector::SetListSize(result, list_size); - result.Verify(count); + result.Verify(); } static void MapKeysFunction(DataChunk &args, ExpressionState &state, Vector &result) { diff --git a/src/duckdb/extension/core_functions/scalar/map/switch.cpp b/src/duckdb/extension/core_functions/scalar/map/switch.cpp index 6a8d0a660..71b419af6 100644 --- a/src/duckdb/extension/core_functions/scalar/map/switch.cpp +++ b/src/duckdb/extension/core_functions/scalar/map/switch.cpp @@ -34,7 +34,7 @@ struct SwitchFunctionBindData : FunctionData { idx_t FindMapArgumentIndex(const vector> &arguments) { for (idx_t i = 0; i < arguments.size(); i++) { - if (arguments[i]->return_type.id() == LogicalTypeId::MAP) { + if (arguments[i]->GetReturnType().id() == LogicalTypeId::MAP) { return i; } } @@ -53,7 +53,7 @@ unique_ptr SwitchBindReturnType(BindScalarFunctionInput &input) { throw BinderException("SWITCH expected a constant map for the cases"); } auto &func = cases->Cast(); - if (func.function.name != "map") { + if (func.function.GetName() != "map") { throw BinderException("SWITCH expected a constant map for the cases"); } auto map_value = ExpressionExecutor::EvaluateScalar(context, *cases); @@ -66,7 +66,7 @@ void ExtractConstantExprFromList(unique_ptr &expr, vectorCast(); - if (list_function.function.name != "list_value") { + if (list_function.function.GetName() != "list_value") { throw BinderException("Expected a list function"); } if (list_function.children.empty()) { @@ -119,8 +119,8 @@ unique_ptr SwitchBindExpression(FunctionBindExpressionInput &input) for (idx_t i = 0; i < keys_unpacked.size(); i++) { BoundCaseCheck case_check; if (base_expr) { - auto max_type = - LogicalType::MaxLogicalType(input.context, base_expr->return_type, keys_unpacked[i]->return_type); + auto max_type = LogicalType::MaxLogicalType(input.context, base_expr->GetReturnType(), + keys_unpacked[i]->GetReturnType()); case_check.when_expr = make_uniq( ExpressionType::COMPARE_EQUAL, base_expr->Copy(), BoundCastExpression::AddCastToType(input.context, std::move(keys_unpacked[i]), max_type)); @@ -128,7 +128,7 @@ unique_ptr SwitchBindExpression(FunctionBindExpressionInput &input) case_check.when_expr = BoundCastExpression::AddCastToType(input.context, std::move(keys_unpacked[i]), LogicalType::BOOLEAN); } - auto then_type = values_unpacked[i]->return_type; + auto then_type = values_unpacked[i]->GetReturnType(); if (!LogicalType::TryGetMaxLogicalType(input.context, function_data.return_type, then_type, function_data.return_type)) { throw BinderException( diff --git a/src/duckdb/extension/core_functions/scalar/math/numeric.cpp b/src/duckdb/extension/core_functions/scalar/math/numeric.cpp index d6bc3babc..065a854a5 100644 --- a/src/duckdb/extension/core_functions/scalar/math/numeric.cpp +++ b/src/duckdb/extension/core_functions/scalar/math/numeric.cpp @@ -109,7 +109,7 @@ static unique_ptr PropagateAbsStats(ClientContext &context, Func Value new_min, new_max; bool potential_overflow = true; if (NumericStats::HasMinMax(lstats)) { - switch (expr.return_type.InternalType()) { + switch (expr.GetReturnType().InternalType()) { case PhysicalType::INT8: potential_overflow = NumericStats::Min(lstats).GetValue() == NumericLimits::Minimum(); break; @@ -127,8 +127,8 @@ static unique_ptr PropagateAbsStats(ClientContext &context, Func } } if (potential_overflow) { - new_min = Value(expr.return_type); - new_max = Value(expr.return_type); + new_min = Value(expr.GetReturnType()); + new_max = Value(expr.GetReturnType()); } else { // no potential overflow @@ -152,11 +152,11 @@ static unique_ptr PropagateAbsStats(ClientContext &context, Func *input.expr_ptr = std::move(input.expr.children[0]); return child_stats[0].ToUnique(); } - new_min = Value::Numeric(expr.return_type, min_val); - new_max = Value::Numeric(expr.return_type, max_val); - expr.function.SetFunctionCallback(ScalarFunction::GetScalarUnaryFunction(expr.return_type)); + new_min = Value::Numeric(expr.GetReturnType(), min_val); + new_max = Value::Numeric(expr.GetReturnType(), max_val); + expr.function.SetFunctionCallback(ScalarFunction::GetScalarUnaryFunction(expr.GetReturnType())); } - auto stats = NumericStats::CreateEmpty(expr.return_type); + auto stats = NumericStats::CreateEmpty(expr.GetReturnType()); NumericStats::SetMin(stats, new_min); NumericStats::SetMax(stats, new_max); stats.CopyValidity(lstats); @@ -167,7 +167,7 @@ template static unique_ptr DecimalUnaryOpBind(BindScalarFunctionInput &input) { auto &bound_function = input.GetBoundFunction(); auto &arguments = input.GetArguments(); - auto decimal_type = arguments[0]->return_type; + auto decimal_type = arguments[0]->GetReturnType(); switch (decimal_type.InternalType()) { case PhysicalType::INT16: bound_function.SetFunctionCallback(ScalarFunction::GetScalarUnaryFunction(LogicalTypeId::SMALLINT)); @@ -351,7 +351,8 @@ struct CeilOperator { template static void GenericRoundFunctionDecimal(DataChunk &input, ExpressionState &state, Vector &result) { auto &func_expr = state.expr.Cast(); - OP::template Operation(input, DecimalType::GetScale(func_expr.children[0]->return_type), result); + OP::template Operation(input, DecimalType::GetScale(func_expr.children[0]->GetReturnType()), + result); } template @@ -359,7 +360,7 @@ static unique_ptr BindGenericRoundFunctionDecimal(BindScalarFuncti auto &bound_function = input.GetBoundFunction(); auto &arguments = input.GetArguments(); // ceil essentially removes the scale - auto &decimal_type = arguments[0]->return_type; + auto &decimal_type = arguments[0]->GetReturnType(); auto scale = DecimalType::GetScale(decimal_type); auto width = DecimalType::GetWidth(decimal_type); if (scale == 0) { @@ -427,6 +428,7 @@ ScalarFunctionSet CeilFun::GetFunctions() { } ceil.AddFunction(ScalarFunction({type}, type, func, bind_func)); } + ceil.SetUnaryArgProperties(ArgProperties().NonDecreasing()); return ceil; } @@ -482,6 +484,7 @@ ScalarFunctionSet FloorFun::GetFunctions() { } floor.AddFunction(ScalarFunction({type}, type, func, bind_func)); } + floor.SetUnaryArgProperties(ArgProperties().NonDecreasing()); return floor; } @@ -516,11 +519,11 @@ unique_ptr BindDecimalRoundPrecision(BindScalarFunctionInput &inpu auto &context = input.GetClientContext(); auto &bound_function = input.GetBoundFunction(); auto &arguments = input.GetArguments(); - auto &decimal_type = arguments[0]->return_type; + auto &decimal_type = arguments[0]->GetReturnType(); if (arguments[1]->HasParameter()) { throw ParameterNotResolvedException(); } - auto fname = StringUtil::Upper(bound_function.name); + auto fname = StringUtil::Upper(bound_function.GetName()); if (!arguments[1]->IsFoldable()) { throw NotImplementedException("%s(DECIMAL, INTEGER) with non-constant precision is not supported", fname); } @@ -626,8 +629,8 @@ struct TruncDecimalNegativePrecisionOperator { static void Operation(DataChunk &input, ExpressionState &state, Vector &result) { auto &func_expr = state.expr.Cast(); auto &info = func_expr.bind_info->Cast(); - auto source_scale = DecimalType::GetScale(func_expr.children[0]->return_type); - auto width = DecimalType::GetWidth(func_expr.children[0]->return_type); + auto source_scale = DecimalType::GetScale(func_expr.children[0]->GetReturnType()); + auto width = DecimalType::GetWidth(func_expr.children[0]->GetReturnType()); if (info.target_scale <= -int32_t(width - source_scale)) { // scale too big for width result.SetVectorType(VectorType::CONSTANT_VECTOR); @@ -649,7 +652,7 @@ struct TruncDecimalPositivePrecisionOperator { static void Operation(DataChunk &input, ExpressionState &state, Vector &result) { auto &func_expr = state.expr.Cast(); auto &info = func_expr.bind_info->Cast(); - auto source_scale = DecimalType::GetScale(func_expr.children[0]->return_type); + auto source_scale = DecimalType::GetScale(func_expr.children[0]->GetReturnType()); T power_of_ten = UnsafeNumericCast(POWERS_OF_TEN_CLASS::POWERS_OF_TEN[source_scale - info.target_scale]); UnaryExecutor::Execute(input.data[0], result, input.size(), [&](T input) { return UnsafeNumericCast(input / power_of_ten); }); @@ -750,6 +753,7 @@ ScalarFunctionSet TruncFun::GetFunctions() { trunc.AddFunction(ScalarFunction({type}, type, trunc_func, bind_func)); trunc.AddFunction(ScalarFunction({type, LogicalType::INTEGER}, type, trunc_prec_func, bind_prec_func)); } + trunc.SetUnaryArgProperties(ArgProperties().NonDecreasing()); return trunc; } @@ -847,8 +851,8 @@ struct DecimalRoundNegativePrecisionOperator { static void Operation(DataChunk &input, ExpressionState &state, Vector &result) { auto &func_expr = state.expr.Cast(); auto &info = func_expr.bind_info->Cast(); - auto source_scale = DecimalType::GetScale(func_expr.children[0]->return_type); - auto width = DecimalType::GetWidth(func_expr.children[0]->return_type); + auto source_scale = DecimalType::GetScale(func_expr.children[0]->GetReturnType()); + auto width = DecimalType::GetWidth(func_expr.children[0]->GetReturnType()); if (info.target_scale <= -int32_t(width - source_scale)) { // scale too big for width result.SetVectorType(VectorType::CONSTANT_VECTOR); @@ -876,7 +880,7 @@ struct DecimalRoundPositivePrecisionOperator { static void Operation(DataChunk &input, ExpressionState &state, Vector &result) { auto &func_expr = state.expr.Cast(); auto &info = func_expr.bind_info->Cast(); - auto source_scale = DecimalType::GetScale(func_expr.children[0]->return_type); + auto source_scale = DecimalType::GetScale(func_expr.children[0]->GetReturnType()); T power_of_ten = UnsafeNumericCast(POWERS_OF_TEN_CLASS::POWERS_OF_TEN[source_scale - info.target_scale]); T addition = power_of_ten / 2; UnaryExecutor::Execute(input.data[0], result, input.size(), [&](T input) { @@ -941,6 +945,7 @@ ScalarFunctionSet RoundFun::GetFunctions() { round.AddFunction(ScalarFunction({type}, type, round_func, bind_func)); round.AddFunction(ScalarFunction({type, LogicalType::INTEGER}, type, round_prec_func, bind_prec_func)); } + round.SetUnaryArgProperties(ArgProperties().NonDecreasing()); return round; } @@ -959,8 +964,10 @@ struct ExpOperator { } // namespace ScalarFunction ExpFun::GetFunction() { - return ScalarFunction({LogicalType::DOUBLE}, LogicalType::DOUBLE, - ScalarFunction::UnaryFunction); + ScalarFunction func({LogicalType::DOUBLE}, LogicalType::DOUBLE, + ScalarFunction::UnaryFunction); + func.SetUnaryArgProperties(ArgProperties().StrictlyIncreasing()); + return func; } //===--------------------------------------------------------------------===// @@ -1025,8 +1032,10 @@ struct CbRtOperator { } // namespace ScalarFunction CbrtFun::GetFunction() { - return ScalarFunction({LogicalType::DOUBLE}, LogicalType::DOUBLE, - ScalarFunction::UnaryFunction); + ScalarFunction func({LogicalType::DOUBLE}, LogicalType::DOUBLE, + ScalarFunction::UnaryFunction); + func.SetUnaryArgProperties(ArgProperties().StrictlyIncreasing()); + return func; } //===--------------------------------------------------------------------===// @@ -1171,7 +1180,7 @@ ScalarFunction Log2Fun::GetFunction() { static void PiFunction(DataChunk &args, ExpressionState &state, Vector &result) { D_ASSERT(args.ColumnCount() == 0); Value pi_value = Value::DOUBLE(PI); - result.Reference(pi_value); + result.Reference(pi_value, count_t(args.size())); } ScalarFunction PiFun::GetFunction() { diff --git a/src/duckdb/extension/core_functions/scalar/random/setseed.cpp b/src/duckdb/extension/core_functions/scalar/random/setseed.cpp index 29f8c86d4..54d1d5cd8 100644 --- a/src/duckdb/extension/core_functions/scalar/random/setseed.cpp +++ b/src/duckdb/extension/core_functions/scalar/random/setseed.cpp @@ -31,7 +31,7 @@ void SetSeedFunction(DataChunk &args, ExpressionState &state, Vector &result) { auto &func_expr = state.expr.Cast(); auto &info = func_expr.bind_info->Cast(); auto &input = args.data[0]; - input.Flatten(args.size()); + input.Flatten(); auto input_seeds = FlatVector::GetData(input); uint32_t half_max = NumericLimits::Maximum() / 2; @@ -45,7 +45,7 @@ void SetSeedFunction(DataChunk &args, ExpressionState &state, Vector &result) { random_engine.SetSeed(norm_seed); } - ConstantVector::SetNull(result); + ConstantVector::SetNull(result, count_t(args.size())); } unique_ptr SetSeedBind(BindScalarFunctionInput &input) { diff --git a/src/duckdb/extension/core_functions/scalar/string/parse_path.cpp b/src/duckdb/extension/core_functions/scalar/string/parse_path.cpp index 8fa84af4a..aaa61dbe3 100644 --- a/src/duckdb/extension/core_functions/scalar/string/parse_path.cpp +++ b/src/duckdb/extension/core_functions/scalar/string/parse_path.cpp @@ -1,5 +1,4 @@ #include "duckdb/common/vector/flat_vector.hpp" -#include "duckdb/common/vector/list_vector.hpp" #include "core_functions/scalar/string_functions.hpp" #include "duckdb/function/scalar/string_common.hpp" #include "duckdb/common/local_file_system.hpp" @@ -27,26 +26,6 @@ static string GetSeparator(const string_t &input) { return separator; } -struct SplitInput { - SplitInput(Vector &result_list, Vector &result_child, idx_t offset) - : result_list(result_list), result_child(result_child), offset(offset) { - } - - Vector &result_list; - Vector &result_child; - idx_t offset; - - void AddSplit(const char *split_data, idx_t split_size, idx_t list_idx) { - auto list_entry = offset + list_idx; - if (list_entry >= ListVector::GetListCapacity(result_list)) { - ListVector::SetListSize(result_list, offset + list_idx); - ListVector::Reserve(result_list, ListVector::GetListCapacity(result_list) * 2); - } - FlatVector::GetDataMutable(result_child)[list_entry] = - StringVector::AddString(result_child, split_data, split_size); - } -}; - static bool IsIdxValid(const idx_t &i, const idx_t &sentence_size) { if (i > sentence_size || i == DConstants::INVALID_INDEX) { return false; @@ -88,7 +67,8 @@ static idx_t FindLast(const char *data_ptr, idx_t input_size, const string &sep_ return start - 1; } -static idx_t SplitPath(string_t input, const string &sep, SplitInput &state) { +template +static idx_t SplitPath(string_t input, const string &sep, CB &&emit) { auto input_data = input.GetData(); auto input_size = input.GetSize(); if (!input_size) { @@ -104,21 +84,21 @@ static idx_t SplitPath(string_t input, const string &sep, SplitInput &state) { D_ASSERT(input_size >= pos); if (pos == 0) { if (list_idx == 0) { // first character in path is separator - state.AddSplit(input_data, 1, list_idx); + emit(input_data, 1); list_idx++; if (input_size == 1) { // special case: the only character in path is a separator return list_idx; } } // else: separator is in the path } else { - state.AddSplit(input_data, pos, list_idx); + emit(input_data, pos); list_idx++; } input_data += (pos + 1); input_size -= (pos + 1); } if (input_size > 0) { - state.AddSplit(input_data, input_size, list_idx); + emit(input_data, input_size); list_idx++; } return list_idx; @@ -132,7 +112,7 @@ static void ReadOptionalArgs(DataChunk &args, Vector &sep, Vector &trim, const b } case 2: { UnifiedVectorFormat sec_arg; - args.data[1].ToUnifiedFormat(args.size(), sec_arg); + args.data[1].ToUnifiedFormat(sec_arg); if (sec_arg.validity.RowIsValid(0)) { // if not NULL switch (args.data[1].GetType().id()) { case LogicalTypeId::VARCHAR: { @@ -153,12 +133,12 @@ static void ReadOptionalArgs(DataChunk &args, Vector &sep, Vector &trim, const b if (!front_trim) { // set trim_extension UnifiedVectorFormat sec_arg; - args.data[1].ToUnifiedFormat(args.size(), sec_arg); + args.data[1].ToUnifiedFormat(sec_arg); if (sec_arg.validity.RowIsValid(0)) { trim.Reinterpret(args.data[1]); } UnifiedVectorFormat third_arg; - args.data[2].ToUnifiedFormat(args.size(), third_arg); + args.data[2].ToUnifiedFormat(third_arg); if (third_arg.validity.RowIsValid(0)) { sep.Reinterpret(args.data[2]); } @@ -176,8 +156,8 @@ template static void TrimPathFunction(DataChunk &args, ExpressionState &state, Vector &result) { // set default values Vector &path = args.data[0]; - Vector separator(string_t("default")); - Vector trim_extension(Value::BOOLEAN(false)); + Vector separator(string_t("default"), count_t(args.size())); + Vector trim_extension(Value::BOOLEAN(false), count_t(args.size())); ReadOptionalArgs(args, separator, trim_extension, FRONT_TRIM); TernaryExecutor::Execute( @@ -222,8 +202,8 @@ static void TrimPathFunction(DataChunk &args, ExpressionState &state, Vector &re static void ParseDirpathFunction(DataChunk &args, ExpressionState &state, Vector &result) { // set default values Vector &path = args.data[0]; - Vector separator(string_t("default")); - Vector trim_extension(false); + Vector separator(string_t("default"), count_t(args.size())); + Vector trim_extension(Value::BOOLEAN(false), count_t(args.size())); ReadOptionalArgs(args, separator, trim_extension, true); auto &heap = StringVector::GetStringHeap(result); @@ -250,14 +230,14 @@ static void ParseDirpathFunction(DataChunk &args, ExpressionState &state, Vector static void ParsePathFunction(DataChunk &args, ExpressionState &state, Vector &result) { D_ASSERT(args.ColumnCount() == 1 || args.ColumnCount() == 2); UnifiedVectorFormat input_data; - args.data[0].ToUnifiedFormat(args.size(), input_data); + args.data[0].ToUnifiedFormat(input_data); auto inputs = UnifiedVectorFormat::GetData(input_data); // set the separator string input_sep = "default"; if (args.ColumnCount() == 2) { UnifiedVectorFormat sep_data; - args.data[1].ToUnifiedFormat(args.size(), sep_data); + args.data[1].ToUnifiedFormat(sep_data); if (sep_data.validity.RowIsValid(0)) { input_sep = UnifiedVectorFormat::GetData(sep_data)->GetString(); } @@ -266,25 +246,19 @@ static void ParsePathFunction(DataChunk &args, ExpressionState &state, Vector &r D_ASSERT(result.GetType().id() == LogicalTypeId::LIST); result.SetVectorType(VectorType::FLAT_VECTOR); - ListVector::SetListSize(result, 0); - // set up the list entries - auto result_data = FlatVector::Writer(result, args.size()); - auto &child_entry = ListVector::GetChildMutable(result); - idx_t total_splits = 0; + auto list_writer = FlatVector::Writer>(result, args.size()); for (idx_t i = 0; i < args.size(); i++) { auto input_idx = input_data.sel->get_index(i); if (!input_data.validity.RowIsValid(input_idx)) { - result_data.WriteNull(); + list_writer.WriteNull(); continue; } - SplitInput split_input(result, child_entry, total_splits); - auto list_length = SplitPath(inputs[input_idx], sep, split_input); - result_data.WriteValue(list_entry_t(total_splits, list_length)); - total_splits += list_length; + auto list = list_writer.WriteDynamicList(); + SplitPath(inputs[input_idx], sep, [&](const char *split_data, idx_t split_size) { + list.WriteElement().WriteValue(string_t(split_data, UnsafeNumericCast(split_size))); + }); } - ListVector::SetListSize(result, total_splits); - D_ASSERT(ListVector::GetListSize(result) == total_splits); } ScalarFunctionSet ParseDirnameFun::GetFunctions() { @@ -293,7 +267,7 @@ ScalarFunctionSet ParseDirnameFun::GetFunctions() { LogicalType::INVALID, FunctionStability::CONSISTENT, FunctionNullHandling::SPECIAL_HANDLING); parse_dirname.AddFunction(func); // separator options - func.GetArguments().emplace_back(LogicalType::VARCHAR); + func.GetSignature().AddParameter(LogicalType::VARCHAR); parse_dirname.AddFunction(func); return parse_dirname; } @@ -304,7 +278,7 @@ ScalarFunctionSet ParseDirpathFun::GetFunctions() { LogicalType::INVALID, FunctionStability::CONSISTENT, FunctionNullHandling::SPECIAL_HANDLING); parse_dirpath.AddFunction(func); // separator options - func.GetArguments().emplace_back(LogicalType::VARCHAR); + func.GetSignature().AddParameter(LogicalType::VARCHAR); parse_dirpath.AddFunction(func); return parse_dirpath; } @@ -334,7 +308,7 @@ ScalarFunctionSet ParsePathFun::GetFunctions() { LogicalType::INVALID, FunctionStability::CONSISTENT, FunctionNullHandling::SPECIAL_HANDLING); parse_path.AddFunction(func); // separator options - func.GetArguments().emplace_back(LogicalType::VARCHAR); + func.GetSignature().AddParameter(LogicalType::VARCHAR); parse_path.AddFunction(func); return parse_path; } diff --git a/src/duckdb/extension/core_functions/scalar/string/printf.cpp b/src/duckdb/extension/core_functions/scalar/string/printf.cpp index f072ee729..b841b06d8 100644 --- a/src/duckdb/extension/core_functions/scalar/string/printf.cpp +++ b/src/duckdb/extension/core_functions/scalar/string/printf.cpp @@ -22,50 +22,50 @@ struct FMTFormat { } }; -unique_ptr BindPrintfFunction(BindScalarFunctionInput &input) { +static unique_ptr BindPrintfFunction(BindScalarFunctionInput &input) { auto &bound_function = input.GetBoundFunction(); auto &arguments = input.GetArguments(); for (idx_t i = 1; i < arguments.size(); i++) { - switch (arguments[i]->return_type.id()) { + switch (arguments[i]->GetReturnType().id()) { case LogicalTypeId::BOOLEAN: - bound_function.GetArguments().emplace_back(LogicalType::BOOLEAN); + bound_function.GetArguments()[i] = LogicalType::BOOLEAN; break; case LogicalTypeId::TINYINT: case LogicalTypeId::SMALLINT: case LogicalTypeId::INTEGER: case LogicalTypeId::BIGINT: - bound_function.GetArguments().emplace_back(LogicalType::BIGINT); + bound_function.GetArguments()[i] = LogicalType::BIGINT; break; case LogicalTypeId::UTINYINT: case LogicalTypeId::USMALLINT: case LogicalTypeId::UINTEGER: case LogicalTypeId::UBIGINT: - bound_function.GetArguments().emplace_back(LogicalType::UBIGINT); + bound_function.GetArguments()[i] = LogicalType::UBIGINT; break; case LogicalTypeId::HUGEINT: - bound_function.GetArguments().emplace_back(LogicalType::HUGEINT); + bound_function.GetArguments()[i] = LogicalType::HUGEINT; break; case LogicalTypeId::UHUGEINT: - bound_function.GetArguments().emplace_back(LogicalType::UHUGEINT); + bound_function.GetArguments()[i] = LogicalType::UHUGEINT; break; case LogicalTypeId::FLOAT: case LogicalTypeId::DOUBLE: - bound_function.GetArguments().emplace_back(LogicalType::DOUBLE); + bound_function.GetArguments()[i] = LogicalType::DOUBLE; break; case LogicalTypeId::VARCHAR: - bound_function.GetArguments().push_back(LogicalType::VARCHAR); + bound_function.GetArguments()[i] = LogicalType::VARCHAR; break; case LogicalTypeId::DECIMAL: // decimal type: add cast to double - bound_function.GetArguments().emplace_back(LogicalType::DOUBLE); + bound_function.GetArguments()[i] = LogicalType::DOUBLE; break; case LogicalTypeId::UNKNOWN: // parameter: accept any input and rebind later - bound_function.GetArguments().emplace_back(LogicalType::ANY); + bound_function.GetArguments()[i] = LogicalType::ANY; break; default: // all other types: add cast to string - bound_function.GetArguments().emplace_back(LogicalType::VARCHAR); + bound_function.GetArguments()[i] = LogicalType::VARCHAR; break; } } @@ -88,8 +88,8 @@ struct StringConstructArgument { }; template -void ConvertArguments(const Vector &input, idx_t count, idx_t arg_idx, - vector>> &result_args) { +static void ConvertArguments(const Vector &input, idx_t count, idx_t arg_idx, + vector>> &result_args) { auto result = input.Values(count); for (idx_t i = 0; i < count; i++) { auto &args = result_args[i]; diff --git a/src/duckdb/extension/core_functions/scalar/string/repeat.cpp b/src/duckdb/extension/core_functions/scalar/string/repeat.cpp index e2a087082..1fa551d2d 100644 --- a/src/duckdb/extension/core_functions/scalar/string/repeat.cpp +++ b/src/duckdb/extension/core_functions/scalar/string/repeat.cpp @@ -36,37 +36,38 @@ static void RepeatFunction(DataChunk &args, ExpressionState &, Vector &result) { static void RepeatListFunction(DataChunk &args, ExpressionState &, Vector &result) { auto &list_vector = args.data[0]; auto &cnt_vector = args.data[1]; - auto &source_child = ListVector::GetChildMutable(list_vector); + auto count = args.size(); - idx_t current_size = ListVector::GetListSize(result); - BinaryExecutor::Execute( - list_vector, cnt_vector, result, args.size(), [&](list_entry_t list_input, int64_t cnt) { - idx_t copy_count = cnt <= 0 || list_input.length == 0 ? 0 : UnsafeNumericCast(cnt); - idx_t result_length; - if (!TryMultiplyOperator::Operation(list_input.length, copy_count, result_length)) { - throw OutOfRangeException("Cannot create a list of size: '%d' * '%d', the result is too large", - list_input.length, copy_count); - } - idx_t new_size; - if (!TryAddOperator::Operation(current_size, result_length, new_size)) { - throw OutOfRangeException("Cannot create a list of size: '%d' + '%d', the result is too large", - current_size, result_length); - } - ListVector::Reserve(result, new_size); - auto &result_child = ListVector::GetChildMutable(result); - list_entry_t result_list; - result_list.offset = current_size; - result_list.length = result_length; - for (idx_t i = 0; i < copy_count; i++) { - // repeat the list contents "cnt" times - VectorOperations::Copy(source_child, result_child, list_input.offset + list_input.length, - list_input.offset, current_size); - current_size += list_input.length; - } - return result_list; - }); - ListVector::SetListSize(result, current_size); + auto list_entries = list_vector.Values(count); + auto cnt_entries = cnt_vector.Values(count); + + auto result_writer = FlatVector::Writer(result, count); + for (idx_t i = 0; i < count; i++) { + auto list_entry = list_entries[i]; + auto cnt_entry = cnt_entries[i]; + if (!list_entry.IsValid() || !cnt_entry.IsValid()) { + result_writer.WriteNull(); + continue; + } + const auto &list_input = list_entry.GetValue(); + const auto cnt = cnt_entry.GetValue(); + const idx_t copy_count = cnt <= 0 || list_input.length == 0 ? 0 : UnsafeNumericCast(cnt); + idx_t result_length; + if (!TryMultiplyOperator::Operation(list_input.length, copy_count, result_length)) { + throw OutOfRangeException("Cannot create a list of size: '%d' * '%d', the result is too large", + list_input.length, copy_count); + } + // reserve the worst-case child capacity up front so an absurd target + // size fails before we enter a 10^N-iteration Append loop + ListVector::Reserve(result, ListVector::GetListSize(result) + result_length); + auto list = result_writer.WriteDynamicList(); + for (idx_t j = 0; j < copy_count; j++) { + list.Append(source_child, *FlatVector::IncrementalSelectionVector(), list_input.offset + list_input.length, + list_input.offset, list_input.length); + } + } + result.Verify(); } ScalarFunctionSet RepeatFun::GetFunctions() { diff --git a/src/duckdb/extension/core_functions/scalar/string/to_base.cpp b/src/duckdb/extension/core_functions/scalar/string/to_base.cpp index 21ee96cb4..740fbcc9f 100644 --- a/src/duckdb/extension/core_functions/scalar/string/to_base.cpp +++ b/src/duckdb/extension/core_functions/scalar/string/to_base.cpp @@ -12,6 +12,7 @@ static unique_ptr ToBaseBind(BindScalarFunctionInput &input) { D_ASSERT(arguments.size() == 2 || arguments.size() == 3); if (arguments.size() == 2) { arguments.push_back(make_uniq_base(Value::INTEGER(0))); + input.GetBoundFunction().GetArguments().push_back(LogicalType::INTEGER); } return nullptr; } diff --git a/src/duckdb/extension/core_functions/scalar/struct/struct_insert.cpp b/src/duckdb/extension/core_functions/scalar/struct/struct_insert.cpp index 3074cf2cf..cef63e9c3 100644 --- a/src/duckdb/extension/core_functions/scalar/struct/struct_insert.cpp +++ b/src/duckdb/extension/core_functions/scalar/struct/struct_insert.cpp @@ -13,7 +13,7 @@ namespace duckdb { static void StructInsertFunction(DataChunk &args, ExpressionState &state, Vector &result) { auto &starting_vec = args.data[0]; - starting_vec.Verify(args.size()); + starting_vec.Verify(); auto &starting_child_entries = StructVector::GetEntries(starting_vec); auto &result_child_entries = StructVector::GetEntries(result); @@ -36,7 +36,7 @@ static unique_ptr StructInsertBind(BindScalarFunctionInput &input) if (arguments.empty()) { throw InvalidInputException("Missing required arguments for struct_insert function."); } - if (LogicalTypeId::STRUCT != arguments[0]->return_type.id()) { + if (LogicalTypeId::STRUCT != arguments[0]->GetReturnType().id()) { throw InvalidInputException("The first argument to struct_insert must be a STRUCT"); } if (arguments.size() < 2) { @@ -45,7 +45,7 @@ static unique_ptr StructInsertBind(BindScalarFunctionInput &input) case_insensitive_set_t name_collision_set; child_list_t new_children; - auto &existing_children = StructType::GetChildTypes(arguments[0]->return_type); + auto &existing_children = StructType::GetChildTypes(arguments[0]->GetReturnType()); for (idx_t i = 0; i < existing_children.size(); i++) { auto &child = existing_children[i]; @@ -63,7 +63,7 @@ static unique_ptr StructInsertBind(BindScalarFunctionInput &input) throw BinderException("Duplicate struct entry name \"%s\"", child->GetAlias()); } name_collision_set.insert(child->GetAlias()); - new_children.push_back(make_pair(child->GetAlias(), arguments[i]->return_type)); + new_children.push_back(make_pair(child->GetAlias(), arguments[i]->GetReturnType())); } bound_function.SetReturnType(LogicalType::STRUCT(new_children)); @@ -73,7 +73,7 @@ static unique_ptr StructInsertBind(BindScalarFunctionInput &input) static unique_ptr StructInsertStats(ClientContext &context, FunctionStatisticsInput &input) { auto &child_stats = input.child_stats; auto &expr = input.expr; - auto new_stats = StructStats::CreateUnknown(expr.return_type); + auto new_stats = StructStats::CreateUnknown(expr.GetReturnType()); auto existing_count = StructType::GetChildCount(child_stats[0].GetType()); auto existing_stats = StructStats::GetChildStats(child_stats[0]); @@ -81,7 +81,7 @@ static unique_ptr StructInsertStats(ClientContext &context, Func StructStats::SetChildStats(new_stats, i, existing_stats[i]); } - auto new_count = StructType::GetChildCount(expr.return_type); + auto new_count = StructType::GetChildCount(expr.GetReturnType()); auto offset = new_count - child_stats.size(); for (idx_t i = 1; i < child_stats.size(); i++) { StructStats::SetChildStats(new_stats, offset + i, child_stats[i]); diff --git a/src/duckdb/extension/core_functions/scalar/struct/struct_keys.cpp b/src/duckdb/extension/core_functions/scalar/struct/struct_keys.cpp index 7dff6e9ad..4dd3de084 100644 --- a/src/duckdb/extension/core_functions/scalar/struct/struct_keys.cpp +++ b/src/duckdb/extension/core_functions/scalar/struct/struct_keys.cpp @@ -1,4 +1,3 @@ -#include "duckdb/common/vector/list_vector.hpp" #include "duckdb/common/types/vector.hpp" #include "duckdb/execution/expression_executor_state.hpp" #include "core_functions/scalar/struct_functions.hpp" @@ -15,17 +14,12 @@ struct StructKeysBindData : public FunctionData { const auto &child_types = StructType::GetChildTypes(type); const auto count = child_types.size(); - ListVector::Reserve(keys_vector, count); - auto &list_child = ListVector::GetChildMutable(keys_vector); - auto child_data = FlatVector::Writer(list_child, count); - for (idx_t i = 0; i < count; i++) { - child_data.WriteValue(string_t(child_types[i].first)); + auto list_writer = FlatVector::Writer>(keys_vector, 2); + idx_t idx = 0; + for (auto &child_writer : list_writer.WriteList(count)) { + child_writer.WriteValue(string_t(child_types[idx++].first)); } - ListVector::SetListSize(keys_vector, count); - - auto list_entries = FlatVector::Writer(keys_vector, 2); - list_entries.WriteValue(list_entry_t(0, count)); - list_entries.WriteNull(); + list_writer.WriteNull(); } bool Equals(const FunctionData &other) const override { @@ -48,7 +42,7 @@ static void StructKeysFunction(DataChunk &args, ExpressionState &state, Vector & // If the input is a constant, we must return a CONSTANT_VECTOR if (args.AllConstant()) { - ConstantVector::Reference(result, keys_vector, 0, count); + ConstantVector::Reference(result, count_t(count), keys_vector, 0, 1); return; } @@ -65,16 +59,16 @@ static void StructKeysFunction(DataChunk &args, ExpressionState &state, Vector & static unique_ptr StructKeysBind(BindScalarFunctionInput &input) { auto &arguments = input.GetArguments(); - auto return_type = arguments[0]->return_type; + auto return_type = arguments[0]->GetReturnType(); if (return_type.id() != LogicalTypeId::STRUCT && !return_type.IsAggregateStateStructType()) { throw InvalidInputException("struct_keys() expects a STRUCT argument"); } - const bool is_unnamed = StructType::IsUnnamed(arguments[0]->return_type); + const bool is_unnamed = StructType::IsUnnamed(arguments[0]->GetReturnType()); if (is_unnamed) { throw InvalidInputException("struct_keys() cannot be applied to an unnamed STRUCT"); } - return make_uniq(arguments[0]->return_type); + return make_uniq(arguments[0]->GetReturnType()); } ScalarFunction StructKeysFun::GetFunction() { diff --git a/src/duckdb/extension/core_functions/scalar/struct/struct_update.cpp b/src/duckdb/extension/core_functions/scalar/struct/struct_update.cpp index 441bf2ab3..eb867000f 100644 --- a/src/duckdb/extension/core_functions/scalar/struct/struct_update.cpp +++ b/src/duckdb/extension/core_functions/scalar/struct/struct_update.cpp @@ -13,7 +13,7 @@ namespace duckdb { static void StructUpdateFunction(DataChunk &args, ExpressionState &state, Vector &result) { auto &starting_vec = args.data[0]; - starting_vec.Verify(args.size()); + starting_vec.Verify(); auto &starting_child_entries = StructVector::GetEntries(starting_vec); auto &result_child_entries = StructVector::GetEntries(result); @@ -26,7 +26,7 @@ static void StructUpdateFunction(DataChunk &args, ExpressionState &state, Vector for (idx_t arg_idx = 1; arg_idx < func_args.size(); arg_idx++) { auto &new_child = func_args[arg_idx]; - new_entries.emplace(new_child->alias, arg_idx); + new_entries.emplace(new_child->GetAlias(), arg_idx); } // Assign the original child entries to the STRUCT. @@ -59,7 +59,7 @@ static unique_ptr StructUpdateBind(BindScalarFunctionInput &input) if (arguments.empty()) { throw InvalidInputException("Missing required arguments for struct_update function."); } - if (LogicalTypeId::STRUCT != arguments[0]->return_type.id()) { + if (LogicalTypeId::STRUCT != arguments[0]->GetReturnType().id()) { throw InvalidInputException("The first argument to struct_update must be a STRUCT"); } if (arguments.size() < 2) { @@ -67,7 +67,7 @@ static unique_ptr StructUpdateBind(BindScalarFunctionInput &input) } child_list_t new_children; - auto &existing_children = StructType::GetChildTypes(arguments[0]->return_type); + auto &existing_children = StructType::GetChildTypes(arguments[0]->GetReturnType()); auto incoming_children = case_insensitive_tree_t(); auto is_new_field = vector(arguments.size(), true); @@ -75,12 +75,12 @@ static unique_ptr StructUpdateBind(BindScalarFunctionInput &input) // Validate incoming arguments and record names for (idx_t arg_idx = 1; arg_idx < arguments.size(); arg_idx++) { auto &child = arguments[arg_idx]; - if (child->alias.empty()) { + if (child->GetAlias().empty()) { throw BinderException("Need named argument for struct update, e.g., a := b"); - } else if (incoming_children.find(child->alias) != incoming_children.end()) { - throw InvalidInputException("Duplicate named argument provided for %s", child->alias.c_str()); + } else if (incoming_children.find(child->GetAlias()) != incoming_children.end()) { + throw InvalidInputException("Duplicate named argument provided for %s", child->GetAlias().c_str()); } - incoming_children.emplace(child->alias, arg_idx); + incoming_children.emplace(child->GetAlias(), arg_idx); } for (idx_t field_idx = 0; field_idx < existing_children.size(); field_idx++) { @@ -93,7 +93,7 @@ static unique_ptr StructUpdateBind(BindScalarFunctionInput &input) // Update the struct with the new data of the same name auto arg_idx = update->second; auto &new_child = arguments[arg_idx]; - new_children.push_back(make_pair(new_child->alias, new_child->return_type)); + new_children.push_back(make_pair(new_child->GetAlias(), new_child->GetReturnType())); is_new_field[arg_idx] = false; } } @@ -102,7 +102,7 @@ static unique_ptr StructUpdateBind(BindScalarFunctionInput &input) for (idx_t arg_idx = 1; arg_idx < arguments.size(); arg_idx++) { if (is_new_field[arg_idx]) { auto &child = arguments[arg_idx]; - new_children.push_back(make_pair(child->alias, child->return_type)); + new_children.push_back(make_pair(child->GetAlias(), child->GetReturnType())); } } @@ -110,17 +110,17 @@ static unique_ptr StructUpdateBind(BindScalarFunctionInput &input) return make_uniq(bound_function.GetReturnType()); } -unique_ptr StructUpdateStats(ClientContext &context, FunctionStatisticsInput &input) { +static unique_ptr StructUpdateStats(ClientContext &context, FunctionStatisticsInput &input) { auto &child_stats = input.child_stats; auto &expr = input.expr; auto incoming_children = case_insensitive_tree_t(); auto is_new_field = vector(expr.children.size(), true); - auto new_stats = StructStats::CreateUnknown(expr.return_type); + auto new_stats = StructStats::CreateUnknown(expr.GetReturnType()); for (idx_t arg_idx = 1; arg_idx < expr.children.size(); arg_idx++) { auto &new_child = expr.children[arg_idx]; - incoming_children.emplace(new_child->alias, arg_idx); + incoming_children.emplace(new_child->GetAlias(), arg_idx); } auto existing_type = child_stats[0].GetType(); diff --git a/src/duckdb/extension/core_functions/scalar/struct/struct_values.cpp b/src/duckdb/extension/core_functions/scalar/struct/struct_values.cpp index 33156cec0..aedae1293 100644 --- a/src/duckdb/extension/core_functions/scalar/struct/struct_values.cpp +++ b/src/duckdb/extension/core_functions/scalar/struct/struct_values.cpp @@ -30,7 +30,7 @@ static void StructValuesFunction(DataChunk &args, ExpressionState &state, Vector if (input.GetVectorType() == VectorType::CONSTANT_VECTOR) { if (ConstantVector::IsNull(input)) { - ConstantVector::SetNull(result); + ConstantVector::SetNull(result, count_t(count)); } } else { // set only the struct buffer's type - do not propagate to children @@ -56,7 +56,7 @@ static void StructValuesFunction(DataChunk &args, ExpressionState &state, Vector static unique_ptr StructValuesBind(BindScalarFunctionInput &input) { auto &bound_function = input.GetBoundFunction(); auto &arguments = input.GetArguments(); - const auto arg_type = arguments[0]->return_type; + const auto arg_type = arguments[0]->GetReturnType(); if (arg_type == LogicalTypeId::UNKNOWN) { throw ParameterNotResolvedException(); } @@ -67,7 +67,7 @@ static unique_ptr StructValuesBind(BindScalarFunctionInput &input) // Build unnamed children list using only types, with empty names child_list_t unnamed_children; - auto &children = StructType::GetChildTypes(arguments[0]->return_type); + auto &children = StructType::GetChildTypes(arguments[0]->GetReturnType()); unnamed_children.reserve(children.size()); for (auto &child : children) { unnamed_children.emplace_back("", child.second); diff --git a/src/duckdb/extension/core_functions/scalar/union/union_extract.cpp b/src/duckdb/extension/core_functions/scalar/union/union_extract.cpp index fa97be4a7..784de169e 100644 --- a/src/duckdb/extension/core_functions/scalar/union/union_extract.cpp +++ b/src/duckdb/extension/core_functions/scalar/union/union_extract.cpp @@ -34,12 +34,12 @@ void UnionExtractFunction(DataChunk &args, ExpressionState &state, Vector &resul // this should be guaranteed by the binder auto &vec = args.data[0]; - vec.Verify(args.size()); + vec.Verify(); D_ASSERT(info.index < UnionType::GetMemberCount(vec.GetType())); auto &member = UnionVector::GetMember(vec, info.index); result.Reference(member); - result.Verify(args.size()); + result.Verify(); } unique_ptr UnionExtractBind(BindScalarFunctionInput &input) { @@ -47,24 +47,24 @@ unique_ptr UnionExtractBind(BindScalarFunctionInput &input) { auto &bound_function = input.GetBoundFunction(); auto &arguments = input.GetArguments(); D_ASSERT(bound_function.GetArguments().size() == 2); - if (arguments[0]->return_type.id() == LogicalTypeId::UNKNOWN) { + if (arguments[0]->GetReturnType().id() == LogicalTypeId::UNKNOWN) { throw ParameterNotResolvedException(); } - if (arguments[0]->return_type.id() != LogicalTypeId::UNION) { + if (arguments[0]->GetReturnType().id() != LogicalTypeId::UNION) { throw BinderException("union_extract can only take a union parameter"); } - idx_t union_member_count = UnionType::GetMemberCount(arguments[0]->return_type); + idx_t union_member_count = UnionType::GetMemberCount(arguments[0]->GetReturnType()); if (union_member_count == 0) { throw InternalException("Can't extract something from an empty union"); } - bound_function.GetArguments()[0] = arguments[0]->return_type; + bound_function.GetArguments()[0] = arguments[0]->GetReturnType(); auto &key_child = arguments[1]; if (key_child->HasParameter()) { throw ParameterNotResolvedException(); } - if (key_child->return_type.id() != LogicalTypeId::VARCHAR || !key_child->IsFoldable()) { + if (key_child->GetReturnType().id() != LogicalTypeId::VARCHAR || !key_child->IsFoldable()) { throw BinderException("Key name for union_extract needs to be a constant string"); } Value key_val = ExpressionExecutor::EvaluateScalar(context, *key_child); @@ -80,11 +80,11 @@ unique_ptr UnionExtractBind(BindScalarFunctionInput &input) { bool found_key = false; for (size_t i = 0; i < union_member_count; i++) { - auto &member_name = UnionType::GetMemberName(arguments[0]->return_type, i); + auto &member_name = UnionType::GetMemberName(arguments[0]->GetReturnType(), i); if (StringUtil::Lower(member_name) == key) { found_key = true; key_index = i; - return_type = UnionType::GetMemberType(arguments[0]->return_type, i); + return_type = UnionType::GetMemberType(arguments[0]->GetReturnType(), i); break; } } @@ -93,7 +93,7 @@ unique_ptr UnionExtractBind(BindScalarFunctionInput &input) { vector candidates; candidates.reserve(union_member_count); for (idx_t i = 0; i < union_member_count; i++) { - candidates.push_back(UnionType::GetMemberName(arguments[0]->return_type, i)); + candidates.push_back(UnionType::GetMemberName(arguments[0]->GetReturnType(), i)); } auto closest_settings = StringUtil::TopNJaroWinkler(candidates, key); auto message = StringUtil::CandidatesMessage(closest_settings, "Candidate Entries"); diff --git a/src/duckdb/extension/core_functions/scalar/union/union_tag.cpp b/src/duckdb/extension/core_functions/scalar/union/union_tag.cpp index 115d61bd5..579c7e5d8 100644 --- a/src/duckdb/extension/core_functions/scalar/union/union_tag.cpp +++ b/src/duckdb/extension/core_functions/scalar/union/union_tag.cpp @@ -16,11 +16,11 @@ unique_ptr UnionTagBind(BindScalarFunctionInput &input) { throw BinderException("Missing required arguments for union_tag function."); } - if (LogicalTypeId::UNKNOWN == arguments[0]->return_type.id()) { + if (LogicalTypeId::UNKNOWN == arguments[0]->GetReturnType().id()) { throw ParameterNotResolvedException(); } - if (LogicalTypeId::UNION != arguments[0]->return_type.id()) { + if (LogicalTypeId::UNION != arguments[0]->GetReturnType().id()) { throw BinderException("First argument to union_tag function must be a union type."); } @@ -28,18 +28,18 @@ unique_ptr UnionTagBind(BindScalarFunctionInput &input) { throw BinderException("Too many arguments, union_tag takes at most one argument."); } - auto member_count = UnionType::GetMemberCount(arguments[0]->return_type); + auto member_count = UnionType::GetMemberCount(arguments[0]->GetReturnType()); if (member_count == 0) { // this should never happen, empty unions are not allowed throw InternalException("Can't get tags from an empty union"); } - bound_function.GetArguments()[0] = arguments[0]->return_type; + bound_function.GetArguments()[0] = arguments[0]->GetReturnType(); auto varchar_vector = Vector(LogicalType::VARCHAR, member_count); auto result_data = FlatVector::Writer(varchar_vector, member_count); for (idx_t i = 0; i < member_count; i++) { - result_data.WriteValue(string_t(UnionType::GetMemberName(arguments[0]->return_type, i))); + result_data.WriteValue(string_t(UnionType::GetMemberName(arguments[0]->GetReturnType(), i))); } auto enum_type = LogicalType::ENUM(varchar_vector, member_count); bound_function.SetReturnType(enum_type); diff --git a/src/duckdb/extension/core_functions/scalar/union/union_value.cpp b/src/duckdb/extension/core_functions/scalar/union/union_value.cpp index aab7d8326..5f9eba8d5 100644 --- a/src/duckdb/extension/core_functions/scalar/union/union_value.cpp +++ b/src/duckdb/extension/core_functions/scalar/union/union_value.cpp @@ -47,7 +47,7 @@ unique_ptr UnionValueBind(BindScalarFunctionInput &input) { child_list_t union_members; - union_members.push_back(make_pair(child->GetAlias(), child->return_type)); + union_members.push_back(make_pair(child->GetAlias(), child->GetReturnType())); bound_function.SetReturnType(LogicalType::UNION(std::move(union_members))); return make_uniq(bound_function.GetReturnType()); diff --git a/src/duckdb/extension/icu/icu-current.cpp b/src/duckdb/extension/icu/icu-current.cpp index 76a7ae0f3..6446ab66f 100644 --- a/src/duckdb/extension/icu/icu-current.cpp +++ b/src/duckdb/extension/icu/icu-current.cpp @@ -3,8 +3,6 @@ #include "duckdb/main/extension/extension_loader.hpp" #include "duckdb/common/types/time.hpp" #include "duckdb/common/types/timestamp.hpp" -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" -#include "duckdb/planner/expression/bound_function_expression.hpp" #include "duckdb/transaction/meta_transaction.hpp" #include "include/icu-current.hpp" #include "include/icu-casts.hpp" @@ -23,15 +21,15 @@ static void CurrentTimeFunction(DataChunk &input, ExpressionState &state, Vector dtime_tz_t result_time(dtime_t(0), 0); ICUToTimeTZ::ToTimeTZ(data.calendar.get(), instant, result_time); auto val = Value::TIMETZ(result_time); - result.Reference(val); + result.Reference(val, count_t(input.size())); } static void CurrentDateFunction(DataChunk &input, ExpressionState &state, Vector &result) { D_ASSERT(input.ColumnCount() == 0); auto instant = GetTransactionTimestamp(state); - auto val = Value::DATE(ICUMakeDate::ToDate(state.GetContext(), instant)); - result.Reference(val); + auto val = Value::DATE(ICUMakeDate::ToDate(state.GetContext(), timestamp_tz_t(instant))); + result.Reference(val, count_t(input.size())); } ScalarFunction GetCurrentTimeFun() { diff --git a/src/duckdb/extension/icu/icu-datefunc.cpp b/src/duckdb/extension/icu/icu-datefunc.cpp index 71ae62eb5..af7dc0ee1 100644 --- a/src/duckdb/extension/icu/icu-datefunc.cpp +++ b/src/duckdb/extension/icu/icu-datefunc.cpp @@ -130,6 +130,24 @@ timestamp_t ICUDateFunc::GetTime(icu::Calendar *calendar, uint64_t micros) { return result; } +bool ICUDateFunc::TryGetTimeNS(icu::Calendar *calendar, uint64_t nanos, timestamp_ns_t &result) { + timestamp_t ts_micros; + if (!TryGetTime(calendar, nanos / Interval::NANOS_PER_MICRO, ts_micros)) { + return false; + } + + nanos %= Interval::NANOS_PER_MICRO; + return Timestamp::TryFromTimestampNanos(ts_micros, nanos, result); +} + +timestamp_ns_t ICUDateFunc::GetTimeNS(icu::Calendar *calendar, uint64_t nanos) { + timestamp_ns_t result; + if (!TryGetTimeNS(calendar, nanos, result)) { + throw ConversionException("ICU date overflows timestamp_ns range"); + } + return result; +} + uint64_t ICUDateFunc::SetTime(icu::Calendar *calendar, timestamp_t date) { int64_t millis = date.value / Interval::MICROS_PER_MSEC; int64_t micros = date.value % Interval::MICROS_PER_MSEC; @@ -147,6 +165,23 @@ uint64_t ICUDateFunc::SetTime(icu::Calendar *calendar, timestamp_t date) { return uint64_t(micros); } +uint64_t ICUDateFunc::SetTimeNS(icu::Calendar *calendar, timestamp_ns_t date) { + int64_t millis = date.value / Interval::NANOS_PER_MSEC; + int64_t nanos = date.value % Interval::NANOS_PER_MSEC; + if (nanos < 0) { + --millis; + nanos += Interval::MICROS_PER_MSEC; + } + + const auto udate = UDate(millis); + UErrorCode status = U_ZERO_ERROR; + calendar->setTime(udate, status); + if (U_FAILURE(status)) { + throw InternalException("Unable to set ICU calendar time."); + } + return uint64_t(nanos); +} + int32_t ICUDateFunc::ExtractField(icu::Calendar *calendar, UCalendarDateFields field) { UErrorCode status = U_ZERO_ERROR; const auto result = calendar->get(field, status); diff --git a/src/duckdb/extension/icu/icu-datepart.cpp b/src/duckdb/extension/icu/icu-datepart.cpp index 2b2d450a1..d76cbeb92 100644 --- a/src/duckdb/extension/icu/icu-datepart.cpp +++ b/src/duckdb/extension/icu/icu-datepart.cpp @@ -288,16 +288,15 @@ struct ICUDatePart : public ICUDateFunc { CalendarPtr calendar_ptr(info.calendar->clone()); auto calendar = calendar_ptr.get(); - UnaryExecutor::ExecuteWithNulls(date_arg, result, args.size(), - [&](INPUT_TYPE input, ValidityMask &mask, idx_t idx) { - if (Timestamp::IsFinite(input)) { - const auto micros = SetTime(calendar, input); - return info.adapters[0](calendar, micros); - } else { - mask.SetInvalid(idx); - return RESULT_TYPE(); - } - }); + UnaryExecutor::Execute(date_arg, result, args.size(), + [&](INPUT_TYPE input) -> optional { + if (Timestamp::IsFinite(input)) { + const auto micros = SetTime(calendar, input); + return info.adapters[0](calendar, micros); + } else { + return nullopt; + } + }); } template @@ -312,16 +311,15 @@ struct ICUDatePart : public ICUDateFunc { CalendarPtr calendar_ptr(info.calendar->clone()); auto calendar = calendar_ptr.get(); - BinaryExecutor::ExecuteWithNulls( + BinaryExecutor::Execute( part_arg, date_arg, result, args.size(), - [&](string_t specifier, INPUT_TYPE input, ValidityMask &mask, idx_t idx) { + [&](string_t specifier, INPUT_TYPE input) -> optional { if (Timestamp::IsFinite(input)) { const auto micros = SetTime(calendar, input); auto adapter = PartCodeBigintFactory(GetDatePartSpecifier(specifier.GetString())); return adapter(calendar, micros); } else { - mask.SetInvalid(idx); - return RESULT_TYPE(0); + return nullopt; } }); } @@ -422,11 +420,11 @@ struct ICUDatePart : public ICUDateFunc { } } - result.Verify(count); + result.Verify(); } template - static duckdb::unique_ptr BindAdapter(ClientContext &context, ScalarFunction &bound_function, + static duckdb::unique_ptr BindAdapter(ClientContext &context, BoundScalarFunction &bound_function, vector> &arguments, typename BIND_TYPE::adapter_t adapter) { return make_uniq(context, adapter); @@ -437,7 +435,7 @@ struct ICUDatePart : public ICUDateFunc { auto &context = input.GetClientContext(); auto &arguments = input.GetArguments(); - const auto part_code = GetDatePartSpecifier(bound_function.name); + const auto part_code = GetDatePartSpecifier(bound_function.GetName()); if (IsBigintDatepart(part_code)) { using data_t = BindAdapterData; auto adapter = PartCodeBigintFactory(part_code); @@ -473,7 +471,7 @@ struct ICUDatePart : public ICUDateFunc { arguments.erase(arguments.begin()); bound_function.GetArguments().erase(bound_function.GetArguments().begin()); - bound_function.name = part_name; + bound_function.SetName(part_name); bound_function.SetReturnType(LogicalType::DOUBLE); bound_function.SetFunctionCallback(UnaryTimestampFunction); @@ -494,7 +492,7 @@ struct ICUDatePart : public ICUDateFunc { throw ParameterNotResolvedException(); } if (!arguments[0]->IsFoldable()) { - throw BinderException("%s can only take constant lists of part names", bound_function.name); + throw BinderException("%s can only take constant lists of part names", bound_function.GetName()); } case_insensitive_set_t name_collision_set; @@ -505,18 +503,19 @@ struct ICUDatePart : public ICUDateFunc { if (parts_list.type().id() == LogicalTypeId::LIST) { auto &list_children = ListValue::GetChildren(parts_list); if (list_children.empty()) { - throw BinderException("%s requires non-empty lists of part names", bound_function.name); + throw BinderException("%s requires non-empty lists of part names", bound_function.GetName()); } for (size_t col = 0; col < list_children.size(); ++col) { const auto &part_value = list_children[col]; if (part_value.IsNull()) { - throw BinderException("NULL struct entry name in %s", bound_function.name); + throw BinderException("NULL struct entry name in %s", bound_function.GetName()); } const auto part_name = part_value.ToString(); const auto part_code = GetDatePartSpecifier(part_name); if (name_collision_set.find(part_name) != name_collision_set.end()) { - throw BinderException("Duplicate struct entry name \"%s\" in %s", part_name, bound_function.name); + throw BinderException("Duplicate struct entry name \"%s\" in %s", part_name, + bound_function.GetName()); } name_collision_set.insert(part_name); part_codes.emplace_back(part_code); @@ -527,7 +526,7 @@ struct ICUDatePart : public ICUDateFunc { } } } else { - throw BinderException("%s can only take constant lists of part names", bound_function.name); + throw BinderException("%s can only take constant lists of part names", bound_function.GetName()); } Function::EraseArgument(bound_function, arguments, 0); @@ -536,7 +535,7 @@ struct ICUDatePart : public ICUDateFunc { } static void SerializeStructFunction(Serializer &serializer, const optional_ptr bind_data, - const ScalarFunction &function) { + const BoundScalarFunction &function) { D_ASSERT(bind_data); auto &info = bind_data->Cast(); serializer.WriteProperty(100, "tz_setting", info.tz_setting); @@ -545,7 +544,7 @@ struct ICUDatePart : public ICUDateFunc { } static duckdb::unique_ptr DeserializeStructFunction(Deserializer &deserializer, - ScalarFunction &bound_function) { + BoundScalarFunction &bound_function) { auto tz_setting = deserializer.ReadProperty(100, "tz_setting"); auto cal_setting = deserializer.ReadProperty(101, "cal_setting"); auto part_codes = deserializer.ReadProperty>(102, "part_codes"); @@ -561,9 +560,11 @@ struct ICUDatePart : public ICUDateFunc { template static void AddUnaryPartCodeFunctions(const string &name, ExtensionLoader &loader, - const LogicalType &result_type = LogicalType::BIGINT) { + const LogicalType &result_type = LogicalType::BIGINT, + ArgProperties unary_arg0_props = {}) { ScalarFunctionSet set(name); set.AddFunction(GetUnaryPartCodeFunction(LogicalType::TIMESTAMP_TZ, result_type)); + set.SetUnaryArgProperties(unary_arg0_props); loader.RegisterFunction(set); } @@ -655,14 +656,17 @@ struct ICUDatePart : public ICUDateFunc { void RegisterICUDatePartFunctions(ExtensionLoader &loader) { // register the individual operators + // year/decade use UCAL_YEAR (year-of-era, positive in both BC and AD), which is non-monotonic + // across the BC/AD flip; leave them unannotated. era/century/millennium/isoyear are signed. + // BIGINTs - ICUDatePart::AddUnaryPartCodeFunctions("era", loader); + ICUDatePart::AddUnaryPartCodeFunctions("era", loader, LogicalType::BIGINT, ArgProperties().NonDecreasing()); ICUDatePart::AddUnaryPartCodeFunctions("year", loader); ICUDatePart::AddUnaryPartCodeFunctions("month", loader); ICUDatePart::AddUnaryPartCodeFunctions("day", loader); ICUDatePart::AddUnaryPartCodeFunctions("decade", loader); - ICUDatePart::AddUnaryPartCodeFunctions("century", loader); - ICUDatePart::AddUnaryPartCodeFunctions("millennium", loader); + ICUDatePart::AddUnaryPartCodeFunctions("century", loader, LogicalType::BIGINT, ArgProperties().NonDecreasing()); + ICUDatePart::AddUnaryPartCodeFunctions("millennium", loader, LogicalType::BIGINT, ArgProperties().NonDecreasing()); ICUDatePart::AddUnaryPartCodeFunctions("microsecond", loader); ICUDatePart::AddUnaryPartCodeFunctions("millisecond", loader); ICUDatePart::AddUnaryPartCodeFunctions("second", loader); @@ -673,7 +677,7 @@ void RegisterICUDatePartFunctions(ExtensionLoader &loader) { ICUDatePart::AddUnaryPartCodeFunctions("week", loader); // Note that WeekOperator is ISO-8601, not US ICUDatePart::AddUnaryPartCodeFunctions("dayofyear", loader); ICUDatePart::AddUnaryPartCodeFunctions("quarter", loader); - ICUDatePart::AddUnaryPartCodeFunctions("isoyear", loader); + ICUDatePart::AddUnaryPartCodeFunctions("isoyear", loader, LogicalType::BIGINT, ArgProperties().NonDecreasing()); ICUDatePart::AddUnaryPartCodeFunctions("timezone", loader); ICUDatePart::AddUnaryPartCodeFunctions("timezone_hour", loader); ICUDatePart::AddUnaryPartCodeFunctions("timezone_minute", loader); @@ -683,7 +687,8 @@ void RegisterICUDatePartFunctions(ExtensionLoader &loader) { ICUDatePart::AddUnaryPartCodeFunctions("julian", loader, LogicalType::DOUBLE); // register combinations - ICUDatePart::AddUnaryPartCodeFunctions("yearweek", loader); // Note this is ISO year and week + ICUDatePart::AddUnaryPartCodeFunctions("yearweek", loader, LogicalType::BIGINT, + ArgProperties().NonDecreasing()); // ISO year and week // register various aliases ICUDatePart::AddUnaryPartCodeFunctions("dayofmonth", loader); diff --git a/src/duckdb/extension/icu/icu-datesub.cpp b/src/duckdb/extension/icu/icu-datesub.cpp index 9e0099641..73ad5b5e8 100644 --- a/src/duckdb/extension/icu/icu-datesub.cpp +++ b/src/duckdb/extension/icu/icu-datesub.cpp @@ -4,7 +4,7 @@ #include "duckdb/main/extension/extension_loader.hpp" #include "duckdb/common/enums/date_part_specifier.hpp" #include "duckdb/common/types/timestamp.hpp" -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" +#include "duckdb/planner/expression/bound_function_expression.hpp" namespace duckdb { @@ -106,26 +106,23 @@ struct ICUCalendarSub : public ICUDateFunc { } const auto specifier = ConstantVector::GetData(part_arg)->GetString(); auto part_func = SubtractFactory(GetDatePartSpecifier(specifier)); - BinaryExecutor::ExecuteWithNulls( - startdate_arg, enddate_arg, result, args.size(), - [&](T start_date, T end_date, ValidityMask &mask, idx_t idx) { + BinaryExecutor::Execute( + startdate_arg, enddate_arg, result, args.size(), [&](T start_date, T end_date) -> optional { if (Timestamp::IsFinite(start_date) && Timestamp::IsFinite(end_date)) { return part_func(calendar.get(), start_date, end_date); } else { - mask.SetInvalid(idx); - return int64_t(0); + return nullopt; } }); } else { - TernaryExecutor::ExecuteWithNulls( + TernaryExecutor::Execute( part_arg, startdate_arg, enddate_arg, result, args.size(), - [&](string_t specifier, T start_date, T end_date, ValidityMask &mask, idx_t idx) { + [&](string_t specifier, T start_date, T end_date) -> optional { if (Timestamp::IsFinite(start_date) && Timestamp::IsFinite(end_date)) { auto part_func = SubtractFactory(GetDatePartSpecifier(specifier.GetString())); return part_func(calendar.get(), start_date, end_date); } else { - mask.SetInvalid(idx); - return int64_t(0); + return nullopt; } }); } @@ -139,6 +136,8 @@ struct ICUCalendarSub : public ICUDateFunc { static void AddFunctions(const string &name, ExtensionLoader &loader) { ScalarFunctionSet set(name); set.AddFunction(GetFunction(LogicalType::TIMESTAMP_TZ)); + set.SetArgProperties(1, ArgProperties().NonIncreasing()); + set.SetArgProperties(2, ArgProperties().NonDecreasing()); loader.RegisterFunction(set); } }; @@ -231,34 +230,33 @@ struct ICUCalendarDiff : public ICUDateFunc { if (part_arg.GetVectorType() == VectorType::CONSTANT_VECTOR) { // Common case of constant part. if (ConstantVector::IsNull(part_arg)) { - throw InternalException("ICUDateSub called with constant NULL bucket width"); + ConstantVector::SetNull(result, true); + } else { + const auto specifier = ConstantVector::GetData(part_arg)->GetString(); + const auto part = GetDatePartSpecifier(specifier); + auto trunc_func = DiffTruncationFactory(part); + auto sub_func = SubtractFactory(part); + BinaryExecutor::Execute( + startdate_arg, enddate_arg, result, args.size(), + [&](T start_date, T end_date) -> optional { + if (Timestamp::IsFinite(start_date) && Timestamp::IsFinite(end_date)) { + return DifferenceFunc(calendar, start_date, end_date, trunc_func, sub_func); + } else { + return nullopt; + } + }); } - const auto specifier = ConstantVector::GetData(part_arg)->GetString(); - const auto part = GetDatePartSpecifier(specifier); - auto trunc_func = DiffTruncationFactory(part); - auto sub_func = SubtractFactory(part); - BinaryExecutor::ExecuteWithNulls( - startdate_arg, enddate_arg, result, args.size(), - [&](T start_date, T end_date, ValidityMask &mask, idx_t idx) { - if (Timestamp::IsFinite(start_date) && Timestamp::IsFinite(end_date)) { - return DifferenceFunc(calendar, start_date, end_date, trunc_func, sub_func); - } else { - mask.SetInvalid(idx); - return int64_t(0); - } - }); } else { - TernaryExecutor::ExecuteWithNulls( + TernaryExecutor::Execute( part_arg, startdate_arg, enddate_arg, result, args.size(), - [&](string_t specifier, T start_date, T end_date, ValidityMask &mask, idx_t idx) { + [&](string_t specifier, T start_date, T end_date) -> optional { if (Timestamp::IsFinite(start_date) && Timestamp::IsFinite(end_date)) { const auto part = GetDatePartSpecifier(specifier.GetString()); auto trunc_func = DiffTruncationFactory(part); auto sub_func = SubtractFactory(part); return DifferenceFunc(calendar, start_date, end_date, trunc_func, sub_func); } else { - mask.SetInvalid(idx); - return int64_t(0); + return nullopt; } }); } @@ -272,6 +270,8 @@ struct ICUCalendarDiff : public ICUDateFunc { static void AddFunctions(const string &name, ExtensionLoader &loader) { ScalarFunctionSet set(name); set.AddFunction(GetFunction(LogicalType::TIMESTAMP_TZ)); + set.SetArgProperties(1, ArgProperties().NonIncreasing()); + set.SetArgProperties(2, ArgProperties().NonDecreasing()); loader.RegisterFunction(set); } }; diff --git a/src/duckdb/extension/icu/icu-datetrunc.cpp b/src/duckdb/extension/icu/icu-datetrunc.cpp index 1c116ee20..13779d456 100644 --- a/src/duckdb/extension/icu/icu-datetrunc.cpp +++ b/src/duckdb/extension/icu/icu-datetrunc.cpp @@ -5,6 +5,7 @@ #include "duckdb/common/vector_operations/binary_executor.hpp" #include "duckdb/planner/expression/bound_function_expression.hpp" #include "duckdb/main/extension/extension_loader.hpp" +#include "duckdb/transaction/meta_transaction.hpp" namespace duckdb { @@ -172,6 +173,7 @@ struct ICUDateTrunc : public ICUDateFunc { static void AddBinaryTimestampFunction(const string &name, ExtensionLoader &loader) { ScalarFunctionSet set(name); set.AddFunction(GetDateTruncFunction(LogicalType::TIMESTAMP_TZ)); + set.SetArgProperties(1, ArgProperties().NonDecreasing()); loader.RegisterFunction(set); } }; diff --git a/src/duckdb/extension/icu/icu-list-range.cpp b/src/duckdb/extension/icu/icu-list-range.cpp index bbde04ee2..4bb2c48ec 100644 --- a/src/duckdb/extension/icu/icu-list-range.cpp +++ b/src/duckdb/extension/icu/icu-list-range.cpp @@ -6,9 +6,8 @@ #include "duckdb/common/types/vector.hpp" #include "duckdb/function/function_set.hpp" #include "duckdb/function/scalar_function.hpp" -#include "duckdb/main/client_context.hpp" -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" #include "include/icu-datefunc.hpp" +#include "duckdb/planner/expression/bound_function_expression.hpp" #include "duckdb/main/extension/extension_loader.hpp" namespace duckdb { @@ -17,42 +16,38 @@ struct ICUListRange : public ICUDateFunc { template class RangeInfoStruct { public: - explicit RangeInfoStruct(DataChunk &args_p) : args(args_p) { - if (args.ColumnCount() == 3) { - args.data[0].ToUnifiedFormat(args.size(), vdata[0]); - args.data[1].ToUnifiedFormat(args.size(), vdata[1]); - args.data[2].ToUnifiedFormat(args.size(), vdata[2]); - } else { - throw InternalException("Unsupported number of parameters for range"); - } + explicit RangeInfoStruct(DataChunk &args_p) + : args(args_p), start_value_data(args.data[0].template Values(args.size())), + end_value_data(args.data[1].template Values(args.size())), + increment_value_data(args.data[2].template Values(args.size())) { } bool RowIsValid(idx_t row_idx) { - for (idx_t i = 0; i < args.ColumnCount(); i++) { - auto idx = vdata[i].sel->get_index(row_idx); - if (!vdata[i].validity.RowIsValid(idx)) { - return false; - } + if (!start_value_data[row_idx].IsValid()) { + return false; + } + if (!end_value_data[row_idx].IsValid()) { + return false; + } + if (!increment_value_data[row_idx].IsValid()) { + return false; } return true; } timestamp_t StartListValue(idx_t row_idx) { - auto data = (timestamp_t *)vdata[0].data; - auto idx = vdata[0].sel->get_index(row_idx); - return data[idx]; + auto start_val = start_value_data[row_idx]; + return start_val.GetValue(); } timestamp_t EndListValue(idx_t row_idx) { - auto data = (timestamp_t *)vdata[1].data; - auto idx = vdata[1].sel->get_index(row_idx); - return data[idx]; + auto end_val = end_value_data[row_idx]; + return end_val.GetValue(); } interval_t ListIncrementValue(idx_t row_idx) { - auto data = (interval_t *)vdata[2].data; - auto idx = vdata[2].sel->get_index(row_idx); - return data[idx]; + auto increment_val = increment_value_data[row_idx]; + return increment_val.GetValue(); } void GetListValues(idx_t row_idx, timestamp_t &start_value, timestamp_t &end_value, @@ -76,7 +71,9 @@ struct ICUListRange : public ICUDateFunc { private: DataChunk &args; - UnifiedVectorFormat vdata[3]; + VectorIterator start_value_data; + VectorIterator end_value_data; + VectorIterator increment_value_data; uint64_t ListLength(timestamp_t start_value, timestamp_t end_value, interval_t increment_value, bool inclusive_bound, TZCalendar &calendar) { @@ -135,48 +132,25 @@ struct ICUListRange : public ICUDateFunc { TZCalendar calendar(*bind_info.calendar, bind_info.cal_setting); RangeInfoStruct info(args); - idx_t args_size = 1; - auto result_type = VectorType::CONSTANT_VECTOR; - for (idx_t i = 0; i < args.ColumnCount(); i++) { - if (args.data[i].GetVectorType() != VectorType::CONSTANT_VECTOR) { - args_size = args.size(); - result_type = VectorType::FLAT_VECTOR; - break; - } - } - auto result_data = FlatVector::Writer(result, args_size); - int64_t total_size = 0; - vector list_lengths(args_size, 0); + idx_t args_size = args.size(); + auto list_writer = FlatVector::Writer>(result, args_size); for (idx_t i = 0; i < args_size; i++) { if (!info.RowIsValid(i)) { - result_data.WriteNull(list_entry_t(NumericCast(total_size), 0)); - } else { - const auto length = info.ListLength(i, calendar); - list_lengths[i] = length; - result_data.WriteValue(list_entry_t(NumericCast(total_size), NumericCast(length))); - total_size += length; + list_writer.WriteNull(); + continue; } - } + const auto length = info.ListLength(i, calendar); + auto list = list_writer.WriteDynamicList(); - // now construct the child vector of the list - ListVector::Reserve(result, total_size); - auto range_data = FlatVector::Writer(ListVector::GetChildMutable(result), total_size); - for (idx_t i = 0; i < args_size; i++) { - timestamp_t start_value = info.StartListValue(i); + timestamp_t range_value = info.StartListValue(i); interval_t increment = info.ListIncrementValue(i); - - timestamp_t range_value = start_value; - for (idx_t range_idx = 0; range_idx < NumericCast(list_lengths[i]); range_idx++) { + for (idx_t range_idx = 0; range_idx < NumericCast(length); range_idx++) { if (range_idx > 0) { info.Increment(range_value, increment, calendar); } - range_data.WriteValue(range_value); + list.WriteElement().WriteValue(range_value); } } - - ListVector::SetListSize(result, total_size); - result.SetVectorType(result_type); - result.Verify(args.size()); } diff --git a/src/duckdb/extension/icu/icu-makedate.cpp b/src/duckdb/extension/icu/icu-makedate.cpp index 433d2fee1..ca8fd5f85 100644 --- a/src/duckdb/extension/icu/icu-makedate.cpp +++ b/src/duckdb/extension/icu/icu-makedate.cpp @@ -3,11 +3,10 @@ #include "duckdb/common/operator/subtract.hpp" #include "duckdb/common/types/date.hpp" #include "duckdb/common/types/timestamp.hpp" -#include "duckdb/common/vector_operations/senary_executor.hpp" -#include "duckdb/common/vector_operations/septenary_executor.hpp" -#include "duckdb/function/cast/cast_function_set.hpp" +#include "duckdb/common/vector_operations/variadic_executor.hpp" #include "duckdb/main/extension/extension_loader.hpp" #include "duckdb/main/settings.hpp" +#include "duckdb/planner/expression/bound_function_expression.hpp" #include "include/icu-casts.hpp" #include "include/icu-datefunc.hpp" #include "include/icu-datetrunc.hpp" @@ -115,25 +114,25 @@ struct ICUMakeTimestampTZFunc : public ICUDateFunc { auto calendar = calendar_ptr.get(); // Three cases: no TZ, constant TZ, variable TZ - if (input.ColumnCount() == SenaryExecutor::NCOLS) { - SenaryExecutor::Execute( + if (input.ColumnCount() == 6) { + VariadicExecutor::Execute( input, result, [&](T yyyy, T mm, T dd, T hr, T mn, double ss) { return Operation(calendar, yyyy, mm, dd, hr, mn, ss); }); } else { - D_ASSERT(input.ColumnCount() == SeptenaryExecutor::NCOLS); + D_ASSERT(input.ColumnCount() == 7); auto &tz_vec = input.data.back(); if (tz_vec.GetVectorType() == VectorType::CONSTANT_VECTOR) { if (ConstantVector::IsNull(tz_vec)) { throw InternalException("ICUMakeTimestamp called with constant NULL tz"); } SetTimeZone(calendar, *ConstantVector::GetData(tz_vec)); - SenaryExecutor::Execute( + VariadicExecutor::Execute( input, result, [&](T yyyy, T mm, T dd, T hr, T mn, double ss) { return Operation(calendar, yyyy, mm, dd, hr, mn, ss); }); } else { - SeptenaryExecutor::Execute( + VariadicExecutor::Execute( input, result, [&](T yyyy, T mm, T dd, T hr, T mn, double ss, string_t tz_id) { SetTimeZone(calendar, tz_id); return Operation(calendar, yyyy, mm, dd, hr, mn, ss); diff --git a/src/duckdb/extension/icu/icu-strptime.cpp b/src/duckdb/extension/icu/icu-strptime.cpp index a71cdd2ed..5be41d65c 100644 --- a/src/duckdb/extension/icu/icu-strptime.cpp +++ b/src/duckdb/extension/icu/icu-strptime.cpp @@ -18,9 +18,9 @@ namespace duckdb { -TimestampComponents ICUHelpers::GetComponents(timestamp_tz_t ts, icu::Calendar *calendar) { +TimestampComponents ICUHelpers::GetComponents(timestamp_t ts, icu::Calendar *calendar) { // Get the parts in the given time zone - uint64_t micros = ICUDateFunc::SetTime(calendar, timestamp_t(ts.value)); + uint64_t micros = ICUDateFunc::SetTime(calendar, ts); TimestampComponents ts_data; ts_data.year = ICUDateFunc::ExtractField(calendar, UCAL_EXTENDED_YEAR); @@ -32,6 +32,14 @@ TimestampComponents ICUHelpers::GetComponents(timestamp_tz_t ts, icu::Calendar * ts_data.second = ICUDateFunc::ExtractField(calendar, UCAL_SECOND); ts_data.microsecond = UnsafeNumericCast( ICUDateFunc::ExtractField(calendar, UCAL_MILLISECOND) * Interval::MICROS_PER_MSEC + micros); + ts_data.nanosecond = 0; + return ts_data; +} + +TimestampComponents ICUHelpers::GetComponents(timestamp_ns_t tsns, icu::Calendar *calendar) { + // Get the parts in the given time zone + auto ts_data = GetComponents(timestamp_t(tsns.value / Interval::NANOS_PER_MICRO), calendar); + ts_data.nanosecond = tsns.value % Interval::NANOS_PER_MICRO; return ts_data; } @@ -101,6 +109,73 @@ struct ICUStrptime : public ICUDateFunc { return micros; } + static uint64_t ToNanos(icu::Calendar *calendar, const ParseResult &parsed, const StrpTimeFormat &format) { + // Get the parts in the current time zone + uint64_t nanos = parsed.data[6]; + calendar->set(UCAL_EXTENDED_YEAR, parsed.data[0]); // strptime doesn't understand eras + calendar->set(UCAL_MONTH, parsed.data[1] - 1); + calendar->set(UCAL_DATE, parsed.data[2]); + calendar->set(UCAL_HOUR_OF_DAY, parsed.data[3]); + calendar->set(UCAL_MINUTE, parsed.data[4]); + calendar->set(UCAL_SECOND, parsed.data[5]); + calendar->set(UCAL_MILLISECOND, UnsafeNumericCast(nanos / Interval::NANOS_PER_MSEC)); + nanos %= Interval::NANOS_PER_MSEC; + + // This overrides the TZ setting, so only use it if an offset was parsed. + // Note that we don't bother/worry about the DST setting because the two just combine. + if (format.HasFormatSpecifier(StrTimeSpecifier::UTC_OFFSET)) { + calendar->set(UCAL_ZONE_OFFSET, UnsafeNumericCast(parsed.data[7] * Interval::MSECS_PER_SEC)); + } + + return nanos; + } + + static inline void ParseOne(icu::Calendar *calendar, string_t input, vector &formats, + timestamp_t &result) { + ParseResult parsed; + for (auto &format : formats) { + if (format.Parse(input, parsed)) { + if (parsed.is_special) { + result = parsed.ToTimestamp(); + return; + } else { + // Set TZ first, if any. + if (!parsed.tz.empty()) { + SetTimeZone(calendar, parsed.tz); + } + + result = GetTime(calendar, ToMicros(calendar, parsed, format)); + return; + } + } + } + + throw InvalidInputException(parsed.FormatError(input, formats[0].format_specifier)); + } + + static inline void ParseOne(icu::Calendar *calendar, string_t input, vector &formats, + timestamp_ns_t &result) { + ParseResult parsed; + for (auto &format : formats) { + if (format.Parse(input, parsed)) { + if (parsed.is_special) { + result = timestamp_ns_t(parsed.ToTimestamp().value); + return; + } else { + // Set TZ first, if any. + if (!parsed.tz.empty()) { + SetTimeZone(calendar, parsed.tz); + } + result = GetTimeNS(calendar, ToNanos(calendar, parsed, format)); + return; + } + } + } + + throw InvalidInputException(parsed.FormatError(input, formats[0].format_specifier)); + } + + template static void Parse(DataChunk &args, ExpressionState &state, Vector &result) { D_ASSERT(args.ColumnCount() == 2); auto &str_arg = args.data[0]; @@ -112,27 +187,52 @@ struct ICUStrptime : public ICUDateFunc { auto calendar = calendar_ptr.get(); D_ASSERT(fmt_arg.GetVectorType() == VectorType::CONSTANT_VECTOR); - UnaryExecutor::Execute(str_arg, result, args.size(), [&](string_t input) { - ParseResult parsed; - for (auto &format : info.formats) { - if (format.Parse(input, parsed)) { - if (parsed.is_special) { - return parsed.ToTimestamp(); - } else { - // Set TZ first, if any. - if (!parsed.tz.empty()) { - SetTimeZone(calendar, parsed.tz); - } - - return GetTime(calendar, ToMicros(calendar, parsed, format)); + UnaryExecutor::Execute(str_arg, result, args.size(), [&](string_t input) { + T parsed; + ParseOne(calendar, input, info.formats, parsed); + return parsed; + }); + } + + static inline bool TryParseOne(icu::Calendar *calendar, string_t input, vector &formats, + timestamp_t &result) { + ParseResult parsed; + for (auto &format : formats) { + if (format.Parse(input, parsed)) { + if (parsed.is_special) { + result = timestamp_ns_t(parsed.ToTimestamp().value); + return true; + } else if (parsed.tz.empty() || TrySetTimeZone(calendar, parsed.tz)) { + if (TryGetTime(calendar, ToMicros(calendar, parsed, format), result)) { + return true; } } } + } - throw InvalidInputException(parsed.FormatError(input, info.formats[0].format_specifier)); - }); + return false; + } + + static inline bool TryParseOne(icu::Calendar *calendar, string_t input, vector &formats, + timestamp_ns_t &result) { + ParseResult parsed; + for (auto &format : formats) { + if (format.Parse(input, parsed)) { + if (parsed.is_special) { + result = timestamp_ns_t(parsed.ToTimestamp().value); + return true; + } else if (parsed.tz.empty() || TrySetTimeZone(calendar, parsed.tz)) { + if (TryGetTimeNS(calendar, ToNanos(calendar, parsed, format), result)) { + return true; + } + } + } + } + + return false; } + template static void TryParse(DataChunk &args, ExpressionState &state, Vector &result) { D_ASSERT(args.ColumnCount() == 2); auto &str_arg = args.data[0]; @@ -146,27 +246,15 @@ struct ICUStrptime : public ICUDateFunc { D_ASSERT(fmt_arg.GetVectorType() == VectorType::CONSTANT_VECTOR); if (ConstantVector::IsNull(fmt_arg)) { - ConstantVector::SetNull(result); + ConstantVector::SetNull(result, count_t(args.size())); } else { - UnaryExecutor::ExecuteWithNulls( - str_arg, result, args.size(), [&](string_t input, ValidityMask &mask, idx_t idx) { - ParseResult parsed; - for (auto &format : info.formats) { - if (format.Parse(input, parsed)) { - if (parsed.is_special) { - return parsed.ToTimestamp(); - } else if (parsed.tz.empty() || TrySetTimeZone(calendar, parsed.tz)) { - timestamp_t result; - if (TryGetTime(calendar, ToMicros(calendar, parsed, format), result)) { - return result; - } - } - } - } - - mask.SetInvalid(idx); - return timestamp_t(); - }); + UnaryExecutor::Execute(str_arg, result, args.size(), [&](string_t input) -> optional { + T result; + if (TryParseOne(calendar, input, info.formats, result)) { + return result; + } + return nullopt; + }); } } @@ -183,10 +271,13 @@ struct ICUStrptime : public ICUDateFunc { if (!arguments[1]->IsFoldable()) { throw InvalidInputException("strptime format must be a constant"); } - scalar_function_t function = (bound_function.name == "try_strptime") ? TryParse : Parse; + const bool is_try = (bound_function.GetName() == "try_strptime"); + scalar_function_t function = is_try ? TryParse : Parse; Value format_value = ExpressionExecutor::EvaluateScalar(context, *arguments[1]); string format_string; StrpTimeFormat format; + bool has_tz = false; + bool has_ns = false; if (format_value.IsNull()) { ; } else if (format_value.type().id() == LogicalTypeId::VARCHAR) { @@ -198,9 +289,14 @@ struct ICUStrptime : public ICUDateFunc { } // If we have a time zone, we should use ICU for parsing and return a TSTZ instead. - if (format.HasFormatSpecifier(StrTimeSpecifier::TZ_NAME)) { + has_tz = has_tz || format.HasFormatSpecifier(StrTimeSpecifier::TZ_NAME); + has_ns = has_ns || format.HasFormatSpecifier(StrTimeSpecifier::NANOSECOND_PADDED); + if (has_tz) { + if (has_ns) { + function = is_try ? TryParse : Parse; + } bound_function.SetFunctionCallback(function); - bound_function.SetReturnType(LogicalType::TIMESTAMP_TZ); + bound_function.SetReturnType(has_ns ? LogicalType::TIMESTAMP_TZ_NS : LogicalType::TIMESTAMP_TZ); return make_uniq(context, format); } } else if (format_value.type() == LogicalType::LIST(LogicalType::VARCHAR)) { @@ -209,7 +305,6 @@ struct ICUStrptime : public ICUDateFunc { throw InvalidInputException("strptime format list must not be empty"); } vector formats; - bool has_tz = false; for (const auto &child : children) { format_string = child.ToString(); format.format_specifier = format_string; @@ -220,18 +315,24 @@ struct ICUStrptime : public ICUDateFunc { // If any format has UTC offsets or names, then we have to produce TSTZ has_tz = has_tz || format.HasFormatSpecifier(StrTimeSpecifier::TZ_NAME); has_tz = has_tz || format.HasFormatSpecifier(StrTimeSpecifier::UTC_OFFSET); + has_ns = has_ns || format.HasFormatSpecifier(StrTimeSpecifier::NANOSECOND_PADDED); formats.emplace_back(format); } if (has_tz) { + if (has_ns) { + function = is_try ? TryParse : Parse; + } bound_function.SetFunctionCallback(function); - bound_function.SetReturnType(LogicalType::TIMESTAMP_TZ); + bound_function.SetReturnType(has_ns ? LogicalType::TIMESTAMP_TZ_NS : LogicalType::TIMESTAMP_TZ); return make_uniq(context, formats); } } // Fall back to faster, non-TZ parsing bound_function.SetBindCallback(bind_strptime); - return bound_function.Bind(context, arguments); + BindScalarFunctionInput new_input(context, bound_function, arguments, + input.HasBinder() ? &input.GetBinder() : nullptr); + return bound_function.GetBindCallback()(new_input); } static void TailPatch(const string &name, ExtensionLoader &loader, const vector &types) { @@ -240,11 +341,24 @@ struct ICUStrptime : public ICUDateFunc { auto &functions = scalar_function.functions.functions; optional_idx best_index; for (idx_t i = 0; i < functions.size(); i++) { - auto &function = functions[i]; - if (types == function.GetArguments()) { - best_index = i; - break; + const auto &sig = functions[i].GetSignature(); + if (sig.GetParameterCount() != types.size()) { + continue; } + + auto match = true; + for (idx_t j = 0; j < sig.GetParameterCount(); j++) { + if (sig.GetParameter(j).GetType() != types[j]) { + match = false; + break; + } + } + if (!match) { + continue; + } + + best_index = i; + break; } if (!best_index.IsValid()) { throw InternalException("ICU - Function for TailPatch not found"); @@ -262,44 +376,71 @@ struct ICUStrptime : public ICUDateFunc { TailPatch(name, loader, types); } + static optional VarcharToTimestampTZUS(CalendarPtr &cal, string_t input, CastParameters ¶meters, + optional_ptr nanos = nullptr) { + timestamp_tz_t result; + const auto str = input.GetData(); + const auto len = input.GetSize(); + string_t tz(nullptr, 0); + bool has_offset = false; + auto success = Timestamp::TryConvertTimestampTZ(str, len, result, true, has_offset, tz, nanos); + if (success != TimestampCastResult::SUCCESS) { + string msg; + if (success == TimestampCastResult::ERROR_RANGE) { + msg = Timestamp::RangeError(string(str, len)); + } else { + msg = Timestamp::FormatError(string(str, len)); + } + HandleCastError::AssignError(msg, parameters); + return nullopt; + } else if (!has_offset) { + // Convert parts to a TZ (default or parsed) if no offset was provided + auto calendar = cal.get(); + + // Change TZ if one was provided. + if (tz.GetSize()) { + string error_msg; + SetTimeZone(calendar, tz, &error_msg); + if (!error_msg.empty()) { + HandleCastError::AssignError(error_msg, parameters); + return nullopt; + } + } + + // Now get the parts in the given time zone + result = timestamp_tz_t(FromNaive(calendar, result)); + } + + return result; + } + static bool VarcharToTimestampTZ(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) { auto &cast_data = parameters.cast_data->Cast(); auto &info = cast_data.info->Cast(); CalendarPtr cal(info.calendar->clone()); - UnaryExecutor::ExecuteWithNulls( - source, result, count, [&](string_t input, ValidityMask &mask, idx_t idx) { - timestamp_tz_t result; - const auto str = input.GetData(); - const auto len = input.GetSize(); - string_t tz(nullptr, 0); - bool has_offset = false; - auto success = Timestamp::TryConvertTimestampTZ(str, len, result, true, has_offset, tz); - if (success != TimestampCastResult::SUCCESS) { - string msg; - if (success == TimestampCastResult::ERROR_RANGE) { - msg = Timestamp::RangeError(string(str, len)); - } else { - msg = Timestamp::FormatError(string(str, len)); - } - HandleCastError::AssignError(msg, parameters); - mask.SetInvalid(idx); - } else if (!has_offset) { - // Convert parts to a TZ (default or parsed) if no offset was provided - auto calendar = cal.get(); + UnaryExecutor::Execute( + source, result, count, [&](string_t input) { return VarcharToTimestampTZUS(cal, input, parameters); }); + return true; + } - // Change TZ if one was provided. - if (tz.GetSize()) { - string error_msg; - SetTimeZone(calendar, tz, &error_msg); - if (!error_msg.empty()) { - HandleCastError::AssignError(error_msg, parameters); - mask.SetInvalid(idx); - } - } + static bool VarcharToTimestampTZNS(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) { + auto &cast_data = parameters.cast_data->Cast(); + auto &info = cast_data.info->Cast(); + CalendarPtr cal(info.calendar->clone()); + + UnaryExecutor::Execute( + source, result, count, [&](string_t input) -> optional { + int32_t nanos = 0; + auto ts_us = VarcharToTimestampTZUS(cal, input, parameters, &nanos); + if (!ts_us) { + return nullopt; + } - // Now get the parts in the given time zone - result = timestamp_tz_t(FromNaive(calendar, result)); + timestamp_tz_ns_t result; + if (!Timestamp::TryFromTimestampNanos(*ts_us, nanos, result)) { + HandleCastError::AssignError(Timestamp::RangeError(input), parameters); + return nullopt; } return result; @@ -312,8 +453,8 @@ struct ICUStrptime : public ICUDateFunc { auto &info = cast_data.info->Cast(); CalendarPtr cal(info.calendar->clone()); - UnaryExecutor::ExecuteWithNulls( - source, result, count, [&](string_t input, ValidityMask &mask, idx_t idx) { + UnaryExecutor::Execute( + source, result, count, [&](string_t input) -> optional { dtime_tz_t result; const auto str = input.GetData(); const auto len = input.GetSize(); @@ -322,7 +463,7 @@ struct ICUStrptime : public ICUDateFunc { if (!Time::TryConvertTimeTZ(str, len, pos, result, has_offset, false)) { auto msg = Time::ConversionError(string(str, len)); HandleCastError::AssignError(msg, parameters); - mask.SetInvalid(idx); + return nullopt; } else if (!has_offset) { // Convert parts to a TZ (default or parsed) if no offset was provided auto calendar = cal.get(); @@ -352,6 +493,8 @@ struct ICUStrptime : public ICUDateFunc { switch (target.id()) { case LogicalTypeId::TIMESTAMP_TZ: return BoundCastInfo(VarcharToTimestampTZ, std::move(cast_data)); + case LogicalTypeId::TIMESTAMP_TZ_NS: + return BoundCastInfo(VarcharToTimestampTZNS, std::move(cast_data)); case LogicalTypeId::TIME_TZ: return BoundCastInfo(VarcharToTimeTZ, std::move(cast_data)); default: @@ -361,6 +504,7 @@ struct ICUStrptime : public ICUDateFunc { static void AddCasts(ExtensionLoader &loader) { loader.RegisterCastFunction(LogicalType::VARCHAR, LogicalType::TIMESTAMP_TZ, BindCastFromVarchar); + loader.RegisterCastFunction(LogicalType::VARCHAR, LogicalType::TIMESTAMP_TZ_NS, BindCastFromVarchar); loader.RegisterCastFunction(LogicalType::VARCHAR, LogicalType::TIME_TZ, BindCastFromVarchar); } }; @@ -378,11 +522,6 @@ struct ICUStrftime : public ICUDateFunc { static string_t Operation(icu::Calendar *calendar, timestamp_t input, const char *tz_name, StrfTimeFormat &format, Vector &result) { - // Infinity is always formatted the same way - if (!Timestamp::IsFinite(input)) { - return StringVector::AddString(result, Timestamp::ToString(input)); - } - // Get the parts in the given time zone uint64_t micros = SetTime(calendar, input); @@ -410,6 +549,35 @@ struct ICUStrftime : public ICUDateFunc { return target; } + static string_t Operation(icu::Calendar *calendar, timestamp_ns_t input, const char *tz_name, + StrfTimeFormat &format, Vector &result) { + // Get the parts in the given time zone + uint64_t nanos = SetTimeNS(calendar, input); + + int32_t data[8]; + data[0] = ExtractField(calendar, UCAL_EXTENDED_YEAR); // strftime doesn't understand eras. + data[1] = ExtractField(calendar, UCAL_MONTH) + 1; + data[2] = ExtractField(calendar, UCAL_DATE); + data[3] = ExtractField(calendar, UCAL_HOUR_OF_DAY); + data[4] = ExtractField(calendar, UCAL_MINUTE); + data[5] = ExtractField(calendar, UCAL_SECOND); + data[6] = + UnsafeNumericCast(ExtractField(calendar, UCAL_MILLISECOND) * Interval::NANOS_PER_MSEC + nanos); + + data[7] = ExtractField(calendar, UCAL_ZONE_OFFSET) + ExtractField(calendar, UCAL_DST_OFFSET); + data[7] /= Interval::MSECS_PER_SEC; + + const auto date = Date::FromDate(data[0], data[1], data[2]); + + const auto len = format.GetLength(date, data, tz_name); + string_t target = StringVector::EmptyString(result, len); + format.FormatStringNS(date, data, tz_name, target.GetDataWriteable()); + target.Finalize(); + + return target; + } + + template static void ICUStrftimeFunction(DataChunk &args, ExpressionState &state, Vector &result) { D_ASSERT(args.ColumnCount() == 2); auto &src_arg = args.data[0]; @@ -428,18 +596,16 @@ struct ICUStrftime : public ICUDateFunc { StrfTimeFormat format; ParseFormatSpecifier(*ConstantVector::GetData(fmt_arg), format); - UnaryExecutor::ExecuteWithNulls( - src_arg, result, args.size(), [&](timestamp_t input, ValidityMask &mask, idx_t idx) { - if (Timestamp::IsFinite(input)) { - return Operation(calendar.get(), input, tz_name, format, result); - } else { - return StringVector::AddString(result, Timestamp::ToString(input)); - } - }); + UnaryExecutor::Execute(src_arg, result, args.size(), [&](T input) { + if (Timestamp::IsFinite(input)) { + return Operation(calendar.get(), input, tz_name, format, result); + } else { + return StringVector::AddString(result, Timestamp::ToString(input)); + } + }); } else { - BinaryExecutor::ExecuteWithNulls( - src_arg, fmt_arg, result, args.size(), - [&](timestamp_t input, string_t format_specifier, ValidityMask &mask, idx_t idx) { + BinaryExecutor::Execute( + src_arg, fmt_arg, result, args.size(), [&](T input, string_t format_specifier) { if (Timestamp::IsFinite(input)) { StrfTimeFormat format; ParseFormatSpecifier(format_specifier, format); @@ -455,25 +621,28 @@ struct ICUStrftime : public ICUDateFunc { static void AddBinaryTimestampFunction(const string &name, ExtensionLoader &loader) { ScalarFunctionSet set(name); set.AddFunction(ScalarFunction({LogicalType::TIMESTAMP_TZ, LogicalType::VARCHAR}, LogicalType::VARCHAR, - ICUStrftimeFunction, Bind)); + ICUStrftimeFunction, Bind)); + set.AddFunction(ScalarFunction({LogicalType::TIMESTAMP_TZ_NS, LogicalType::VARCHAR}, LogicalType::VARCHAR, + ICUStrftimeFunction, Bind)); loader.RegisterFunction(set); } - static string_t CastOperation(icu::Calendar *calendar, timestamp_t input, Vector &result) { + template + static string_t CastOperation(icu::Calendar *calendar, T input, Vector &result) { // Infinity is always formatted the same way if (!Timestamp::IsFinite(input)) { return StringVector::AddString(result, Timestamp::ToString(input)); } // decompose the timestamp - auto ts_data = ICUHelpers::GetComponents(timestamp_tz_t(input.value), calendar); + auto ts_data = ICUHelpers::GetComponents(input, calendar); idx_t year_length; bool add_bc; const auto date_len = DateToStringCast::YearLength(ts_data.year, year_length, add_bc); - char micro_buffer[6]; - const auto time_len = TimeToStringCast::MicrosLength(ts_data.microsecond, micro_buffer); + char micro_buffer[9]; + const auto time_len = TimeToStringCast::MicrosLength(ts_data.microsecond, micro_buffer, ts_data.nanosecond); auto offset = ExtractField(calendar, UCAL_ZONE_OFFSET) + ExtractField(calendar, UCAL_DST_OFFSET); offset /= Interval::MSECS_PER_SEC; @@ -491,8 +660,7 @@ struct ICUStrftime : public ICUDateFunc { buffer += date_len; *buffer++ = ' '; - TimeToStringCast::Format(buffer, time_len, ts_data.hour, ts_data.minute, ts_data.second, ts_data.microsecond, - micro_buffer); + TimeToStringCast::Format(buffer, time_len, ts_data.hour, ts_data.minute, ts_data.second, 0, micro_buffer); buffer += time_len; memcpy(buffer, offset_str.c_str(), offset_len); @@ -503,15 +671,14 @@ struct ICUStrftime : public ICUDateFunc { return target; } + template static bool CastToVarchar(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) { auto &cast_data = parameters.cast_data->Cast(); auto &info = cast_data.info->Cast(); CalendarPtr calendar(info.calendar->clone()); - UnaryExecutor::ExecuteWithNulls(source, result, count, - [&](timestamp_t input, ValidityMask &mask, idx_t idx) { - return CastOperation(calendar.get(), input, result); - }); + UnaryExecutor::Execute(source, result, count, + [&](T input) { return CastOperation(calendar.get(), input, result); }); return true; } @@ -522,11 +689,19 @@ struct ICUStrftime : public ICUDateFunc { auto cast_data = make_uniq(make_uniq(*input.context)); - return BoundCastInfo(CastToVarchar, std::move(cast_data)); + switch (source.id()) { + case LogicalTypeId::TIMESTAMP_TZ: + return BoundCastInfo(CastToVarchar, std::move(cast_data)); + case LogicalTypeId::TIMESTAMP_TZ_NS: + return BoundCastInfo(CastToVarchar, std::move(cast_data)); + default: + throw InternalException("Unexpected TIMESTAMPTZ type to VARCHAR cast."); + } } static void AddCasts(ExtensionLoader &loader) { loader.RegisterCastFunction(LogicalType::TIMESTAMP_TZ, LogicalType::VARCHAR, BindCastToVarchar); + loader.RegisterCastFunction(LogicalType::TIMESTAMP_TZ_NS, LogicalType::VARCHAR, BindCastToVarchar); } }; diff --git a/src/duckdb/extension/icu/icu-table-range.cpp b/src/duckdb/extension/icu/icu-table-range.cpp index 002e62691..b63f1528f 100644 --- a/src/duckdb/extension/icu/icu-table-range.cpp +++ b/src/duckdb/extension/icu/icu-table-range.cpp @@ -2,6 +2,7 @@ #include "duckdb/common/operator/subtract.hpp" #include "duckdb/common/types/interval.hpp" #include "duckdb/common/types/timestamp.hpp" +#include "duckdb/main/client_context.hpp" #include "duckdb/main/extension/extension_loader.hpp" #include "duckdb/function/function_set.hpp" #include "duckdb/function/table_function.hpp" @@ -225,7 +226,7 @@ struct ICUTableRange { state.initialized_row = false; continue; } - output.SetCardinality(size); + output.SetChildCardinality(size); return OperatorResultType::HAVE_MORE_OUTPUT; } } diff --git a/src/duckdb/extension/icu/icu-timebucket.cpp b/src/duckdb/extension/icu/icu-timebucket.cpp index 3cb3d9dbe..76f8af2cd 100644 --- a/src/duckdb/extension/icu/icu-timebucket.cpp +++ b/src/duckdb/extension/icu/icu-timebucket.cpp @@ -3,14 +3,12 @@ #include "duckdb/common/operator/cast_operators.hpp" #include "duckdb/common/operator/subtract.hpp" #include "duckdb/common/types/interval.hpp" -#include "duckdb/common/types/time.hpp" #include "duckdb/common/types/timestamp.hpp" #include "duckdb/common/types/value.hpp" #include "duckdb/main/extension/extension_loader.hpp" #include "duckdb/common/vector_operations/binary_executor.hpp" #include "duckdb/common/vector_operations/ternary_executor.hpp" -#include "duckdb/main/client_context.hpp" -#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" +#include "duckdb/planner/expression/bound_function_expression.hpp" #include "include/icu-datefunc.hpp" namespace duckdb { @@ -289,11 +287,10 @@ struct ICUTimeBucket : public ICUDateFunc { }; struct OriginTernaryOperator { - static inline timestamp_t Operation(interval_t bucket_width, timestamp_t ts, timestamp_t origin, - ValidityMask &mask, idx_t idx, TZCalendar &calendar) { + static inline optional Operation(interval_t bucket_width, timestamp_t ts, timestamp_t origin, + TZCalendar &calendar) { if (!Value::IsFinite(origin)) { - mask.SetInvalid(idx); - return timestamp_t(0); + return nullopt; } BucketWidthType bucket_width_type = ClassifyBucketWidthErrorThrow(bucket_width); switch (bucket_width_type) { @@ -494,7 +491,7 @@ struct ICUTimeBucket : public ICUDateFunc { origin_arg.GetVectorType() == VectorType::CONSTANT_VECTOR) { if (ConstantVector::IsNull(bucket_width_arg) || ConstantVector::IsNull(origin_arg) || !Value::IsFinite(*ConstantVector::GetData(origin_arg))) { - ConstantVector::SetNull(result); + ConstantVector::SetNull(result, count_t(args.size())); } else { interval_t bucket_width = *ConstantVector::GetData(bucket_width_arg); BucketWidthType bucket_width_type = ClassifyBucketWidth(bucket_width); @@ -524,11 +521,10 @@ struct ICUTimeBucket : public ICUDateFunc { }); break; case BucketWidthType::UNCLASSIFIED: - TernaryExecutor::ExecuteWithNulls( + TernaryExecutor::Execute( bucket_width_arg, ts_arg, origin_arg, result, args.size(), - [&](interval_t bucket_width, timestamp_t ts, timestamp_t origin, ValidityMask &mask, - idx_t idx) { - return OriginTernaryOperator::Operation(bucket_width, ts, origin, mask, idx, calendar); + [&](interval_t bucket_width, timestamp_t ts, timestamp_t origin) -> optional { + return OriginTernaryOperator::Operation(bucket_width, ts, origin, calendar); }); break; default: @@ -536,10 +532,10 @@ struct ICUTimeBucket : public ICUDateFunc { } } } else { - TernaryExecutor::ExecuteWithNulls( + TernaryExecutor::Execute( bucket_width_arg, ts_arg, origin_arg, result, args.size(), - [&](interval_t bucket_width, timestamp_t ts, timestamp_t origin, ValidityMask &mask, idx_t idx) { - return OriginTernaryOperator::Operation(bucket_width, ts, origin, mask, idx, calendar); + [&](interval_t bucket_width, timestamp_t ts, timestamp_t origin) -> optional { + return OriginTernaryOperator::Operation(bucket_width, ts, origin, calendar); }); } } @@ -623,6 +619,7 @@ struct ICUTimeBucket : public ICUDateFunc { LogicalType::TIMESTAMP_TZ, ICUTimeBucketTimeZoneFunction, Bind)); for (auto &func : set.functions) { func.SetFallible(); + func.SetArgProperties(1, ArgProperties().NonDecreasing()); } loader.RegisterFunction(set); } diff --git a/src/duckdb/extension/icu/icu-timezone.cpp b/src/duckdb/extension/icu/icu-timezone.cpp index 211d01d34..4fb77a509 100644 --- a/src/duckdb/extension/icu/icu-timezone.cpp +++ b/src/duckdb/extension/icu/icu-timezone.cpp @@ -5,6 +5,7 @@ #include "duckdb/function/cast/cast_function_set.hpp" #include "duckdb/function/cast_rules.hpp" #include "duckdb/main/extension/extension_loader.hpp" +#include "duckdb/planner/expression/bound_function_expression.hpp" #include "include/icu-casts.hpp" #include "include/icu-datefunc.hpp" #include "duckdb/transaction/meta_transaction.hpp" @@ -23,6 +24,16 @@ bool ICUIsFinite(const timestamp_t &t) { return Timestamp::IsFinite(t); } +template <> +bool ICUIsFinite(const timestamp_tz_t &t) { + return Timestamp::IsFinite(t); +} + +template <> +bool ICUIsFinite(const timestamp_tz_ns_t &t) { + return Timestamp::IsFinite(t); +} + struct ICUTimeZoneData : public GlobalTableFunctionState { ICUTimeZoneData() : tzs(icu::TimeZone::createEnumeration()) { UErrorCode status = U_ZERO_ERROR; @@ -114,10 +125,77 @@ static void ICUTimeZoneFunction(ClientContext &context, TableFunctionInput &data output.SetCardinality(index); } +// Wrap the multiply-named and non-type-safe cast utilities. +struct ICUCast { + template + static inline DST Operation(SRC input) { + throw NotImplementedException("Naive timezone cast could not be performed!"); + } +}; + +// From naive types to TIMESTAMP_TZ +template <> +timestamp_tz_t ICUCast::Operation(timestamp_t src) { + return timestamp_tz_t(src.value); +} + +template <> +timestamp_tz_t ICUCast::Operation(timestamp_ms_t src) { + return timestamp_tz_t(CastTimestampMsToUs::Operation(src).value); +} + +template <> +timestamp_tz_t ICUCast::Operation(timestamp_ns_t src) { + return timestamp_tz_t(CastTimestampNsToUs::Operation(timestamp_t(src)).value); +} + +template <> +timestamp_tz_t ICUCast::Operation(timestamp_sec_t src) { + return timestamp_tz_t(CastTimestampSecToUs::Operation(src).value); +} + +template <> +timestamp_tz_t ICUCast::Operation(date_t src) { + return timestamp_tz_t(Cast::Operation(src).value); +} + +// From TIMESTAMP_TZ to naive types +template <> +timestamp_sec_t ICUCast::Operation(timestamp_tz_t src) { + return timestamp_sec_t(CastTimestampUsToSec::Operation(src).value); +} + +template <> +timestamp_ms_t ICUCast::Operation(timestamp_tz_t src) { + return timestamp_ms_t(CastTimestampUsToMs::Operation(src).value); +} + +template <> +timestamp_t ICUCast::Operation(timestamp_tz_t src) { + return src; +} + +template <> +timestamp_ns_t ICUCast::Operation(timestamp_tz_t src) { + return timestamp_ns_t(CastTimestampUsToNs::Operation(src).value); +} + +// From TIMESTAMP_TZ_NS +template <> +timestamp_ns_t ICUCast::Operation(timestamp_tz_ns_t src) { + return src; +} + +// From TIME_TZ +template <> +dtime_tz_t ICUCast::Operation(dtime_tz_t src) { + return src; +} + struct ICUFromNaiveTimestamp : public ICUDateFunc { - static inline timestamp_t Operation(icu::Calendar *calendar, timestamp_t naive) { + static inline timestamp_tz_t Operation(icu::Calendar *calendar, timestamp_t naive) { if (!ICUIsFinite(naive)) { - return naive; + return timestamp_tz_t(naive.value); } // Extract the parts from the "instant" @@ -147,29 +225,50 @@ struct ICUFromNaiveTimestamp : public ICUDateFunc { calendar->set(UCAL_SECOND, secs); calendar->set(UCAL_MILLISECOND, millis); - return GetTime(calendar, micros); + return timestamp_tz_t(GetTime(calendar, micros).value); } - struct CastTimestampUsToUs { - template - static inline DST Operation(SRC input) { - // no-op - return input; + static inline timestamp_tz_ns_t Operation(icu::Calendar *calendar, timestamp_ns_t naive) { + if (!ICUIsFinite(naive)) { + return timestamp_tz_ns_t(naive.value); } - }; - template + auto nanos = naive.value % Interval::NANOS_PER_MICRO; + timestamp_t micros(naive.value / Interval::NANOS_PER_MICRO); + auto cast = Operation(calendar, micros); + + timestamp_tz_ns_t result; + if (!Timestamp::TryFromTimestampNanos(cast, nanos, result)) { + throw ConversionException("ICU date overflows timestamp_ns range"); + } + return result; + } + + template static bool CastFromNaive(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) { auto &cast_data = parameters.cast_data->Cast(); auto &info = cast_data.info->Cast(); CalendarPtr calendar(info.calendar->clone()); - UnaryExecutor::Execute(source, result, count, [&](T input) { - return Operation(calendar.get(), OP::template Operation(input)); + UnaryExecutor::Execute(source, result, count, [&](SRC input) { + return Operation(calendar.get(), ICUCast::Operation(input)); }); return true; } + template + static BoundCastInfo BindCastFromNaiveType(BindCastInput &input, const LogicalType &target) { + auto cast_data = make_uniq(make_uniq(*input.context)); + switch (target.id()) { + case LogicalTypeId::TIMESTAMP_TZ: + return BoundCastInfo(CastFromNaive, std::move(cast_data)); + case LogicalTypeId::TIMESTAMP_TZ_NS: + return BoundCastInfo(CastFromNaive, std::move(cast_data)); + default: + throw InternalException("Type %s not handled in BindCastFromNaiveType", LogicalTypeIdToString(target.id())); + } + } + static BoundCastInfo BindCastFromNaive(BindCastInput &input, const LogicalType &source, const LogicalType &target) { if (!input.context) { throw InternalException("Missing context for TIMESTAMP to TIMESTAMPTZ cast."); @@ -179,18 +278,17 @@ struct ICUFromNaiveTimestamp : public ICUDateFunc { "has been disabled - use \"AT TIME ZONE ...\""); } - auto cast_data = make_uniq(make_uniq(*input.context)); switch (source.id()) { case LogicalTypeId::TIMESTAMP: - return BoundCastInfo(CastFromNaive, std::move(cast_data)); + return BindCastFromNaiveType(input, target); case LogicalTypeId::TIMESTAMP_MS: - return BoundCastInfo(CastFromNaive, std::move(cast_data)); + return BindCastFromNaiveType(input, target); case LogicalTypeId::TIMESTAMP_NS: - return BoundCastInfo(CastFromNaive, std::move(cast_data)); + return BindCastFromNaiveType(input, target); case LogicalTypeId::TIMESTAMP_SEC: - return BoundCastInfo(CastFromNaive, std::move(cast_data)); + return BindCastFromNaiveType(input, target); case LogicalTypeId::DATE: - return BoundCastInfo(CastFromNaive, std::move(cast_data)); + return BindCastFromNaiveType(input, target); default: throw InternalException("Type %s not handled in BindCastFromNaive", LogicalTypeIdToString(source.id())); } @@ -213,7 +311,7 @@ struct ICUFromNaiveTimestamp : public ICUDateFunc { }; struct ICUToNaiveTimestamp : public ICUDateFunc { - static inline timestamp_t Operation(icu::Calendar *calendar, timestamp_t instant) { + static inline timestamp_tz_t Operation(icu::Calendar *calendar, timestamp_tz_t instant) { if (!ICUIsFinite(instant)) { return instant; } @@ -239,7 +337,7 @@ struct ICUToNaiveTimestamp : public ICUDateFunc { micros += millis * int32_t(Interval::MICROS_PER_MSEC); dtime_t local_time = Time::FromTime(hr, mn, secs, micros); - timestamp_t naive; + timestamp_tz_t naive; if (!Timestamp::TryFromDatetime(local_date, local_time, naive)) { throw ConversionException("Unable to convert TIMESTAMPTZ to local TIMESTAMP"); } @@ -247,32 +345,83 @@ struct ICUToNaiveTimestamp : public ICUDateFunc { return naive; } + static inline timestamp_tz_ns_t Operation(icu::Calendar *calendar, timestamp_tz_ns_t instant) { + if (!ICUIsFinite(instant)) { + return instant; + } + + auto nanos = instant.value % Interval::NANOS_PER_MICRO; + timestamp_t micros(instant.value / Interval::NANOS_PER_MICRO); + auto cast = Operation(calendar, instant); + + return timestamp_tz_ns_t(cast.value * Interval::NANOS_PER_MICRO + nanos); + } + + template static bool CastToNaive(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) { auto &cast_data = parameters.cast_data->Cast(); auto &info = cast_data.info->Cast(); CalendarPtr calendar(info.calendar->clone()); - UnaryExecutor::Execute( - source, result, count, [&](timestamp_t input) { return Operation(calendar.get(), input); }); + UnaryExecutor::Execute(source, result, count, [&](SRC input) { + return ICUCast::Operation(Operation(calendar.get(), input)); + }); return true; } static BoundCastInfo BindCastToNaive(BindCastInput &input, const LogicalType &source, const LogicalType &target) { if (!input.context) { - throw InternalException("Missing context for TIMESTAMPTZ to TIMESTAMP cast."); + throw InternalException("Missing context for TIMESTAMPTZ to %s cast.", LogicalTypeIdToString(target.id())); } if (Settings::Get(*input.context)) { - throw BinderException("Casting from TIMESTAMP WITH TIME ZONE to TIMESTAMP without an explicit time zone " - "has been disabled - use \"AT TIME ZONE ...\""); + throw BinderException("Casting from TIMESTAMP WITH TIME ZONE to %s without an explicit time zone " + "has been disabled - use \"AT TIME ZONE ...\"", + LogicalTypeIdToString(target.id())); } auto cast_data = make_uniq(make_uniq(*input.context)); - return BoundCastInfo(CastToNaive, std::move(cast_data)); + switch (source.id()) { + case LogicalTypeId::TIMESTAMP_TZ: + switch (target.id()) { + case LogicalType::TIMESTAMP: + return BoundCastInfo(CastToNaive, std::move(cast_data)); + case LogicalType::TIMESTAMP_MS: + return BoundCastInfo(CastToNaive, std::move(cast_data)); + case LogicalType::TIMESTAMP_NS: + return BoundCastInfo(CastToNaive, std::move(cast_data)); + case LogicalType::TIMESTAMP_S: + return BoundCastInfo(CastToNaive, std::move(cast_data)); + default: + throw InternalException("Type %s not handled in BindCastToNaive", LogicalTypeIdToString(target.id())); + } + case LogicalTypeId::TIMESTAMP_TZ_NS: + switch (target.id()) { + case LogicalType::TIMESTAMP_NS: + return BoundCastInfo(CastToNaive, std::move(cast_data)); + default: + throw InternalException("Type %s not handled in BindCastToNaive", LogicalTypeIdToString(target.id())); + } + default: + throw InternalException("Type %s not handled in BindCastToNaive", LogicalTypeIdToString(source.id())); + } + } + + static void AddCast(CastFunctionSet &casts, const LogicalType &source, const LogicalType &target) { + const auto implicit_cost = CastRules::ImplicitCast(source, target); + casts.RegisterCastFunction(source, target, BindCastToNaive, implicit_cost); } static void AddCasts(ExtensionLoader &loader) { - loader.RegisterCastFunction(LogicalType::TIMESTAMP_TZ, LogicalType::TIMESTAMP, BindCastToNaive); + auto &config = DBConfig::GetConfig(loader.GetDatabaseInstance()); + auto &casts = config.GetCastFunctions(); + + AddCast(casts, LogicalType::TIMESTAMP_TZ, LogicalType::TIMESTAMP); + AddCast(casts, LogicalType::TIMESTAMP_TZ, LogicalType::TIMESTAMP_MS); + AddCast(casts, LogicalType::TIMESTAMP_TZ, LogicalType::TIMESTAMP_NS); + AddCast(casts, LogicalType::TIMESTAMP_TZ, LogicalType::TIMESTAMP_S); + + AddCast(casts, LogicalType::TIMESTAMP_TZ_NS, LogicalType::TIMESTAMP_NS); } }; @@ -311,7 +460,7 @@ struct ICULocalTimestampFunc : public ICUDateFunc { CalendarPtr calendar_ptr(info.calendar->clone()); auto calendar = calendar_ptr.get(); - const auto now = info.now; + const auto now = timestamp_tz_t(info.now.value); return ICUToNaiveTimestamp::Operation(calendar, now); } @@ -389,16 +538,15 @@ bool ICUToTimeTZ::CastToTimeTZ(Vector &source, Vector &result, idx_t count, Cast auto &info = cast_data.info->Cast(); CalendarPtr calendar(info.calendar->clone()); - UnaryExecutor::ExecuteWithNulls(source, result, count, - [&](timestamp_t input, ValidityMask &mask, idx_t idx) { - dtime_tz_t output; - if (ToTimeTZ(calendar.get(), input, output)) { - return output; - } else { - mask.SetInvalid(idx); - return dtime_tz_t(); - } - }); + UnaryExecutor::Execute(source, result, count, + [&](timestamp_t input) -> optional { + dtime_tz_t output; + if (ToTimeTZ(calendar.get(), input, output)) { + return output; + } else { + return nullopt; + } + }); return true; } @@ -413,13 +561,45 @@ BoundCastInfo ICUToTimeTZ::BindCastToTimeTZ(BindCastInput &input, const LogicalT return BoundCastInfo(CastToTimeTZ, std::move(cast_data)); } +bool ICUToTimeTZ::CastFromTime(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) { + auto &cast_data = parameters.cast_data->Cast(); + auto &info = cast_data.info->Cast(); + CalendarPtr calendar_ptr(info.calendar->clone()); + auto calendar = calendar_ptr.get(); + + // Read the session UTC offset (with DST) from the calendar. + // This mirrors the no-offset branch in ICUStrptime::VarcharToTimeTZ so that + // '00:00:00'::TIME::TIMETZ matches '00:00:00'::TIMETZ. + auto offset = ExtractField(calendar, UCAL_ZONE_OFFSET); + offset += ExtractField(calendar, UCAL_DST_OFFSET); + offset /= Interval::MSECS_PER_SEC; + + UnaryExecutor::Execute(source, result, count, + [&](dtime_t input) { return dtime_tz_t(input, offset); }); + return true; +} + +BoundCastInfo ICUToTimeTZ::BindCastFromTime(BindCastInput &input, const LogicalType &source, + const LogicalType &target) { + if (!input.context) { + throw InternalException("Missing context for TIME to TIMETZ cast."); + } + + auto cast_data = make_uniq(make_uniq(*input.context)); + + return BoundCastInfo(CastFromTime, std::move(cast_data)); +} + void ICUToTimeTZ::AddCasts(ExtensionLoader &loader) { const auto implicit_cost = CastRules::ImplicitCast(LogicalType::TIMESTAMP_TZ, LogicalType::TIME_TZ); loader.RegisterCastFunction(LogicalType::TIMESTAMP_TZ, LogicalType::TIME_TZ, BindCastToTimeTZ, implicit_cost); + + const auto time_implicit_cost = CastRules::ImplicitCast(LogicalType::TIME, LogicalType::TIME_TZ); + loader.RegisterCastFunction(LogicalType::TIME, LogicalType::TIME_TZ, BindCastFromTime, time_implicit_cost); } struct ICUTimeZoneFunc : public ICUDateFunc { - template + template static void Execute(DataChunk &input, ExpressionState &state, Vector &result) { auto &func_expr = state.expr.Cast(); auto &info = func_expr.bind_info->Cast(); @@ -435,28 +615,29 @@ struct ICUTimeZoneFunc : public ICUDateFunc { throw InternalException("ICUTimeZone called with constant NULL tz"); } SetTimeZone(calendar, *ConstantVector::GetData(tz_vec)); - UnaryExecutor::Execute(ts_vec, result, input.size(), - [&](T ts) { return OP::Operation(calendar, ts); }); + UnaryExecutor::Execute(ts_vec, result, input.size(), + [&](SRC ts) { return OP::Operation(calendar, ts); }); } else { - BinaryExecutor::Execute(tz_vec, ts_vec, result, input.size(), [&](string_t tz_id, T ts) { - if (ICUIsFinite(ts)) { - SetTimeZone(calendar, tz_id); - return OP::Operation(calendar, ts); - } else { - return ts; - } - }); + BinaryExecutor::Execute( + tz_vec, ts_vec, result, input.size(), [&](string_t tz_id, SRC ts) { + if (ICUIsFinite(ts)) { + SetTimeZone(calendar, tz_id); + return ICUCast::Operation(OP::Operation(calendar, ts)); + } else { + return ICUCast::Operation(ts); + } + }); } } static void AddFunction(const string &name, ExtensionLoader &loader) { ScalarFunctionSet set(name); set.AddFunction(ScalarFunction({LogicalType::VARCHAR, LogicalType::TIMESTAMP}, LogicalType::TIMESTAMP_TZ, - Execute, Bind)); + Execute, Bind)); set.AddFunction(ScalarFunction({LogicalType::VARCHAR, LogicalType::TIMESTAMP_TZ}, LogicalType::TIMESTAMP, - Execute, Bind)); + Execute, Bind)); set.AddFunction(ScalarFunction({LogicalType::VARCHAR, LogicalType::TIME_TZ}, LogicalType::TIME_TZ, - Execute, Bind)); + Execute, Bind)); for (auto &func : set.functions) { func.SetFallible(); } diff --git a/src/duckdb/extension/icu/icu_extension.cpp b/src/duckdb/extension/icu/icu_extension.cpp index fd1684032..3ec3ff696 100644 --- a/src/duckdb/extension/icu/icu_extension.cpp +++ b/src/duckdb/extension/icu/icu_extension.cpp @@ -83,14 +83,14 @@ struct IcuBindData : public FunctionData { } static void Serialize(Serializer &serializer, const optional_ptr bind_data_p, - const ScalarFunction &function) { + const BoundScalarFunction &function) { auto &bind_data = bind_data_p->Cast(); serializer.WriteProperty(100, "language", bind_data.language); serializer.WriteProperty(101, "country", bind_data.country); serializer.WritePropertyWithDefault(102, "tag", bind_data.tag); } - static unique_ptr Deserialize(Deserializer &deserializer, ScalarFunction &function) { + static unique_ptr Deserialize(Deserializer &deserializer, BoundScalarFunction &function) { string language; string country; string tag; @@ -157,11 +157,11 @@ static duckdb::unique_ptr ICUCollateBind(BindScalarFunctionInput & auto &bound_function = input.GetBoundFunction(); //! Return a tagged collator - if (!bound_function.extra_info.empty()) { - return make_uniq(bound_function.extra_info); + if (!bound_function.GetExtraInfo().empty()) { + return make_uniq(bound_function.GetExtraInfo()); } - const auto collation = IcuBindData::DecodeFunctionName(bound_function.name); + const auto collation = IcuBindData::DecodeFunctionName(bound_function.GetName()); auto splits = StringUtil::Split(collation, "_"); if (splits.size() == 1) { return make_uniq(splits[0], ""); @@ -185,8 +185,8 @@ static duckdb::unique_ptr ICUSortKeyBind(BindScalarFunctionInput & throw NotImplementedException("ICU_SORT_KEY(VARCHAR, VARCHAR) expected a non-null collation"); } //! Verify tagged collation - if (!bound_function.extra_info.empty()) { - return make_uniq(bound_function.extra_info); + if (!bound_function.GetExtraInfo().empty()) { + return make_uniq(bound_function.GetExtraInfo()); } auto splits = StringUtil::Split(StringValue::Get(val), "_"); if (splits.size() == 1) { @@ -219,12 +219,13 @@ unique_ptr GetKnownTimeZone(const string &tz_str) { return nullptr; } -static string NormalizeTimeZone(const string &tz_str) { - if (GetKnownTimeZone(tz_str)) { - return tz_str; +unique_ptr GetNormalizedTimeZone(string &tz_str) { + duckdb::unique_ptr tz; + if (tz = GetKnownTimeZone(tz_str)) { + return tz; } - // Map UTC±NN00 to Etc/UTC±N + // Map UTC±NN00 to Etc/GMT±N do { if (tz_str.size() <= 4) { break; @@ -233,53 +234,46 @@ static string NormalizeTimeZone(const string &tz_str) { break; } + // Parse the offset, allowing single digits idx_t pos = 3; - const auto utc = tz_str[pos++]; - // Invert the sign (UTC and Etc use opposite sign conventions) - // https://en.wikipedia.org/wiki/Tz_database#Area - auto sign = utc; - if (utc == '+') { - sign = '-'; - ; - } else if (utc == '-') { - sign = '+'; - } else { + int hh, mm, ss; + if (!Timestamp::TryParseUTCOffset(tz_str.data(), pos, tz_str.size(), hh, mm, ss, false)) { break; } + if (pos < tz_str.size() || mm || ss) { + break; + } + + // Invert the sign (UTC and Etc use opposite sign conventions) + // https://en.wikipedia.org/wiki/Tz_database#Area + hh = -hh; + // Build the mapped timezone string with single digit hour offsets string mapped = "Etc/GMT"; - mapped += sign; - const auto base_len = mapped.size(); - for (; pos < tz_str.size(); ++pos) { - const auto digit = tz_str[pos]; - // We could get fancy here and count colons and their locations, but I doubt anyone cares. - if (digit == '0' || digit == ':') { - continue; - } - if (!StringUtil::CharacterIsDigit(digit)) { - break; - } - mapped += digit; - } - if (pos < tz_str.size()) { - break; + if (hh < 0) { + mapped += "-"; + hh = -hh; + } else { + mapped += "+"; } - // If we didn't add anything, then make it +0 - if (mapped.size() == base_len) { - mapped.back() = '+'; - mapped += '0'; + if (hh >= 10) { + mapped += UnsafeNumericCast('0' + hh / 10); + hh %= 10; } + mapped += UnsafeNumericCast('0' + hh); + // Final sanity check - if (GetKnownTimeZone(mapped)) { - return mapped; + if (tz = GetKnownTimeZone(mapped)) { + tz_str = mapped; + return tz; } } while (false); - return tz_str; + return nullptr; } unique_ptr GetTimeZoneInternal(string &tz_str, vector &candidates) { - auto tz = GetKnownTimeZone(tz_str); + auto tz = GetNormalizedTimeZone(tz_str); if (tz) { return tz; } @@ -336,7 +330,6 @@ unique_ptr ICUHelpers::GetTimeZone(string &tz_str, string *error_ static void SetICUTimeZone(ClientContext &context, SetScope scope, Value ¶meter) { auto tz_str = StringValue::Get(parameter); - tz_str = NormalizeTimeZone(tz_str); ICUHelpers::GetTimeZone(tz_str); parameter = Value(tz_str); } @@ -478,8 +471,8 @@ static void LoadInternal(ExtensionLoader &loader) { std::string tz_string; tz->getID(tz_id).toUTF8String(tz_string); // If the environment TZ is invalid, look for some alternatives - tz_string = NormalizeTimeZone(tz_string); - if (!GetKnownTimeZone(tz_string)) { + tz = GetNormalizedTimeZone(tz_string); + if (!tz) { tz_string = "UTC"; } config.AddExtensionOption("TimeZone", "The current time zone", LogicalType::VARCHAR, Value(tz_string), diff --git a/src/duckdb/extension/icu/include/icu-casts.hpp b/src/duckdb/extension/icu/include/icu-casts.hpp index 04595bc45..1559e85b2 100644 --- a/src/duckdb/extension/icu/include/icu-casts.hpp +++ b/src/duckdb/extension/icu/include/icu-casts.hpp @@ -30,8 +30,10 @@ struct ICUToTimeTZ : public ICUDateFunc { static bool ToTimeTZ(icu::Calendar *calendar, timestamp_t instant, dtime_tz_t &result); static bool CastToTimeTZ(Vector &source, Vector &result, idx_t count, CastParameters ¶meters); + static bool CastFromTime(Vector &source, Vector &result, idx_t count, CastParameters ¶meters); static BoundCastInfo BindCastToTimeTZ(BindCastInput &input, const LogicalType &source, const LogicalType &target); + static BoundCastInfo BindCastFromTime(BindCastInput &input, const LogicalType &source, const LogicalType &target); static void AddCasts(ExtensionLoader &loader); }; diff --git a/src/duckdb/extension/icu/include/icu-datefunc.hpp b/src/duckdb/extension/icu/include/icu-datefunc.hpp index 71553e505..1a593ae55 100644 --- a/src/duckdb/extension/icu/include/icu-datefunc.hpp +++ b/src/duckdb/extension/icu/include/icu-datefunc.hpp @@ -8,10 +8,9 @@ #pragma once -#include "duckdb.hpp" - #include "duckdb/common/enums/date_part_specifier.hpp" -#include "duckdb/planner/expression/bound_function_expression.hpp" +#include "duckdb/function/cast/default_casts.hpp" +#include "duckdb/function/function.hpp" #include "tz_calendar.hpp" namespace duckdb { @@ -54,10 +53,16 @@ struct ICUDateFunc { static bool TryGetTime(icu::Calendar *calendar, uint64_t micros, timestamp_t &result); //! Gets the timestamp from the calendar, throwing if it is not in range. static timestamp_t GetTime(icu::Calendar *calendar, uint64_t micros = 0); + //! Gets the timestamp from the calendar, throwing if it is not in range. + static bool TryGetTimeNS(icu::Calendar *calendar, uint64_t nanos, timestamp_ns_t &result); + //! Gets the timestamp from the calendar, throwing if it is not in range. + static timestamp_ns_t GetTimeNS(icu::Calendar *calendar, uint64_t micros = 0); //! Gets the timestamp from the calendar, assuming it is in range. static timestamp_t GetTimeUnsafe(icu::Calendar *calendar, uint64_t micros = 0); //! Sets the calendar to the timestamp, returning the unused µs part static uint64_t SetTime(icu::Calendar *calendar, timestamp_t date); + //! Sets the calendar to the timestamp, returning the unused ns part + static uint64_t SetTimeNS(icu::Calendar *calendar, timestamp_ns_t date); //! Extracts the field from the calendar static int32_t ExtractField(icu::Calendar *calendar, UCalendarDateFields field); //! Subtracts the field of the given date from the calendar diff --git a/src/duckdb/extension/icu/include/icu-helpers.hpp b/src/duckdb/extension/icu/include/icu-helpers.hpp index 9103c7a03..46fc5b7f7 100644 --- a/src/duckdb/extension/icu/include/icu-helpers.hpp +++ b/src/duckdb/extension/icu/include/icu-helpers.hpp @@ -8,7 +8,7 @@ #pragma once -#include "duckdb.hpp" +#include "unicode/calendar.h" #include "unicode/timezone.h" #include "duckdb/common/types/timestamp.hpp" @@ -20,7 +20,8 @@ struct ICUHelpers { //! Gets a time zone - throws an error if the timezone is not found static unique_ptr GetTimeZone(string &tz_str, string *error_message = nullptr); - static TimestampComponents GetComponents(timestamp_tz_t ts, icu::Calendar *calendar); + static TimestampComponents GetComponents(timestamp_t ts, icu::Calendar *calendar); + static TimestampComponents GetComponents(timestamp_ns_t ts, icu::Calendar *calendar); static timestamp_t ToTimestamp(TimestampComponents data); }; diff --git a/src/duckdb/extension/icu/third_party/icu/common/charstr.h b/src/duckdb/extension/icu/third_party/icu/common/charstr.h index ea54ede73..a4a70f077 100644 --- a/src/duckdb/extension/icu/third_party/icu/common/charstr.h +++ b/src/duckdb/extension/icu/third_party/icu/common/charstr.h @@ -21,12 +21,6 @@ U_NAMESPACE_BEGIN -// Windows needs us to DLL-export the MaybeStackArray template specialization, -// but MacOS X cannot handle it. Same as in digitlst.h. -#if !U_PLATFORM_IS_DARWIN_BASED -template class U_COMMON_API MaybeStackArray; -#endif - /** * ICU-internal char * string class. * This class does not assume or enforce any particular character encoding. @@ -38,34 +32,34 @@ template class U_COMMON_API MaybeStackArray; * For example: * cs.data()[5]='a'; // no need for setCharAt(5, 'a') */ -class U_COMMON_API CharString : public UMemory { +class U_COMMON_API_CLASS CharString : public UMemory { public: - CharString() : len(0) { buffer[0]=0; } - CharString(StringPiece s, UErrorCode &errorCode) : len(0) { + U_COMMON_API CharString() : len(0) { buffer[0]=0; } + U_COMMON_API CharString(StringPiece s, UErrorCode &errorCode) : len(0) { buffer[0]=0; append(s, errorCode); } - CharString(const CharString &s, UErrorCode &errorCode) : len(0) { + U_COMMON_API CharString(const CharString &s, UErrorCode &errorCode) : len(0) { buffer[0]=0; append(s, errorCode); } - CharString(const char *s, int32_t sLength, UErrorCode &errorCode) : len(0) { + U_COMMON_API CharString(const char *s, int32_t sLength, UErrorCode &errorCode) : len(0) { buffer[0]=0; append(s, sLength, errorCode); } - ~CharString() {} + U_COMMON_API ~CharString() {} /** * Move constructor; might leave src in an undefined state. * This string will have the same contents and state that the source string had. */ - CharString(CharString &&src) noexcept; + U_COMMON_API CharString(CharString &&src) noexcept; /** * Move assignment operator; might leave src in an undefined state. * This string will have the same contents and state that the source string had. * The behavior is undefined if *this and src are the same object. */ - CharString &operator=(CharString &&src) noexcept; + U_COMMON_API CharString &operator=(CharString &&src) noexcept; /** * Replaces this string's contents with the other string's contents. @@ -73,21 +67,21 @@ class U_COMMON_API CharString : public UMemory { * the assignment operator, to make copies explicit and to * use a UErrorCode where memory allocations might be needed. */ - CharString ©From(const CharString &other, UErrorCode &errorCode); - CharString ©From(StringPiece s, UErrorCode &errorCode); + U_COMMON_API CharString ©From(const CharString &other, UErrorCode &errorCode); + U_COMMON_API CharString ©From(StringPiece s, UErrorCode &errorCode); - UBool isEmpty() const { return len==0; } - int32_t length() const { return len; } - char operator[](int32_t index) const { return buffer[index]; } - StringPiece toStringPiece() const { return StringPiece(buffer.getAlias(), len); } + U_COMMON_API UBool isEmpty() const { return len==0; } + U_COMMON_API int32_t length() const { return len; } + U_COMMON_API char operator[](int32_t index) const { return buffer[index]; } + U_COMMON_API StringPiece toStringPiece() const { return StringPiece(buffer.getAlias(), len); } - const char *data() const { return buffer.getAlias(); } - char *data() { return buffer.getAlias(); } + U_COMMON_API const char *data() const { return buffer.getAlias(); } + U_COMMON_API char *data() { return buffer.getAlias(); } /** * Allocates length()+1 chars and copies the NUL-terminated data(). * The caller must uprv_free() the result. */ - char *cloneData(UErrorCode &errorCode) const; + U_COMMON_API char *cloneData(UErrorCode &errorCode) const; /** * Copies the contents of the string into dest. * Checks if there is enough space in dest, extracts the entire string if possible, @@ -103,40 +97,40 @@ class U_COMMON_API CharString : public UMemory { * @param errorCode ICU error code. * @return length() */ - int32_t extract(char *dest, int32_t capacity, UErrorCode &errorCode) const; + U_COMMON_API int32_t extract(char *dest, int32_t capacity, UErrorCode &errorCode) const; - bool operator==(const CharString& other) const { + U_COMMON_API bool operator==(const CharString& other) const { return len == other.length() && (len == 0 || uprv_memcmp(data(), other.data(), len) == 0); } - bool operator!=(const CharString& other) const { + U_COMMON_API bool operator!=(const CharString& other) const { return !operator==(other); } - bool operator==(StringPiece other) const { + U_COMMON_API bool operator==(StringPiece other) const { return len == other.length() && (len == 0 || uprv_memcmp(data(), other.data(), len) == 0); } - bool operator!=(StringPiece other) const { + U_COMMON_API bool operator!=(StringPiece other) const { return !operator==(other); } /** @return last index of c, or -1 if c is not in this string */ - int32_t lastIndexOf(char c) const; + U_COMMON_API int32_t lastIndexOf(char c) const; - bool contains(StringPiece s) const; + U_COMMON_API bool contains(StringPiece s) const; - CharString &clear() { len=0; buffer[0]=0; return *this; } - CharString &truncate(int32_t newLength); + U_COMMON_API CharString &clear() { len=0; buffer[0]=0; return *this; } + U_COMMON_API CharString &truncate(int32_t newLength); - CharString &append(char c, UErrorCode &errorCode); - CharString &append(StringPiece s, UErrorCode &errorCode) { + U_COMMON_API CharString &append(char c, UErrorCode &errorCode); + U_COMMON_API CharString &append(StringPiece s, UErrorCode &errorCode) { return append(s.data(), s.length(), errorCode); } - CharString &append(const CharString &s, UErrorCode &errorCode) { + U_COMMON_API CharString &append(const CharString &s, UErrorCode &errorCode) { return append(s.data(), s.length(), errorCode); } - CharString &append(const char *s, int32_t sLength, UErrorCode &status); + U_COMMON_API CharString &append(const char *s, int32_t sLength, UErrorCode &status); - CharString &appendNumber(int64_t number, UErrorCode &status); + U_COMMON_API CharString &appendNumber(int64_t number, UErrorCode &status); /** * Returns a writable buffer for appending and writes the buffer's capacity to @@ -158,26 +152,28 @@ class U_COMMON_API CharString : public UMemory { * @param errorCode in/out error code * @return a buffer with resultCapacity>=min_capacity */ - char *getAppendBuffer(int32_t minCapacity, - int32_t desiredCapacityHint, - int32_t &resultCapacity, - UErrorCode &errorCode); + U_COMMON_API char *getAppendBuffer(int32_t minCapacity, + int32_t desiredCapacityHint, + int32_t &resultCapacity, + UErrorCode &errorCode); - CharString &appendInvariantChars(const UnicodeString &s, UErrorCode &errorCode); - CharString &appendInvariantChars(const char16_t* uchars, int32_t ucharsLen, UErrorCode& errorCode); + U_COMMON_API CharString &appendInvariantChars(const UnicodeString &s, UErrorCode &errorCode); + U_COMMON_API CharString &appendInvariantChars(const char16_t* uchars, + int32_t ucharsLen, + UErrorCode& errorCode); /** * Appends a filename/path part, e.g., a directory name. * First appends a U_FILE_SEP_CHAR or U_FILE_ALT_SEP_CHAR if necessary. * Does nothing if s is empty. */ - CharString &appendPathPart(StringPiece s, UErrorCode &errorCode); + U_COMMON_API CharString &appendPathPart(StringPiece s, UErrorCode &errorCode); /** * Appends a U_FILE_SEP_CHAR or U_FILE_ALT_SEP_CHAR if this string is not empty * and does not already end with a U_FILE_SEP_CHAR or U_FILE_ALT_SEP_CHAR. */ - CharString &ensureEndsWithFileSeparator(UErrorCode &errorCode); + U_COMMON_API CharString &ensureEndsWithFileSeparator(UErrorCode &errorCode); private: MaybeStackArray buffer; diff --git a/src/duckdb/extension/icu/third_party/icu/common/cmemory.h b/src/duckdb/extension/icu/third_party/icu/common/cmemory.h index 3705c2dfd..e31c54aac 100644 --- a/src/duckdb/extension/icu/third_party/icu/common/cmemory.h +++ b/src/duckdb/extension/icu/third_party/icu/common/cmemory.h @@ -334,9 +334,7 @@ class MaybeStackArray { // No heap allocation. Use only on the stack. static void* U_EXPORT2 operator new(size_t) noexcept = delete; static void* U_EXPORT2 operator new[](size_t) noexcept = delete; -#if U_HAVE_PLACEMENT_NEW static void* U_EXPORT2 operator new(size_t, void*) noexcept = delete; -#endif /** * Default constructor initializes with internal T[stackCapacity] buffer. @@ -570,9 +568,7 @@ class MaybeStackHeaderAndArray { // No heap allocation. Use only on the stack. static void* U_EXPORT2 operator new(size_t) noexcept = delete; static void* U_EXPORT2 operator new[](size_t) noexcept = delete; -#if U_HAVE_PLACEMENT_NEW static void* U_EXPORT2 operator new(size_t, void*) noexcept = delete; -#endif /** * Default constructor initializes with internal H+T[stackCapacity] buffer. diff --git a/src/duckdb/extension/icu/third_party/icu/common/cstr.h b/src/duckdb/extension/icu/third_party/icu/common/cstr.h index be21d910b..84a07543e 100644 --- a/src/duckdb/extension/icu/third_party/icu/common/cstr.h +++ b/src/duckdb/extension/icu/third_party/icu/common/cstr.h @@ -43,11 +43,11 @@ U_NAMESPACE_BEGIN -class U_COMMON_API CStr : public UMemory { +class U_COMMON_API_CLASS CStr : public UMemory { public: - CStr(const UnicodeString &in); - ~CStr(); - const char * operator ()() const; + U_COMMON_API CStr(const UnicodeString &in); + U_COMMON_API ~CStr(); + U_COMMON_API const char * operator ()() const; private: CharString s; diff --git a/src/duckdb/extension/icu/third_party/icu/common/fixedstring.cpp b/src/duckdb/extension/icu/third_party/icu/common/fixedstring.cpp new file mode 100644 index 000000000..1c603083e --- /dev/null +++ b/src/duckdb/extension/icu/third_party/icu/common/fixedstring.cpp @@ -0,0 +1,29 @@ +// © 2025 and later: Unicode, Inc. and others. +// License & terms of use: https://www.unicode.org/copyright.html + +#include "fixedstring.h" + +#include "unicode/unistr.h" +#include "unicode/utypes.h" + +U_NAMESPACE_BEGIN + +U_EXPORT void copyInvariantChars(const UnicodeString& src, FixedString& dst, UErrorCode& status) { + if (U_FAILURE(status)) { + return; + } + + if (src.isEmpty()) { + dst.clear(); + return; + } + + int32_t length = src.length(); + if (!dst.reserve(length + 1)) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + src.extract(0, length, dst.getAlias(), length + 1, US_INV); +} + +U_NAMESPACE_END diff --git a/src/duckdb/extension/icu/third_party/icu/common/fixedstring.h b/src/duckdb/extension/icu/third_party/icu/common/fixedstring.h new file mode 100644 index 000000000..b576d17ad --- /dev/null +++ b/src/duckdb/extension/icu/third_party/icu/common/fixedstring.h @@ -0,0 +1,104 @@ +// © 2025 and later: Unicode, Inc. and others. +// License & terms of use: https://www.unicode.org/copyright.html + +#ifndef FIXEDSTRING_H +#define FIXEDSTRING_H + +#include +#include + +#include "unicode/uobject.h" +#include "unicode/utypes.h" +#include "cmemory.h" + +U_NAMESPACE_BEGIN + +class UnicodeString; + +/** + * ICU-internal fixed-length char* string class. + * This is a complement to CharString to store fixed-length strings efficiently + * (not allocating any unnecessary storage for future additions to the string). + * + * A terminating NUL is always stored, but the length of the string isn't. + * An empty string is stored as nullptr, allocating no storage at all. + * + * This class wants to be convenient but is also deliberately minimalist. + * Please do not add methods if they only add minor convenience. + */ +class FixedString : public UMemory { +public: + FixedString() = default; + ~FixedString() { operator delete[](ptr); } + + FixedString(const FixedString& other) : FixedString(other.data()) {} + + FixedString(std::string_view init) { + size_t size = init.size(); + if (size > 0 && reserve(size + 1)) { + uprv_memcpy(ptr, init.data(), size); + ptr[size] = '\0'; + } + } + + FixedString& operator=(const FixedString& other) { + *this = other.data(); + return *this; + } + + FixedString& operator=(std::string_view init) { + if (init.empty()) { + operator delete[](ptr); + ptr = nullptr; + } else { + size_t size = init.size(); + if (reserve(size + 1)) { + uprv_memcpy(ptr, init.data(), size); + ptr[size] = '\0'; + } + } + return *this; + } + + FixedString(FixedString&& other) noexcept : ptr(std::exchange(other.ptr, nullptr)) {} + + FixedString& operator=(FixedString&& other) noexcept { + operator delete[](ptr); + ptr = other.ptr; + other.ptr = nullptr; + return *this; + } + + void clear() { + operator delete[](ptr); + ptr = nullptr; + } + + const char* data() const { + return isEmpty() ? "" : ptr; + } + + char* getAlias() { + return ptr; + } + + bool isEmpty() const { + return ptr == nullptr; + } + + /** Allocate storage for a new string, without initializing it. */ + bool reserve(size_t size) { + operator delete[](ptr); + ptr = static_cast(operator new[](size)); + return ptr != nullptr; + } + +private: + char* ptr = nullptr; +}; + +U_COMMON_API void copyInvariantChars(const UnicodeString& src, FixedString& dst, UErrorCode& status); + +U_NAMESPACE_END + +#endif diff --git a/src/duckdb/extension/icu/third_party/icu/common/localebuilder.cpp b/src/duckdb/extension/icu/third_party/icu/common/localebuilder.cpp index 71969c847..3ba735530 100644 --- a/src/duckdb/extension/icu/third_party/icu/common/localebuilder.cpp +++ b/src/duckdb/extension/icu/third_party/icu/common/localebuilder.cpp @@ -8,6 +8,7 @@ #include "bytesinkutil.h" // StringByteSink #include "charstr.h" #include "cstring.h" +#include "fixedstring.h" #include "ulocimp.h" #include "unicode/localebuilder.h" #include "unicode/locid.h" @@ -131,14 +132,13 @@ LocaleBuilder& LocaleBuilder::setVariant(StringPiece variant) variant_ = nullptr; return *this; } - CharString* new_variant = new CharString(variant, status_); - if (U_FAILURE(status_)) { return *this; } - if (new_variant == nullptr) { + FixedString* new_variant = new FixedString(variant); + if (new_variant == nullptr || new_variant->isEmpty()) { status_ = U_MEMORY_ALLOCATION_ERROR; return *this; } - transform(new_variant->data(), new_variant->length()); - if (!ultag_isVariantSubtags(new_variant->data(), new_variant->length())) { + transform(new_variant->getAlias(), variant.length()); + if (!ultag_isVariantSubtags(new_variant->data(), variant.length())) { delete new_variant; status_ = U_ILLEGAL_ARGUMENT_ERROR; return *this; diff --git a/src/duckdb/extension/icu/third_party/icu/common/localefallback_data.h b/src/duckdb/extension/icu/third_party/icu/common/localefallback_data.h index 3b8ad8a3f..338bfa359 100644 --- a/src/duckdb/extension/icu/third_party/icu/common/localefallback_data.h +++ b/src/duckdb/extension/icu/third_party/icu/common/localefallback_data.h @@ -11,1149 +11,1171 @@ //====================================================================== // Default script table const char scriptCodeChars[] = - "Aghb\0Ahom\0Arab\0Armi\0Armn\0Avst\0Bali\0Bamu\0Bass\0Batk\0Beng\0" - "Bopo\0Brah\0Cakm\0Cans\0Cari\0Cham\0Cher\0Chrs\0Copt\0Cprt\0Cyrl\0" - "Deva\0Egyp\0Elym\0Ethi\0Geor\0Gong\0Gonm\0Goth\0Gran\0Grek\0Gujr\0" - "Guru\0Hang\0Hani\0Hans\0Hant\0Hebr\0Hluw\0Hmnp\0Ital\0Java\0Jpan\0" - "Kali\0Kana\0Khar\0Khmr\0Kits\0Knda\0Kore\0Lana\0Laoo\0Latf\0Latg\0" - "Lepc\0Lina\0Linb\0Lisu\0Lyci\0Lydi\0Mand\0Mani\0Marc\0Medf\0Merc\0" - "Mlym\0Modi\0Mong\0Mroo\0Mtei\0Mymr\0Narb\0Newa\0Nkoo\0Nshu\0Ogam\0" - "Olck\0Orkh\0Orya\0Osge\0Ougr\0Pauc\0Phli\0Phnx\0Plrd\0Prti\0Rjng\0" - "Rohg\0Runr\0Samr\0Sarb\0Saur\0Sgnw\0Sinh\0Sogd\0Sora\0Soyo\0Sunu\0" - "Syrc\0Tagb\0Takr\0Tale\0Talu\0Taml\0Tang\0Tavt\0Telu\0Tfng\0Thaa\0" - "Thai\0Tibt\0Tnsa\0Toto\0Ugar\0Vaii\0Wcho\0Xpeo\0Xsux\0Yiii\0"; + "Aghb\0Ahom\0Arab\0Armi\0Armn\0Avst\0Bali\0Bamu\0Batk\0Beng\0Bopo\0" + "Brah\0Cakm\0Cans\0Cari\0Cham\0Cher\0Chrs\0Copt\0Cprt\0Cyrl\0Deva\0" + "Egyp\0Elym\0Ethi\0Geor\0Gong\0Gonm\0Goth\0Gran\0Grek\0Gujr\0Guru\0" + "Hang\0Hani\0Hans\0Hant\0Hebr\0Hluw\0Hmnp\0Ital\0Java\0Jpan\0Kali\0" + "Kana\0Khar\0Khmr\0Kits\0Knda\0Kore\0Lana\0Laoo\0Latf\0Latg\0Lepc\0" + "Lina\0Linb\0Lisu\0Lyci\0Lydi\0Mand\0Mani\0Marc\0Medf\0Merc\0Mlym\0" + "Modi\0Mong\0Mroo\0Mtei\0Mymr\0Narb\0Newa\0Nkoo\0Ogam\0Olck\0Orkh\0" + "Orya\0Osge\0Ougr\0Pauc\0Phli\0Phnx\0Plrd\0Prti\0Rjng\0Rohg\0Runr\0" + "Samr\0Sarb\0Saur\0Sgnw\0Sidt\0Sinh\0Sogd\0Sora\0Soyo\0Syrc\0Tagb\0" + "Takr\0Tale\0Talu\0Taml\0Tang\0Tavt\0Telu\0Tfng\0Thaa\0Thai\0Tibt\0" + "Tnsa\0Toto\0Ugar\0Vaii\0Wcho\0Xpeo\0Xsux\0Yiii\0"; const char dsLocaleIDChars[] = - "aaf\0aao\0aat\0ab\0abh\0abl\0abv\0acm\0acq\0acw\0acx\0adf\0adx\0" - "ady\0ae\0aeb\0aec\0aee\0aeq\0afb\0agi\0agj\0agx\0ahg\0aho\0ahr\0" - "aib\0aii\0aij\0ain\0aio\0aiq\0akk\0akv\0alk\0all\0alr\0alt\0alw\0" - "am\0ams\0amw\0ani\0anp\0anq\0anr\0anu\0aot\0apc\0apd\0aph\0aqc\0" - "ar\0arc\0arq\0ars\0ary\0arz\0as\0ase\0ask\0asr\0atn\0atv\0auj\0" - "auz\0av\0avd\0avl\0awa\0awn\0axm\0ayh\0ayl\0ayn\0ayp\0az_IQ\0" + "aaf\0aao\0aat\0ab\0abh\0abl\0abq\0abv\0acm\0acq\0acw\0acx\0adf\0" + "adx\0ady\0ae\0aeb\0aec\0aee\0aeq\0afb\0agi\0agj\0agx\0ahg\0aho\0" + "ahr\0aib\0aii\0aij\0ain\0aio\0aiq\0akk\0akv\0alk\0all\0alr\0alt\0" + "alw\0am\0ams\0amw\0ani\0anp\0anq\0anr\0anu\0aot\0apc\0apd\0aph\0" + "aqc\0ar\0arc\0arq\0ars\0ary\0arz\0as\0ase\0ask\0asr\0atn\0atv\0" + "auj\0auz\0av\0avd\0avl\0awa\0awn\0axm\0ayh\0ayl\0ayn\0ayp\0az_IQ\0" "az_IR\0az_RU\0azb\0ba\0bal\0bap\0bax\0bbl\0bcq\0bdv\0bdz\0be\0" "bee\0bej\0bfb\0bfq\0bft\0bfu\0bfw\0bfy\0bfz\0bg\0bgc\0bgd\0bgn\0" "bgp\0bgq\0bgw\0bgx\0bha\0bhb\0bhd\0bhe\0bhh\0bhi\0bhj\0bhm\0bhn\0" - "bho\0bht\0bhu\0biy\0bjf\0bjj\0bjm\0bkk\0blk\0blt\0bmj\0bn\0bns\0" - "bo\0bph\0bpx\0bpy\0bqi\0bra\0brb\0brd\0brh\0brk\0bro\0brv\0brw\0" - "brx\0bsh\0bsk\0bsq\0bst\0btd\0btm\0btv\0bua\0bwe\0bxm\0bxu\0byh\0" - "byn\0byw\0bzi\0cbn\0ccp\0cde\0cdh\0cdi\0cdj\0cdm\0cdo\0cdz\0ce\0" - "cgk\0chg\0chm\0chr\0chx\0cih\0cja\0cji\0cjm\0cjy\0ckb\0ckt\0clh\0" - "clw\0cmg\0cna\0cnp\0cog\0cop\0cpg\0cr\0crh\0crj\0crk\0crl\0crm\0" - "csh\0csp\0csw\0ctd\0ctg\0ctn\0ctt\0cty\0cu\0cuu\0cv\0czh\0czk\0" - "daq\0dar\0dcc\0ddo\0def\0deh\0der\0dgl\0dhi\0dhn\0dho\0dhw\0dka\0" - "dlg\0dmf\0dmk\0dml\0dng\0dnu\0dnv\0doi\0dox\0dre\0drq\0drs\0dry\0" - "dso\0dty\0dub\0duh\0dus\0dv\0dwk\0dwz\0dz\0dzl\0ecr\0ecy\0egy\0" - "eky\0el\0emg\0emu\0enf\0enh\0era\0esg\0esh\0ett\0eve\0evn\0fa\0" - "fay\0faz\0fia\0fmu\0fub\0gan\0gaq\0gas\0gau\0gbj\0gbk\0gbl\0gbm\0" - "gbz\0gdb\0gdo\0gdx\0gez\0ggg\0gha\0ghe\0gho\0ghr\0ght\0gig\0gin\0" - "gjk\0gju\0gld\0glh\0glk\0gml\0gmv\0gmy\0goe\0gof\0goj\0gok\0gon\0" - "got\0gra\0grc\0grt\0gru\0gu\0gvr\0gwc\0gwf\0gwt\0gyo\0gzi\0ha_CM\0" - "ha_SD\0hac\0hak\0hak_TW\0har\0haz\0hbo\0hdy\0he\0hi\0hif\0hii\0" - "hit\0hkh\0hlb\0hlu\0hmd\0hmj\0hmq\0hnd\0hne\0hnj\0hno\0hoc\0hoh\0" - "hoj\0how\0hoy\0hpo\0hrt\0hrz\0hsn\0hss\0htx\0hut\0huy\0huz\0hy\0" - "hyw\0ii\0imy\0inh\0int\0ior\0iru\0isk\0itk\0itl\0iu\0iw\0ja\0" - "jad\0jat\0jbe\0jbn\0jct\0jda\0jdg\0jdt\0jee\0jge\0ji\0jje\0jkm\0" - "jml\0jna\0jnd\0jnl\0jns\0jog\0jpa\0jpr\0jrb\0jul\0jun\0juy\0jya\0" - "jye\0ka\0kaa\0kap\0kaw\0kbd\0kbg\0kbu\0kby\0kca\0kcy\0kdq\0kdt\0" - "ket\0kev\0kex\0key\0kfa\0kfb\0kfc\0kfd\0kfe\0kfg\0kfh\0kfi\0kfk\0" - "kfm\0kfp\0kfq\0kfr\0kfs\0kfu\0kfx\0kfy\0kgj\0kgy\0khb\0khf\0khg\0" - "khn\0kho\0kht\0khv\0khw\0kif\0kim\0kip\0kjg\0kjh\0kjl\0kjo\0kjp\0" - "kjt\0kjz\0kk\0kk_AF\0kk_CN\0kk_IR\0kk_MN\0kkf\0kkh\0kkt\0kle\0" - "klj\0klr\0km\0kmj\0kmz\0kn\0knn\0ko\0koi\0kok\0kpt\0kpy\0kqd\0" - "kqy\0kra\0krc\0krk\0krr\0kru\0krv\0ks\0ksu\0ksw\0ksz\0ktb\0kte\0" - "ktl\0ktp\0ku_LB\0kuf\0kum\0kv\0kva\0kvq\0kvt\0kvx\0kvy\0kxf\0" + "bho\0bht\0bhu\0bix\0biy\0bjf\0bjj\0bjm\0bkk\0blk\0blt\0bmj\0bn\0" + "bns\0bo\0bph\0bpx\0bpy\0bqi\0bra\0brb\0brd\0brh\0brk\0bro\0brv\0" + "brw\0brx\0bsh\0bsk\0bst\0btd\0btm\0btv\0bua\0bwe\0bxm\0bxu\0byh\0" + "byn\0byw\0bzi\0cbn\0ccp\0cde\0cdh\0cdi\0cdj\0cdm\0cdn\0cdo\0cdz\0" + "ce\0cgk\0chg\0chm\0chr\0chx\0cih\0cja\0cji\0cjm\0cjy\0ckb\0ckt\0" + "clh\0clw\0cmg\0cna\0cnp\0cog\0cop\0cpg\0cr\0crh\0crj\0crk\0crl\0" + "crm\0csh\0csp\0csw\0ctd\0ctg\0ctn\0ctt\0cty\0cu\0cuu\0cv\0czh\0" + "czk\0daq\0dar\0dcc\0ddo\0def\0deh\0der\0dgl\0dhi\0dhn\0dho\0dhw\0" + "dka\0dlg\0dmf\0dmk\0dml\0dng\0dnu\0dnv\0doi\0dox\0dre\0drq\0drs\0" + "dry\0dso\0dty\0dub\0duh\0dus\0dv\0dwk\0dwz\0dz\0dzl\0ecr\0ecy\0" + "egy\0eky\0el\0emg\0emu\0enf\0enh\0era\0esg\0esh\0ett\0eve\0evn\0" + "fa\0fay\0faz\0fia\0fmu\0fub\0gan\0gaq\0gas\0gau\0gbj\0gbk\0gbl\0" + "gbm\0gbz\0gdb\0gdo\0gdx\0gez\0ggg\0gha\0ghe\0gho\0ghr\0ght\0gig\0" + "gin\0gjk\0gju\0gld\0glh\0glk\0gml\0gmv\0gmy\0goe\0gof\0goj\0gok\0" + "gon\0got\0gra\0grc\0grr\0grt\0gru\0gu\0gvr\0gwc\0gwf\0gwt\0gyo\0" + "gzi\0ha_CM\0ha_SD\0hac\0hak\0hak_TW\0har\0haz\0hbo\0hdy\0he\0" + "hi\0hif\0hii\0hit\0hkh\0hlb\0hlu\0hmd\0hmj\0hmq\0hnd\0hne\0hnj\0" + "hnm\0hno\0hoc\0hoh\0hoj\0how\0hoy\0hpo\0hrt\0hrz\0hsn\0hss\0htx\0" + "hut\0huy\0huz\0hy\0hyw\0ii\0imy\0inh\0int\0ior\0iru\0isk\0itk\0" + "itl\0iu\0iw\0ja\0jad\0jat\0jbe\0jbn\0jct\0jda\0jdg\0jdt\0jee\0" + "jge\0ji\0jje\0jkm\0jml\0jna\0jnd\0jnl\0jns\0jog\0jpa\0jpr\0jrb\0" + "jul\0jun\0juy\0jya\0jye\0ka\0kaa\0kap\0kaw\0kbd\0kbg\0kbu\0kby\0" + "kca\0kcy\0kdq\0kdt\0ket\0kev\0kex\0key\0kfa\0kfb\0kfc\0kfd\0kfe\0" + "kfg\0kfh\0kfi\0kfk\0kfm\0kfp\0kfq\0kfr\0kfs\0kfu\0kfx\0kfy\0kgj\0" + "kgy\0khb\0khf\0khg\0khn\0kho\0kht\0khv\0khw\0kif\0kim\0kip\0kjg\0" + "kjh\0kjl\0kjo\0kjp\0kjt\0kjz\0kk\0kk_AF\0kk_CN\0kk_IR\0kk_MN\0" + "kkf\0kkh\0kkt\0kle\0klj\0klr\0km\0kmj\0kmz\0kn\0knn\0ko\0koi\0" + "kok\0kpt\0kpy\0kqd\0kqy\0kra\0krc\0krk\0krr\0kru\0krv\0ks\0ksu\0" + "ksw\0ksz\0ktb\0kte\0ktl\0ktp\0ku_AM\0ku_AZ\0ku_GE\0ku_IQ\0ku_IR\0" + "ku_LB\0ku_TM\0kuf\0kum\0kv\0kva\0kvq\0kvt\0kvx\0kvy\0kwx\0kxf\0" "kxk\0kxm\0kxp\0ky\0ky_CN\0kyu\0kyv\0kyw\0lab\0lad\0lae\0lah\0" "lbe\0lbf\0lbj\0lbm\0lbo\0lbr\0lcp\0lep\0lez\0lhm\0lhs\0lif\0lis\0" "lkh\0lki\0lmh\0lmn\0lo\0loy\0lpo\0lrc\0lrk\0lrl\0lsa\0lsd\0lss\0" - "ltc\0luk\0luu\0luv\0luz\0lwl\0lwm\0lya\0lzh\0lzz_GE\0mag\0mai\0" - "mby\0mde\0mdf\0mdx\0mdy\0mfa\0mfi\0mga\0mgp\0mhj\0mid\0mjl\0mjq\0" - "mjr\0mjt\0mju\0mjv\0mjz\0mk\0mkb\0mke\0mki\0mkm\0ml\0mlf\0mn\0" - "mn_CN\0mnc\0mni\0mnj\0mns\0mnw\0mpz\0mr\0mra\0mrd\0mrj\0mro\0" - "mrr\0ms_CC\0mtm\0mtr\0mud\0muk\0mut\0muv\0muz\0mve\0mvf\0mvy\0" - "mvz\0mwr\0mwt\0mww\0my\0mym\0myv\0myz\0mzn\0nan\0nan_TW\0nao\0" - "ncd\0ncq\0ndf\0ne\0neg\0neh\0nei\0new\0ngt\0nio\0nit\0niv\0nli\0" - "nlm\0nlx\0nmm\0nnp\0nod\0noe\0nog\0noi\0non\0nos\0npb\0nqo\0nrn\0" - "nsd\0nsf\0nsk\0nst\0nsv\0nty\0ntz\0nwc\0nwx\0nyl\0nyq\0nyw\0oaa\0" - "oac\0oar\0oav\0obm\0obr\0odk\0oht\0oj\0ojs\0okm\0oko\0okz\0ola\0" - "ole\0omk\0omp\0omr\0omx\0oon\0or\0ort\0oru\0orv\0os\0osa\0osc\0" - "osi\0ota\0otb\0otk\0oty\0oui\0pa\0pa_PK\0pal\0paq\0pbt\0pcb\0" - "pce\0pcf\0pcg\0pch\0pci\0pcj\0peg\0peo\0pgd\0pgg\0pgl\0pgn\0phd\0" - "phk\0phl\0phn\0pho\0phr\0pht\0phu\0phv\0phw\0pi\0pka\0pkr\0plk\0" - "pll\0pmh\0pnt\0pnt_RU\0pra\0prc\0prd\0prt\0prx\0ps\0psh\0psi\0" - "pst\0psu\0pum\0pwo\0pwr\0pww\0pyx\0qxq\0raa\0rab\0raf\0rah\0raj\0" - "rav\0rbb\0rdb\0rei\0rhg\0rji\0rjs\0rka\0rki\0rkt\0rmi\0rmt\0rmz\0" - "rsk\0rtw\0ru\0rue\0rut\0rwr\0ryu\0sa\0sah\0sam\0sat\0saz\0sbn\0" - "sbu\0sck\0scl\0scp\0sct\0scu\0scx\0sd\0sd_IN\0sdb\0sdf\0sdg\0" - "sdh\0sdr\0sds\0sel\0sfm\0sgh\0sgj\0sgr\0sgt\0sgw\0sgy\0shd\0shi\0" - "shm\0shn\0shu\0shv\0si\0sia\0sip\0siy\0siz\0sjd\0sjp\0sjt\0skb\0" - "skj\0skr\0smh\0smp\0smu\0smy\0soa\0sog\0soi\0sou\0spt\0spv\0sqo\0" - "sqq\0sqt\0sr\0srb\0srh\0srx\0srz\0ssh\0sss\0sts\0stv\0sty\0suz\0" - "sva\0swb\0swi\0swv\0sxu\0syc\0syl\0syn\0syr\0syw\0ta\0tab\0taj\0" - "tbk\0tcn\0tco\0tcx\0tcy\0tda\0tdb\0tdd\0tdg\0tdh\0te\0tes\0tg\0" - "tg_PK\0tge\0tgf\0th\0the\0thf\0thi\0thl\0thm\0thq\0thr\0ths\0" - "ti\0tig\0tij\0tin\0tjl\0tjo\0tkb\0tks\0tkt\0tmr\0tnv\0tov\0tpu\0" - "tra\0trg\0trm\0trw\0tsd\0tsj\0tt\0tth\0tto\0tts\0ttz\0tvn\0twm\0" - "txg\0txo\0tyr\0tyv\0ude\0udg\0udi\0udm\0ug\0ug_KZ\0ug_MN\0uga\0" - "ugh\0ugo\0uk\0uki\0ulc\0unr\0unr_NP\0unx\0ur\0urk\0ush\0uum\0" - "uz_AF\0uz_CN\0uzs\0vaa\0vaf\0vah\0vai\0vas\0vav\0vay\0vgr\0vjk\0" - "vmd\0vmh\0wal\0wbk\0wbq\0wbr\0wle\0wlo\0wme\0wne\0wni\0wsg\0wsv\0" - "wtm\0wuu\0xag\0xal\0xan\0xas\0xco\0xcr\0xdq\0xhe\0xhm\0xis\0xka\0" - "xkc\0xkf\0xkj\0xkp\0xlc\0xld\0xly\0xmf\0xmn\0xmr\0xna\0xnr\0xpg\0" - "xpi\0xpm\0xpr\0xrm\0xrn\0xsa\0xsr\0xtq\0xub\0xuj\0xve\0xvi\0xwo\0" - "xzh\0yai\0ybh\0ybi\0ydg\0yea\0yej\0yeu\0ygp\0yhd\0yi\0yig\0yih\0" - "yiv\0ykg\0ykh\0yna\0ynk\0yoi\0yoy\0yrk\0ysd\0ysn\0ysp\0ysr\0ysy\0" - "yud\0yue\0yue_CN\0yug\0yux\0ywq\0ywu\0zau\0zba\0zch\0zdj\0zeh\0" - "zen\0zgb\0zgh\0zgm\0zgn\0zh\0zh_AU\0zh_BN\0zh_GB\0zh_GF\0zh_HK\0" - "zh_ID\0zh_MO\0zh_PA\0zh_PF\0zh_PH\0zh_SR\0zh_TH\0zh_TW\0zh_US\0" - "zh_VN\0zhd\0zhx\0zko\0zkt\0zkz\0zlj\0zln\0zlq\0zqe\0zrg\0zrp\0" - "zum\0zwa\0zyg\0zyn\0zzj\0"; + "ltc\0luh\0luk\0luu\0luv\0luz\0lwl\0lwm\0lya\0lzh\0lzz_GE\0mag\0" + "mai\0mby\0mde\0mdf\0mdx\0mdy\0mey\0mfa\0mfi\0mga\0mgp\0mhj\0mid\0" + "mjl\0mjq\0mjr\0mjt\0mju\0mjv\0mjz\0mk\0mkb\0mke\0mki\0mkm\0ml\0" + "mlf\0mn\0mn_CN\0mnc\0mni\0mnj\0mns\0mnw\0mpz\0mr\0mra\0mrd\0mrj\0" + "mro\0mrr\0ms_CC\0mtm\0mtr\0mud\0muk\0mut\0muv\0muz\0mve\0mvf\0" + "mvy\0mvz\0mwr\0mwt\0mww\0my\0mym\0myv\0myz\0mzb\0mzn\0nan\0nan_MO\0" + "nan_TW\0nao\0ncd\0ncq\0ndf\0ne\0neg\0neh\0nei\0new\0ngt\0nio\0" + "nit\0niv\0nli\0nlm\0nlx\0nmm\0nnp\0nod\0noe\0nog\0noi\0non\0nos\0" + "npb\0nqo\0nrn\0nsd\0nsf\0nsk\0nst\0nsv\0nty\0ntz\0nwc\0nwx\0nyl\0" + "nyq\0nyw\0oaa\0oac\0oar\0oav\0obm\0obr\0odk\0oht\0oj\0ojs\0okm\0" + "oko\0okz\0ola\0ole\0omk\0omp\0omr\0omx\0oon\0or\0ort\0oru\0orv\0" + "os\0osa\0osc\0osi\0ota\0otb\0otk\0oty\0oui\0oyb\0pa\0pa_PK\0pal\0" + "paq\0pbt\0pcb\0pce\0pcf\0pcg\0pch\0pci\0pcj\0peg\0peo\0pgd\0pgg\0" + "pgl\0pgn\0phd\0phk\0phl\0phn\0pho\0phr\0pht\0phu\0phv\0phw\0pi_IN\0" + "pi_LK\0pi_MM\0pi_TH\0pka\0pkr\0plk\0pll\0pmh\0pnt\0pnt_RU\0prc\0" + "prd\0prt\0prx\0ps\0psh\0psi\0pst\0psu\0pum\0pwo\0pwr\0pww\0pyx\0" + "qxq\0raa\0rab\0raf\0rah\0raj\0rav\0rbb\0rdb\0rei\0rhg\0rji\0rjs\0" + "rka\0rki\0rkt\0rmi\0rmt\0rmz\0rsk\0rtw\0ru\0rue\0rut\0rwr\0ryu\0" + "sa\0sah\0sam\0sat\0saz\0sbn\0sbu\0sck\0scl\0scp\0sct\0scu\0scx\0" + "sd\0sd_IN\0sdb\0sdf\0sdg\0sdh\0sdr\0sds\0sel\0sfm\0sgh\0sgj\0" + "sgr\0sgt\0sgw\0sgy\0shd\0shi\0shm\0shn\0shu\0shv\0si\0sia\0sip\0" + "siy\0siz\0sjc\0sjd\0sjp\0sjt\0skb\0skj\0skr\0smh\0smp\0smu\0smy\0" + "soa\0sog\0soi\0sou\0spt\0spv\0sqo\0sqq\0sqt\0sr\0srb\0srh\0srx\0" + "srz\0ssh\0sss\0sts\0stu\0stu_CN\0stv\0sty\0suz\0sva\0swb\0swi\0" + "swv\0sxu\0syc\0syl\0syn\0syr\0syw\0ta\0tab\0taj\0tbk\0tcn\0tco\0" + "tcx\0tcy\0tda\0tdb\0tdd\0tdg\0tdh\0te\0tes\0tg\0tg_PK\0tge\0tgf\0" + "th\0the\0thf\0thi\0thl\0thm\0thq\0thr\0ths\0ti\0tig\0tij\0tin\0" + "tjl\0tjo\0tkb\0tks\0tkt\0tmr\0tnv\0tov\0tpu\0tra\0trg\0trm\0trw\0" + "tsd\0tsj\0tt\0tth\0tto\0tts\0ttz\0tvn\0twm\0txg\0txo\0tyr\0tyv\0" + "ude\0udg\0udi\0udm\0ug\0ug_KZ\0ug_MN\0uga\0ugh\0ugo\0uk\0uki\0" + "ulc\0unr\0unr_NP\0unx\0ur\0urk\0ush\0uum\0uz_AF\0uz_CN\0uzs\0" + "vaa\0vaf\0vah\0vai\0vas\0vav\0vay\0vgr\0vjk\0vmd\0vmh\0wal\0wbk\0" + "wbq\0wbr\0wle\0wlo\0wme\0wne\0wni\0wsg\0wsv\0wtm\0wuu\0xag\0xal\0" + "xan\0xas\0xco\0xcr\0xdq\0xhe\0xhm\0xis\0xka\0xkc\0xkf\0xkj\0xkp\0" + "xlc\0xld\0xly\0xmf\0xmn\0xmr\0xna\0xnr\0xpg\0xpi\0xpm\0xpr\0xrm\0" + "xrn\0xsa\0xsd\0xsr\0xtq\0xub\0xuj\0xve\0xvi\0xwo\0xzh\0yai\0ybh\0" + "ybi\0ydg\0yea\0yej\0yeu\0ygp\0yhd\0yi\0yig\0yih\0yiv\0ykg\0ykh\0" + "yna\0ynk\0yoi\0yoy\0yrk\0ysd\0ysn\0ysp\0ysr\0ysy\0yud\0yue\0yue_CN\0" + "yug\0yux\0ywq\0ywu\0zau\0zba\0zch\0zdj\0zeh\0zen\0zgb\0zgh\0zgm\0" + "zgn\0zh\0zh_AU\0zh_BN\0zh_GB\0zh_GF\0zh_HK\0zh_ID\0zh_MO\0zh_PA\0" + "zh_PF\0zh_PH\0zh_SR\0zh_TH\0zh_TW\0zh_US\0zh_VN\0zhd\0zko\0zkt\0" + "zkz\0zlj\0zln\0zlq\0zqe\0zrg\0zrp\0zum\0zwa\0zyg\0zyn\0zzj\0"; const int32_t defaultScriptTable[] = { - 0, 330, // aaf -> Mlym + 0, 325, // aaf -> Mlym 4, 10, // aao -> Arab - 8, 155, // aat -> Grek - 12, 105, // ab -> Cyrl + 8, 150, // aat -> Grek + 12, 100, // ab -> Cyrl 15, 10, // abh -> Arab - 19, 435, // abl -> Rjng - 23, 10, // abv -> Arab - 27, 10, // acm -> Arab - 31, 10, // acq -> Arab - 35, 10, // acw -> Arab - 39, 10, // acx -> Arab - 43, 10, // adf -> Arab - 47, 555, // adx -> Tibt - 51, 105, // ady -> Cyrl - 55, 25, // ae -> Avst - 58, 10, // aeb -> Arab - 62, 10, // aec -> Arab - 66, 10, // aee -> Arab - 70, 10, // aeq -> Arab - 74, 10, // afb -> Arab - 78, 110, // agi -> Deva - 82, 125, // agj -> Ethi - 86, 105, // agx -> Cyrl - 90, 125, // ahg -> Ethi - 94, 5, // aho -> Ahom - 98, 110, // ahr -> Deva - 102, 10, // aib -> Arab - 106, 495, // aii -> Syrc - 110, 190, // aij -> Hebr - 114, 225, // ain -> Kana - 118, 355, // aio -> Mymr - 122, 10, // aiq -> Arab - 126, 590, // akk -> Xsux - 130, 105, // akv -> Cyrl - 134, 260, // alk -> Laoo - 138, 330, // all -> Mlym - 142, 105, // alr -> Cyrl - 146, 105, // alt -> Cyrl - 150, 125, // alw -> Ethi - 154, 125, // am -> Ethi - 157, 215, // ams -> Jpan - 161, 495, // amw -> Syrc - 165, 105, // ani -> Cyrl - 169, 110, // anp -> Deva - 173, 110, // anq -> Deva - 177, 110, // anr -> Deva - 181, 125, // anu -> Ethi - 185, 50, // aot -> Beng - 189, 10, // apc -> Arab - 193, 10, // apd -> Arab - 197, 110, // aph -> Deva - 201, 105, // aqc -> Cyrl - 205, 10, // ar -> Arab - 208, 15, // arc -> Armi - 212, 10, // arq -> Arab - 216, 10, // ars -> Arab - 220, 10, // ary -> Arab - 224, 10, // arz -> Arab - 228, 50, // as -> Beng - 231, 465, // ase -> Sgnw - 235, 10, // ask -> Arab - 239, 110, // asr -> Deva - 243, 10, // atn -> Arab - 247, 105, // atv -> Cyrl - 251, 10, // auj -> Arab - 255, 10, // auz -> Arab - 259, 105, // av -> Cyrl - 262, 10, // avd -> Arab - 266, 10, // avl -> Arab - 270, 110, // awa -> Deva - 274, 125, // awn -> Ethi - 278, 20, // axm -> Armn - 282, 10, // ayh -> Arab - 286, 10, // ayl -> Arab - 290, 10, // ayn -> Arab - 294, 10, // ayp -> Arab - 298, 10, // az_IQ -> Arab - 304, 10, // az_IR -> Arab - 310, 105, // az_RU -> Cyrl - 316, 10, // azb -> Arab - 320, 105, // ba -> Cyrl - 323, 10, // bal -> Arab - 327, 110, // bap -> Deva - 331, 35, // bax -> Bamu - 335, 130, // bbl -> Geor - 339, 125, // bcq -> Ethi - 343, 395, // bdv -> Orya - 347, 10, // bdz -> Arab - 351, 105, // be -> Cyrl - 354, 110, // bee -> Deva - 358, 10, // bej -> Arab - 362, 110, // bfb -> Deva - 366, 520, // bfq -> Taml - 370, 10, // bft -> Arab - 374, 555, // bfu -> Tibt - 378, 395, // bfw -> Orya - 382, 110, // bfy -> Deva - 386, 110, // bfz -> Deva - 390, 105, // bg -> Cyrl - 393, 110, // bgc -> Deva - 397, 110, // bgd -> Deva - 401, 10, // bgn -> Arab - 405, 10, // bgp -> Arab - 409, 110, // bgq -> Deva - 413, 110, // bgw -> Deva - 417, 155, // bgx -> Grek - 421, 110, // bha -> Deva - 425, 110, // bhb -> Deva - 429, 110, // bhd -> Deva - 433, 10, // bhe -> Arab - 437, 105, // bhh -> Cyrl - 441, 110, // bhi -> Deva - 445, 110, // bhj -> Deva - 449, 10, // bhm -> Arab - 453, 495, // bhn -> Syrc - 457, 110, // bho -> Deva - 461, 110, // bht -> Deva - 465, 110, // bhu -> Deva - 469, 110, // biy -> Deva - 473, 495, // bjf -> Syrc - 477, 110, // bjj -> Deva - 481, 10, // bjm -> Arab - 485, 555, // bkk -> Tibt - 489, 355, // blk -> Mymr - 493, 530, // blt -> Tavt - 497, 110, // bmj -> Deva - 501, 50, // bn -> Beng - 504, 110, // bns -> Deva - 508, 555, // bo -> Tibt - 511, 105, // bph -> Cyrl - 515, 110, // bpx -> Deva - 519, 50, // bpy -> Beng - 523, 10, // bqi -> Arab - 527, 110, // bra -> Deva - 531, 235, // brb -> Khmr - 535, 110, // brd -> Deva - 539, 10, // brh -> Arab - 543, 10, // brk -> Arab - 547, 555, // bro -> Tibt - 551, 260, // brv -> Laoo - 555, 245, // brw -> Knda - 559, 110, // brx -> Deva - 563, 10, // bsh -> Arab - 567, 10, // bsk -> Arab - 571, 40, // bsq -> Bass - 575, 125, // bst -> Ethi - 579, 45, // btd -> Batk - 583, 45, // btm -> Batk - 587, 110, // btv -> Deva - 591, 105, // bua -> Cyrl - 595, 355, // bwe -> Mymr - 599, 105, // bxm -> Cyrl - 603, 340, // bxu -> Mong - 607, 110, // byh -> Deva - 611, 125, // byn -> Ethi - 615, 110, // byw -> Deva - 619, 550, // bzi -> Thai - 623, 550, // cbn -> Thai - 627, 65, // ccp -> Cakm - 631, 535, // cde -> Telu - 635, 110, // cdh -> Deva - 639, 160, // cdi -> Gujr - 643, 110, // cdj -> Deva - 647, 110, // cdm -> Deva - 651, 180, // cdo -> Hans - 655, 50, // cdz -> Beng - 659, 105, // ce -> Cyrl - 662, 555, // cgk -> Tibt - 666, 10, // chg -> Arab - 670, 105, // chm -> Cyrl - 674, 85, // chr -> Cher - 678, 110, // chx -> Deva - 682, 110, // cih -> Deva - 686, 10, // cja -> Arab - 690, 105, // cji -> Cyrl - 694, 80, // cjm -> Cham - 698, 180, // cjy -> Hans - 702, 10, // ckb -> Arab - 706, 105, // ckt -> Cyrl - 710, 10, // clh -> Arab - 714, 105, // clw -> Cyrl - 718, 485, // cmg -> Soyo - 722, 555, // cna -> Tibt - 726, 180, // cnp -> Hans - 730, 550, // cog -> Thai - 734, 95, // cop -> Copt - 738, 155, // cpg -> Grek - 742, 70, // cr -> Cans - 745, 105, // crh -> Cyrl - 749, 70, // crj -> Cans - 753, 70, // crk -> Cans - 757, 70, // crl -> Cans - 761, 70, // crm -> Cans - 765, 355, // csh -> Mymr - 769, 180, // csp -> Hans - 773, 70, // csw -> Cans - 777, 410, // ctd -> Pauc - 781, 50, // ctg -> Beng - 785, 110, // ctn -> Deva - 789, 520, // ctt -> Taml - 793, 520, // cty -> Taml - 797, 105, // cu -> Cyrl - 800, 255, // cuu -> Lana - 804, 105, // cv -> Cyrl - 807, 180, // czh -> Hans - 811, 190, // czk -> Hebr - 815, 110, // daq -> Deva - 819, 105, // dar -> Cyrl - 823, 10, // dcc -> Arab - 827, 105, // ddo -> Cyrl - 831, 10, // def -> Arab - 835, 10, // deh -> Arab - 839, 50, // der -> Beng - 843, 10, // dgl -> Arab - 847, 110, // dhi -> Deva - 851, 160, // dhn -> Gujr - 855, 110, // dho -> Deva - 859, 110, // dhw -> Deva - 863, 555, // dka -> Tibt - 867, 105, // dlg -> Cyrl - 871, 320, // dmf -> Medf - 875, 10, // dmk -> Arab - 879, 10, // dml -> Arab - 883, 105, // dng -> Cyrl - 887, 355, // dnu -> Mymr - 891, 355, // dnv -> Mymr - 895, 110, // doi -> Deva - 899, 125, // dox -> Ethi - 903, 555, // dre -> Tibt - 907, 110, // drq -> Deva - 911, 125, // drs -> Ethi - 915, 110, // dry -> Deva - 919, 395, // dso -> Orya - 923, 110, // dty -> Deva - 927, 160, // dub -> Gujr - 931, 110, // duh -> Deva - 935, 110, // dus -> Deva - 939, 545, // dv -> Thaa - 942, 395, // dwk -> Orya - 946, 110, // dwz -> Deva - 950, 555, // dz -> Tibt - 953, 555, // dzl -> Tibt - 957, 155, // ecr -> Grek - 961, 100, // ecy -> Cprt - 965, 115, // egy -> Egyp - 969, 220, // eky -> Kali - 973, 155, // el -> Grek - 976, 110, // emg -> Deva - 980, 110, // emu -> Deva - 984, 105, // enf -> Cyrl - 988, 105, // enh -> Cyrl - 992, 520, // era -> Taml - 996, 140, // esg -> Gonm - 1000, 10, // esh -> Arab - 1004, 205, // ett -> Ital - 1008, 105, // eve -> Cyrl - 1012, 105, // evn -> Cyrl - 1016, 10, // fa -> Arab - 1019, 10, // fay -> Arab - 1023, 10, // faz -> Arab - 1027, 10, // fia -> Arab - 1031, 110, // fmu -> Deva - 1035, 10, // fub -> Arab - 1039, 180, // gan -> Hans - 1043, 395, // gaq -> Orya - 1047, 160, // gas -> Gujr - 1051, 535, // gau -> Telu - 1055, 395, // gbj -> Orya - 1059, 110, // gbk -> Deva - 1063, 160, // gbl -> Gujr - 1067, 110, // gbm -> Deva - 1071, 10, // gbz -> Arab - 1075, 395, // gdb -> Orya - 1079, 105, // gdo -> Cyrl - 1083, 110, // gdx -> Deva - 1087, 125, // gez -> Ethi - 1091, 10, // ggg -> Arab - 1095, 10, // gha -> Arab - 1099, 110, // ghe -> Deva - 1103, 540, // gho -> Tfng - 1107, 10, // ghr -> Arab - 1111, 555, // ght -> Tibt - 1115, 10, // gig -> Arab - 1119, 105, // gin -> Cyrl - 1123, 10, // gjk -> Arab - 1127, 10, // gju -> Arab - 1131, 105, // gld -> Cyrl - 1135, 10, // glh -> Arab - 1139, 10, // glk -> Arab - 1143, 265, // gml -> Latf - 1147, 125, // gmv -> Ethi - 1151, 285, // gmy -> Linb - 1155, 555, // goe -> Tibt - 1159, 125, // gof -> Ethi - 1163, 110, // goj -> Deva - 1167, 110, // gok -> Deva - 1171, 110, // gon -> Deva - 1175, 145, // got -> Goth - 1179, 110, // gra -> Deva - 1183, 155, // grc -> Grek - 1187, 50, // grt -> Beng - 1191, 125, // gru -> Ethi - 1195, 160, // gu -> Gujr - 1198, 110, // gvr -> Deva - 1202, 10, // gwc -> Arab - 1206, 10, // gwf -> Arab - 1210, 10, // gwt -> Arab - 1214, 110, // gyo -> Deva - 1218, 10, // gzi -> Arab - 1222, 10, // ha_CM -> Arab - 1228, 10, // ha_SD -> Arab - 1234, 10, // hac -> Arab - 1238, 180, // hak -> Hans - 1242, 185, // hak_TW -> Hant - 1249, 125, // har -> Ethi - 1253, 10, // haz -> Arab - 1257, 190, // hbo -> Hebr - 1261, 125, // hdy -> Ethi - 1265, 190, // he -> Hebr - 1268, 110, // hi -> Deva - 1271, 110, // hif -> Deva - 1275, 505, // hii -> Takr - 1279, 590, // hit -> Xsux - 1283, 10, // hkh -> Arab - 1287, 110, // hlb -> Deva - 1291, 195, // hlu -> Hluw - 1295, 425, // hmd -> Plrd - 1299, 55, // hmj -> Bopo - 1303, 55, // hmq -> Bopo - 1307, 10, // hnd -> Arab - 1311, 110, // hne -> Deva - 1315, 200, // hnj -> Hmnp - 1319, 10, // hno -> Arab - 1323, 110, // hoc -> Deva - 1327, 10, // hoh -> Arab - 1331, 110, // hoj -> Deva - 1335, 175, // how -> Hani - 1339, 110, // hoy -> Deva - 1343, 355, // hpo -> Mymr - 1347, 495, // hrt -> Syrc - 1351, 10, // hrz -> Arab - 1355, 180, // hsn -> Hans - 1359, 10, // hss -> Arab - 1363, 590, // htx -> Xsux - 1367, 110, // hut -> Deva - 1371, 190, // huy -> Hebr - 1375, 105, // huz -> Cyrl - 1379, 20, // hy -> Armn - 1382, 20, // hyw -> Armn - 1386, 595, // ii -> Yiii - 1389, 295, // imy -> Lyci - 1393, 105, // inh -> Cyrl - 1397, 355, // int -> Mymr - 1401, 125, // ior -> Ethi - 1405, 520, // iru -> Taml - 1409, 10, // isk -> Arab - 1413, 190, // itk -> Hebr - 1417, 105, // itl -> Cyrl - 1421, 70, // iu -> Cans - 1424, 190, // iw -> Hebr - 1427, 215, // ja -> Jpan - 1430, 10, // jad -> Arab - 1434, 10, // jat -> Arab - 1438, 190, // jbe -> Hebr - 1442, 10, // jbn -> Arab - 1446, 105, // jct -> Cyrl - 1450, 555, // jda -> Tibt - 1454, 10, // jdg -> Arab - 1458, 105, // jdt -> Cyrl - 1462, 110, // jee -> Deva - 1466, 130, // jge -> Geor - 1470, 190, // ji -> Hebr - 1473, 170, // jje -> Hang - 1477, 355, // jkm -> Mymr - 1481, 110, // jml -> Deva - 1485, 505, // jna -> Takr - 1489, 10, // jnd -> Arab - 1493, 110, // jnl -> Deva - 1497, 110, // jns -> Deva - 1501, 10, // jog -> Arab - 1505, 190, // jpa -> Hebr - 1509, 190, // jpr -> Hebr - 1513, 190, // jrb -> Hebr - 1517, 110, // jul -> Deva - 1521, 395, // jun -> Orya - 1525, 395, // juy -> Orya - 1529, 555, // jya -> Tibt - 1533, 190, // jye -> Hebr - 1537, 130, // ka -> Geor - 1540, 105, // kaa -> Cyrl - 1544, 105, // kap -> Cyrl - 1548, 30, // kaw -> Bali - 1552, 105, // kbd -> Cyrl - 1556, 555, // kbg -> Tibt - 1560, 10, // kbu -> Arab - 1564, 10, // kby -> Arab - 1568, 105, // kca -> Cyrl - 1572, 10, // kcy -> Arab - 1576, 50, // kdq -> Beng - 1580, 550, // kdt -> Thai - 1584, 105, // ket -> Cyrl - 1588, 330, // kev -> Mlym - 1592, 110, // kex -> Deva - 1596, 535, // key -> Telu - 1600, 245, // kfa -> Knda - 1604, 110, // kfb -> Deva - 1608, 535, // kfc -> Telu - 1612, 245, // kfd -> Knda - 1616, 520, // kfe -> Taml - 1620, 245, // kfg -> Knda - 1624, 330, // kfh -> Mlym - 1628, 520, // kfi -> Taml - 1632, 110, // kfk -> Deva - 1636, 10, // kfm -> Arab - 1640, 110, // kfp -> Deva - 1644, 110, // kfq -> Deva - 1648, 110, // kfr -> Deva - 1652, 110, // kfs -> Deva - 1656, 110, // kfu -> Deva - 1660, 110, // kfx -> Deva - 1664, 110, // kfy -> Deva - 1668, 110, // kgj -> Deva - 1672, 110, // kgy -> Deva - 1676, 515, // khb -> Talu - 1680, 550, // khf -> Thai - 1684, 555, // khg -> Tibt - 1688, 110, // khn -> Deva - 1692, 60, // kho -> Brah - 1696, 355, // kht -> Mymr - 1700, 105, // khv -> Cyrl - 1704, 10, // khw -> Arab - 1708, 110, // kif -> Deva - 1712, 105, // kim -> Cyrl - 1716, 110, // kip -> Deva - 1720, 260, // kjg -> Laoo - 1724, 105, // kjh -> Cyrl - 1728, 110, // kjl -> Deva - 1732, 110, // kjo -> Deva - 1736, 355, // kjp -> Mymr - 1740, 550, // kjt -> Thai - 1744, 555, // kjz -> Tibt - 1748, 105, // kk -> Cyrl - 1751, 10, // kk_AF -> Arab - 1757, 10, // kk_CN -> Arab - 1763, 10, // kk_IR -> Arab - 1769, 10, // kk_MN -> Arab - 1775, 555, // kkf -> Tibt - 1779, 255, // kkh -> Lana - 1783, 110, // kkt -> Deva - 1787, 110, // kle -> Deva - 1791, 10, // klj -> Arab - 1795, 110, // klr -> Deva - 1799, 235, // km -> Khmr - 1802, 110, // kmj -> Deva - 1806, 10, // kmz -> Arab - 1810, 245, // kn -> Knda - 1813, 110, // knn -> Deva - 1817, 250, // ko -> Kore - 1820, 105, // koi -> Cyrl - 1824, 110, // kok -> Deva - 1828, 105, // kpt -> Cyrl - 1832, 105, // kpy -> Cyrl - 1836, 495, // kqd -> Syrc - 1840, 125, // kqy -> Ethi - 1844, 110, // kra -> Deva - 1848, 105, // krc -> Cyrl - 1852, 105, // krk -> Cyrl - 1856, 235, // krr -> Khmr - 1860, 110, // kru -> Deva - 1864, 235, // krv -> Khmr - 1868, 10, // ks -> Arab - 1871, 355, // ksu -> Mymr - 1875, 355, // ksw -> Mymr - 1879, 110, // ksz -> Deva - 1883, 125, // ktb -> Ethi - 1887, 110, // kte -> Deva - 1891, 10, // ktl -> Arab - 1895, 425, // ktp -> Plrd - 1899, 10, // ku_LB -> Arab - 1905, 260, // kuf -> Laoo - 1909, 105, // kum -> Cyrl - 1913, 105, // kv -> Cyrl - 1916, 105, // kva -> Cyrl - 1920, 355, // kvq -> Mymr - 1924, 355, // kvt -> Mymr - 1928, 10, // kvx -> Arab - 1932, 220, // kvy -> Kali - 1936, 355, // kxf -> Mymr - 1940, 355, // kxk -> Mymr - 1944, 550, // kxm -> Thai - 1948, 10, // kxp -> Arab - 1952, 105, // ky -> Cyrl - 1955, 10, // ky_CN -> Arab - 1961, 220, // kyu -> Kali - 1965, 110, // kyv -> Deva - 1969, 110, // kyw -> Deva - 1973, 280, // lab -> Lina - 1977, 190, // lad -> Hebr - 1981, 110, // lae -> Deva - 1985, 10, // lah -> Arab - 1989, 105, // lbe -> Cyrl - 1993, 110, // lbf -> Deva - 1997, 555, // lbj -> Tibt - 2001, 110, // lbm -> Deva - 2005, 260, // lbo -> Laoo - 2009, 110, // lbr -> Deva - 2013, 550, // lcp -> Thai - 2017, 275, // lep -> Lepc - 2021, 105, // lez -> Cyrl - 2025, 110, // lhm -> Deva - 2029, 495, // lhs -> Syrc - 2033, 110, // lif -> Deva - 2037, 290, // lis -> Lisu - 2041, 555, // lkh -> Tibt - 2045, 10, // lki -> Arab - 2049, 110, // lmh -> Deva - 2053, 535, // lmn -> Telu - 2057, 260, // lo -> Laoo - 2060, 110, // loy -> Deva - 2064, 425, // lpo -> Plrd - 2068, 10, // lrc -> Arab - 2072, 10, // lrk -> Arab - 2076, 10, // lrl -> Arab - 2080, 10, // lsa -> Arab - 2084, 190, // lsd -> Hebr - 2088, 10, // lss -> Arab - 2092, 185, // ltc -> Hant - 2096, 555, // luk -> Tibt - 2100, 110, // luu -> Deva - 2104, 10, // luv -> Arab - 2108, 10, // luz -> Arab - 2112, 550, // lwl -> Thai - 2116, 550, // lwm -> Thai - 2120, 555, // lya -> Tibt - 2124, 180, // lzh -> Hans - 2128, 130, // lzz_GE -> Geor - 2135, 110, // mag -> Deva - 2139, 110, // mai -> Deva - 2143, 10, // mby -> Arab - 2147, 10, // mde -> Arab - 2151, 105, // mdf -> Cyrl - 2155, 125, // mdx -> Ethi - 2159, 125, // mdy -> Ethi - 2163, 10, // mfa -> Arab - 2167, 10, // mfi -> Arab - 2171, 270, // mga -> Latg - 2175, 110, // mgp -> Deva - 2179, 10, // mhj -> Arab - 2183, 305, // mid -> Mand - 2187, 110, // mjl -> Deva - 2191, 330, // mjq -> Mlym - 2195, 330, // mjr -> Mlym - 2199, 110, // mjt -> Deva - 2203, 535, // mju -> Telu - 2207, 330, // mjv -> Mlym - 2211, 110, // mjz -> Deva - 2215, 105, // mk -> Cyrl - 2218, 110, // mkb -> Deva - 2222, 110, // mke -> Deva - 2226, 10, // mki -> Arab - 2230, 550, // mkm -> Thai - 2234, 330, // ml -> Mlym - 2237, 550, // mlf -> Thai - 2241, 105, // mn -> Cyrl - 2244, 340, // mn_CN -> Mong - 2250, 340, // mnc -> Mong - 2254, 50, // mni -> Beng - 2258, 10, // mnj -> Arab - 2262, 105, // mns -> Cyrl - 2266, 355, // mnw -> Mymr - 2270, 550, // mpz -> Thai - 2274, 110, // mr -> Deva - 2277, 550, // mra -> Thai - 2281, 110, // mrd -> Deva - 2285, 105, // mrj -> Cyrl - 2289, 345, // mro -> Mroo - 2293, 110, // mrr -> Deva - 2297, 10, // ms_CC -> Arab - 2303, 105, // mtm -> Cyrl - 2307, 110, // mtr -> Deva - 2311, 105, // mud -> Cyrl - 2315, 555, // muk -> Tibt - 2319, 110, // mut -> Deva - 2323, 520, // muv -> Taml - 2327, 125, // muz -> Ethi - 2331, 10, // mve -> Arab - 2335, 340, // mvf -> Mong - 2339, 10, // mvy -> Arab - 2343, 125, // mvz -> Ethi - 2347, 110, // mwr -> Deva - 2351, 355, // mwt -> Mymr - 2355, 200, // mww -> Hmnp - 2359, 355, // my -> Mymr - 2362, 125, // mym -> Ethi - 2366, 105, // myv -> Cyrl - 2370, 305, // myz -> Mand - 2374, 10, // mzn -> Arab - 2378, 180, // nan -> Hans - 2382, 185, // nan_TW -> Hant - 2389, 110, // nao -> Deva - 2393, 110, // ncd -> Deva - 2397, 260, // ncq -> Laoo - 2401, 105, // ndf -> Cyrl - 2405, 110, // ne -> Deva - 2408, 105, // neg -> Cyrl - 2412, 555, // neh -> Tibt - 2416, 590, // nei -> Xsux - 2420, 110, // new -> Deva - 2424, 260, // ngt -> Laoo - 2428, 105, // nio -> Cyrl - 2432, 535, // nit -> Telu - 2436, 105, // niv -> Cyrl - 2440, 10, // nli -> Arab - 2444, 10, // nlm -> Arab - 2448, 110, // nlx -> Deva - 2452, 110, // nmm -> Deva - 2456, 580, // nnp -> Wcho - 2460, 255, // nod -> Lana - 2464, 110, // noe -> Deva - 2468, 105, // nog -> Cyrl - 2472, 110, // noi -> Deva - 2476, 445, // non -> Runr - 2480, 595, // nos -> Yiii - 2484, 555, // npb -> Tibt - 2488, 370, // nqo -> Nkoo - 2492, 445, // nrn -> Runr - 2496, 595, // nsd -> Yiii - 2500, 595, // nsf -> Yiii - 2504, 70, // nsk -> Cans - 2508, 560, // nst -> Tnsa - 2512, 595, // nsv -> Yiii - 2516, 595, // nty -> Yiii - 2520, 10, // ntz -> Arab - 2524, 365, // nwc -> Newa - 2528, 110, // nwx -> Deva - 2532, 550, // nyl -> Thai - 2536, 10, // nyq -> Arab - 2540, 550, // nyw -> Thai - 2544, 105, // oaa -> Cyrl - 2548, 105, // oac -> Cyrl - 2552, 495, // oar -> Syrc - 2556, 130, // oav -> Geor - 2560, 420, // obm -> Phnx - 2564, 355, // obr -> Mymr - 2568, 10, // odk -> Arab - 2572, 590, // oht -> Xsux - 2576, 70, // oj -> Cans - 2579, 70, // ojs -> Cans - 2583, 170, // okm -> Hang - 2587, 175, // oko -> Hani - 2591, 235, // okz -> Khmr - 2595, 110, // ola -> Deva - 2599, 555, // ole -> Tibt - 2603, 105, // omk -> Cyrl - 2607, 350, // omp -> Mtei - 2611, 335, // omr -> Modi - 2615, 355, // omx -> Mymr - 2619, 110, // oon -> Deva - 2623, 395, // or -> Orya - 2626, 535, // ort -> Telu - 2630, 10, // oru -> Arab - 2634, 105, // orv -> Cyrl - 2638, 105, // os -> Cyrl - 2641, 400, // osa -> Osge - 2645, 205, // osc -> Ital - 2649, 210, // osi -> Java - 2653, 10, // ota -> Arab - 2657, 555, // otb -> Tibt - 2661, 390, // otk -> Orkh - 2665, 150, // oty -> Gran - 2669, 405, // oui -> Ougr - 2673, 165, // pa -> Guru - 2676, 10, // pa_PK -> Arab - 2682, 415, // pal -> Phli - 2686, 105, // paq -> Cyrl - 2690, 10, // pbt -> Arab - 2694, 235, // pcb -> Khmr - 2698, 355, // pce -> Mymr - 2702, 330, // pcf -> Mlym - 2706, 330, // pcg -> Mlym - 2710, 110, // pch -> Deva - 2714, 110, // pci -> Deva - 2718, 535, // pcj -> Telu - 2722, 395, // peg -> Orya - 2726, 585, // peo -> Xpeo - 2730, 230, // pgd -> Khar - 2734, 110, // pgg -> Deva - 2738, 380, // pgl -> Ogam - 2742, 205, // pgn -> Ital - 2746, 110, // phd -> Deva - 2750, 355, // phk -> Mymr - 2754, 10, // phl -> Arab - 2758, 420, // phn -> Phnx - 2762, 260, // pho -> Laoo - 2766, 10, // phr -> Arab - 2770, 550, // pht -> Thai - 2774, 550, // phu -> Thai - 2778, 10, // phv -> Arab - 2782, 110, // phw -> Deva - 2786, 470, // pi -> Sinh - 2789, 60, // pka -> Brah - 2793, 330, // pkr -> Mlym - 2797, 10, // plk -> Arab - 2801, 355, // pll -> Mymr - 2805, 60, // pmh -> Brah - 2809, 155, // pnt -> Grek - 2813, 105, // pnt_RU -> Cyrl - 2820, 230, // pra -> Khar - 2824, 10, // prc -> Arab - 2828, 10, // prd -> Arab - 2832, 550, // prt -> Thai - 2836, 10, // prx -> Arab - 2840, 10, // ps -> Arab - 2843, 10, // psh -> Arab - 2847, 10, // psi -> Arab - 2851, 10, // pst -> Arab - 2855, 60, // psu -> Brah - 2859, 110, // pum -> Deva - 2863, 355, // pwo -> Mymr - 2867, 110, // pwr -> Deva - 2871, 550, // pww -> Thai - 2875, 355, // pyx -> Mymr - 2879, 10, // qxq -> Arab - 2883, 110, // raa -> Deva - 2887, 110, // rab -> Deva - 2891, 110, // raf -> Deva - 2895, 50, // rah -> Beng - 2899, 110, // raj -> Deva - 2903, 110, // rav -> Deva - 2907, 355, // rbb -> Mymr - 2911, 10, // rdb -> Arab - 2915, 395, // rei -> Orya - 2919, 440, // rhg -> Rohg - 2923, 110, // rji -> Deva - 2927, 110, // rjs -> Deva - 2931, 235, // rka -> Khmr - 2935, 355, // rki -> Mymr - 2939, 50, // rkt -> Beng - 2943, 20, // rmi -> Armn - 2947, 10, // rmt -> Arab - 2951, 355, // rmz -> Mymr - 2955, 105, // rsk -> Cyrl - 2959, 110, // rtw -> Deva - 2963, 105, // ru -> Cyrl - 2966, 105, // rue -> Cyrl - 2970, 105, // rut -> Cyrl - 2974, 110, // rwr -> Deva - 2978, 225, // ryu -> Kana - 2982, 110, // sa -> Deva - 2985, 105, // sah -> Cyrl - 2989, 450, // sam -> Samr - 2993, 385, // sat -> Olck - 2997, 460, // saz -> Saur - 3001, 10, // sbn -> Arab - 3005, 555, // sbu -> Tibt - 3009, 110, // sck -> Deva - 3013, 10, // scl -> Arab - 3017, 110, // scp -> Deva - 3021, 260, // sct -> Laoo - 3025, 505, // scu -> Takr - 3029, 155, // scx -> Grek - 3033, 10, // sd -> Arab - 3036, 110, // sd_IN -> Deva - 3042, 10, // sdb -> Arab - 3046, 10, // sdf -> Arab - 3050, 10, // sdg -> Arab - 3054, 10, // sdh -> Arab - 3058, 50, // sdr -> Beng - 3062, 10, // sds -> Arab - 3066, 105, // sel -> Cyrl - 3070, 425, // sfm -> Plrd - 3074, 105, // sgh -> Cyrl - 3078, 110, // sgj -> Deva - 3082, 10, // sgr -> Arab - 3086, 555, // sgt -> Tibt - 3090, 125, // sgw -> Ethi - 3094, 10, // sgy -> Arab - 3098, 10, // shd -> Arab - 3102, 540, // shi -> Tfng - 3106, 10, // shm -> Arab - 3110, 355, // shn -> Mymr - 3114, 10, // shu -> Arab - 3118, 10, // shv -> Arab - 3122, 470, // si -> Sinh - 3125, 105, // sia -> Cyrl - 3129, 555, // sip -> Tibt - 3133, 10, // siy -> Arab - 3137, 10, // siz -> Arab - 3141, 105, // sjd -> Cyrl - 3145, 110, // sjp -> Deva - 3149, 105, // sjt -> Cyrl - 3153, 550, // skb -> Thai - 3157, 110, // skj -> Deva - 3161, 10, // skr -> Arab - 3165, 595, // smh -> Yiii - 3169, 450, // smp -> Samr - 3173, 235, // smu -> Khmr - 3177, 10, // smy -> Arab - 3181, 530, // soa -> Tavt - 3185, 475, // sog -> Sogd - 3189, 110, // soi -> Deva - 3193, 550, // sou -> Thai - 3197, 555, // spt -> Tibt - 3201, 395, // spv -> Orya - 3205, 10, // sqo -> Arab - 3209, 260, // sqq -> Laoo - 3213, 10, // sqt -> Arab - 3217, 105, // sr -> Cyrl - 3220, 480, // srb -> Sora - 3224, 10, // srh -> Arab - 3228, 110, // srx -> Deva - 3232, 10, // srz -> Arab - 3236, 10, // ssh -> Arab - 3240, 260, // sss -> Laoo - 3244, 10, // sts -> Arab - 3248, 125, // stv -> Ethi - 3252, 105, // sty -> Cyrl - 3256, 490, // suz -> Sunu - 3260, 130, // sva -> Geor - 3264, 10, // swb -> Arab - 3268, 175, // swi -> Hani - 3272, 110, // swv -> Deva - 3276, 445, // sxu -> Runr - 3280, 495, // syc -> Syrc - 3284, 50, // syl -> Beng - 3288, 495, // syn -> Syrc - 3292, 495, // syr -> Syrc - 3296, 110, // syw -> Deva - 3300, 520, // ta -> Taml - 3303, 105, // tab -> Cyrl - 3307, 110, // taj -> Deva - 3311, 500, // tbk -> Tagb - 3315, 555, // tcn -> Tibt - 3319, 355, // tco -> Mymr - 3323, 520, // tcx -> Taml - 3327, 245, // tcy -> Knda - 3331, 540, // tda -> Tfng - 3335, 110, // tdb -> Deva - 3339, 510, // tdd -> Tale - 3343, 110, // tdg -> Deva - 3347, 110, // tdh -> Deva - 3351, 535, // te -> Telu - 3354, 210, // tes -> Java - 3358, 105, // tg -> Cyrl - 3361, 10, // tg_PK -> Arab - 3367, 110, // tge -> Deva - 3371, 555, // tgf -> Tibt - 3375, 550, // th -> Thai - 3378, 110, // the -> Deva - 3382, 110, // thf -> Deva - 3386, 510, // thi -> Tale - 3390, 110, // thl -> Deva - 3394, 550, // thm -> Thai - 3398, 110, // thq -> Deva - 3402, 110, // thr -> Deva - 3406, 110, // ths -> Deva - 3410, 125, // ti -> Ethi - 3413, 125, // tig -> Ethi - 3417, 110, // tij -> Deva - 3421, 105, // tin -> Cyrl - 3425, 355, // tjl -> Mymr - 3429, 10, // tjo -> Arab - 3433, 110, // tkb -> Deva - 3437, 10, // tks -> Arab - 3441, 110, // tkt -> Deva - 3445, 495, // tmr -> Syrc - 3449, 65, // tnv -> Cakm - 3453, 10, // tov -> Arab - 3457, 235, // tpu -> Khmr - 3461, 10, // tra -> Arab - 3465, 190, // trg -> Hebr - 3469, 10, // trm -> Arab - 3473, 10, // trw -> Arab - 3477, 155, // tsd -> Grek - 3481, 555, // tsj -> Tibt - 3485, 105, // tt -> Cyrl - 3488, 260, // tth -> Laoo - 3492, 260, // tto -> Laoo - 3496, 550, // tts -> Thai - 3500, 110, // ttz -> Deva - 3504, 355, // tvn -> Mymr - 3508, 110, // twm -> Deva - 3512, 525, // txg -> Tang - 3516, 565, // txo -> Toto - 3520, 530, // tyr -> Tavt - 3524, 105, // tyv -> Cyrl - 3528, 105, // ude -> Cyrl - 3532, 330, // udg -> Mlym - 3536, 105, // udi -> Cyrl - 3540, 105, // udm -> Cyrl - 3544, 10, // ug -> Arab - 3547, 105, // ug_KZ -> Cyrl - 3553, 105, // ug_MN -> Cyrl - 3559, 570, // uga -> Ugar - 3563, 105, // ugh -> Cyrl - 3567, 550, // ugo -> Thai - 3571, 105, // uk -> Cyrl - 3574, 395, // uki -> Orya - 3578, 105, // ulc -> Cyrl - 3582, 50, // unr -> Beng - 3586, 110, // unr_NP -> Deva - 3593, 50, // unx -> Beng - 3597, 10, // ur -> Arab - 3600, 550, // urk -> Thai - 3604, 10, // ush -> Arab - 3608, 155, // uum -> Grek - 3612, 10, // uz_AF -> Arab - 3618, 105, // uz_CN -> Cyrl - 3624, 10, // uzs -> Arab - 3628, 520, // vaa -> Taml - 3632, 10, // vaf -> Arab - 3636, 110, // vah -> Deva - 3640, 575, // vai -> Vaii - 3644, 110, // vas -> Deva - 3648, 110, // vav -> Deva - 3652, 110, // vay -> Deva - 3656, 10, // vgr -> Arab - 3660, 110, // vjk -> Deva - 3664, 245, // vmd -> Knda - 3668, 10, // vmh -> Arab - 3672, 125, // wal -> Ethi - 3676, 10, // wbk -> Arab - 3680, 535, // wbq -> Telu - 3684, 110, // wbr -> Deva - 3688, 125, // wle -> Ethi - 3692, 10, // wlo -> Arab - 3696, 110, // wme -> Deva - 3700, 10, // wne -> Arab - 3704, 10, // wni -> Arab - 3708, 135, // wsg -> Gong - 3712, 10, // wsv -> Arab - 3716, 110, // wtm -> Deva - 3720, 180, // wuu -> Hans - 3724, 0, // xag -> Aghb - 3728, 105, // xal -> Cyrl - 3732, 125, // xan -> Ethi - 3736, 105, // xas -> Cyrl - 3740, 90, // xco -> Chrs - 3744, 75, // xcr -> Cari - 3748, 105, // xdq -> Cyrl - 3752, 10, // xhe -> Arab - 3756, 235, // xhm -> Khmr - 3760, 395, // xis -> Orya - 3764, 10, // xka -> Arab - 3768, 10, // xkc -> Arab - 3772, 555, // xkf -> Tibt - 3776, 10, // xkj -> Arab - 3780, 10, // xkp -> Arab - 3784, 295, // xlc -> Lyci - 3788, 300, // xld -> Lydi - 3792, 120, // xly -> Elym - 3796, 130, // xmf -> Geor - 3800, 310, // xmn -> Mani - 3804, 325, // xmr -> Merc - 3808, 360, // xna -> Narb - 3812, 110, // xnr -> Deva - 3816, 155, // xpg -> Grek - 3820, 380, // xpi -> Ogam - 3824, 105, // xpm -> Cyrl - 3828, 430, // xpr -> Prti - 3832, 105, // xrm -> Cyrl - 3836, 105, // xrn -> Cyrl - 3840, 455, // xsa -> Sarb - 3844, 110, // xsr -> Deva - 3848, 60, // xtq -> Brah - 3852, 520, // xub -> Taml - 3856, 520, // xuj -> Taml - 3860, 205, // xve -> Ital - 3864, 10, // xvi -> Arab - 3868, 105, // xwo -> Cyrl - 3872, 315, // xzh -> Marc - 3876, 105, // yai -> Cyrl - 3880, 110, // ybh -> Deva - 3884, 110, // ybi -> Deva - 3888, 10, // ydg -> Arab - 3892, 330, // yea -> Mlym - 3896, 155, // yej -> Grek - 3900, 535, // yeu -> Telu - 3904, 425, // ygp -> Plrd - 3908, 190, // yhd -> Hebr - 3912, 190, // yi -> Hebr - 3915, 595, // yig -> Yiii - 3919, 190, // yih -> Hebr - 3923, 595, // yiv -> Yiii - 3927, 105, // ykg -> Cyrl - 3931, 105, // ykh -> Cyrl - 3935, 425, // yna -> Plrd - 3939, 105, // ynk -> Cyrl - 3943, 215, // yoi -> Jpan - 3947, 550, // yoy -> Thai - 3951, 105, // yrk -> Cyrl - 3955, 595, // ysd -> Yiii - 3959, 595, // ysn -> Yiii - 3963, 595, // ysp -> Yiii - 3967, 105, // ysr -> Cyrl - 3971, 425, // ysy -> Plrd - 3975, 190, // yud -> Hebr - 3979, 185, // yue -> Hant - 3983, 180, // yue_CN -> Hans - 3990, 105, // yug -> Cyrl - 3994, 105, // yux -> Cyrl - 3998, 425, // ywq -> Plrd - 4002, 425, // ywu -> Plrd - 4006, 555, // zau -> Tibt - 4010, 10, // zba -> Arab - 4014, 175, // zch -> Hani - 4018, 10, // zdj -> Arab - 4022, 175, // zeh -> Hani - 4026, 540, // zen -> Tfng - 4030, 175, // zgb -> Hani - 4034, 540, // zgh -> Tfng - 4038, 175, // zgm -> Hani - 4042, 175, // zgn -> Hani - 4046, 180, // zh -> Hans - 4049, 185, // zh_AU -> Hant - 4055, 185, // zh_BN -> Hant - 4061, 185, // zh_GB -> Hant - 4067, 185, // zh_GF -> Hant - 4073, 185, // zh_HK -> Hant - 4079, 185, // zh_ID -> Hant - 4085, 185, // zh_MO -> Hant - 4091, 185, // zh_PA -> Hant - 4097, 185, // zh_PF -> Hant - 4103, 185, // zh_PH -> Hant - 4109, 185, // zh_SR -> Hant - 4115, 185, // zh_TH -> Hant - 4121, 185, // zh_TW -> Hant - 4127, 185, // zh_US -> Hant - 4133, 185, // zh_VN -> Hant - 4139, 175, // zhd -> Hani - 4143, 375, // zhx -> Nshu - 4147, 105, // zko -> Cyrl - 4151, 240, // zkt -> Kits - 4155, 105, // zkz -> Cyrl - 4159, 175, // zlj -> Hani - 4163, 175, // zln -> Hani - 4167, 175, // zlq -> Hani - 4171, 175, // zqe -> Hani - 4175, 395, // zrg -> Orya - 4179, 190, // zrp -> Hebr - 4183, 10, // zum -> Arab - 4187, 125, // zwa -> Ethi - 4191, 175, // zyg -> Hani - 4195, 175, // zyn -> Hani - 4199, 175, // zzj -> Hani + 19, 425, // abl -> Rjng + 23, 100, // abq -> Cyrl + 27, 10, // abv -> Arab + 31, 10, // acm -> Arab + 35, 10, // acq -> Arab + 39, 10, // acw -> Arab + 43, 10, // acx -> Arab + 47, 10, // adf -> Arab + 51, 545, // adx -> Tibt + 55, 100, // ady -> Cyrl + 59, 25, // ae -> Avst + 62, 10, // aeb -> Arab + 66, 10, // aec -> Arab + 70, 10, // aee -> Arab + 74, 10, // aeq -> Arab + 78, 10, // afb -> Arab + 82, 105, // agi -> Deva + 86, 120, // agj -> Ethi + 90, 100, // agx -> Cyrl + 94, 120, // ahg -> Ethi + 98, 5, // aho -> Ahom + 102, 105, // ahr -> Deva + 106, 10, // aib -> Arab + 110, 485, // aii -> Syrc + 114, 185, // aij -> Hebr + 118, 220, // ain -> Kana + 122, 350, // aio -> Mymr + 126, 10, // aiq -> Arab + 130, 580, // akk -> Xsux + 134, 100, // akv -> Cyrl + 138, 255, // alk -> Laoo + 142, 325, // all -> Mlym + 146, 100, // alr -> Cyrl + 150, 100, // alt -> Cyrl + 154, 120, // alw -> Ethi + 158, 120, // am -> Ethi + 161, 210, // ams -> Jpan + 165, 485, // amw -> Syrc + 169, 100, // ani -> Cyrl + 173, 105, // anp -> Deva + 177, 105, // anq -> Deva + 181, 105, // anr -> Deva + 185, 120, // anu -> Ethi + 189, 45, // aot -> Beng + 193, 10, // apc -> Arab + 197, 10, // apd -> Arab + 201, 105, // aph -> Deva + 205, 100, // aqc -> Cyrl + 209, 10, // ar -> Arab + 212, 15, // arc -> Armi + 216, 10, // arq -> Arab + 220, 10, // ars -> Arab + 224, 10, // ary -> Arab + 228, 10, // arz -> Arab + 232, 45, // as -> Beng + 235, 455, // ase -> Sgnw + 239, 10, // ask -> Arab + 243, 105, // asr -> Deva + 247, 10, // atn -> Arab + 251, 100, // atv -> Cyrl + 255, 10, // auj -> Arab + 259, 10, // auz -> Arab + 263, 100, // av -> Cyrl + 266, 10, // avd -> Arab + 270, 10, // avl -> Arab + 274, 105, // awa -> Deva + 278, 120, // awn -> Ethi + 282, 20, // axm -> Armn + 286, 10, // ayh -> Arab + 290, 10, // ayl -> Arab + 294, 10, // ayn -> Arab + 298, 10, // ayp -> Arab + 302, 10, // az_IQ -> Arab + 308, 10, // az_IR -> Arab + 314, 100, // az_RU -> Cyrl + 320, 10, // azb -> Arab + 324, 100, // ba -> Cyrl + 327, 10, // bal -> Arab + 331, 105, // bap -> Deva + 335, 35, // bax -> Bamu + 339, 125, // bbl -> Geor + 343, 120, // bcq -> Ethi + 347, 385, // bdv -> Orya + 351, 10, // bdz -> Arab + 355, 100, // be -> Cyrl + 358, 105, // bee -> Deva + 362, 10, // bej -> Arab + 366, 105, // bfb -> Deva + 370, 510, // bfq -> Taml + 374, 10, // bft -> Arab + 378, 545, // bfu -> Tibt + 382, 385, // bfw -> Orya + 386, 105, // bfy -> Deva + 390, 105, // bfz -> Deva + 394, 100, // bg -> Cyrl + 397, 105, // bgc -> Deva + 401, 105, // bgd -> Deva + 405, 10, // bgn -> Arab + 409, 10, // bgp -> Arab + 413, 105, // bgq -> Deva + 417, 105, // bgw -> Deva + 421, 150, // bgx -> Grek + 425, 105, // bha -> Deva + 429, 105, // bhb -> Deva + 433, 105, // bhd -> Deva + 437, 10, // bhe -> Arab + 441, 100, // bhh -> Cyrl + 445, 105, // bhi -> Deva + 449, 105, // bhj -> Deva + 453, 10, // bhm -> Arab + 457, 485, // bhn -> Syrc + 461, 105, // bho -> Deva + 465, 105, // bht -> Deva + 469, 105, // bhu -> Deva + 473, 105, // bix -> Deva + 477, 105, // biy -> Deva + 481, 485, // bjf -> Syrc + 485, 105, // bjj -> Deva + 489, 10, // bjm -> Arab + 493, 545, // bkk -> Tibt + 497, 350, // blk -> Mymr + 501, 520, // blt -> Tavt + 505, 105, // bmj -> Deva + 509, 45, // bn -> Beng + 512, 105, // bns -> Deva + 516, 545, // bo -> Tibt + 519, 100, // bph -> Cyrl + 523, 105, // bpx -> Deva + 527, 45, // bpy -> Beng + 531, 10, // bqi -> Arab + 535, 105, // bra -> Deva + 539, 230, // brb -> Khmr + 543, 105, // brd -> Deva + 547, 10, // brh -> Arab + 551, 10, // brk -> Arab + 555, 545, // bro -> Tibt + 559, 255, // brv -> Laoo + 563, 240, // brw -> Knda + 567, 105, // brx -> Deva + 571, 10, // bsh -> Arab + 575, 10, // bsk -> Arab + 579, 120, // bst -> Ethi + 583, 40, // btd -> Batk + 587, 40, // btm -> Batk + 591, 105, // btv -> Deva + 595, 100, // bua -> Cyrl + 599, 350, // bwe -> Mymr + 603, 100, // bxm -> Cyrl + 607, 335, // bxu -> Mong + 611, 105, // byh -> Deva + 615, 120, // byn -> Ethi + 619, 105, // byw -> Deva + 623, 540, // bzi -> Thai + 627, 540, // cbn -> Thai + 631, 60, // ccp -> Cakm + 635, 525, // cde -> Telu + 639, 105, // cdh -> Deva + 643, 155, // cdi -> Gujr + 647, 105, // cdj -> Deva + 651, 105, // cdm -> Deva + 655, 105, // cdn -> Deva + 659, 175, // cdo -> Hans + 663, 45, // cdz -> Beng + 667, 100, // ce -> Cyrl + 670, 545, // cgk -> Tibt + 674, 10, // chg -> Arab + 678, 100, // chm -> Cyrl + 682, 80, // chr -> Cher + 686, 105, // chx -> Deva + 690, 105, // cih -> Deva + 694, 10, // cja -> Arab + 698, 100, // cji -> Cyrl + 702, 75, // cjm -> Cham + 706, 175, // cjy -> Hans + 710, 10, // ckb -> Arab + 714, 100, // ckt -> Cyrl + 718, 10, // clh -> Arab + 722, 100, // clw -> Cyrl + 726, 480, // cmg -> Soyo + 730, 545, // cna -> Tibt + 734, 175, // cnp -> Hans + 738, 540, // cog -> Thai + 742, 90, // cop -> Copt + 746, 150, // cpg -> Grek + 750, 65, // cr -> Cans + 753, 100, // crh -> Cyrl + 757, 65, // crj -> Cans + 761, 65, // crk -> Cans + 765, 65, // crl -> Cans + 769, 65, // crm -> Cans + 773, 350, // csh -> Mymr + 777, 175, // csp -> Hans + 781, 65, // csw -> Cans + 785, 400, // ctd -> Pauc + 789, 45, // ctg -> Beng + 793, 105, // ctn -> Deva + 797, 510, // ctt -> Taml + 801, 510, // cty -> Taml + 805, 100, // cu -> Cyrl + 808, 250, // cuu -> Lana + 812, 100, // cv -> Cyrl + 815, 175, // czh -> Hans + 819, 185, // czk -> Hebr + 823, 105, // daq -> Deva + 827, 100, // dar -> Cyrl + 831, 10, // dcc -> Arab + 835, 100, // ddo -> Cyrl + 839, 10, // def -> Arab + 843, 10, // deh -> Arab + 847, 45, // der -> Beng + 851, 10, // dgl -> Arab + 855, 105, // dhi -> Deva + 859, 155, // dhn -> Gujr + 863, 155, // dho -> Gujr + 867, 105, // dhw -> Deva + 871, 545, // dka -> Tibt + 875, 100, // dlg -> Cyrl + 879, 315, // dmf -> Medf + 883, 10, // dmk -> Arab + 887, 10, // dml -> Arab + 891, 100, // dng -> Cyrl + 895, 350, // dnu -> Mymr + 899, 350, // dnv -> Mymr + 903, 105, // doi -> Deva + 907, 120, // dox -> Ethi + 911, 545, // dre -> Tibt + 915, 105, // drq -> Deva + 919, 120, // drs -> Ethi + 923, 105, // dry -> Deva + 927, 385, // dso -> Orya + 931, 105, // dty -> Deva + 935, 155, // dub -> Gujr + 939, 105, // duh -> Deva + 943, 105, // dus -> Deva + 947, 535, // dv -> Thaa + 950, 385, // dwk -> Orya + 954, 105, // dwz -> Deva + 958, 545, // dz -> Tibt + 961, 545, // dzl -> Tibt + 965, 150, // ecr -> Grek + 969, 95, // ecy -> Cprt + 973, 110, // egy -> Egyp + 977, 215, // eky -> Kali + 981, 150, // el -> Grek + 984, 105, // emg -> Deva + 988, 105, // emu -> Deva + 992, 100, // enf -> Cyrl + 996, 100, // enh -> Cyrl + 1000, 510, // era -> Taml + 1004, 135, // esg -> Gonm + 1008, 10, // esh -> Arab + 1012, 200, // ett -> Ital + 1016, 100, // eve -> Cyrl + 1020, 100, // evn -> Cyrl + 1024, 10, // fa -> Arab + 1027, 10, // fay -> Arab + 1031, 10, // faz -> Arab + 1035, 10, // fia -> Arab + 1039, 105, // fmu -> Deva + 1043, 10, // fub -> Arab + 1047, 175, // gan -> Hans + 1051, 385, // gaq -> Orya + 1055, 155, // gas -> Gujr + 1059, 525, // gau -> Telu + 1063, 385, // gbj -> Orya + 1067, 105, // gbk -> Deva + 1071, 155, // gbl -> Gujr + 1075, 105, // gbm -> Deva + 1079, 10, // gbz -> Arab + 1083, 385, // gdb -> Orya + 1087, 100, // gdo -> Cyrl + 1091, 105, // gdx -> Deva + 1095, 120, // gez -> Ethi + 1099, 10, // ggg -> Arab + 1103, 10, // gha -> Arab + 1107, 105, // ghe -> Deva + 1111, 530, // gho -> Tfng + 1115, 10, // ghr -> Arab + 1119, 545, // ght -> Tibt + 1123, 10, // gig -> Arab + 1127, 100, // gin -> Cyrl + 1131, 10, // gjk -> Arab + 1135, 10, // gju -> Arab + 1139, 100, // gld -> Cyrl + 1143, 10, // glh -> Arab + 1147, 10, // glk -> Arab + 1151, 260, // gml -> Latf + 1155, 120, // gmv -> Ethi + 1159, 280, // gmy -> Linb + 1163, 545, // goe -> Tibt + 1167, 120, // gof -> Ethi + 1171, 105, // goj -> Deva + 1175, 105, // gok -> Deva + 1179, 105, // gon -> Deva + 1183, 140, // got -> Goth + 1187, 105, // gra -> Deva + 1191, 150, // grc -> Grek + 1195, 10, // grr -> Arab + 1199, 45, // grt -> Beng + 1203, 120, // gru -> Ethi + 1207, 155, // gu -> Gujr + 1210, 105, // gvr -> Deva + 1214, 10, // gwc -> Arab + 1218, 10, // gwf -> Arab + 1222, 10, // gwt -> Arab + 1226, 105, // gyo -> Deva + 1230, 10, // gzi -> Arab + 1234, 10, // ha_CM -> Arab + 1240, 10, // ha_SD -> Arab + 1246, 10, // hac -> Arab + 1250, 175, // hak -> Hans + 1254, 180, // hak_TW -> Hant + 1261, 120, // har -> Ethi + 1265, 10, // haz -> Arab + 1269, 185, // hbo -> Hebr + 1273, 120, // hdy -> Ethi + 1277, 185, // he -> Hebr + 1280, 105, // hi -> Deva + 1283, 105, // hif -> Deva + 1287, 495, // hii -> Takr + 1291, 580, // hit -> Xsux + 1295, 10, // hkh -> Arab + 1299, 105, // hlb -> Deva + 1303, 190, // hlu -> Hluw + 1307, 415, // hmd -> Plrd + 1311, 50, // hmj -> Bopo + 1315, 50, // hmq -> Bopo + 1319, 10, // hnd -> Arab + 1323, 105, // hne -> Deva + 1327, 195, // hnj -> Hmnp + 1331, 175, // hnm -> Hans + 1335, 10, // hno -> Arab + 1339, 105, // hoc -> Deva + 1343, 10, // hoh -> Arab + 1347, 105, // hoj -> Deva + 1351, 170, // how -> Hani + 1355, 105, // hoy -> Deva + 1359, 350, // hpo -> Mymr + 1363, 485, // hrt -> Syrc + 1367, 10, // hrz -> Arab + 1371, 175, // hsn -> Hans + 1375, 10, // hss -> Arab + 1379, 580, // htx -> Xsux + 1383, 105, // hut -> Deva + 1387, 185, // huy -> Hebr + 1391, 100, // huz -> Cyrl + 1395, 20, // hy -> Armn + 1398, 20, // hyw -> Armn + 1402, 585, // ii -> Yiii + 1405, 290, // imy -> Lyci + 1409, 100, // inh -> Cyrl + 1413, 350, // int -> Mymr + 1417, 120, // ior -> Ethi + 1421, 510, // iru -> Taml + 1425, 10, // isk -> Arab + 1429, 185, // itk -> Hebr + 1433, 100, // itl -> Cyrl + 1437, 65, // iu -> Cans + 1440, 185, // iw -> Hebr + 1443, 210, // ja -> Jpan + 1446, 10, // jad -> Arab + 1450, 10, // jat -> Arab + 1454, 185, // jbe -> Hebr + 1458, 10, // jbn -> Arab + 1462, 100, // jct -> Cyrl + 1466, 545, // jda -> Tibt + 1470, 10, // jdg -> Arab + 1474, 100, // jdt -> Cyrl + 1478, 105, // jee -> Deva + 1482, 125, // jge -> Geor + 1486, 185, // ji -> Hebr + 1489, 165, // jje -> Hang + 1493, 350, // jkm -> Mymr + 1497, 105, // jml -> Deva + 1501, 495, // jna -> Takr + 1505, 10, // jnd -> Arab + 1509, 105, // jnl -> Deva + 1513, 105, // jns -> Deva + 1517, 10, // jog -> Arab + 1521, 185, // jpa -> Hebr + 1525, 185, // jpr -> Hebr + 1529, 185, // jrb -> Hebr + 1533, 105, // jul -> Deva + 1537, 385, // jun -> Orya + 1541, 385, // juy -> Orya + 1545, 545, // jya -> Tibt + 1549, 185, // jye -> Hebr + 1553, 125, // ka -> Geor + 1556, 100, // kaa -> Cyrl + 1560, 100, // kap -> Cyrl + 1564, 30, // kaw -> Bali + 1568, 100, // kbd -> Cyrl + 1572, 545, // kbg -> Tibt + 1576, 10, // kbu -> Arab + 1580, 10, // kby -> Arab + 1584, 100, // kca -> Cyrl + 1588, 10, // kcy -> Arab + 1592, 45, // kdq -> Beng + 1596, 540, // kdt -> Thai + 1600, 100, // ket -> Cyrl + 1604, 325, // kev -> Mlym + 1608, 105, // kex -> Deva + 1612, 525, // key -> Telu + 1616, 240, // kfa -> Knda + 1620, 105, // kfb -> Deva + 1624, 525, // kfc -> Telu + 1628, 240, // kfd -> Knda + 1632, 510, // kfe -> Taml + 1636, 240, // kfg -> Knda + 1640, 325, // kfh -> Mlym + 1644, 510, // kfi -> Taml + 1648, 105, // kfk -> Deva + 1652, 10, // kfm -> Arab + 1656, 105, // kfp -> Deva + 1660, 105, // kfq -> Deva + 1664, 105, // kfr -> Deva + 1668, 105, // kfs -> Deva + 1672, 105, // kfu -> Deva + 1676, 105, // kfx -> Deva + 1680, 105, // kfy -> Deva + 1684, 105, // kgj -> Deva + 1688, 545, // kgy -> Tibt + 1692, 505, // khb -> Talu + 1696, 540, // khf -> Thai + 1700, 545, // khg -> Tibt + 1704, 105, // khn -> Deva + 1708, 55, // kho -> Brah + 1712, 350, // kht -> Mymr + 1716, 100, // khv -> Cyrl + 1720, 10, // khw -> Arab + 1724, 105, // kif -> Deva + 1728, 100, // kim -> Cyrl + 1732, 105, // kip -> Deva + 1736, 255, // kjg -> Laoo + 1740, 100, // kjh -> Cyrl + 1744, 105, // kjl -> Deva + 1748, 105, // kjo -> Deva + 1752, 350, // kjp -> Mymr + 1756, 540, // kjt -> Thai + 1760, 545, // kjz -> Tibt + 1764, 100, // kk -> Cyrl + 1767, 10, // kk_AF -> Arab + 1773, 10, // kk_CN -> Arab + 1779, 10, // kk_IR -> Arab + 1785, 10, // kk_MN -> Arab + 1791, 545, // kkf -> Tibt + 1795, 250, // kkh -> Lana + 1799, 105, // kkt -> Deva + 1803, 105, // kle -> Deva + 1807, 10, // klj -> Arab + 1811, 105, // klr -> Deva + 1815, 230, // km -> Khmr + 1818, 105, // kmj -> Deva + 1822, 10, // kmz -> Arab + 1826, 240, // kn -> Knda + 1829, 105, // knn -> Deva + 1833, 245, // ko -> Kore + 1836, 100, // koi -> Cyrl + 1840, 105, // kok -> Deva + 1844, 100, // kpt -> Cyrl + 1848, 100, // kpy -> Cyrl + 1852, 485, // kqd -> Syrc + 1856, 120, // kqy -> Ethi + 1860, 105, // kra -> Deva + 1864, 100, // krc -> Cyrl + 1868, 100, // krk -> Cyrl + 1872, 230, // krr -> Khmr + 1876, 105, // kru -> Deva + 1880, 230, // krv -> Khmr + 1884, 10, // ks -> Arab + 1887, 350, // ksu -> Mymr + 1891, 350, // ksw -> Mymr + 1895, 105, // ksz -> Deva + 1899, 120, // ktb -> Ethi + 1903, 105, // kte -> Deva + 1907, 10, // ktl -> Arab + 1911, 415, // ktp -> Plrd + 1915, 100, // ku_AM -> Cyrl + 1921, 100, // ku_AZ -> Cyrl + 1927, 100, // ku_GE -> Cyrl + 1933, 10, // ku_IQ -> Arab + 1939, 10, // ku_IR -> Arab + 1945, 10, // ku_LB -> Arab + 1951, 100, // ku_TM -> Cyrl + 1957, 255, // kuf -> Laoo + 1961, 100, // kum -> Cyrl + 1965, 100, // kv -> Cyrl + 1968, 100, // kva -> Cyrl + 1972, 350, // kvq -> Mymr + 1976, 350, // kvt -> Mymr + 1980, 10, // kvx -> Arab + 1984, 215, // kvy -> Kali + 1988, 105, // kwx -> Deva + 1992, 350, // kxf -> Mymr + 1996, 350, // kxk -> Mymr + 2000, 540, // kxm -> Thai + 2004, 10, // kxp -> Arab + 2008, 100, // ky -> Cyrl + 2011, 10, // ky_CN -> Arab + 2017, 215, // kyu -> Kali + 2021, 105, // kyv -> Deva + 2025, 105, // kyw -> Deva + 2029, 275, // lab -> Lina + 2033, 185, // lad -> Hebr + 2037, 105, // lae -> Deva + 2041, 10, // lah -> Arab + 2045, 100, // lbe -> Cyrl + 2049, 105, // lbf -> Deva + 2053, 545, // lbj -> Tibt + 2057, 105, // lbm -> Deva + 2061, 255, // lbo -> Laoo + 2065, 105, // lbr -> Deva + 2069, 540, // lcp -> Thai + 2073, 270, // lep -> Lepc + 2077, 100, // lez -> Cyrl + 2081, 105, // lhm -> Deva + 2085, 485, // lhs -> Syrc + 2089, 105, // lif -> Deva + 2093, 285, // lis -> Lisu + 2097, 545, // lkh -> Tibt + 2101, 10, // lki -> Arab + 2105, 105, // lmh -> Deva + 2109, 525, // lmn -> Telu + 2113, 255, // lo -> Laoo + 2116, 105, // loy -> Deva + 2120, 415, // lpo -> Plrd + 2124, 10, // lrc -> Arab + 2128, 10, // lrk -> Arab + 2132, 10, // lrl -> Arab + 2136, 10, // lsa -> Arab + 2140, 185, // lsd -> Hebr + 2144, 10, // lss -> Arab + 2148, 180, // ltc -> Hant + 2152, 175, // luh -> Hans + 2156, 545, // luk -> Tibt + 2160, 105, // luu -> Deva + 2164, 10, // luv -> Arab + 2168, 10, // luz -> Arab + 2172, 540, // lwl -> Thai + 2176, 540, // lwm -> Thai + 2180, 545, // lya -> Tibt + 2184, 180, // lzh -> Hant + 2188, 125, // lzz_GE -> Geor + 2195, 105, // mag -> Deva + 2199, 105, // mai -> Deva + 2203, 10, // mby -> Arab + 2207, 10, // mde -> Arab + 2211, 100, // mdf -> Cyrl + 2215, 120, // mdx -> Ethi + 2219, 120, // mdy -> Ethi + 2223, 10, // mey -> Arab + 2227, 10, // mfa -> Arab + 2231, 10, // mfi -> Arab + 2235, 265, // mga -> Latg + 2239, 105, // mgp -> Deva + 2243, 10, // mhj -> Arab + 2247, 300, // mid -> Mand + 2251, 105, // mjl -> Deva + 2255, 325, // mjq -> Mlym + 2259, 325, // mjr -> Mlym + 2263, 105, // mjt -> Deva + 2267, 525, // mju -> Telu + 2271, 325, // mjv -> Mlym + 2275, 105, // mjz -> Deva + 2279, 100, // mk -> Cyrl + 2282, 105, // mkb -> Deva + 2286, 105, // mke -> Deva + 2290, 10, // mki -> Arab + 2294, 540, // mkm -> Thai + 2298, 325, // ml -> Mlym + 2301, 540, // mlf -> Thai + 2305, 100, // mn -> Cyrl + 2308, 335, // mn_CN -> Mong + 2314, 335, // mnc -> Mong + 2318, 45, // mni -> Beng + 2322, 10, // mnj -> Arab + 2326, 100, // mns -> Cyrl + 2330, 350, // mnw -> Mymr + 2334, 540, // mpz -> Thai + 2338, 105, // mr -> Deva + 2341, 540, // mra -> Thai + 2345, 105, // mrd -> Deva + 2349, 100, // mrj -> Cyrl + 2353, 340, // mro -> Mroo + 2357, 105, // mrr -> Deva + 2361, 10, // ms_CC -> Arab + 2367, 100, // mtm -> Cyrl + 2371, 105, // mtr -> Deva + 2375, 100, // mud -> Cyrl + 2379, 545, // muk -> Tibt + 2383, 105, // mut -> Deva + 2387, 510, // muv -> Taml + 2391, 120, // muz -> Ethi + 2395, 10, // mve -> Arab + 2399, 335, // mvf -> Mong + 2403, 10, // mvy -> Arab + 2407, 120, // mvz -> Ethi + 2411, 105, // mwr -> Deva + 2415, 350, // mwt -> Mymr + 2419, 195, // mww -> Hmnp + 2423, 350, // my -> Mymr + 2426, 120, // mym -> Ethi + 2430, 100, // myv -> Cyrl + 2434, 300, // myz -> Mand + 2438, 10, // mzb -> Arab + 2442, 10, // mzn -> Arab + 2446, 175, // nan -> Hans + 2450, 180, // nan_MO -> Hant + 2457, 180, // nan_TW -> Hant + 2464, 105, // nao -> Deva + 2468, 105, // ncd -> Deva + 2472, 255, // ncq -> Laoo + 2476, 100, // ndf -> Cyrl + 2480, 105, // ne -> Deva + 2483, 100, // neg -> Cyrl + 2487, 545, // neh -> Tibt + 2491, 580, // nei -> Xsux + 2495, 105, // new -> Deva + 2499, 255, // ngt -> Laoo + 2503, 100, // nio -> Cyrl + 2507, 525, // nit -> Telu + 2511, 100, // niv -> Cyrl + 2515, 10, // nli -> Arab + 2519, 10, // nlm -> Arab + 2523, 105, // nlx -> Deva + 2527, 105, // nmm -> Deva + 2531, 570, // nnp -> Wcho + 2535, 250, // nod -> Lana + 2539, 105, // noe -> Deva + 2543, 100, // nog -> Cyrl + 2547, 105, // noi -> Deva + 2551, 435, // non -> Runr + 2555, 585, // nos -> Yiii + 2559, 545, // npb -> Tibt + 2563, 365, // nqo -> Nkoo + 2567, 435, // nrn -> Runr + 2571, 585, // nsd -> Yiii + 2575, 585, // nsf -> Yiii + 2579, 65, // nsk -> Cans + 2583, 550, // nst -> Tnsa + 2587, 585, // nsv -> Yiii + 2591, 585, // nty -> Yiii + 2595, 10, // ntz -> Arab + 2599, 360, // nwc -> Newa + 2603, 105, // nwx -> Deva + 2607, 540, // nyl -> Thai + 2611, 10, // nyq -> Arab + 2615, 540, // nyw -> Thai + 2619, 100, // oaa -> Cyrl + 2623, 100, // oac -> Cyrl + 2627, 485, // oar -> Syrc + 2631, 125, // oav -> Geor + 2635, 410, // obm -> Phnx + 2639, 350, // obr -> Mymr + 2643, 10, // odk -> Arab + 2647, 580, // oht -> Xsux + 2651, 65, // oj -> Cans + 2654, 65, // ojs -> Cans + 2658, 165, // okm -> Hang + 2662, 170, // oko -> Hani + 2666, 230, // okz -> Khmr + 2670, 105, // ola -> Deva + 2674, 545, // ole -> Tibt + 2678, 100, // omk -> Cyrl + 2682, 345, // omp -> Mtei + 2686, 330, // omr -> Modi + 2690, 350, // omx -> Mymr + 2694, 105, // oon -> Deva + 2698, 385, // or -> Orya + 2701, 525, // ort -> Telu + 2705, 10, // oru -> Arab + 2709, 100, // orv -> Cyrl + 2713, 100, // os -> Cyrl + 2716, 390, // osa -> Osge + 2720, 200, // osc -> Ital + 2724, 205, // osi -> Java + 2728, 10, // ota -> Arab + 2732, 545, // otb -> Tibt + 2736, 380, // otk -> Orkh + 2740, 145, // oty -> Gran + 2744, 395, // oui -> Ougr + 2748, 255, // oyb -> Laoo + 2752, 160, // pa -> Guru + 2755, 10, // pa_PK -> Arab + 2761, 405, // pal -> Phli + 2765, 100, // paq -> Cyrl + 2769, 10, // pbt -> Arab + 2773, 230, // pcb -> Khmr + 2777, 350, // pce -> Mymr + 2781, 325, // pcf -> Mlym + 2785, 325, // pcg -> Mlym + 2789, 105, // pch -> Deva + 2793, 105, // pci -> Deva + 2797, 525, // pcj -> Telu + 2801, 385, // peg -> Orya + 2805, 575, // peo -> Xpeo + 2809, 225, // pgd -> Khar + 2813, 105, // pgg -> Deva + 2817, 370, // pgl -> Ogam + 2821, 200, // pgn -> Ital + 2825, 105, // phd -> Deva + 2829, 350, // phk -> Mymr + 2833, 10, // phl -> Arab + 2837, 410, // phn -> Phnx + 2841, 255, // pho -> Laoo + 2845, 10, // phr -> Arab + 2849, 540, // pht -> Thai + 2853, 540, // phu -> Thai + 2857, 10, // phv -> Arab + 2861, 105, // phw -> Deva + 2865, 105, // pi_IN -> Deva + 2871, 465, // pi_LK -> Sinh + 2877, 350, // pi_MM -> Mymr + 2883, 540, // pi_TH -> Thai + 2889, 55, // pka -> Brah + 2893, 325, // pkr -> Mlym + 2897, 10, // plk -> Arab + 2901, 350, // pll -> Mymr + 2905, 55, // pmh -> Brah + 2909, 150, // pnt -> Grek + 2913, 100, // pnt_RU -> Cyrl + 2920, 10, // prc -> Arab + 2924, 10, // prd -> Arab + 2928, 540, // prt -> Thai + 2932, 10, // prx -> Arab + 2936, 10, // ps -> Arab + 2939, 10, // psh -> Arab + 2943, 10, // psi -> Arab + 2947, 10, // pst -> Arab + 2951, 55, // psu -> Brah + 2955, 105, // pum -> Deva + 2959, 350, // pwo -> Mymr + 2963, 105, // pwr -> Deva + 2967, 540, // pww -> Thai + 2971, 350, // pyx -> Mymr + 2975, 10, // qxq -> Arab + 2979, 105, // raa -> Deva + 2983, 105, // rab -> Deva + 2987, 105, // raf -> Deva + 2991, 45, // rah -> Beng + 2995, 105, // raj -> Deva + 2999, 105, // rav -> Deva + 3003, 350, // rbb -> Mymr + 3007, 10, // rdb -> Arab + 3011, 385, // rei -> Orya + 3015, 430, // rhg -> Rohg + 3019, 105, // rji -> Deva + 3023, 105, // rjs -> Deva + 3027, 230, // rka -> Khmr + 3031, 350, // rki -> Mymr + 3035, 45, // rkt -> Beng + 3039, 20, // rmi -> Armn + 3043, 10, // rmt -> Arab + 3047, 350, // rmz -> Mymr + 3051, 100, // rsk -> Cyrl + 3055, 105, // rtw -> Deva + 3059, 100, // ru -> Cyrl + 3062, 100, // rue -> Cyrl + 3066, 100, // rut -> Cyrl + 3070, 105, // rwr -> Deva + 3074, 220, // ryu -> Kana + 3078, 105, // sa -> Deva + 3081, 100, // sah -> Cyrl + 3085, 440, // sam -> Samr + 3089, 375, // sat -> Olck + 3093, 450, // saz -> Saur + 3097, 10, // sbn -> Arab + 3101, 545, // sbu -> Tibt + 3105, 105, // sck -> Deva + 3109, 10, // scl -> Arab + 3113, 105, // scp -> Deva + 3117, 255, // sct -> Laoo + 3121, 495, // scu -> Takr + 3125, 150, // scx -> Grek + 3129, 10, // sd -> Arab + 3132, 105, // sd_IN -> Deva + 3138, 10, // sdb -> Arab + 3142, 10, // sdf -> Arab + 3146, 10, // sdg -> Arab + 3150, 10, // sdh -> Arab + 3154, 45, // sdr -> Beng + 3158, 10, // sds -> Arab + 3162, 100, // sel -> Cyrl + 3166, 415, // sfm -> Plrd + 3170, 100, // sgh -> Cyrl + 3174, 105, // sgj -> Deva + 3178, 10, // sgr -> Arab + 3182, 545, // sgt -> Tibt + 3186, 120, // sgw -> Ethi + 3190, 10, // sgy -> Arab + 3194, 10, // shd -> Arab + 3198, 530, // shi -> Tfng + 3202, 10, // shm -> Arab + 3206, 350, // shn -> Mymr + 3210, 10, // shu -> Arab + 3214, 10, // shv -> Arab + 3218, 465, // si -> Sinh + 3221, 100, // sia -> Cyrl + 3225, 545, // sip -> Tibt + 3229, 10, // siy -> Arab + 3233, 10, // siz -> Arab + 3237, 175, // sjc -> Hans + 3241, 100, // sjd -> Cyrl + 3245, 105, // sjp -> Deva + 3249, 100, // sjt -> Cyrl + 3253, 540, // skb -> Thai + 3257, 105, // skj -> Deva + 3261, 10, // skr -> Arab + 3265, 585, // smh -> Yiii + 3269, 440, // smp -> Samr + 3273, 230, // smu -> Khmr + 3277, 10, // smy -> Arab + 3281, 520, // soa -> Tavt + 3285, 470, // sog -> Sogd + 3289, 105, // soi -> Deva + 3293, 540, // sou -> Thai + 3297, 545, // spt -> Tibt + 3301, 385, // spv -> Orya + 3305, 10, // sqo -> Arab + 3309, 255, // sqq -> Laoo + 3313, 10, // sqt -> Arab + 3317, 100, // sr -> Cyrl + 3320, 475, // srb -> Sora + 3324, 10, // srh -> Arab + 3328, 105, // srx -> Deva + 3332, 10, // srz -> Arab + 3336, 10, // ssh -> Arab + 3340, 255, // sss -> Laoo + 3344, 10, // sts -> Arab + 3348, 250, // stu -> Lana + 3352, 500, // stu_CN -> Tale + 3359, 120, // stv -> Ethi + 3363, 100, // sty -> Cyrl + 3367, 105, // suz -> Deva + 3371, 125, // sva -> Geor + 3375, 10, // swb -> Arab + 3379, 170, // swi -> Hani + 3383, 105, // swv -> Deva + 3387, 435, // sxu -> Runr + 3391, 485, // syc -> Syrc + 3395, 45, // syl -> Beng + 3399, 485, // syn -> Syrc + 3403, 485, // syr -> Syrc + 3407, 105, // syw -> Deva + 3411, 510, // ta -> Taml + 3414, 100, // tab -> Cyrl + 3418, 105, // taj -> Deva + 3422, 490, // tbk -> Tagb + 3426, 105, // tcn -> Deva + 3430, 350, // tco -> Mymr + 3434, 510, // tcx -> Taml + 3438, 240, // tcy -> Knda + 3442, 530, // tda -> Tfng + 3446, 105, // tdb -> Deva + 3450, 500, // tdd -> Tale + 3454, 105, // tdg -> Deva + 3458, 105, // tdh -> Deva + 3462, 525, // te -> Telu + 3465, 205, // tes -> Java + 3469, 100, // tg -> Cyrl + 3472, 10, // tg_PK -> Arab + 3478, 105, // tge -> Deva + 3482, 545, // tgf -> Tibt + 3486, 540, // th -> Thai + 3489, 105, // the -> Deva + 3493, 105, // thf -> Deva + 3497, 500, // thi -> Tale + 3501, 105, // thl -> Deva + 3505, 540, // thm -> Thai + 3509, 105, // thq -> Deva + 3513, 105, // thr -> Deva + 3517, 105, // ths -> Deva + 3521, 120, // ti -> Ethi + 3524, 120, // tig -> Ethi + 3528, 105, // tij -> Deva + 3532, 100, // tin -> Cyrl + 3536, 350, // tjl -> Mymr + 3540, 10, // tjo -> Arab + 3544, 105, // tkb -> Deva + 3548, 10, // tks -> Arab + 3552, 105, // tkt -> Deva + 3556, 485, // tmr -> Syrc + 3560, 60, // tnv -> Cakm + 3564, 10, // tov -> Arab + 3568, 230, // tpu -> Khmr + 3572, 10, // tra -> Arab + 3576, 185, // trg -> Hebr + 3580, 10, // trm -> Arab + 3584, 10, // trw -> Arab + 3588, 150, // tsd -> Grek + 3592, 545, // tsj -> Tibt + 3596, 100, // tt -> Cyrl + 3599, 255, // tth -> Laoo + 3603, 255, // tto -> Laoo + 3607, 540, // tts -> Thai + 3611, 105, // ttz -> Deva + 3615, 350, // tvn -> Mymr + 3619, 545, // twm -> Tibt + 3623, 515, // txg -> Tang + 3627, 555, // txo -> Toto + 3631, 520, // tyr -> Tavt + 3635, 100, // tyv -> Cyrl + 3639, 100, // ude -> Cyrl + 3643, 325, // udg -> Mlym + 3647, 100, // udi -> Cyrl + 3651, 100, // udm -> Cyrl + 3655, 10, // ug -> Arab + 3658, 100, // ug_KZ -> Cyrl + 3664, 100, // ug_MN -> Cyrl + 3670, 560, // uga -> Ugar + 3674, 100, // ugh -> Cyrl + 3678, 540, // ugo -> Thai + 3682, 100, // uk -> Cyrl + 3685, 385, // uki -> Orya + 3689, 100, // ulc -> Cyrl + 3693, 45, // unr -> Beng + 3697, 105, // unr_NP -> Deva + 3704, 45, // unx -> Beng + 3708, 10, // ur -> Arab + 3711, 540, // urk -> Thai + 3715, 10, // ush -> Arab + 3719, 150, // uum -> Grek + 3723, 10, // uz_AF -> Arab + 3729, 100, // uz_CN -> Cyrl + 3735, 10, // uzs -> Arab + 3739, 510, // vaa -> Taml + 3743, 10, // vaf -> Arab + 3747, 105, // vah -> Deva + 3751, 565, // vai -> Vaii + 3755, 105, // vas -> Deva + 3759, 105, // vav -> Deva + 3763, 105, // vay -> Deva + 3767, 10, // vgr -> Arab + 3771, 105, // vjk -> Deva + 3775, 240, // vmd -> Knda + 3779, 10, // vmh -> Arab + 3783, 120, // wal -> Ethi + 3787, 10, // wbk -> Arab + 3791, 525, // wbq -> Telu + 3795, 105, // wbr -> Deva + 3799, 120, // wle -> Ethi + 3803, 10, // wlo -> Arab + 3807, 105, // wme -> Deva + 3811, 10, // wne -> Arab + 3815, 10, // wni -> Arab + 3819, 130, // wsg -> Gong + 3823, 10, // wsv -> Arab + 3827, 105, // wtm -> Deva + 3831, 175, // wuu -> Hans + 3835, 0, // xag -> Aghb + 3839, 100, // xal -> Cyrl + 3843, 120, // xan -> Ethi + 3847, 100, // xas -> Cyrl + 3851, 85, // xco -> Chrs + 3855, 70, // xcr -> Cari + 3859, 100, // xdq -> Cyrl + 3863, 10, // xhe -> Arab + 3867, 230, // xhm -> Khmr + 3871, 385, // xis -> Orya + 3875, 10, // xka -> Arab + 3879, 10, // xkc -> Arab + 3883, 545, // xkf -> Tibt + 3887, 10, // xkj -> Arab + 3891, 10, // xkp -> Arab + 3895, 290, // xlc -> Lyci + 3899, 295, // xld -> Lydi + 3903, 115, // xly -> Elym + 3907, 125, // xmf -> Geor + 3911, 305, // xmn -> Mani + 3915, 320, // xmr -> Merc + 3919, 355, // xna -> Narb + 3923, 105, // xnr -> Deva + 3927, 150, // xpg -> Grek + 3931, 370, // xpi -> Ogam + 3935, 100, // xpm -> Cyrl + 3939, 420, // xpr -> Prti + 3943, 100, // xrm -> Cyrl + 3947, 100, // xrn -> Cyrl + 3951, 445, // xsa -> Sarb + 3955, 460, // xsd -> Sidt + 3959, 105, // xsr -> Deva + 3963, 55, // xtq -> Brah + 3967, 510, // xub -> Taml + 3971, 510, // xuj -> Taml + 3975, 200, // xve -> Ital + 3979, 10, // xvi -> Arab + 3983, 100, // xwo -> Cyrl + 3987, 310, // xzh -> Marc + 3991, 100, // yai -> Cyrl + 3995, 105, // ybh -> Deva + 3999, 105, // ybi -> Deva + 4003, 10, // ydg -> Arab + 4007, 325, // yea -> Mlym + 4011, 150, // yej -> Grek + 4015, 525, // yeu -> Telu + 4019, 415, // ygp -> Plrd + 4023, 185, // yhd -> Hebr + 4027, 185, // yi -> Hebr + 4030, 585, // yig -> Yiii + 4034, 185, // yih -> Hebr + 4038, 585, // yiv -> Yiii + 4042, 100, // ykg -> Cyrl + 4046, 100, // ykh -> Cyrl + 4050, 415, // yna -> Plrd + 4054, 100, // ynk -> Cyrl + 4058, 210, // yoi -> Jpan + 4062, 540, // yoy -> Thai + 4066, 100, // yrk -> Cyrl + 4070, 585, // ysd -> Yiii + 4074, 585, // ysn -> Yiii + 4078, 585, // ysp -> Yiii + 4082, 100, // ysr -> Cyrl + 4086, 415, // ysy -> Plrd + 4090, 185, // yud -> Hebr + 4094, 180, // yue -> Hant + 4098, 175, // yue_CN -> Hans + 4105, 100, // yug -> Cyrl + 4109, 100, // yux -> Cyrl + 4113, 415, // ywq -> Plrd + 4117, 415, // ywu -> Plrd + 4121, 545, // zau -> Tibt + 4125, 10, // zba -> Arab + 4129, 170, // zch -> Hani + 4133, 10, // zdj -> Arab + 4137, 170, // zeh -> Hani + 4141, 530, // zen -> Tfng + 4145, 170, // zgb -> Hani + 4149, 530, // zgh -> Tfng + 4153, 170, // zgm -> Hani + 4157, 170, // zgn -> Hani + 4161, 175, // zh -> Hans + 4164, 180, // zh_AU -> Hant + 4170, 180, // zh_BN -> Hant + 4176, 180, // zh_GB -> Hant + 4182, 180, // zh_GF -> Hant + 4188, 180, // zh_HK -> Hant + 4194, 180, // zh_ID -> Hant + 4200, 180, // zh_MO -> Hant + 4206, 180, // zh_PA -> Hant + 4212, 180, // zh_PF -> Hant + 4218, 180, // zh_PH -> Hant + 4224, 180, // zh_SR -> Hant + 4230, 180, // zh_TH -> Hant + 4236, 180, // zh_TW -> Hant + 4242, 180, // zh_US -> Hant + 4248, 180, // zh_VN -> Hant + 4254, 170, // zhd -> Hani + 4258, 100, // zko -> Cyrl + 4262, 235, // zkt -> Kits + 4266, 100, // zkz -> Cyrl + 4270, 170, // zlj -> Hani + 4274, 170, // zln -> Hani + 4278, 170, // zlq -> Hani + 4282, 170, // zqe -> Hani + 4286, 385, // zrg -> Orya + 4290, 185, // zrp -> Hebr + 4294, 10, // zum -> Arab + 4298, 120, // zwa -> Ethi + 4302, 170, // zyg -> Hani + 4306, 170, // zyn -> Hani + 4310, 170, // zzj -> Hani }; //====================================================================== @@ -1163,38 +1185,39 @@ const char parentLocaleChars[] = "cu_Glag\0dje_Arab\0dyo_Arab\0en_001\0en_150\0en_AG\0en_AI\0en_AT\0" "en_AU\0en_BB\0en_BE\0en_BM\0en_BS\0en_BW\0en_BZ\0en_CC\0en_CH\0" "en_CK\0en_CM\0en_CX\0en_CY\0en_CZ\0en_DE\0en_DG\0en_DK\0en_DM\0" - "en_Dsrt\0en_ER\0en_ES\0en_FI\0en_FJ\0en_FK\0en_FM\0en_FR\0en_GB\0" - "en_GD\0en_GG\0en_GH\0en_GI\0en_GM\0en_GS\0en_GY\0en_HK\0en_HU\0" - "en_ID\0en_IE\0en_IL\0en_IM\0en_IN\0en_IO\0en_IT\0en_JE\0en_JM\0" - "en_KE\0en_KI\0en_KN\0en_KY\0en_LC\0en_LR\0en_LS\0en_MG\0en_MO\0" - "en_MS\0en_MT\0en_MU\0en_MV\0en_MW\0en_MY\0en_NA\0en_NF\0en_NG\0" - "en_NL\0en_NO\0en_NR\0en_NU\0en_NZ\0en_PG\0en_PK\0en_PL\0en_PN\0" - "en_PT\0en_PW\0en_RO\0en_RW\0en_SB\0en_SC\0en_SD\0en_SE\0en_SG\0" - "en_SH\0en_SI\0en_SK\0en_SL\0en_SS\0en_SX\0en_SZ\0en_Shaw\0en_TC\0" - "en_TK\0en_TO\0en_TT\0en_TV\0en_TZ\0en_UG\0en_VC\0en_VG\0en_VU\0" - "en_WS\0en_ZA\0en_ZM\0en_ZW\0es_419\0es_AR\0es_BO\0es_BR\0es_BZ\0" - "es_CL\0es_CO\0es_CR\0es_CU\0es_DO\0es_EC\0es_GT\0es_HN\0es_JP\0" - "es_MX\0es_NI\0es_PA\0es_PE\0es_PR\0es_PY\0es_SV\0es_US\0es_UY\0" - "es_VE\0ff_Adlm\0ff_Arab\0fr_HT\0ha_Arab\0hi_Latn\0ht\0iu_Latn\0" - "kaa_Latn\0kk_Arab\0kok_Latn\0ks_Deva\0ku_Arab\0kxv_Deva\0kxv_Orya\0" - "kxv_Telu\0ky_Arab\0ky_Latn\0ml_Arab\0mn_Mong\0mni_Mtei\0ms_Arab\0" - "nb\0nn\0no\0no_NO\0pa_Arab\0pt_AO\0pt_CH\0pt_CV\0pt_FR\0pt_GQ\0" - "pt_GW\0pt_LU\0pt_MO\0pt_MZ\0pt_PT\0pt_ST\0pt_TL\0root\0sat_Deva\0" - "sd_Deva\0sd_Khoj\0sd_Sind\0shi_Latn\0so_Arab\0sr_Latn\0sw_Arab\0" - "tg_Arab\0ug_Cyrl\0uz_Arab\0uz_Cyrl\0vai_Latn\0wo_Arab\0yo_Arab\0" - "yue_Hans\0zh_Hant\0zh_Hant_HK\0zh_Hant_MO\0"; + "en_Dsrt\0en_EE\0en_ER\0en_ES\0en_FI\0en_FJ\0en_FK\0en_FM\0en_FR\0" + "en_GB\0en_GD\0en_GE\0en_GG\0en_GH\0en_GI\0en_GM\0en_GS\0en_GY\0" + "en_HK\0en_HU\0en_ID\0en_IE\0en_IL\0en_IM\0en_IN\0en_IO\0en_IT\0" + "en_JE\0en_JM\0en_KE\0en_KI\0en_KN\0en_KY\0en_LC\0en_LR\0en_LS\0" + "en_LT\0en_LV\0en_MG\0en_MO\0en_MS\0en_MT\0en_MU\0en_MV\0en_MW\0" + "en_MY\0en_NA\0en_NF\0en_NG\0en_NL\0en_NO\0en_NR\0en_NU\0en_NZ\0" + "en_PG\0en_PK\0en_PL\0en_PN\0en_PT\0en_PW\0en_RO\0en_RW\0en_SB\0" + "en_SC\0en_SD\0en_SE\0en_SG\0en_SH\0en_SI\0en_SK\0en_SL\0en_SS\0" + "en_SX\0en_SZ\0en_Shaw\0en_TC\0en_TK\0en_TO\0en_TT\0en_TV\0en_TZ\0" + "en_UA\0en_UG\0en_VC\0en_VG\0en_VU\0en_WS\0en_ZA\0en_ZM\0en_ZW\0" + "es_419\0es_AR\0es_BO\0es_BR\0es_BZ\0es_CL\0es_CO\0es_CR\0es_CU\0" + "es_DO\0es_EC\0es_GT\0es_HN\0es_JP\0es_MX\0es_NI\0es_PA\0es_PE\0" + "es_PR\0es_PY\0es_SV\0es_US\0es_UY\0es_VE\0ff_Adlm\0ff_Arab\0fr_HT\0" + "ha_Arab\0hi_Latn\0ht\0iu_Latn\0kaa_Latn\0kk_Arab\0kok_Latn\0ks_Deva\0" + "ku_Arab\0kxv_Deva\0kxv_Orya\0kxv_Telu\0ky_Arab\0ky_Latn\0ml_Arab\0" + "mn_Mong\0mni_Mtei\0ms_Arab\0nb\0nn\0no\0no_NO\0pa_Arab\0pt_AO\0" + "pt_CH\0pt_CV\0pt_FR\0pt_GQ\0pt_GW\0pt_LU\0pt_MO\0pt_MZ\0pt_PT\0" + "pt_ST\0pt_TL\0root\0sat_Deva\0sd_Deva\0sd_Khoj\0sd_Sind\0shi_Latn\0" + "so_Arab\0sr_Latn\0suz_Sunu\0sw_Arab\0tg_Arab\0ug_Cyrl\0uz_Arab\0" + "uz_Cyrl\0vai_Latn\0wo_Arab\0yo_Arab\0yue_Hans\0zh_Hant\0zh_Hant_HK\0" + "zh_Hant_MO\0"; const int32_t parentLocaleTable[] = { - 0, 1146, // az_Arab -> root - 8, 1146, // az_Cyrl -> root - 16, 1146, // bal_Latn -> root - 25, 1146, // blt_Latn -> root - 34, 1146, // bm_Nkoo -> root - 42, 1146, // bs_Cyrl -> root - 50, 1146, // byn_Latn -> root - 59, 1146, // cu_Glag -> root - 67, 1146, // dje_Arab -> root - 76, 1146, // dyo_Arab -> root + 0, 1176, // az_Arab -> root + 8, 1176, // az_Cyrl -> root + 16, 1176, // bal_Latn -> root + 25, 1176, // blt_Latn -> root + 34, 1176, // bm_Nkoo -> root + 42, 1176, // bs_Cyrl -> root + 50, 1176, // byn_Latn -> root + 59, 1176, // cu_Glag -> root + 67, 1176, // dje_Arab -> root + 76, 1176, // dyo_Arab -> root 92, 85, // en_150 -> en_001 99, 85, // en_AG -> en_001 105, 85, // en_AI -> en_001 @@ -1217,167 +1240,173 @@ const int32_t parentLocaleTable[] = { 207, 85, // en_DG -> en_001 213, 92, // en_DK -> en_150 219, 85, // en_DM -> en_001 - 225, 1146, // en_Dsrt -> root - 233, 85, // en_ER -> en_001 - 239, 92, // en_ES -> en_150 - 245, 92, // en_FI -> en_150 - 251, 85, // en_FJ -> en_001 - 257, 85, // en_FK -> en_001 - 263, 85, // en_FM -> en_001 - 269, 92, // en_FR -> en_150 - 275, 85, // en_GB -> en_001 - 281, 85, // en_GD -> en_001 - 287, 85, // en_GG -> en_001 - 293, 85, // en_GH -> en_001 - 299, 85, // en_GI -> en_001 - 305, 85, // en_GM -> en_001 - 311, 85, // en_GS -> en_001 - 317, 85, // en_GY -> en_001 - 323, 85, // en_HK -> en_001 - 329, 92, // en_HU -> en_150 - 335, 85, // en_ID -> en_001 - 341, 85, // en_IE -> en_001 - 347, 85, // en_IL -> en_001 - 353, 85, // en_IM -> en_001 - 359, 85, // en_IN -> en_001 - 365, 85, // en_IO -> en_001 - 371, 92, // en_IT -> en_150 - 377, 85, // en_JE -> en_001 - 383, 85, // en_JM -> en_001 - 389, 85, // en_KE -> en_001 - 395, 85, // en_KI -> en_001 - 401, 85, // en_KN -> en_001 - 407, 85, // en_KY -> en_001 - 413, 85, // en_LC -> en_001 - 419, 85, // en_LR -> en_001 - 425, 85, // en_LS -> en_001 - 431, 85, // en_MG -> en_001 - 437, 85, // en_MO -> en_001 - 443, 85, // en_MS -> en_001 - 449, 85, // en_MT -> en_001 - 455, 85, // en_MU -> en_001 - 461, 85, // en_MV -> en_001 - 467, 85, // en_MW -> en_001 - 473, 85, // en_MY -> en_001 - 479, 85, // en_NA -> en_001 - 485, 85, // en_NF -> en_001 - 491, 85, // en_NG -> en_001 - 497, 92, // en_NL -> en_150 - 503, 92, // en_NO -> en_150 - 509, 85, // en_NR -> en_001 - 515, 85, // en_NU -> en_001 - 521, 85, // en_NZ -> en_001 - 527, 85, // en_PG -> en_001 - 533, 85, // en_PK -> en_001 - 539, 92, // en_PL -> en_150 - 545, 85, // en_PN -> en_001 - 551, 92, // en_PT -> en_150 - 557, 85, // en_PW -> en_001 - 563, 92, // en_RO -> en_150 - 569, 85, // en_RW -> en_001 - 575, 85, // en_SB -> en_001 - 581, 85, // en_SC -> en_001 - 587, 85, // en_SD -> en_001 - 593, 92, // en_SE -> en_150 - 599, 85, // en_SG -> en_001 - 605, 85, // en_SH -> en_001 - 611, 92, // en_SI -> en_150 - 617, 92, // en_SK -> en_150 - 623, 85, // en_SL -> en_001 - 629, 85, // en_SS -> en_001 - 635, 85, // en_SX -> en_001 - 641, 85, // en_SZ -> en_001 - 647, 1146, // en_Shaw -> root - 655, 85, // en_TC -> en_001 - 661, 85, // en_TK -> en_001 - 667, 85, // en_TO -> en_001 - 673, 85, // en_TT -> en_001 - 679, 85, // en_TV -> en_001 - 685, 85, // en_TZ -> en_001 - 691, 85, // en_UG -> en_001 - 697, 85, // en_VC -> en_001 - 703, 85, // en_VG -> en_001 - 709, 85, // en_VU -> en_001 - 715, 85, // en_WS -> en_001 - 721, 85, // en_ZA -> en_001 - 727, 85, // en_ZM -> en_001 - 733, 85, // en_ZW -> en_001 - 746, 739, // es_AR -> es_419 - 752, 739, // es_BO -> es_419 - 758, 739, // es_BR -> es_419 - 764, 739, // es_BZ -> es_419 - 770, 739, // es_CL -> es_419 - 776, 739, // es_CO -> es_419 - 782, 739, // es_CR -> es_419 - 788, 739, // es_CU -> es_419 - 794, 739, // es_DO -> es_419 - 800, 739, // es_EC -> es_419 - 806, 739, // es_GT -> es_419 - 812, 739, // es_HN -> es_419 - 818, 739, // es_JP -> es_419 - 824, 739, // es_MX -> es_419 - 830, 739, // es_NI -> es_419 - 836, 739, // es_PA -> es_419 - 842, 739, // es_PE -> es_419 - 848, 739, // es_PR -> es_419 - 854, 739, // es_PY -> es_419 - 860, 739, // es_SV -> es_419 - 866, 739, // es_US -> es_419 - 872, 739, // es_UY -> es_419 - 878, 739, // es_VE -> es_419 - 884, 1146, // ff_Adlm -> root - 892, 1146, // ff_Arab -> root - 906, 1146, // ha_Arab -> root - 914, 359, // hi_Latn -> en_IN - 922, 900, // ht -> fr_HT - 925, 1146, // iu_Latn -> root - 933, 1146, // kaa_Latn -> root - 942, 1146, // kk_Arab -> root - 950, 1146, // kok_Latn -> root - 959, 1146, // ks_Deva -> root - 967, 1146, // ku_Arab -> root - 975, 1146, // kxv_Deva -> root - 984, 1146, // kxv_Orya -> root - 993, 1146, // kxv_Telu -> root - 1002, 1146, // ky_Arab -> root - 1010, 1146, // ky_Latn -> root - 1018, 1146, // ml_Arab -> root - 1026, 1146, // mn_Mong -> root - 1034, 1146, // mni_Mtei -> root - 1043, 1146, // ms_Arab -> root - 1051, 1057, // nb -> no - 1054, 1057, // nn -> no - 1060, 1057, // no_NO -> no - 1066, 1146, // pa_Arab -> root - 1074, 1128, // pt_AO -> pt_PT - 1080, 1128, // pt_CH -> pt_PT - 1086, 1128, // pt_CV -> pt_PT - 1092, 1128, // pt_FR -> pt_PT - 1098, 1128, // pt_GQ -> pt_PT - 1104, 1128, // pt_GW -> pt_PT - 1110, 1128, // pt_LU -> pt_PT - 1116, 1128, // pt_MO -> pt_PT - 1122, 1128, // pt_MZ -> pt_PT - 1134, 1128, // pt_ST -> pt_PT - 1140, 1128, // pt_TL -> pt_PT - 1151, 1146, // sat_Deva -> root - 1160, 1146, // sd_Deva -> root - 1168, 1146, // sd_Khoj -> root - 1176, 1146, // sd_Sind -> root - 1184, 1146, // shi_Latn -> root - 1193, 1146, // so_Arab -> root - 1201, 1146, // sr_Latn -> root - 1209, 1146, // sw_Arab -> root - 1217, 1146, // tg_Arab -> root - 1225, 1146, // ug_Cyrl -> root - 1233, 1146, // uz_Arab -> root - 1241, 1146, // uz_Cyrl -> root - 1249, 1146, // vai_Latn -> root - 1258, 1146, // wo_Arab -> root - 1266, 1146, // yo_Arab -> root - 1274, 1146, // yue_Hans -> root - 1283, 1146, // zh_Hant -> root - 1302, 1291, // zh_Hant_MO -> zh_Hant_HK + 225, 1176, // en_Dsrt -> root + 233, 92, // en_EE -> en_150 + 239, 85, // en_ER -> en_001 + 245, 92, // en_ES -> en_150 + 251, 92, // en_FI -> en_150 + 257, 85, // en_FJ -> en_001 + 263, 85, // en_FK -> en_001 + 269, 85, // en_FM -> en_001 + 275, 92, // en_FR -> en_150 + 281, 85, // en_GB -> en_001 + 287, 85, // en_GD -> en_001 + 293, 92, // en_GE -> en_150 + 299, 85, // en_GG -> en_001 + 305, 85, // en_GH -> en_001 + 311, 85, // en_GI -> en_001 + 317, 85, // en_GM -> en_001 + 323, 85, // en_GS -> en_001 + 329, 85, // en_GY -> en_001 + 335, 85, // en_HK -> en_001 + 341, 92, // en_HU -> en_150 + 347, 85, // en_ID -> en_001 + 353, 85, // en_IE -> en_001 + 359, 85, // en_IL -> en_001 + 365, 85, // en_IM -> en_001 + 371, 85, // en_IN -> en_001 + 377, 85, // en_IO -> en_001 + 383, 92, // en_IT -> en_150 + 389, 85, // en_JE -> en_001 + 395, 85, // en_JM -> en_001 + 401, 85, // en_KE -> en_001 + 407, 85, // en_KI -> en_001 + 413, 85, // en_KN -> en_001 + 419, 85, // en_KY -> en_001 + 425, 85, // en_LC -> en_001 + 431, 85, // en_LR -> en_001 + 437, 85, // en_LS -> en_001 + 443, 92, // en_LT -> en_150 + 449, 92, // en_LV -> en_150 + 455, 85, // en_MG -> en_001 + 461, 85, // en_MO -> en_001 + 467, 85, // en_MS -> en_001 + 473, 85, // en_MT -> en_001 + 479, 85, // en_MU -> en_001 + 485, 85, // en_MV -> en_001 + 491, 85, // en_MW -> en_001 + 497, 85, // en_MY -> en_001 + 503, 85, // en_NA -> en_001 + 509, 85, // en_NF -> en_001 + 515, 85, // en_NG -> en_001 + 521, 92, // en_NL -> en_150 + 527, 92, // en_NO -> en_150 + 533, 85, // en_NR -> en_001 + 539, 85, // en_NU -> en_001 + 545, 85, // en_NZ -> en_001 + 551, 85, // en_PG -> en_001 + 557, 85, // en_PK -> en_001 + 563, 92, // en_PL -> en_150 + 569, 85, // en_PN -> en_001 + 575, 92, // en_PT -> en_150 + 581, 85, // en_PW -> en_001 + 587, 92, // en_RO -> en_150 + 593, 85, // en_RW -> en_001 + 599, 85, // en_SB -> en_001 + 605, 85, // en_SC -> en_001 + 611, 85, // en_SD -> en_001 + 617, 92, // en_SE -> en_150 + 623, 85, // en_SG -> en_001 + 629, 85, // en_SH -> en_001 + 635, 92, // en_SI -> en_150 + 641, 92, // en_SK -> en_150 + 647, 85, // en_SL -> en_001 + 653, 85, // en_SS -> en_001 + 659, 85, // en_SX -> en_001 + 665, 85, // en_SZ -> en_001 + 671, 1176, // en_Shaw -> root + 679, 85, // en_TC -> en_001 + 685, 85, // en_TK -> en_001 + 691, 85, // en_TO -> en_001 + 697, 85, // en_TT -> en_001 + 703, 85, // en_TV -> en_001 + 709, 85, // en_TZ -> en_001 + 715, 92, // en_UA -> en_150 + 721, 85, // en_UG -> en_001 + 727, 85, // en_VC -> en_001 + 733, 85, // en_VG -> en_001 + 739, 85, // en_VU -> en_001 + 745, 85, // en_WS -> en_001 + 751, 85, // en_ZA -> en_001 + 757, 85, // en_ZM -> en_001 + 763, 85, // en_ZW -> en_001 + 776, 769, // es_AR -> es_419 + 782, 769, // es_BO -> es_419 + 788, 769, // es_BR -> es_419 + 794, 769, // es_BZ -> es_419 + 800, 769, // es_CL -> es_419 + 806, 769, // es_CO -> es_419 + 812, 769, // es_CR -> es_419 + 818, 769, // es_CU -> es_419 + 824, 769, // es_DO -> es_419 + 830, 769, // es_EC -> es_419 + 836, 769, // es_GT -> es_419 + 842, 769, // es_HN -> es_419 + 848, 769, // es_JP -> es_419 + 854, 769, // es_MX -> es_419 + 860, 769, // es_NI -> es_419 + 866, 769, // es_PA -> es_419 + 872, 769, // es_PE -> es_419 + 878, 769, // es_PR -> es_419 + 884, 769, // es_PY -> es_419 + 890, 769, // es_SV -> es_419 + 896, 769, // es_US -> es_419 + 902, 769, // es_UY -> es_419 + 908, 769, // es_VE -> es_419 + 914, 1176, // ff_Adlm -> root + 922, 1176, // ff_Arab -> root + 936, 1176, // ha_Arab -> root + 944, 371, // hi_Latn -> en_IN + 952, 930, // ht -> fr_HT + 955, 1176, // iu_Latn -> root + 963, 1176, // kaa_Latn -> root + 972, 1176, // kk_Arab -> root + 980, 1176, // kok_Latn -> root + 989, 1176, // ks_Deva -> root + 997, 1176, // ku_Arab -> root + 1005, 1176, // kxv_Deva -> root + 1014, 1176, // kxv_Orya -> root + 1023, 1176, // kxv_Telu -> root + 1032, 1176, // ky_Arab -> root + 1040, 1176, // ky_Latn -> root + 1048, 1176, // ml_Arab -> root + 1056, 1176, // mn_Mong -> root + 1064, 1176, // mni_Mtei -> root + 1073, 1176, // ms_Arab -> root + 1081, 1087, // nb -> no + 1084, 1087, // nn -> no + 1090, 1087, // no_NO -> no + 1096, 1176, // pa_Arab -> root + 1104, 1158, // pt_AO -> pt_PT + 1110, 1158, // pt_CH -> pt_PT + 1116, 1158, // pt_CV -> pt_PT + 1122, 1158, // pt_FR -> pt_PT + 1128, 1158, // pt_GQ -> pt_PT + 1134, 1158, // pt_GW -> pt_PT + 1140, 1158, // pt_LU -> pt_PT + 1146, 1158, // pt_MO -> pt_PT + 1152, 1158, // pt_MZ -> pt_PT + 1164, 1158, // pt_ST -> pt_PT + 1170, 1158, // pt_TL -> pt_PT + 1181, 1176, // sat_Deva -> root + 1190, 1176, // sd_Deva -> root + 1198, 1176, // sd_Khoj -> root + 1206, 1176, // sd_Sind -> root + 1214, 1176, // shi_Latn -> root + 1223, 1176, // so_Arab -> root + 1231, 1176, // sr_Latn -> root + 1239, 1176, // suz_Sunu -> root + 1248, 1176, // sw_Arab -> root + 1256, 1176, // tg_Arab -> root + 1264, 1176, // ug_Cyrl -> root + 1272, 1176, // uz_Arab -> root + 1280, 1176, // uz_Cyrl -> root + 1288, 1176, // vai_Latn -> root + 1297, 1176, // wo_Arab -> root + 1305, 1176, // yo_Arab -> root + 1313, 1176, // yue_Hans -> root + 1322, 1176, // zh_Hant -> root + 1341, 1330, // zh_Hant_MO -> zh_Hant_HK }; diff --git a/src/duckdb/extension/icu/third_party/icu/common/locbased.cpp b/src/duckdb/extension/icu/third_party/icu/common/locbased.cpp index 6f35e7221..b661cc462 100644 --- a/src/duckdb/extension/icu/third_party/icu/common/locbased.cpp +++ b/src/duckdb/extension/icu/third_party/icu/common/locbased.cpp @@ -11,85 +11,36 @@ ********************************************************************** */ #include "locbased.h" -#include "cstring.h" -#include "charstr.h" +#include "uresimp.h" U_NAMESPACE_BEGIN -Locale LocaleBased::getLocale(const CharString* valid, const CharString* actual, - ULocDataLocaleType type, UErrorCode& status) { - const char* id = getLocaleID(valid, actual, type, status); - return Locale(id != nullptr ? id : ""); -} - -const char* LocaleBased::getLocaleID(const CharString* valid, const CharString* actual, +const Locale& LocaleBased::getLocale(const Locale& valid, const Locale& actual, ULocDataLocaleType type, UErrorCode& status) { if (U_FAILURE(status)) { - return nullptr; + return Locale::getRoot(); } switch(type) { case ULOC_VALID_LOCALE: - return valid == nullptr ? "" : valid->data(); + return valid; case ULOC_ACTUAL_LOCALE: - return actual == nullptr ? "" : actual->data(); + return actual; default: status = U_ILLEGAL_ARGUMENT_ERROR; - return nullptr; + return Locale::getRoot(); } } -void LocaleBased::setLocaleIDs(const CharString* validID, const CharString* actualID, UErrorCode& status) { - setValidLocaleID(validID, status); - setActualLocaleID(actualID,status); -} -void LocaleBased::setLocaleIDs(const char* validID, const char* actualID, UErrorCode& status) { - setValidLocaleID(validID, status); - setActualLocaleID(actualID,status); -} - -void LocaleBased::setLocaleID(const char* id, CharString*& dest, UErrorCode& status) { - if (U_FAILURE(status)) { return; } - if (id == nullptr || *id == 0) { - delete dest; - dest = nullptr; - } else { - if (dest == nullptr) { - dest = new CharString(id, status); - if (dest == nullptr) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - } else { - dest->copyFrom(id, status); - } - } -} +const char* LocaleBased::getLocaleID(const Locale& valid, const Locale& actual, + ULocDataLocaleType type, UErrorCode& status) { + const Locale& locale = getLocale(valid, actual, type, status); -void LocaleBased::setLocaleID(const CharString* id, CharString*& dest, UErrorCode& status) { - if (U_FAILURE(status)) { return; } - if (id == nullptr || id->isEmpty()) { - delete dest; - dest = nullptr; - } else { - if (dest == nullptr) { - dest = new CharString(*id, status); - if (dest == nullptr) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - } else { - dest->copyFrom(*id, status); - } + if (U_FAILURE(status)) { + return nullptr; } -} -bool LocaleBased::equalIDs(const CharString* left, const CharString* right) { - // true if both are nullptr - if (left == nullptr && right == nullptr) return true; - // false if only one is nullptr - if (left == nullptr || right == nullptr) return false; - return *left == *right; + return locale == Locale::getRoot() ? kRootLocaleName : locale.getName(); } U_NAMESPACE_END diff --git a/src/duckdb/extension/icu/third_party/icu/common/locbased.h b/src/duckdb/extension/icu/third_party/icu/common/locbased.h index 9441eb823..dbff148bc 100644 --- a/src/duckdb/extension/icu/third_party/icu/common/locbased.h +++ b/src/duckdb/extension/icu/third_party/icu/common/locbased.h @@ -16,17 +16,8 @@ #include "unicode/locid.h" #include "unicode/uobject.h" -/** - * Macro to declare a locale LocaleBased wrapper object for the given - * object, which must have two members named `validLocale' and - * `actualLocale' of which are pointers to the internal icu::CharString. - */ -#define U_LOCALE_BASED(varname, objname) \ - LocaleBased varname((objname).validLocale, (objname).actualLocale) - U_NAMESPACE_BEGIN -class CharString; /** * A utility class that unifies the implementation of getLocale() by * various ICU services. This class is likely to be removed in the @@ -38,12 +29,6 @@ class U_COMMON_API LocaleBased : public UMemory { public: - /** - * Construct a LocaleBased wrapper around the two pointers. These - * will be aliased for the lifetime of this object. - */ - inline LocaleBased(CharString*& validAlias, CharString*& actualAlias); - /** * Return locale meta-data for the service object wrapped by this * object. Either the valid or the actual locale may be @@ -54,8 +39,8 @@ class U_COMMON_API LocaleBased : public UMemory { * @param status input-output error code * @return the indicated locale */ - static Locale getLocale( - const CharString* valid, const CharString* actual, + static const Locale& getLocale( + const Locale& valid, const Locale& actual, ULocDataLocaleType type, UErrorCode& status); /** @@ -69,51 +54,11 @@ class U_COMMON_API LocaleBased : public UMemory { * @return the indicated locale ID */ static const char* getLocaleID( - const CharString* valid, const CharString* actual, + const Locale& valid, const Locale& actual, ULocDataLocaleType type, UErrorCode& status); - /** - * Set the locale meta-data for the service object wrapped by this - * object. If either parameter is zero, it is ignored. - * @param valid the ID of the valid locale - * @param actual the ID of the actual locale - */ - void setLocaleIDs(const char* valid, const char* actual, UErrorCode& status); - void setLocaleIDs(const CharString* valid, const CharString* actual, UErrorCode& status); - - static void setLocaleID(const char* id, CharString*& dest, UErrorCode& status); - static void setLocaleID(const CharString* id, CharString*& dest, UErrorCode& status); - - static bool equalIDs(const CharString* left, const CharString* right); - - private: - - void setValidLocaleID(const CharString* id, UErrorCode& status); - void setActualLocaleID(const CharString* id, UErrorCode& status); - void setValidLocaleID(const char* id, UErrorCode& status); - void setActualLocaleID(const char* id, UErrorCode& status); - - CharString*& valid; - CharString*& actual; }; -inline LocaleBased::LocaleBased(CharString*& validAlias, CharString*& actualAlias) : - valid(validAlias), actual(actualAlias) { -} - -inline void LocaleBased::setValidLocaleID(const CharString* id, UErrorCode& status) { - setLocaleID(id, valid, status); -} -inline void LocaleBased::setActualLocaleID(const CharString* id, UErrorCode& status) { - setLocaleID(id, actual, status); -} -inline void LocaleBased::setValidLocaleID(const char* id, UErrorCode& status) { - setLocaleID(id, valid, status); -} -inline void LocaleBased::setActualLocaleID(const char* id, UErrorCode& status) { - setLocaleID(id, actual, status); -} - U_NAMESPACE_END #endif diff --git a/src/duckdb/extension/icu/third_party/icu/common/locdispnames.cpp b/src/duckdb/extension/icu/third_party/icu/common/locdispnames.cpp index d3521e879..3051798b7 100644 --- a/src/duckdb/extension/icu/third_party/icu/common/locdispnames.cpp +++ b/src/duckdb/extension/icu/third_party/icu/common/locdispnames.cpp @@ -66,7 +66,7 @@ Locale::getDisplayLanguage(const Locale &displayLocale, return result; } - length=uloc_getDisplayLanguage(fullName, displayLocale.fullName, + length=uloc_getDisplayLanguage(getName(), displayLocale.getName(), buffer, result.getCapacity(), &errorCode); result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0); @@ -78,7 +78,7 @@ Locale::getDisplayLanguage(const Locale &displayLocale, return result; } errorCode=U_ZERO_ERROR; - length=uloc_getDisplayLanguage(fullName, displayLocale.fullName, + length=uloc_getDisplayLanguage(getName(), displayLocale.getName(), buffer, result.getCapacity(), &errorCode); result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0); @@ -106,7 +106,7 @@ Locale::getDisplayScript(const Locale &displayLocale, return result; } - length=uloc_getDisplayScript(fullName, displayLocale.fullName, + length=uloc_getDisplayScript(getName(), displayLocale.getName(), buffer, result.getCapacity(), &errorCode); result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0); @@ -118,7 +118,7 @@ Locale::getDisplayScript(const Locale &displayLocale, return result; } errorCode=U_ZERO_ERROR; - length=uloc_getDisplayScript(fullName, displayLocale.fullName, + length=uloc_getDisplayScript(getName(), displayLocale.getName(), buffer, result.getCapacity(), &errorCode); result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0); @@ -146,7 +146,7 @@ Locale::getDisplayCountry(const Locale &displayLocale, return result; } - length=uloc_getDisplayCountry(fullName, displayLocale.fullName, + length=uloc_getDisplayCountry(getName(), displayLocale.getName(), buffer, result.getCapacity(), &errorCode); result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0); @@ -158,7 +158,7 @@ Locale::getDisplayCountry(const Locale &displayLocale, return result; } errorCode=U_ZERO_ERROR; - length=uloc_getDisplayCountry(fullName, displayLocale.fullName, + length=uloc_getDisplayCountry(getName(), displayLocale.getName(), buffer, result.getCapacity(), &errorCode); result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0); @@ -186,7 +186,7 @@ Locale::getDisplayVariant(const Locale &displayLocale, return result; } - length=uloc_getDisplayVariant(fullName, displayLocale.fullName, + length=uloc_getDisplayVariant(getName(), displayLocale.getName(), buffer, result.getCapacity(), &errorCode); result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0); @@ -198,7 +198,7 @@ Locale::getDisplayVariant(const Locale &displayLocale, return result; } errorCode=U_ZERO_ERROR; - length=uloc_getDisplayVariant(fullName, displayLocale.fullName, + length=uloc_getDisplayVariant(getName(), displayLocale.getName(), buffer, result.getCapacity(), &errorCode); result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0); @@ -226,7 +226,7 @@ Locale::getDisplayName(const Locale &displayLocale, return result; } - length=uloc_getDisplayName(fullName, displayLocale.fullName, + length=uloc_getDisplayName(getName(), displayLocale.getName(), buffer, result.getCapacity(), &errorCode); result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0); @@ -238,7 +238,7 @@ Locale::getDisplayName(const Locale &displayLocale, return result; } errorCode=U_ZERO_ERROR; - length=uloc_getDisplayName(fullName, displayLocale.fullName, + length=uloc_getDisplayName(getName(), displayLocale.getName(), buffer, result.getCapacity(), &errorCode); result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0); diff --git a/src/duckdb/extension/icu/third_party/icu/common/locid.cpp b/src/duckdb/extension/icu/third_party/icu/common/locid.cpp index e7e86079a..34f1266f6 100644 --- a/src/duckdb/extension/icu/third_party/icu/common/locid.cpp +++ b/src/duckdb/extension/icu/third_party/icu/common/locid.cpp @@ -31,13 +31,16 @@ ****************************************************************************** */ +#include #include #include +#include #include #include "unicode/bytestream.h" #include "unicode/locid.h" #include "unicode/localebuilder.h" +#include "unicode/localpointer.h" #include "unicode/strenum.h" #include "unicode/stringpiece.h" #include "unicode/uloc.h" @@ -48,6 +51,7 @@ #include "charstrmap.h" #include "cmemory.h" #include "cstring.h" +#include "fixedstring.h" #include "mutex.h" #include "putilimp.h" #include "uassert.h" @@ -232,9 +236,214 @@ locale_get_default() return Locale::getDefault().getName(); } +namespace { + +template +void copyToArray(std::string_view sv, T* that) { + auto& field = that->*FIELD; + constexpr size_t capacity = std::extent_v>; + static_assert(capacity > 0); + if (!sv.empty()) { + U_ASSERT(sv.size() < capacity); + uprv_memcpy(field, sv.data(), sv.size()); + } + field[sv.size()] = '\0'; +} + +} // namespace U_NAMESPACE_BEGIN +void Locale::Nest::init(std::string_view language, + std::string_view script, + std::string_view region, + uint8_t variantBegin) { + copyToArray<&Nest::language>(language, this); + copyToArray<&Nest::script>(script, this); + copyToArray<&Nest::region>(region, this); + this->variantBegin = variantBegin; +} + +Locale::Nest::Nest(Heap&& heap, uint8_t variantBegin) { + // When moving from Heap to Nest the language field can be left untouched + // (as it has the same offset in both) and only the script and region fields + // need to be copied to their new locations, which is safe to do because the + // new locations come before the old locations in memory and don't overlap. + static_assert(offsetof(Nest, region) <= offsetof(Heap, script)); + static_assert(offsetof(Nest, variantBegin) <= offsetof(Heap, region)); + U_ASSERT(this == reinterpret_cast(&heap)); + copyToArray<&Nest::script>(heap.script, this); + copyToArray<&Nest::region>(heap.region, this); + this->variantBegin = variantBegin; + *this->baseName = '\0'; +} + +struct Locale::Heap::Alloc : public UMemory { + FixedString fullName; + FixedString baseName; + int32_t variantBegin; + + const char* getVariant() const { return variantBegin == 0 ? "" : getBaseName() + variantBegin; } + const char* getFullName() const { return fullName.data(); } + const char* getBaseName() const { + if (baseName.isEmpty()) { + if (const char* name = fullName.data(); *name != '@') { + return name; + } + } + return baseName.data(); + } + + Alloc(int32_t variantBegin) : fullName(), baseName(), variantBegin(variantBegin) {} + + Alloc(const Alloc& other, UErrorCode& status) + : fullName(), baseName(), variantBegin(other.variantBegin) { + if (U_SUCCESS(status)) { + if (!other.fullName.isEmpty()) { + fullName = other.fullName; + if (fullName.isEmpty()) { + status = U_MEMORY_ALLOCATION_ERROR; + } else { + if (!other.baseName.isEmpty()) { + baseName = other.baseName; + if (baseName.isEmpty()) { + status = U_MEMORY_ALLOCATION_ERROR; + } + } + } + } + } + } + + // Move should be done on the owner of the pointer to this object. + Alloc(Alloc&&) noexcept = delete; + + ~Alloc() = default; +}; + +const char* Locale::Heap::getVariant() const { return ptr->getVariant(); } +const char* Locale::Heap::getFullName() const { return ptr->getFullName(); } +const char* Locale::Heap::getBaseName() const { return ptr->getBaseName(); } + +Locale::Heap::Heap(std::string_view language, + std::string_view script, + std::string_view region, + int32_t variantBegin) { + ptr = new Alloc(variantBegin); + if (ptr == nullptr) { + type = eBOGUS; + } else { + type = eHEAP; + copyToArray<&Heap::language>(language, this); + copyToArray<&Heap::script>(script, this); + copyToArray<&Heap::region>(region, this); + } +} + +Locale::Heap::~Heap() { + U_ASSERT(type == eHEAP); + delete ptr; +} + +Locale::Heap& Locale::Heap::operator=(const Heap& other) { + U_ASSERT(type == eBOGUS); + UErrorCode status = U_ZERO_ERROR; + ptr = new Alloc(*other.ptr, status); + if (ptr == nullptr || U_FAILURE(status)) { + delete ptr; + } else { + type = eHEAP; + uprv_memcpy(language, other.language, sizeof language); + uprv_memcpy(script, other.script, sizeof script); + uprv_memcpy(region, other.region, sizeof region); + } + return *this; +} + +Locale::Heap& Locale::Heap::operator=(Heap&& other) noexcept { + U_ASSERT(type == eBOGUS); + ptr = other.ptr; + type = eHEAP; + other.type = eBOGUS; + uprv_memcpy(language, other.language, sizeof language); + uprv_memcpy(script, other.script, sizeof script); + uprv_memcpy(region, other.region, sizeof region); + return *this; +} + +template +auto Locale::Payload::visit(BogusFn bogusFn, NestFn nestFn, HeapFn heapFn, Args... args) const { + switch (type) { + case eBOGUS: + return bogusFn(args...); + case eNEST: + return nestFn(nest, args...); + case eHEAP: + return heapFn(heap, args...); + default: + UPRV_UNREACHABLE_EXIT; + }; +} + +void Locale::Payload::copy(const Payload& other) { + other.visit([](Payload*) {}, + [](const Nest& nest, Payload* dst) { dst->nest = nest; }, + [](const Heap& heap, Payload* dst) { dst->heap = heap; }, + this); +} + +void Locale::Payload::move(Payload&& other) noexcept { + other.visit( + [](Payload*) {}, + [](const Nest& nest, Payload* dst) { dst->nest = nest; }, + [](const Heap& heap, Payload* dst) { dst->heap = std::move(const_cast(heap)); }, + this); +} + +Locale::Payload::~Payload() { + if (type == eHEAP) { heap.~Heap(); } +} + +Locale::Payload::Payload(const Payload& other) : type{eBOGUS} { copy(other); } +Locale::Payload::Payload(Payload&& other) noexcept : type{eBOGUS} { move(std::move(other)); } + +Locale::Payload& Locale::Payload::operator=(const Payload& other) { + if (this != &other) { + setToBogus(); + copy(other); + } + return *this; +} + +Locale::Payload& Locale::Payload::operator=(Payload&& other) noexcept { + if (this != &other) { + setToBogus(); + move(std::move(other)); + } + return *this; +} + +void Locale::Payload::setToBogus() { + this->~Payload(); + type = eBOGUS; +} + +template T& Locale::Payload::emplace(Args&&... args) { + if constexpr (std::is_same_v) { + this->~Payload(); + ::new (&nest) Nest(std::forward(args)...); + return nest; + } + if constexpr (std::is_same_v) { + U_ASSERT(type != eHEAP); + ::new (&heap) Heap(std::forward(args)...); + return heap; + } +} + +template <> Locale::Nest* Locale::Payload::get() { return type == eNEST ? &nest : nullptr; } +template <> Locale::Heap* Locale::Payload::get() { return type == eHEAP ? &heap : nullptr; } + UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Locale) /*Character separating the posix id fields*/ @@ -243,22 +452,10 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Locale) #define SEP_CHAR '_' #define NULL_CHAR '\0' -Locale::~Locale() -{ - if ((baseName != fullName) && (baseName != fullNameBuffer)) { - uprv_free(baseName); - } - baseName = nullptr; - /*if fullName is on the heap, we free it*/ - if (fullName != fullNameBuffer) - { - uprv_free(fullName); - fullName = nullptr; - } -} +Locale::~Locale() = default; Locale::Locale() - : UObject(), fullName(fullNameBuffer), baseName(nullptr) + : UObject(), payload() { init(nullptr, false); } @@ -269,9 +466,8 @@ Locale::Locale() * the default locale.) */ Locale::Locale(Locale::ELocaleType) - : UObject(), fullName(fullNameBuffer), baseName(nullptr) + : UObject(), payload() { - setToBogus(); } @@ -279,7 +475,7 @@ Locale::Locale( const char * newLanguage, const char * newCountry, const char * newVariant, const char * newKeywords) - : UObject(), fullName(fullNameBuffer), baseName(nullptr) + : UObject(), payload() { if( (newLanguage==nullptr) && (newCountry == nullptr) && (newVariant == nullptr) ) { @@ -300,7 +496,6 @@ Locale::Locale( const char * newLanguage, { lsize = static_cast(uprv_strlen(newLanguage)); if ( lsize < 0 || lsize > ULOC_STRING_LIMIT ) { // int32 wrap - setToBogus(); return; } } @@ -312,7 +507,6 @@ Locale::Locale( const char * newLanguage, { csize = static_cast(uprv_strlen(newCountry)); if ( csize < 0 || csize > ULOC_STRING_LIMIT ) { // int32 wrap - setToBogus(); return; } } @@ -329,7 +523,6 @@ Locale::Locale( const char * newLanguage, // remove trailing _'s vsize = static_cast(uprv_strlen(newVariant)); if ( vsize < 0 || vsize > ULOC_STRING_LIMIT ) { // int32 wrap - setToBogus(); return; } while( (vsize>1) && (newVariant[vsize-1] == SEP_CHAR) ) @@ -342,7 +535,6 @@ Locale::Locale( const char * newLanguage, { ksize = static_cast(uprv_strlen(newKeywords)); if ( ksize < 0 || ksize > ULOC_STRING_LIMIT ) { - setToBogus(); return; } } @@ -383,7 +575,6 @@ Locale::Locale( const char * newLanguage, if (U_FAILURE(status)) { // Something went wrong with appending, etc. - setToBogus(); return; } // Parse it, because for example 'language' might really be a complete @@ -392,82 +583,11 @@ Locale::Locale( const char * newLanguage, } } -Locale::Locale(const Locale &other) - : UObject(other), fullName(fullNameBuffer), baseName(nullptr) -{ - *this = other; -} - -Locale::Locale(Locale&& other) noexcept - : UObject(other), fullName(fullNameBuffer), baseName(fullName) { - *this = std::move(other); -} - -Locale& Locale::operator=(const Locale& other) { - if (this == &other) { - return *this; - } - - setToBogus(); - - if (other.fullName == other.fullNameBuffer) { - uprv_strcpy(fullNameBuffer, other.fullNameBuffer); - } else if (other.fullName == nullptr) { - fullName = nullptr; - } else { - fullName = uprv_strdup(other.fullName); - if (fullName == nullptr) return *this; - } - - if (other.baseName == other.fullName) { - baseName = fullName; - } else if (other.baseName != nullptr) { - baseName = uprv_strdup(other.baseName); - if (baseName == nullptr) return *this; - } - - uprv_strcpy(language, other.language); - uprv_strcpy(script, other.script); - uprv_strcpy(country, other.country); - - variantBegin = other.variantBegin; - fIsBogus = other.fIsBogus; - - return *this; -} - -Locale& Locale::operator=(Locale&& other) noexcept { - if ((baseName != fullName) && (baseName != fullNameBuffer)) uprv_free(baseName); - if (fullName != fullNameBuffer) uprv_free(fullName); +Locale::Locale(const Locale&) = default; +Locale::Locale(Locale&&) noexcept = default; - if (other.fullName == other.fullNameBuffer || other.baseName == other.fullNameBuffer) { - uprv_strcpy(fullNameBuffer, other.fullNameBuffer); - } - if (other.fullName == other.fullNameBuffer) { - fullName = fullNameBuffer; - } else { - fullName = other.fullName; - } - - if (other.baseName == other.fullNameBuffer) { - baseName = fullNameBuffer; - } else if (other.baseName == other.fullName) { - baseName = fullName; - } else { - baseName = other.baseName; - } - - uprv_strcpy(language, other.language); - uprv_strcpy(script, other.script); - uprv_strcpy(country, other.country); - - variantBegin = other.variantBegin; - fIsBogus = other.fIsBogus; - - other.baseName = other.fullName = other.fullNameBuffer; - - return *this; -} +Locale& Locale::operator=(const Locale&) = default; +Locale& Locale::operator=(Locale&&) noexcept = default; Locale * Locale::clone() const { @@ -477,7 +597,7 @@ Locale::clone() const { bool Locale::operator==( const Locale& other) const { - return (uprv_strcmp(other.fullName, fullName) == 0); + return uprv_strcmp(other.getName(), getName()) == 0; } namespace { @@ -1073,7 +1193,7 @@ class AliasReplacer { } // Check the fields inside locale, if need to replace fields, - // place the the replaced locale ID in out and return true. + // place the replaced locale ID in out and return true. // Otherwise return false for no replacement or error. bool replace( const Locale& locale, CharString& out, UErrorCode& status); @@ -1836,16 +1956,8 @@ Locale& Locale::init(const char* localeID, UBool canonicalize) /*This function initializes a Locale from a C locale ID*/ Locale& Locale::init(StringPiece localeID, UBool canonicalize) { - fIsBogus = false; /* Free our current storage */ - if ((baseName != fullName) && (baseName != fullNameBuffer)) { - uprv_free(baseName); - } - baseName = nullptr; - if(fullName != fullNameBuffer) { - uprv_free(fullName); - fullName = fullNameBuffer; - } + Nest& nest = payload.emplace(); // not a loop: // just an easy way to have a common error-exit @@ -1859,9 +1971,6 @@ Locale& Locale::init(StringPiece localeID, UBool canonicalize) int32_t length; UErrorCode err; - /* preset all fields to empty */ - language[0] = script[0] = country[0] = 0; - const auto parse = [canonicalize](std::string_view localeID, char* name, int32_t nameCapacity, @@ -1879,17 +1988,17 @@ Locale& Locale::init(StringPiece localeID, UBool canonicalize) }; // "canonicalize" the locale ID to ICU/Java format + char* fullName = nest.baseName; err = U_ZERO_ERROR; - length = parse(localeID, fullName, sizeof fullNameBuffer, err); + length = parse(localeID, fullName, sizeof Nest::baseName, err); - if (err == U_BUFFER_OVERFLOW_ERROR || length >= static_cast(sizeof(fullNameBuffer))) { - U_ASSERT(baseName == nullptr); + FixedString fullNameBuffer; + if (err == U_BUFFER_OVERFLOW_ERROR || length >= static_cast(sizeof Nest::baseName)) { /*Go to heap for the fullName if necessary*/ - char* newFullName = static_cast(uprv_malloc(sizeof(char) * (length + 1))); - if (newFullName == nullptr) { + if (!fullNameBuffer.reserve(length + 1)) { break; // error: out of memory } - fullName = newFullName; + fullName = fullNameBuffer.getAlias(); err = U_ZERO_ERROR; length = parse(localeID, fullName, length + 1, err); } @@ -1898,7 +2007,10 @@ Locale& Locale::init(StringPiece localeID, UBool canonicalize) break; } - variantBegin = length; + std::string_view language; + std::string_view script; + std::string_view region; + int32_t variantBegin = length; /* after uloc_getName/canonicalize() we know that only '_' are separators */ /* But _ could also appeared in timezone such as "en@timezone=America/Los_Angeles" */ @@ -1923,8 +2035,9 @@ Locale& Locale::init(StringPiece localeID, UBool canonicalize) } else { fieldLen[fieldIdx - 1] = length - static_cast(field[fieldIdx - 1] - fullName); } + bool hasKeywords = at != nullptr && uprv_strchr(at + 1, '=') != nullptr; - if (fieldLen[0] >= static_cast(sizeof(language))) + if (fieldLen[0] >= ULOC_LANG_CAPACITY) { break; // error: the language field is too long } @@ -1932,22 +2045,19 @@ Locale& Locale::init(StringPiece localeID, UBool canonicalize) variantField = 1; /* Usually the 2nd one, except when a script or country is also used. */ if (fieldLen[0] > 0) { /* We have a language */ - uprv_memcpy(language, fullName, fieldLen[0]); - language[fieldLen[0]] = 0; + language = {fullName, static_cast(fieldLen[0])}; } if (fieldLen[1] == 4 && uprv_isASCIILetter(field[1][0]) && uprv_isASCIILetter(field[1][1]) && uprv_isASCIILetter(field[1][2]) && uprv_isASCIILetter(field[1][3])) { /* We have at least a script */ - uprv_memcpy(script, field[1], fieldLen[1]); - script[fieldLen[1]] = 0; + script = {field[1], static_cast(fieldLen[1])}; variantField++; } if (fieldLen[variantField] == 2 || fieldLen[variantField] == 3) { /* We have a country */ - uprv_memcpy(country, field[variantField], fieldLen[variantField]); - country[fieldLen[variantField]] = 0; + region = {field[variantField], static_cast(fieldLen[variantField])}; variantField++; } else if (fieldLen[variantField] == 0) { variantField++; /* script or country empty but variant in next field (i.e. en__POSIX) */ @@ -1956,16 +2066,52 @@ Locale& Locale::init(StringPiece localeID, UBool canonicalize) if (fieldLen[variantField] > 0) { /* We have a variant */ variantBegin = static_cast(field[variantField] - fullName); + } else if (hasKeywords) { + // The original computation of variantBegin leaves it equal to the length + // of fullName if there is no variant. It should instead be + // the length of the baseName. + variantBegin = static_cast(at - fullName); } - err = U_ZERO_ERROR; - initBaseName(err); - if (U_FAILURE(err)) { - break; + if (!hasKeywords && Nest::fits(length, language, script, region)) { + U_ASSERT(fullName == nest.baseName); + U_ASSERT(fullNameBuffer.isEmpty()); + nest.init(language, script, region, variantBegin); + } else { + if (fullName == nest.baseName) { + U_ASSERT(fullNameBuffer.isEmpty()); + fullNameBuffer = {fullName, static_cast(length)}; + if (fullNameBuffer.isEmpty()) { + break; // error: out of memory + } + if (!language.empty()) { + language = {fullNameBuffer.data(), language.size()}; + } + if (!script.empty()) { + script = {fullNameBuffer.data() + (script.data() - fullName), script.size()}; + } + if (!region.empty()) { + region = {fullNameBuffer.data() + (region.data() - fullName), region.size()}; + } + } + Heap& heap = payload.emplace(language, script, region, variantBegin); + if (isBogus()) { + break; // error: out of memory + } + U_ASSERT(!fullNameBuffer.isEmpty()); + heap.ptr->fullName = std::move(fullNameBuffer); + if (hasKeywords) { + if (std::string_view::size_type baseNameLength = at - fullName; baseNameLength > 0) { + heap.ptr->baseName = {heap.ptr->fullName.data(), baseNameLength}; + if (heap.ptr->baseName.isEmpty()) { + break; // error: out of memory + } + } + } } if (canonicalize) { - if (!isKnownCanonicalizedLocale(fullName, err)) { + if (!isKnownCanonicalizedLocale(getName(), err)) { CharString replaced; // Not sure it is already canonicalized if (canonicalizeLocale(*this, replaced, err)) { @@ -1989,67 +2135,16 @@ Locale& Locale::init(StringPiece localeID, UBool canonicalize) return *this; } -/* - * Set up the base name. - * If there are no key words, it's exactly the full name. - * If key words exist, it's the full name truncated at the '@' character. - * Need to set up both at init() and after setting a keyword. - */ -void -Locale::initBaseName(UErrorCode &status) { - if (U_FAILURE(status)) { - return; - } - U_ASSERT(baseName==nullptr || baseName==fullName); - const char *atPtr = uprv_strchr(fullName, '@'); - const char *eqPtr = uprv_strchr(fullName, '='); - if (atPtr && eqPtr && atPtr < eqPtr) { - // Key words exist. - int32_t baseNameLength = static_cast(atPtr - fullName); - char* newBaseName = static_cast(uprv_malloc(baseNameLength + 1)); - if (newBaseName == nullptr) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - baseName = newBaseName; - uprv_strncpy(baseName, fullName, baseNameLength); - baseName[baseNameLength] = 0; - - // The original computation of variantBegin leaves it equal to the length - // of fullName if there is no variant. It should instead be - // the length of the baseName. - if (variantBegin > baseNameLength) { - variantBegin = baseNameLength; - } - } else { - baseName = fullName; - } -} - - int32_t Locale::hashCode() const { - return ustr_hashCharsN(fullName, static_cast(uprv_strlen(fullName))); + return ustr_hashCharsN(getName(), static_cast(uprv_strlen(getName()))); } void Locale::setToBogus() { /* Free our current storage */ - if((baseName != fullName) && (baseName != fullNameBuffer)) { - uprv_free(baseName); - } - baseName = nullptr; - if(fullName != fullNameBuffer) { - uprv_free(fullName); - fullName = fullNameBuffer; - } - *fullNameBuffer = 0; - *language = 0; - *script = 0; - *country = 0; - fIsBogus = true; - variantBegin = 0; + payload.setToBogus(); } const Locale& U_EXPORT2 @@ -2088,9 +2183,12 @@ Locale::addLikelySubtags(UErrorCode& status) { return; } - CharString maximizedLocaleID = ulocimp_addLikelySubtags(fullName, status); + CharString maximizedLocaleID = ulocimp_addLikelySubtags(getName(), status); if (U_FAILURE(status)) { + if (status == U_MEMORY_ALLOCATION_ERROR) { + setToBogus(); + } return; } @@ -2110,9 +2208,12 @@ Locale::minimizeSubtags(bool favorScript, UErrorCode& status) { return; } - CharString minimizedLocaleID = ulocimp_minimizeSubtags(fullName, favorScript, status); + CharString minimizedLocaleID = ulocimp_minimizeSubtags(getName(), favorScript, status); if (U_FAILURE(status)) { + if (status == U_MEMORY_ALLOCATION_ERROR) { + setToBogus(); + } return; } @@ -2131,8 +2232,11 @@ Locale::canonicalize(UErrorCode& status) { status = U_ILLEGAL_ARGUMENT_ERROR; return; } - CharString uncanonicalized(fullName, status); + CharString uncanonicalized(getName(), status); if (U_FAILURE(status)) { + if (status == U_MEMORY_ALLOCATION_ERROR) { + setToBogus(); + } return; } init(uncanonicalized.data(), /*canonicalize=*/true); @@ -2191,12 +2295,12 @@ Locale::toLanguageTag(ByteSink& sink, UErrorCode& status) const return; } - if (fIsBogus) { + if (isBogus()) { status = U_ILLEGAL_ARGUMENT_ERROR; return; } - ulocimp_toLanguageTag(fullName, sink, /*strict=*/false, status); + ulocimp_toLanguageTag(getName(), sink, /*strict=*/false, status); } Locale U_EXPORT2 @@ -2229,14 +2333,14 @@ Locale::createCanonical(const char* name) { const char * Locale::getISO3Language() const { - return uloc_getISO3Language(fullName); + return uloc_getISO3Language(getName()); } const char * Locale::getISO3Country() const { - return uloc_getISO3Country(fullName); + return uloc_getISO3Country(getName()); } /** @@ -2249,7 +2353,7 @@ Locale::getISO3Country() const uint32_t Locale::getLCID() const { - return uloc_getLCID(fullName); + return uloc_getLCID(getName()); } const char* const* U_EXPORT2 Locale::getISOCountries() @@ -2428,8 +2532,9 @@ Locale::getLocaleCache() class KeywordEnumeration : public StringEnumeration { protected: - CharString keywords; + FixedString keywords; private: + int32_t length; const char *current; static const char fgClassID; @@ -2438,13 +2543,17 @@ class KeywordEnumeration : public StringEnumeration { virtual UClassID getDynamicClassID() const override { return getStaticClassID(); } public: KeywordEnumeration(const char *keys, int32_t keywordLen, int32_t currentIndex, UErrorCode &status) - : keywords(), current(keywords.data()) { + : keywords(), length(keywordLen), current(nullptr) { if(U_SUCCESS(status) && keywordLen != 0) { if(keys == nullptr || keywordLen < 0) { status = U_ILLEGAL_ARGUMENT_ERROR; } else { - keywords.append(keys, keywordLen, status); - current = keywords.data() + currentIndex; + keywords = {keys, static_cast(length)}; + if (keywords.isEmpty()) { + status = U_MEMORY_ALLOCATION_ERROR; + } else { + current = keywords.data() + currentIndex; + } } } } @@ -2455,7 +2564,7 @@ class KeywordEnumeration : public StringEnumeration { { UErrorCode status = U_ZERO_ERROR; return new KeywordEnumeration( - keywords.data(), keywords.length(), + keywords.data(), length, static_cast(current - keywords.data()), status); } @@ -2556,8 +2665,8 @@ Locale::createKeywords(UErrorCode &status) const return result; } - const char* variantStart = uprv_strchr(fullName, '@'); - const char* assignment = uprv_strchr(fullName, '='); + const char* variantStart = uprv_strchr(getName(), '@'); + const char* assignment = uprv_strchr(getName(), '='); if(variantStart) { if(assignment > variantStart) { CharString keywords = ulocimp_getKeywords(variantStart + 1, '@', false, status); @@ -2583,8 +2692,8 @@ Locale::createUnicodeKeywords(UErrorCode &status) const return result; } - const char* variantStart = uprv_strchr(fullName, '@'); - const char* assignment = uprv_strchr(fullName, '='); + const char* variantStart = uprv_strchr(getName(), '@'); + const char* assignment = uprv_strchr(getName(), '='); if(variantStart) { if(assignment > variantStart) { CharString keywords = ulocimp_getKeywords(variantStart + 1, '@', false, status); @@ -2604,7 +2713,7 @@ Locale::createUnicodeKeywords(UErrorCode &status) const int32_t Locale::getKeywordValue(const char* keywordName, char *buffer, int32_t bufLen, UErrorCode &status) const { - return uloc_getKeywordValue(fullName, keywordName, buffer, bufLen, &status); + return uloc_getKeywordValue(getName(), keywordName, buffer, bufLen, &status); } void @@ -2613,12 +2722,12 @@ Locale::getKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& sta return; } - if (fIsBogus) { + if (isBogus()) { status = U_ILLEGAL_ARGUMENT_ERROR; return; } - ulocimp_getKeywordValue(fullName, keywordName, sink, status); + ulocimp_getKeywordValue(getName(), keywordName, sink, status); } void @@ -2664,51 +2773,77 @@ Locale::setKeywordValue(StringPiece keywordName, status = U_ZERO_ERROR; } - int32_t length = static_cast(uprv_strlen(fullName)); - int32_t capacity = fullName == fullNameBuffer ? ULOC_FULLNAME_CAPACITY : length + 1; - - const char* start = locale_getKeywordsStart(fullName); - int32_t offset = start == nullptr ? length : start - fullName; - - for (;;) { - // Remove -1 from the capacity so that this function can guarantee NUL termination. - CheckedArrayByteSink sink(fullName + offset, capacity - offset - 1); - - int32_t reslen = ulocimp_setKeywordValue( - {fullName + offset, static_cast(length - offset)}, - keywordName, - keywordValue, - sink, - status); + CharString localeID(getName(), -1, status); + ulocimp_setKeywordValue(keywordName, keywordValue, localeID, status); + if (U_FAILURE(status)) { + if (status == U_MEMORY_ALLOCATION_ERROR) { + setToBogus(); + } + return; + } - if (status == U_BUFFER_OVERFLOW_ERROR) { - capacity = reslen + offset + 1; - char* newFullName = static_cast(uprv_malloc(capacity)); - if (newFullName == nullptr) { + const char* at = locale_getKeywordsStart(localeID.toStringPiece()); + bool hasKeywords = at != nullptr && uprv_strchr(at + 1, '=') != nullptr; + + Nest* nest = payload.get(); + if (!hasKeywords) { + if (nest == nullptr) { + // There are no longer any keywords left, so it might now be + // possible to move the payload from Heap to Nest. + Heap* heap = payload.get(); + U_ASSERT(heap != nullptr); + if (Nest::fits(localeID.length(), heap->language, heap->script, heap->region)) { + int32_t variantBegin = heap->ptr->variantBegin; + U_ASSERT(variantBegin >= 0); + U_ASSERT(static_cast(variantBegin) < sizeof Nest::baseName); + nest = &payload.emplace(std::move(*heap), static_cast(variantBegin)); + localeID.extract(nest->baseName, sizeof Nest::baseName, status); + } else { + heap->ptr->baseName.clear(); + heap->ptr->fullName = localeID.toStringPiece(); + if (heap->ptr->fullName.isEmpty()) { + status = U_MEMORY_ALLOCATION_ERROR; + setToBogus(); + return; + } + } + } + } else { + Heap* heap = nullptr; + if (nest != nullptr) { + // A keyword has been added, so the payload now needs to be moved + // from Nest to Heap so that it can get a baseName. + Nest copy(*nest); + heap = &payload.emplace(copy.language, + copy.script, + copy.region, + copy.variantBegin); + if (isBogus()) { status = U_MEMORY_ALLOCATION_ERROR; return; } - uprv_memcpy(newFullName, fullName, length + 1); - if (fullName != fullNameBuffer) { - if (baseName == fullName) { - baseName = newFullName; // baseName should not point to freed memory. + } else { + heap = payload.get(); + } + U_ASSERT(heap != nullptr); + heap->ptr->fullName = localeID.toStringPiece(); + if (heap->ptr->fullName.isEmpty()) { + status = U_MEMORY_ALLOCATION_ERROR; + setToBogus(); + return; + } + + if (heap->ptr->baseName.isEmpty()) { + // Has added the first keyword, meaning that the fullName is no longer also the baseName. + if (std::string_view::size_type baseNameLength = at - localeID.data(); baseNameLength > 0) { + heap->ptr->baseName = {heap->ptr->fullName.data(), baseNameLength}; + if (heap->ptr->baseName.isEmpty()) { + status = U_MEMORY_ALLOCATION_ERROR; + setToBogus(); + return; } - // if fullName is already on the heap, need to free it. - uprv_free(fullName); } - fullName = newFullName; - status = U_ZERO_ERROR; - continue; } - - if (U_FAILURE(status)) { return; } - u_terminateChars(fullName, capacity, reslen + offset, &status); - break; - } - - if (baseName == fullName) { - // May have added the first keyword, meaning that the fullName is no longer also the baseName. - initBaseName(status); } } @@ -2741,9 +2876,42 @@ Locale::setUnicodeKeywordValue(StringPiece keywordName, setKeywordValue(*legacy_key, value, status); } -const char * +const char* +Locale::getCountry() const { + return getField<&Nest::getRegion, &Heap::getRegion>(); +} + +const char* +Locale::getLanguage() const { + return getField<&Nest::getLanguage, &Heap::getLanguage>(); +} + +const char* +Locale::getScript() const { + return getField<&Nest::getScript, &Heap::getScript>(); +} + +const char* +Locale::getVariant() const { + return getField<&Nest::getVariant, &Heap::getVariant>(); +} + +const char* +Locale::getName() const { + return getField<&Nest::getBaseName, &Heap::getFullName>(); +} + +const char* Locale::getBaseName() const { - return baseName; + return getField<&Nest::getBaseName, &Heap::getBaseName>(); +} + +template +const char* Locale::getField() const { + return payload.visit([] { return ""; }, + [](const Nest& nest) { return (nest.*NEST)(); }, + [](const Heap& heap) { return (heap.*HEAP)(); }); } Locale::Iterator::~Iterator() = default; diff --git a/src/duckdb/extension/icu/third_party/icu/common/loclikely.cpp b/src/duckdb/extension/icu/third_party/icu/common/loclikely.cpp index f87fd8dd6..1c9447fa2 100644 --- a/src/duckdb/extension/icu/third_party/icu/common/loclikely.cpp +++ b/src/duckdb/extension/icu/third_party/icu/common/loclikely.cpp @@ -495,7 +495,7 @@ bool RegionValidateMap::equals(const RegionValidateMap& that) const { // The code transform two letter a-z to a integer valued between -1, 26x26. // -1 indicate the region is outside the range of two letter a-z // the rest of value is between 0 and 676 (= 26x26) and used as an index -// the the bigmap in map. The map is an array of 22 int32_t. +// the bigmap in map. The map is an array of 22 int32_t. // since 32x21 < 676/32 < 32x22 we store this 676 bits bitmap into 22 int32_t. int32_t RegionValidateMap::value(const char* region) const { if (uprv_isASCIILetter(region[0]) && uprv_isASCIILetter(region[1]) && diff --git a/src/duckdb/extension/icu/third_party/icu/common/loclikelysubtags.cpp b/src/duckdb/extension/icu/third_party/icu/common/loclikelysubtags.cpp index 7245a7798..b37aaeec7 100644 --- a/src/duckdb/extension/icu/third_party/icu/common/loclikelysubtags.cpp +++ b/src/duckdb/extension/icu/third_party/icu/common/loclikelysubtags.cpp @@ -715,13 +715,29 @@ LSR LikelySubtags::maximize(StringPiece language, StringPiece script, StringPiec } else { iter.resetToState64(state); value = trieNext(iter, "", 0); - U_ASSERT(value > 0); + U_ASSERT(value != 0); + // For the case of und_Latn + if (value < 0) { + retainLanguage = !language.empty(); + retainScript = !script.empty(); + retainRegion = !region.empty(); + // Fallback to und_$region => + iter.resetToState64(trieUndState); // "und" ("*") + value = trieNext(iter, "", 0); + U_ASSERT(value == 0); + int64_t trieUndEmptyState = iter.getState64(); + value = trieNext(iter, region, 0); + // Fallback to und => + if (value < 0) { + iter.resetToState64(trieUndEmptyState); + value = trieNext(iter, "", 0); + U_ASSERT(value > 0); + } + } } } } U_ASSERT(value < lsrsLength); - const LSR &matched = lsrs[value]; - if (returnInputIfUnmatch && (!(matchLanguage || matchScript || (matchRegion && language.empty())))) { return LSR("", "", "", LSR::EXPLICIT_LSR, errorCode); // no matching. @@ -731,18 +747,23 @@ LSR LikelySubtags::maximize(StringPiece language, StringPiece script, StringPiec } if (!(retainLanguage || retainScript || retainRegion)) { + U_ASSERT(value >= 0); // Quickly return a copy of the lookup-result LSR // without new allocation of the subtags. + const LSR &matched = lsrs[value]; return LSR(matched.language, matched.script, matched.region, matched.flags); } if (!retainLanguage) { - language = matched.language; + U_ASSERT(value >= 0); + language = lsrs[value].language; } if (!retainScript) { - script = matched.script; + U_ASSERT(value >= 0); + script = lsrs[value].script; } if (!retainRegion) { - region = matched.region; + U_ASSERT(value >= 0); + region = lsrs[value].region; } int32_t retainMask = (retainLanguage ? 4 : 0) + (retainScript ? 2 : 0) + (retainRegion ? 1 : 0); // retainOldMask flags = LSR explicit-subtag flags diff --git a/src/duckdb/extension/icu/third_party/icu/common/messagepattern.cpp b/src/duckdb/extension/icu/third_party/icu/common/messagepattern.cpp index 96555ce8a..cd2976da1 100644 --- a/src/duckdb/extension/icu/third_party/icu/common/messagepattern.cpp +++ b/src/duckdb/extension/icu/third_party/icu/common/messagepattern.cpp @@ -914,8 +914,9 @@ MessagePattern::parseArgNumber(const UnicodeString &s, int32_t start, int32_t li if(0x30<=c && c<=0x39) { if(number>=INT32_MAX/10) { badNumber=true; // overflow + } else { + number=number*10+(c-0x30); } - number=number*10+(c-0x30); } else { return UMSGPAT_ARG_NAME_NOT_NUMBER; } diff --git a/src/duckdb/extension/icu/third_party/icu/common/norm2_nfc_data.h b/src/duckdb/extension/icu/third_party/icu/common/norm2_nfc_data.h index 89d0287c5..562c2b251 100644 --- a/src/duckdb/extension/icu/third_party/icu/common/norm2_nfc_data.h +++ b/src/duckdb/extension/icu/third_party/icu/common/norm2_nfc_data.h @@ -10,14 +10,14 @@ #ifdef INCLUDED_FROM_NORMALIZER2_CPP static const UVersionInfo norm2_nfc_data_formatVersion={5,0,0,0}; -static const UVersionInfo norm2_nfc_data_dataVersion={0x10,0,0,0}; +static const UVersionInfo norm2_nfc_data_dataVersion={0x11,0,0,0}; static const int32_t norm2_nfc_data_indexes[Normalizer2Impl::IX_COUNT]={ -0x58,0x4e84,0x8c60,0x8d60,0x8d60,0x8d60,0x8d60,0x8d60,0xc0,0x300,0xb0c,0x2a6a,0x3cf0,0xfbc4,0x12c2,0x3c26, +0x58,0x4eec,0x8cc8,0x8dc8,0x8dc8,0x8dc8,0x8dc8,0x8dc8,0xc0,0x300,0xb0c,0x2a6a,0x3cf0,0xfbc4,0x12c2,0x3c26, 0x3cbe,0x3cf0,0x300,0,0xfb10,0xfb9e }; -static const uint16_t norm2_nfc_data_trieIndex[1869]={ +static const uint16_t norm2_nfc_data_trieIndex[1888]={ 0,0x40,0x7b,0xbb,0xfb,0x13a,0x17a,0x1b2,0x1f2,0x226,0x254,0x226,0x294,0x2d4,0x313,0x353, 0x393,0x3d2,0x40f,0x44e,0x226,0x226,0x488,0x4c8,0x4f8,0x530,0x226,0x570,0x59f,0x5de,0x226,0x5f3, 0x631,0x65f,0x688,0x6be,0x6fe,0x73b,0x75b,0x79a,0x7d9,0x816,0x835,0x872,0x75b,0x8ab,0x8d9,0x918, @@ -82,7 +82,7 @@ static const uint16_t norm2_nfc_data_trieIndex[1869]={ 0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, 0x226,0x226,0x226,0x226,0x1881,0x18c1,0x1901,0x1941,0x1981,0x19c1,0x1a01,0x1a41,0x1a64,0x1aa4,0x226,0x226, 0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1ac4,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x6cf,0x6df,0x6f7,0x716,0x72b,0x72b,0x72b,0x72f,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x6e2,0x6f2,0x70a,0x729,0x73e,0x73e,0x73e,0x742,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, 0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, 0x226,0x226,0x226,0x226,0x226,0x226,0x226,0xc0c,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, 0x226,0x226,0x226,0x226,0x226,0x226,0x54f,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x40c, @@ -91,53 +91,54 @@ static const uint16_t norm2_nfc_data_trieIndex[1869]={ 0x1b1a,0x226,0x226,0x1b2a,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0xdf8,0x226, 0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, 0x226,0x1b3a,0x226,0x226,0x226,0x1b42,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x1608,0x226,0x226,0x226,0x226,0x66b,0x226,0x226,0x226,0x226,0x1b50,0x54f,0x226,0x226,0x1b60,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x81d,0x226,0x226,0x1b70,0x226,0x1b80,0x1b8d,0x1b99,0x226,0x226, -0x226,0x226,0x414,0x226,0x1ba4,0x1bb4,0x226,0x226,0x226,0x812,0x226,0x226,0x226,0x226,0x1bc4,0x226, -0x226,0x226,0x1bcf,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1bd6,0x226,0x226, -0x226,0x226,0x1be1,0x1bf0,0x928,0x1bfe,0x412,0x1c0c,0x1c1c,0x226,0x1c24,0x1c32,0x87f,0x226,0x226,0x226, -0x226,0x1c42,0x7ca,0x226,0x226,0x226,0x226,0x226,0x1c52,0x1c61,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x908,0x1c69,0x1c79,0x226,0x226,0x226,0x9ec,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x1c83,0x226,0x226,0x226,0x226,0x226,0x226,0x818,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1c80,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1c93,0x812,0x226,0x226,0x226,0x226, +0x1608,0x226,0x226,0x226,0x226,0x1b50,0x226,0x226,0x226,0x226,0x1b60,0x54f,0x226,0x226,0x1b70,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x81d,0x226,0x226,0x1b80,0x226,0x1b90,0x1b9d,0x1ba9,0x226,0x226, +0x226,0x226,0x414,0x226,0x1bb4,0x1bc4,0x226,0x226,0x226,0x812,0x226,0x226,0x226,0x226,0x1bd4,0x226, +0x226,0x226,0x1bdf,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1be6,0x226,0x226, +0x226,0x226,0x1bf1,0x1c00,0x928,0x1c0e,0x412,0x1c1c,0x1c2c,0x226,0x1c34,0x1c42,0x87f,0x226,0x226,0x226, +0x226,0x1c52,0x7ca,0x226,0x226,0x226,0x226,0x226,0x1c62,0x1c71,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x908,0x1c79,0x1c89,0x226,0x226,0x226,0x9ec,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x1c93,0x226,0x226,0x226,0x226,0x226,0x226,0x818,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1c90,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1ca3,0x812,0x226,0x226,0x226,0x226, 0x226,0x226,0x226,0x226,0x226,0x87f,0x226,0x226,0x226,0x81f,0x81c,0x226,0x226,0x226,0x226,0x81a, 0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, 0x226,0x226,0x226,0x226,0x226,0x226,0x9ec,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, 0x226,0x226,0x226,0x226,0x226,0x226,0x226,0xc06,0x226,0x226,0x226,0x226,0x81c,0x226,0x226,0x226, 0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, 0x226,0xc09,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x1ca2,0x1cb1,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x1cc1,0x226,0x226,0x226,0xf2d,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1cce, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x1cb2,0x1cc1,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x1cd1,0x226,0x226,0x226,0xf2d,0x226,0x226,0x226,0x226,0x226,0x226,0x226, 0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1cde, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1ce0,0x226,0x226,0x226,0x226,0x226,0x226, 0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x1cef,0x1cff,0x1d0d,0x1d1a,0x226,0x1d26,0x1d34,0x1d44,0x226,0x226,0x226,0x226,0xd1c,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1cee, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1cf0,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x1cff,0x1d0f,0x1d1d,0x1d2a,0x226,0x1d36,0x1d44,0x1d54,0x226,0x226,0x226,0x226,0xd1c,0x226,0x226,0x226, 0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1d54,0x1d5c,0x1d6a,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1d64,0x1d6c,0x1d7a,0x226,0x226,0x226,0x226,0x226, 0x4f9,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0xf2d,0x226,0x226,0x226,0x226, 0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x7ca,0x226,0x226,0x226,0x4fc,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1d75,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1d85,0x226, 0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x5c1,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1d85,0x226,0x226,0x226, -0x226,0x226,0x226,0x1d91,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1da1, -0x1db1,0x1dc1,0x1dd1,0x1de1,0x1df1,0x1e01,0x1e11,0x1e21,0x1e31,0x1e41,0x1e51,0x1e61,0x1e71,0x1e81,0x1e91,0x1ea1, -0x1eb1,0x1ec1,0x1ed1,0x1ee1,0x1ef1,0x1f01,0x1f11,0x1f21,0x1f31,0x1f41,0x1f51,0x1f61,0x1f71,0x1f81,0x1f91,0x1fa1, -0x1fb1,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, -0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x408, -0x428,0x440,0xc4,0xc4,0x460,0x46f,0x486,0x4a2,0x4bf,0x4dd,0x4fa,0x517,0x536,0x553,0x56d,0xc4, -0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0x582, -0xc4,0xc4,0xc4,0xc4,0x595,0x5a9,0x5c0,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1d95,0x7d3,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1da5, +0x226,0x226,0x226,0x226,0x226,0x226,0x1db1,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x1dc1,0x1dd1,0x1de1,0x1df1,0x1e01,0x1e11,0x1e21,0x1e31,0x1e41,0x1e51,0x1e61,0x1e71,0x1e81,0x1e91, +0x1ea1,0x1eb1,0x1ec1,0x1ed1,0x1ee1,0x1ef1,0x1f01,0x1f11,0x1f21,0x1f31,0x1f41,0x1f51,0x1f61,0x1f71,0x1f81,0x1f91, +0x1fa1,0x1fb1,0x1fc1,0x1fd1,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x408,0x428,0x440,0xc4,0xc4,0x460,0x46f,0x486,0x4a2,0x4bf,0x4dd,0x4fa,0x517,0x536, +0x553,0x56d,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4, +0xc4,0xc4,0x582,0xc4,0xc4,0xc4,0xc4,0x595,0x5a9,0x5c0,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4, 0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4, -0xc4,0xc4,0xc4,0xc4,0xc4,0x5e0,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0x5eb,0x608, -0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0x628,0x63e,0x650,0xc4,0x66f,0xc4,0xc4,0xc4,0xc4,0xc4, +0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0x5e0,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4, +0xc4,0x5eb,0x608,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0x628,0x63e,0x650,0x66f,0x682,0xc4,0xc4, 0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4, -0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0x68f,0x6af +0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0x6a2,0x6c2 }; -static const uint16_t norm2_nfc_data_trieData[8129]={ +static const uint16_t norm2_nfc_data_trieData[8162]={ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, @@ -371,9 +372,9 @@ static const uint16_t norm2_nfc_data_trieData[8129]={ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8, 0xffb8,0xffcc,0xffcc,0xffb8,1,0xffb8,0xffcc,0xffcc,0xffb8,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8, -0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, +0xffcc,0xffcc,0xffb8,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc, +0xffd4,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,0x8c4,0x1a65,0x8c8,0x1a6b,0x8cc,0x1a71,0x8d0,0x1a77,0x8d4,0x1a7d,1, 1,0x8d8,0x1a83,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, @@ -575,6 +576,7 @@ static const uint16_t norm2_nfc_data_trieData[8129]={ 1,1,1,1,1,1,1,0xffb8,1,0xffcc,1,1,1,1,1,1, 1,1,0xffcc,0xfe02,0xffb8,1,1,1,1,0xfe12,1,1,1,1,0xffcc,0xffcc, 0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, +1,1,1,1,1,1,1,1,1,1,0xffb8,0xffb8,1,0xffb8,0xffb8,0xffb8, 1,1,1,1,1,1,0xffb8,0xffb8,0xffcc,0xffcc,0xffcc,0xffb8,0xffcc,0xffb8,0xffb8,0xffb8, 1,1,0xffcc,0xffb8,0xffcc,0xffb8,1,1,1,1,1,1,1,1,1,1, 0xfe12,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12, @@ -610,7 +612,8 @@ static const uint16_t norm2_nfc_data_trieData[8129]={ 1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,0xffcc,0xffcc, 0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,1, 0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1, -1,0xffd0,0xffd0,0xffb8,0xffcc,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,1,1,1,1, +1,0xffd0,0xffd0,0xffb8,0xffcc,1,1,1,0xffcc,1,1,0xffcc,1,1,1,1, +1,1,1,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,1,1,1,1, 1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xfe0e,1,1,1,1, 1,0x33e5,0x33e9,0x33ed,0x33f1,0x33f7,0x2fd7,0x33fb,0x33ff,0x3403,0x3407,0x2fdb,0x340b,0x340f,0x3413,0x2fdf, 0x3419,0x341d,0x3421,0x3425,0x342b,0x342f,0x3433,0x3437,0x343d,0x3441,0x3445,0x3449,0x30c9,0x344d,0x3453,0x3457, @@ -646,13 +649,13 @@ static const uint16_t norm2_nfc_data_trieData[8129]={ 0x3b5f,0x3b63,0x3b67,0x3b6d,0x3b71,0x3b75,0x3b79,0x3b7d,0x3b83,0x3b89,0x3b8d,0x3b91,0x3b95,0x3b9b,0x3b9f,0x31d1, 0x31d1,0x3ba5,0x3ba9,0x3baf,0x3bb3,0x3bb7,0x3bbb,0x3bbf,0x3bc3,0x3bc7,0x3bcb,0x31d5,0x3bd1,0x3bd5,0x3bd9,0x3bdd, 0x3be1,0x3be5,0x3beb,0x3bef,0x3bf5,0x3bfb,0x3c01,0x3c05,0x3c09,0x3c0d,0x3c11,0x3c15,0x3c19,0x3c1d,0x3c21,1, -1 +1,1 }; static const UCPTrie norm2_nfc_data_trie={ norm2_nfc_data_trieIndex, { norm2_nfc_data_trieData }, - 1869, 8129, + 1888, 8162, 0x2fc00, 0x30, 0, 0, 0, 0, @@ -1160,7 +1163,7 @@ static const uint16_t norm2_nfc_data_extraData[7918]={ static const uint8_t norm2_nfc_data_smallFCD[256]={ 0xc0,0xef,3,0x7f,0xdf,0x70,0xcf,0x87,0xd7,0xe6,0x66,0x46,0x66,0x46,0x66,0x5b, -0x12,0,0,4,0,0,0,0x43,0x20,2,0x69,0xae,0xc2,0xc0,0xff,0xff, +0x12,0,0,4,0,0,0,0x43,0x20,2,0xe9,0xae,0xc2,0xc0,0xff,0xff, 0xc0,0x72,0xbf,0,0,0,0,0,0,0,0x40,0,0x80,0x88,0,0, 0xfe,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, diff --git a/src/duckdb/extension/icu/third_party/icu/common/normalizer2impl.h b/src/duckdb/extension/icu/third_party/icu/common/normalizer2impl.h index 5999190ba..94047336f 100644 --- a/src/duckdb/extension/icu/third_party/icu/common/normalizer2impl.h +++ b/src/duckdb/extension/icu/third_party/icu/common/normalizer2impl.h @@ -243,32 +243,36 @@ class U_COMMON_API ReorderingBuffer : public UMemory { * this normalizer2impl.h and in the design doc at * https://unicode-org.github.io/icu/design/normalization/custom.html */ -class U_COMMON_API Normalizer2Impl : public UObject { +class U_COMMON_API_CLASS Normalizer2Impl : public UObject { public: - Normalizer2Impl() : normTrie(nullptr), fCanonIterData(nullptr) {} - virtual ~Normalizer2Impl(); + U_COMMON_API Normalizer2Impl() : normTrie(nullptr), fCanonIterData(nullptr) {} + U_COMMON_API virtual ~Normalizer2Impl(); - void init(const int32_t *inIndexes, const UCPTrie *inTrie, - const uint16_t *inExtraData, const uint8_t *inSmallFCD); + U_COMMON_API void init(const int32_t* inIndexes, + const UCPTrie* inTrie, + const uint16_t* inExtraData, + const uint8_t* inSmallFCD); - void addLcccChars(UnicodeSet &set) const; - void addPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const; - void addCanonIterPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const; + U_COMMON_API void addLcccChars(UnicodeSet& set) const; + U_COMMON_API void addPropertyStarts(const USetAdder* sa, UErrorCode& errorCode) const; + U_COMMON_API void addCanonIterPropertyStarts(const USetAdder* sa, UErrorCode& errorCode) const; // low-level properties ------------------------------------------------ *** - UBool ensureCanonIterData(UErrorCode &errorCode) const; + U_COMMON_API UBool ensureCanonIterData(UErrorCode& errorCode) const; // The trie stores values for lead surrogate code *units*. // Surrogate code *points* are inert. - uint16_t getNorm16(UChar32 c) const { + U_COMMON_API uint16_t getNorm16(UChar32 c) const { return U_IS_LEAD(c) ? static_cast(INERT) : UCPTRIE_FAST_GET(normTrie, UCPTRIE_16, c); } - uint16_t getRawNorm16(UChar32 c) const { return UCPTRIE_FAST_GET(normTrie, UCPTRIE_16, c); } + U_COMMON_API uint16_t getRawNorm16(UChar32 c) const { + return UCPTRIE_FAST_GET(normTrie, UCPTRIE_16, c); + } - UNormalizationCheckResult getCompQuickCheck(uint16_t norm16) const { + U_COMMON_API UNormalizationCheckResult getCompQuickCheck(uint16_t norm16) const { if(norm16=MIN_NORMAL_MAYBE_YES) { return getCCFromNormalYesOrMaybe(norm16); } @@ -290,13 +300,13 @@ class U_COMMON_API Normalizer2Impl : public UObject { } return getCCFromNoNo(norm16); } - static uint8_t getCCFromNormalYesOrMaybe(uint16_t norm16) { + U_COMMON_API static uint8_t getCCFromNormalYesOrMaybe(uint16_t norm16) { return static_cast(norm16 >> OFFSET_SHIFT); } - static uint8_t getCCFromYesOrMaybeYes(uint16_t norm16) { + U_COMMON_API static uint8_t getCCFromYesOrMaybeYes(uint16_t norm16) { return norm16>=MIN_NORMAL_MAYBE_YES ? getCCFromNormalYesOrMaybe(norm16) : 0; } - uint8_t getCCFromYesOrMaybeYesCP(UChar32 c) const { + U_COMMON_API uint8_t getCCFromYesOrMaybeYesCP(UChar32 c) const { if (c < minCompNoMaybeCP) { return 0; } return getCCFromYesOrMaybeYes(getNorm16(c)); } @@ -306,7 +316,7 @@ class U_COMMON_API Normalizer2Impl : public UObject { * @param c A Unicode code point. * @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0. */ - uint16_t getFCD16(UChar32 c) const { + U_COMMON_API uint16_t getFCD16(UChar32 c) const { if(c>8]; if(bits==0) { return false; } return (bits >> ((lead >> 5) & 7)) & 1; } /** Returns the FCD value from the regular normalization data. */ - uint16_t getFCD16FromNormData(UChar32 c) const; + U_COMMON_API uint16_t getFCD16FromNormData(UChar32 c) const; - uint16_t getFCD16FromMaybeOrNonZeroCC(uint16_t norm16) const; + U_COMMON_API uint16_t getFCD16FromMaybeOrNonZeroCC(uint16_t norm16) const; /** * Gets the decomposition for one code point. @@ -378,7 +388,7 @@ class U_COMMON_API Normalizer2Impl : public UObject { * @param length out-only, takes the length of the decomposition, if any * @return pointer to the decomposition, or NULL if none */ - const char16_t *getDecomposition(UChar32 c, char16_t buffer[4], int32_t &length) const; + U_COMMON_API const char16_t* getDecomposition(UChar32 c, char16_t buffer[4], int32_t& length) const; /** * Gets the raw decomposition for one code point. @@ -387,12 +397,14 @@ class U_COMMON_API Normalizer2Impl : public UObject { * @param length out-only, takes the length of the decomposition, if any * @return pointer to the decomposition, or NULL if none */ - const char16_t *getRawDecomposition(UChar32 c, char16_t buffer[30], int32_t &length) const; + U_COMMON_API const char16_t* getRawDecomposition(UChar32 c, + char16_t buffer[30], + int32_t& length) const; - UChar32 composePair(UChar32 a, UChar32 b) const; + U_COMMON_API UChar32 composePair(UChar32 a, UChar32 b) const; - UBool isCanonSegmentStarter(UChar32 c) const; - UBool getCanonStartSet(UChar32 c, UnicodeSet &set) const; + U_COMMON_API UBool isCanonSegmentStarter(UChar32 c) const; + U_COMMON_API UBool getCanonStartSet(UChar32 c, UnicodeSet& set) const; enum { // Fixed norm16 values. @@ -481,71 +493,90 @@ class U_COMMON_API Normalizer2Impl : public UObject { // higher-level functionality ------------------------------------------ *** // NFD without an NFD Normalizer2 instance. - UnicodeString &decompose(const UnicodeString &src, UnicodeString &dest, - UErrorCode &errorCode) const; + U_COMMON_API UnicodeString& decompose(const UnicodeString& src, + UnicodeString& dest, + UErrorCode& errorCode) const; /** * Decomposes [src, limit[ and writes the result to dest. * limit can be NULL if src is NUL-terminated. * destLengthEstimate is the initial dest buffer capacity and can be -1. */ - void decompose(const char16_t *src, const char16_t *limit, - UnicodeString &dest, int32_t destLengthEstimate, - UErrorCode &errorCode) const; - - const char16_t *decompose(const char16_t *src, const char16_t *limit, - ReorderingBuffer *buffer, UErrorCode &errorCode) const; - void decomposeAndAppend(const char16_t *src, const char16_t *limit, - UBool doDecompose, - UnicodeString &safeMiddle, - ReorderingBuffer &buffer, - UErrorCode &errorCode) const; + U_COMMON_API void decompose(const char16_t* src, + const char16_t* limit, + UnicodeString& dest, + int32_t destLengthEstimate, + UErrorCode& errorCode) const; + + U_COMMON_API const char16_t* decompose(const char16_t* src, + const char16_t* limit, + ReorderingBuffer* buffer, + UErrorCode& errorCode) const; + U_COMMON_API void decomposeAndAppend(const char16_t* src, + const char16_t* limit, + UBool doDecompose, + UnicodeString& safeMiddle, + ReorderingBuffer& buffer, + UErrorCode& errorCode) const; /** sink==nullptr: isNormalized()/spanQuickCheckYes() */ - const uint8_t *decomposeUTF8(uint32_t options, - const uint8_t *src, const uint8_t *limit, - ByteSink *sink, Edits *edits, UErrorCode &errorCode) const; - - UBool compose(const char16_t *src, const char16_t *limit, - UBool onlyContiguous, - UBool doCompose, - ReorderingBuffer &buffer, - UErrorCode &errorCode) const; - const char16_t *composeQuickCheck(const char16_t *src, const char16_t *limit, - UBool onlyContiguous, - UNormalizationCheckResult *pQCResult) const; - void composeAndAppend(const char16_t *src, const char16_t *limit, - UBool doCompose, - UBool onlyContiguous, - UnicodeString &safeMiddle, - ReorderingBuffer &buffer, - UErrorCode &errorCode) const; + U_COMMON_API const uint8_t* decomposeUTF8(uint32_t options, + const uint8_t* src, + const uint8_t* limit, + ByteSink* sink, + Edits* edits, + UErrorCode& errorCode) const; + + U_COMMON_API UBool compose(const char16_t* src, + const char16_t* limit, + UBool onlyContiguous, + UBool doCompose, + ReorderingBuffer& buffer, + UErrorCode& errorCode) const; + U_COMMON_API const char16_t* composeQuickCheck(const char16_t* src, + const char16_t* limit, + UBool onlyContiguous, + UNormalizationCheckResult* pQCResult) const; + U_COMMON_API void composeAndAppend(const char16_t* src, + const char16_t* limit, + UBool doCompose, + UBool onlyContiguous, + UnicodeString& safeMiddle, + ReorderingBuffer& buffer, + UErrorCode& errorCode) const; /** sink==nullptr: isNormalized() */ - UBool composeUTF8(uint32_t options, UBool onlyContiguous, - const uint8_t *src, const uint8_t *limit, - ByteSink *sink, icu::Edits *edits, UErrorCode &errorCode) const; - - const char16_t *makeFCD(const char16_t *src, const char16_t *limit, - ReorderingBuffer *buffer, UErrorCode &errorCode) const; - void makeFCDAndAppend(const char16_t *src, const char16_t *limit, - UBool doMakeFCD, - UnicodeString &safeMiddle, - ReorderingBuffer &buffer, - UErrorCode &errorCode) const; - - UBool hasDecompBoundaryBefore(UChar32 c) const; - UBool norm16HasDecompBoundaryBefore(uint16_t norm16) const; - UBool hasDecompBoundaryAfter(UChar32 c) const; - UBool norm16HasDecompBoundaryAfter(uint16_t norm16) const; - UBool isDecompInert(UChar32 c) const { return isDecompYesAndZeroCC(getNorm16(c)); } - - UBool hasCompBoundaryBefore(UChar32 c) const { + U_COMMON_API UBool composeUTF8(uint32_t options, + UBool onlyContiguous, + const uint8_t* src, + const uint8_t* limit, + ByteSink* sink, + icu::Edits* edits, + UErrorCode& errorCode) const; + + U_COMMON_API const char16_t* makeFCD(const char16_t* src, + const char16_t* limit, + ReorderingBuffer* buffer, + UErrorCode& errorCode) const; + U_COMMON_API void makeFCDAndAppend(const char16_t* src, + const char16_t* limit, + UBool doMakeFCD, + UnicodeString& safeMiddle, + ReorderingBuffer& buffer, + UErrorCode& errorCode) const; + + U_COMMON_API UBool hasDecompBoundaryBefore(UChar32 c) const; + U_COMMON_API UBool norm16HasDecompBoundaryBefore(uint16_t norm16) const; + U_COMMON_API UBool hasDecompBoundaryAfter(UChar32 c) const; + U_COMMON_API UBool norm16HasDecompBoundaryAfter(uint16_t norm16) const; + U_COMMON_API UBool isDecompInert(UChar32 c) const { return isDecompYesAndZeroCC(getNorm16(c)); } + + U_COMMON_API UBool hasCompBoundaryBefore(UChar32 c) const { return c