From ac75274283f6de8d0c99da18a0ac28f29a8797ed Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Tue, 28 Apr 2026 17:19:17 +0200 Subject: [PATCH] Add fundable project for Arrow run-end-encoding --- .../descriptions/RunEndEncodedInArrowCpp.md | 50 +++++++++++++++++++ src/components/fundable/projectsDetails.ts | 13 +++++ .../RunEndEncodedInApacheArrow/GetAQuote.tsx | 9 ++++ .../RunEndEncodedInApacheArrow/index.tsx | 9 ++++ 4 files changed, 81 insertions(+) create mode 100644 src/components/fundable/descriptions/RunEndEncodedInArrowCpp.md create mode 100644 src/pages/fundable/RunEndEncodedInApacheArrow/GetAQuote.tsx create mode 100644 src/pages/fundable/RunEndEncodedInApacheArrow/index.tsx diff --git a/src/components/fundable/descriptions/RunEndEncodedInArrowCpp.md b/src/components/fundable/descriptions/RunEndEncodedInArrowCpp.md new file mode 100644 index 00000000..75b0eded --- /dev/null +++ b/src/components/fundable/descriptions/RunEndEncodedInArrowCpp.md @@ -0,0 +1,50 @@ +#### Overview + +Apache Arrow is the universal columnar format and multi-language toolbox for fast data interchange and in-memory analytics. + +Run-End-Encoding is a composite Arrow datatype that efficiently represents data with many repeated values, such as timeseries. +While basic support is present, Run-End-Encoding is not universally supported in all Arrow C++ components. + +We propose to finish implementing support for Run-End-Encoding in all components of Arrow C++: + +* cast kernels (including implicit run-end-encoding or decoding where applicable) + +* scalar kernels: + - all unary kernels (math, etc.) + - all binary kernels (arithmetic, comparisons, etc.) + - `coalesce`, `if_else`, `case_when` + - fix incorrect output in `is_null` and `true_unless_null` + +* vector kernels: + - cumulative aggregations (`cumulative_max`, `cumulative_sum`, etc.) + - `pairwise_diff` + - `filter`, `take`, `scatter` + - `fill_null_forward`, `fill_null_backward` + - `replace_with_mask` + - `indices_non_zero` + - `sort_indices`, `rank`, `rank_normal`, `rank_quantile` + - `partition_nth_indices` + - `select_k_unstable` + - `unique`, `value_counts` + - `winsorize` + - fix incorrect output in `drop_null` + +* aggregate kernels: + - `all`, `any` + - `count_distinct` + - `approximate_median` + - `sum`, `product`, `mean`, `mode`, `quantile`, `tdigest` + - `variance`, `stddev`, `skew`, `kurtosis` + - `first`, `last`, `min`, `max` + - `index` + - fix incorrect output in `count` + +* Parquet reader and writer + +* CSV reader + +* ORC reader and writer + +Funders can decide to fund the entire package, or choose the components they are interested in. + +##### Are you interested in this project? Either entirely or partially, contact us for more information on how to help us fund it. diff --git a/src/components/fundable/projectsDetails.ts b/src/components/fundable/projectsDetails.ts index 07568ddb..1f11b031 100644 --- a/src/components/fundable/projectsDetails.ts +++ b/src/components/fundable/projectsDetails.ts @@ -8,6 +8,7 @@ import MatrixOperationsInXtensorMD from "@site/src/components/fundable/descripti import BinaryViewInArrowCppMD from "@site/src/components/fundable/descriptions/BinaryViewInArrowCpp.md" import Decimal32InArrowCppMD from "@site/src/components/fundable/descriptions/Decimal32InArrowCpp.md" import Float16InArrowCppMD from "@site/src/components/fundable/descriptions/Float16InArrowCpp.md" +import RunEndEncodedInArrowCppMD from "@site/src/components/fundable/descriptions/RunEndEncodedInArrowCpp.md" import ParquetNullOptimizationsMD from "@site/src/components/fundable/descriptions/ParquetNullOptimizations.md" export const fundableProjectsDetails = { @@ -140,6 +141,18 @@ export const fundableProjectsDetails = { currentFundingPercentage: 0, repoLink: "https://github.com/apache/arrow" }, + { + category: "Apache Arrow and Parquet", + title: "Complete Run-End-Encoded support in Arrow C++", + pageName: "RunEndEncodedInApacheArrow", + shortDescription: "Like dictionary encoding, run-end-encoding allows representing some kinds of data more efficiently.", + description: RunEndEncodedInArrowCppMD, + price: "TBD", + maxNbOfFunders: 4, + currentNbOfFunders: 0, + currentFundingPercentage: 0, + repoLink: "https://github.com/apache/arrow" + }, { category: "Apache Arrow and Parquet", title: "Parquet reader optimizations", diff --git a/src/pages/fundable/RunEndEncodedInApacheArrow/GetAQuote.tsx b/src/pages/fundable/RunEndEncodedInApacheArrow/GetAQuote.tsx new file mode 100644 index 00000000..a945b598 --- /dev/null +++ b/src/pages/fundable/RunEndEncodedInApacheArrow/GetAQuote.tsx @@ -0,0 +1,9 @@ +import useDocusaurusContext from '@docusaurus/useDocusaurusContext'; +import GetAQuotePage from '@site/src/components/fundable/GetAQuotePage'; + +export default function FundablePage() { + const { siteConfig } = useDocusaurusContext(); + return ( + + ); +} \ No newline at end of file diff --git a/src/pages/fundable/RunEndEncodedInApacheArrow/index.tsx b/src/pages/fundable/RunEndEncodedInApacheArrow/index.tsx new file mode 100644 index 00000000..876857af --- /dev/null +++ b/src/pages/fundable/RunEndEncodedInApacheArrow/index.tsx @@ -0,0 +1,9 @@ +import useDocusaurusContext from '@docusaurus/useDocusaurusContext'; +import LargeProjectCardPage from '@site/src/components/fundable/LargeProjectCardPage'; + +export default function FundablePage() { + const { siteConfig } = useDocusaurusContext(); + return ( + + ); +}