Skip to content

Commit c83b9d7

Browse files
authored
feat: add array_slice benchmark (#18879)
## Which issue does this PR close? - Part of #18458. ## Rationale for this change - Add bench mark for `array_slice` ## What changes are included in this PR? Bench marks `array_slice` with: - `ListArray` / `ListViewArray` - array / scalar input - with / without `stride` - `stride` in `[-2, -1, 1, 2]` ## Are these changes tested? ## Are there any user-facing changes?
1 parent 39d4a38 commit c83b9d7

2 files changed

Lines changed: 234 additions & 0 deletions

File tree

datafusion/functions-nested/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,10 @@ name = "array_has"
7777
harness = false
7878
name = "array_reverse"
7979

80+
[[bench]]
81+
harness = false
82+
name = "array_slice"
83+
8084
[[bench]]
8185
harness = false
8286
name = "map"
Lines changed: 230 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,230 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
extern crate criterion;
19+
20+
use arrow::array::{
21+
Int64Array, ListArray, ListViewArray, NullBufferBuilder, PrimitiveArray,
22+
};
23+
use arrow::buffer::{OffsetBuffer, ScalarBuffer};
24+
use arrow::datatypes::{DataType, Field, Int64Type};
25+
use criterion::{criterion_group, criterion_main, Criterion};
26+
use datafusion_common::config::ConfigOptions;
27+
use datafusion_common::ScalarValue;
28+
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
29+
use datafusion_functions_nested::extract::array_slice_udf;
30+
use rand::rngs::StdRng;
31+
use rand::seq::IndexedRandom;
32+
use rand::{Rng, SeedableRng};
33+
use std::hint::black_box;
34+
use std::sync::Arc;
35+
36+
fn create_inputs(
37+
rng: &mut StdRng,
38+
size: usize,
39+
child_array_size: usize,
40+
null_density: f32,
41+
) -> (ListArray, ListViewArray) {
42+
let mut nulls_builder = NullBufferBuilder::new(size);
43+
let mut sizes = Vec::with_capacity(size);
44+
45+
for _ in 0..size {
46+
if rng.random::<f32>() < null_density {
47+
nulls_builder.append_null();
48+
} else {
49+
nulls_builder.append_non_null();
50+
}
51+
sizes.push(rng.random_range(1..child_array_size));
52+
}
53+
let nulls = nulls_builder.finish();
54+
55+
let length = sizes.iter().sum();
56+
let values: PrimitiveArray<Int64Type> =
57+
(0..length).map(|_| Some(rng.random())).collect();
58+
let values = Arc::new(values);
59+
60+
let offsets = OffsetBuffer::from_lengths(sizes.clone());
61+
let list_array = ListArray::new(
62+
Arc::new(Field::new_list_field(DataType::Int64, true)),
63+
offsets.clone(),
64+
values.clone(),
65+
nulls.clone(),
66+
);
67+
68+
let offsets = ScalarBuffer::from(offsets.slice(0, size - 1));
69+
let sizes = ScalarBuffer::from_iter(sizes.into_iter().map(|v| v as i32));
70+
let list_view_array = ListViewArray::new(
71+
Arc::new(Field::new_list_field(DataType::Int64, true)),
72+
offsets,
73+
sizes,
74+
values,
75+
nulls,
76+
);
77+
78+
(list_array, list_view_array)
79+
}
80+
81+
/// Create `from`, `to`, and `stride` from an array of strides.
82+
fn random_from_to_stride(
83+
rng: &mut StdRng,
84+
size: i64,
85+
null_density: f32,
86+
stride_choices: &[Option<i64>],
87+
) -> (Option<i64>, Option<i64>, Option<i64>) {
88+
let from = if rng.random::<f32>() < null_density {
89+
None
90+
} else {
91+
Some(rng.random_range(1..=size))
92+
};
93+
94+
let to = if rng.random::<f32>() < null_density {
95+
None
96+
} else {
97+
match from {
98+
Some(from) => Some(rng.random_range(from..=size)),
99+
None => Some(rng.random_range(1..=size)),
100+
}
101+
};
102+
103+
let stride = stride_choices.choose(rng).cloned().unwrap_or(None);
104+
105+
if from.is_none() || to.is_none() || stride.is_none_or(|s| s > 0) {
106+
(from, to, stride)
107+
} else {
108+
// stride < 0, swap from and to
109+
(to, from, stride)
110+
}
111+
}
112+
113+
fn array_slice_benchmark(
114+
name: &str,
115+
input: ColumnarValue,
116+
mut args: Vec<ColumnarValue>,
117+
c: &mut Criterion,
118+
size: usize,
119+
) {
120+
args.insert(0, input);
121+
122+
let array_slice = array_slice_udf();
123+
let arg_fields = args
124+
.iter()
125+
.enumerate()
126+
.map(|(idx, arg)| {
127+
<Arc<Field>>::from(Field::new(format!("arg_{idx}"), arg.data_type(), true))
128+
})
129+
.collect::<Vec<_>>();
130+
c.bench_function(name, |b| {
131+
b.iter(|| {
132+
black_box(
133+
array_slice
134+
.invoke_with_args(ScalarFunctionArgs {
135+
args: args.clone(),
136+
arg_fields: arg_fields.clone(),
137+
number_rows: size,
138+
return_field: Field::new_list_field(args[0].data_type(), true)
139+
.into(),
140+
config_options: Arc::new(ConfigOptions::default()),
141+
})
142+
.unwrap(),
143+
)
144+
})
145+
});
146+
}
147+
148+
fn criterion_benchmark(c: &mut Criterion) {
149+
let rng = &mut StdRng::seed_from_u64(42);
150+
let size = 1_000_000;
151+
let child_array_size = 100;
152+
let null_density = 0.1;
153+
154+
let (list_array, list_view_array) =
155+
create_inputs(rng, size, child_array_size, null_density);
156+
157+
let mut array_from = Vec::with_capacity(size);
158+
let mut array_to = Vec::with_capacity(size);
159+
let mut array_stride = Vec::with_capacity(size);
160+
for child_array_size in list_array.offsets().lengths() {
161+
let (from, to, stride) = random_from_to_stride(
162+
rng,
163+
child_array_size as i64,
164+
null_density,
165+
&[None, Some(-2), Some(-1), Some(1), Some(2)],
166+
);
167+
array_from.push(from);
168+
array_to.push(to);
169+
array_stride.push(stride);
170+
}
171+
172+
// input
173+
let list_array = ColumnarValue::Array(Arc::new(list_array));
174+
let list_view_array = ColumnarValue::Array(Arc::new(list_view_array));
175+
176+
// args
177+
let array_from = ColumnarValue::Array(Arc::new(Int64Array::from(array_from)));
178+
let array_to = ColumnarValue::Array(Arc::new(Int64Array::from(array_to)));
179+
let array_stride = ColumnarValue::Array(Arc::new(Int64Array::from(array_stride)));
180+
let scalar_from = ColumnarValue::Scalar(ScalarValue::from(1i64));
181+
let scalar_to = ColumnarValue::Scalar(ScalarValue::from(child_array_size as i64 / 2));
182+
183+
for input in [list_array, list_view_array] {
184+
let input_type = input.data_type().to_string();
185+
186+
array_slice_benchmark(
187+
&format!("array_slice: input {input_type}, array args"),
188+
input.clone(),
189+
vec![array_from.clone(), array_to.clone(), array_stride.clone()],
190+
c,
191+
size,
192+
);
193+
194+
array_slice_benchmark(
195+
&format!("array_slice: input {input_type}, array args, no stride"),
196+
input.clone(),
197+
vec![array_from.clone(), array_to.clone()],
198+
c,
199+
size,
200+
);
201+
202+
array_slice_benchmark(
203+
&format!("array_slice: input {input_type}, scalar args, no stride"),
204+
input.clone(),
205+
vec![scalar_from.clone(), scalar_to.clone()],
206+
c,
207+
size,
208+
);
209+
210+
for stride in [-2i64, -1i64, 1i64, 2i64] {
211+
// swap from and to if stride < 0
212+
let (scalar_from, scalar_to) = if stride > 0 {
213+
(scalar_from.clone(), scalar_to.clone())
214+
} else {
215+
(scalar_to.clone(), scalar_from.clone())
216+
};
217+
let scalar_stride = ColumnarValue::Scalar(ScalarValue::from(stride));
218+
array_slice_benchmark(
219+
&format!("array_slice: input {input_type}, scalar args, stride={stride}"),
220+
input.clone(),
221+
vec![scalar_from, scalar_to, scalar_stride],
222+
c,
223+
size,
224+
);
225+
}
226+
}
227+
}
228+
229+
criterion_group!(benches, criterion_benchmark);
230+
criterion_main!(benches);

0 commit comments

Comments
 (0)