1use crate::cast::*;
19
20pub(crate) fn dictionary_cast<K: ArrowDictionaryKeyType>(
25 array: &dyn Array,
26 to_type: &DataType,
27 cast_options: &CastOptions,
28) -> Result<ArrayRef, ArrowError> {
29 use DataType::*;
30
31 match to_type {
32 Dictionary(to_index_type, to_value_type) => {
33 let dict_array = array
34 .as_any()
35 .downcast_ref::<DictionaryArray<K>>()
36 .ok_or_else(|| {
37 ArrowError::ComputeError(
38 "Internal Error: Cannot cast dictionary to DictionaryArray of expected type".to_string(),
39 )
40 })?;
41
42 let keys_array: ArrayRef =
43 Arc::new(PrimitiveArray::<K>::from(dict_array.keys().to_data()));
44 let values_array = dict_array.values();
45 let cast_keys = cast_with_options(&keys_array, to_index_type, cast_options)?;
46 let cast_values = cast_with_options(values_array, to_value_type, cast_options)?;
47
48 if cast_keys.null_count() > keys_array.null_count() {
51 return Err(ArrowError::ComputeError(format!(
52 "Could not convert {} dictionary indexes from {:?} to {:?}",
53 cast_keys.null_count() - keys_array.null_count(),
54 keys_array.data_type(),
55 to_index_type
56 )));
57 }
58
59 let data = cast_keys.into_data();
60 let builder = data
61 .into_builder()
62 .data_type(to_type.clone())
63 .child_data(vec![cast_values.into_data()]);
64
65 let data = unsafe { builder.build_unchecked() };
68
69 let new_array: ArrayRef = match **to_index_type {
71 Int8 => Arc::new(DictionaryArray::<Int8Type>::from(data)),
72 Int16 => Arc::new(DictionaryArray::<Int16Type>::from(data)),
73 Int32 => Arc::new(DictionaryArray::<Int32Type>::from(data)),
74 Int64 => Arc::new(DictionaryArray::<Int64Type>::from(data)),
75 UInt8 => Arc::new(DictionaryArray::<UInt8Type>::from(data)),
76 UInt16 => Arc::new(DictionaryArray::<UInt16Type>::from(data)),
77 UInt32 => Arc::new(DictionaryArray::<UInt32Type>::from(data)),
78 UInt64 => Arc::new(DictionaryArray::<UInt64Type>::from(data)),
79 _ => {
80 return Err(ArrowError::CastError(format!(
81 "Unsupported type {to_index_type:?} for dictionary index"
82 )));
83 }
84 };
85
86 Ok(new_array)
87 }
88 Utf8View => {
89 let dict_array = array
92 .as_dictionary::<K>()
93 .downcast_dict::<StringArray>()
94 .ok_or_else(|| {
95 ArrowError::ComputeError(
96 "Internal Error: Cannot cast Utf8View to StringArray of expected type"
97 .to_string(),
98 )
99 })?;
100
101 let string_view = view_from_dict_values::<K, StringViewType, GenericStringType<i32>>(
102 dict_array.values(),
103 dict_array.keys(),
104 )?;
105 Ok(Arc::new(string_view))
106 }
107 BinaryView => {
108 let dict_array = array
111 .as_dictionary::<K>()
112 .downcast_dict::<BinaryArray>()
113 .ok_or_else(|| {
114 ArrowError::ComputeError(
115 "Internal Error: Cannot cast BinaryView to BinaryArray of expected type"
116 .to_string(),
117 )
118 })?;
119
120 let binary_view = view_from_dict_values::<K, BinaryViewType, BinaryType>(
121 dict_array.values(),
122 dict_array.keys(),
123 )?;
124 Ok(Arc::new(binary_view))
125 }
126 _ => unpack_dictionary::<K>(array, to_type, cast_options),
127 }
128}
129
130fn view_from_dict_values<K: ArrowDictionaryKeyType, T: ByteViewType, V: ByteArrayType>(
131 array: &GenericByteArray<V>,
132 keys: &PrimitiveArray<K>,
133) -> Result<GenericByteViewArray<T>, ArrowError> {
134 let value_buffer = array.values();
135 let value_offsets = array.value_offsets();
136 let mut builder = GenericByteViewBuilder::<T>::with_capacity(keys.len());
137 builder.append_block(value_buffer.clone());
138 for i in keys.iter() {
139 match i {
140 Some(v) => {
141 let idx = v.to_usize().ok_or_else(|| {
142 ArrowError::ComputeError("Invalid dictionary index".to_string())
143 })?;
144
145 unsafe {
149 let offset = value_offsets.get_unchecked(idx).as_usize();
150 let end = value_offsets.get_unchecked(idx + 1).as_usize();
151 let length = end - offset;
152 builder.append_view_unchecked(0, offset as u32, length as u32)
153 }
154 }
155 None => {
156 builder.append_null();
157 }
158 }
159 }
160 Ok(builder.finish())
161}
162
163pub(crate) fn unpack_dictionary<K>(
165 array: &dyn Array,
166 to_type: &DataType,
167 cast_options: &CastOptions,
168) -> Result<ArrayRef, ArrowError>
169where
170 K: ArrowDictionaryKeyType,
171{
172 let dict_array = array.as_dictionary::<K>();
173 let cast_dict_values = cast_with_options(dict_array.values(), to_type, cast_options)?;
174 take(cast_dict_values.as_ref(), dict_array.keys(), None)
175}
176
177pub(crate) fn pack_array_to_dictionary_via_primitive<K: ArrowDictionaryKeyType>(
179 array: &dyn Array,
180 primitive_type: DataType,
181 dict_value_type: &DataType,
182 cast_options: &CastOptions,
183) -> Result<ArrayRef, ArrowError> {
184 let primitive = cast_with_options(array, &primitive_type, cast_options)?;
185 let dict = cast_with_options(
186 primitive.as_ref(),
187 &DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(primitive_type)),
188 cast_options,
189 )?;
190 cast_with_options(
191 dict.as_ref(),
192 &DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(dict_value_type.clone())),
193 cast_options,
194 )
195}
196
197pub(crate) fn cast_to_dictionary<K: ArrowDictionaryKeyType>(
202 array: &dyn Array,
203 dict_value_type: &DataType,
204 cast_options: &CastOptions,
205) -> Result<ArrayRef, ArrowError> {
206 use DataType::*;
207
208 match *dict_value_type {
209 Int8 => pack_numeric_to_dictionary::<K, Int8Type>(array, dict_value_type, cast_options),
210 Int16 => pack_numeric_to_dictionary::<K, Int16Type>(array, dict_value_type, cast_options),
211 Int32 => pack_numeric_to_dictionary::<K, Int32Type>(array, dict_value_type, cast_options),
212 Int64 => pack_numeric_to_dictionary::<K, Int64Type>(array, dict_value_type, cast_options),
213 UInt8 => pack_numeric_to_dictionary::<K, UInt8Type>(array, dict_value_type, cast_options),
214 UInt16 => pack_numeric_to_dictionary::<K, UInt16Type>(array, dict_value_type, cast_options),
215 UInt32 => pack_numeric_to_dictionary::<K, UInt32Type>(array, dict_value_type, cast_options),
216 UInt64 => pack_numeric_to_dictionary::<K, UInt64Type>(array, dict_value_type, cast_options),
217 Decimal128(p, s) => pack_decimal_to_dictionary::<K, Decimal128Type>(
218 array,
219 dict_value_type,
220 p,
221 s,
222 cast_options,
223 ),
224 Decimal256(p, s) => pack_decimal_to_dictionary::<K, Decimal256Type>(
225 array,
226 dict_value_type,
227 p,
228 s,
229 cast_options,
230 ),
231 Float16 => {
232 pack_numeric_to_dictionary::<K, Float16Type>(array, dict_value_type, cast_options)
233 }
234 Float32 => {
235 pack_numeric_to_dictionary::<K, Float32Type>(array, dict_value_type, cast_options)
236 }
237 Float64 => {
238 pack_numeric_to_dictionary::<K, Float64Type>(array, dict_value_type, cast_options)
239 }
240 Date32 => pack_array_to_dictionary_via_primitive::<K>(
241 array,
242 DataType::Int32,
243 dict_value_type,
244 cast_options,
245 ),
246 Date64 => pack_array_to_dictionary_via_primitive::<K>(
247 array,
248 DataType::Int64,
249 dict_value_type,
250 cast_options,
251 ),
252 Time32(_) => pack_array_to_dictionary_via_primitive::<K>(
253 array,
254 DataType::Int32,
255 dict_value_type,
256 cast_options,
257 ),
258 Time64(_) => pack_array_to_dictionary_via_primitive::<K>(
259 array,
260 DataType::Int64,
261 dict_value_type,
262 cast_options,
263 ),
264 Timestamp(_, _) => pack_array_to_dictionary_via_primitive::<K>(
265 array,
266 DataType::Int64,
267 dict_value_type,
268 cast_options,
269 ),
270 Utf8 => {
271 if array.data_type() == &DataType::Utf8View {
273 return string_view_to_dictionary::<K, i32>(array);
274 }
275 pack_byte_to_dictionary::<K, GenericStringType<i32>>(array, cast_options)
276 }
277 LargeUtf8 => {
278 if array.data_type() == &DataType::Utf8View {
280 return string_view_to_dictionary::<K, i64>(array);
281 }
282 pack_byte_to_dictionary::<K, GenericStringType<i64>>(array, cast_options)
283 }
284 Binary => {
285 if array.data_type() == &DataType::BinaryView {
287 return binary_view_to_dictionary::<K, i32>(array);
288 }
289 pack_byte_to_dictionary::<K, GenericBinaryType<i32>>(array, cast_options)
290 }
291 LargeBinary => {
292 if array.data_type() == &DataType::BinaryView {
294 return binary_view_to_dictionary::<K, i64>(array);
295 }
296 pack_byte_to_dictionary::<K, GenericBinaryType<i64>>(array, cast_options)
297 }
298 _ => Err(ArrowError::CastError(format!(
299 "Unsupported output type for dictionary packing: {dict_value_type:?}"
300 ))),
301 }
302}
303
304pub(crate) fn pack_numeric_to_dictionary<K, V>(
307 array: &dyn Array,
308 dict_value_type: &DataType,
309 cast_options: &CastOptions,
310) -> Result<ArrayRef, ArrowError>
311where
312 K: ArrowDictionaryKeyType,
313 V: ArrowPrimitiveType,
314{
315 let cast_values = cast_with_options(array, dict_value_type, cast_options)?;
317 let values = cast_values.as_primitive::<V>();
318
319 let mut b = PrimitiveDictionaryBuilder::<K, V>::with_capacity(values.len(), values.len());
320
321 for i in 0..values.len() {
323 if values.is_null(i) {
324 b.append_null();
325 } else {
326 b.append(values.value(i))?;
327 }
328 }
329 Ok(Arc::new(b.finish()))
330}
331
332pub(crate) fn pack_decimal_to_dictionary<K, D>(
333 array: &dyn Array,
334 dict_value_type: &DataType,
335 precision: u8,
336 scale: i8,
337 cast_options: &CastOptions,
338) -> Result<ArrayRef, ArrowError>
339where
340 K: ArrowDictionaryKeyType,
341 D: DecimalType + ArrowPrimitiveType,
342{
343 let dict = pack_numeric_to_dictionary::<K, D>(array, dict_value_type, cast_options)?;
344 let dict = dict
345 .as_dictionary::<K>()
346 .downcast_dict::<PrimitiveArray<D>>()
347 .ok_or_else(|| {
348 ArrowError::ComputeError(format!(
349 "Internal Error: Cannot cast dict to {}Array",
350 D::PREFIX
351 ))
352 })?;
353 let value = dict.values().clone();
354 let value = value.with_precision_and_scale(precision, scale)?;
356 Ok(Arc::new(DictionaryArray::<K>::try_new(
357 dict.keys().clone(),
358 Arc::new(value),
359 )?))
360}
361
362pub(crate) fn string_view_to_dictionary<K, O: OffsetSizeTrait>(
363 array: &dyn Array,
364) -> Result<ArrayRef, ArrowError>
365where
366 K: ArrowDictionaryKeyType,
367{
368 let mut b = GenericByteDictionaryBuilder::<K, GenericStringType<O>>::with_capacity(
369 array.len(),
370 1024,
371 1024,
372 );
373 let string_view = array
374 .as_any()
375 .downcast_ref::<StringViewArray>()
376 .ok_or_else(|| {
377 ArrowError::ComputeError("Internal Error: Cannot cast to StringViewArray".to_string())
378 })?;
379 for v in string_view.iter() {
380 match v {
381 Some(v) => {
382 b.append(v)?;
383 }
384 None => {
385 b.append_null();
386 }
387 }
388 }
389
390 Ok(Arc::new(b.finish()))
391}
392
393pub(crate) fn binary_view_to_dictionary<K, O: OffsetSizeTrait>(
394 array: &dyn Array,
395) -> Result<ArrayRef, ArrowError>
396where
397 K: ArrowDictionaryKeyType,
398{
399 let mut b = GenericByteDictionaryBuilder::<K, GenericBinaryType<O>>::with_capacity(
400 array.len(),
401 1024,
402 1024,
403 );
404 let binary_view = array
405 .as_any()
406 .downcast_ref::<BinaryViewArray>()
407 .ok_or_else(|| {
408 ArrowError::ComputeError("Internal Error: Cannot cast to BinaryViewArray".to_string())
409 })?;
410 for v in binary_view.iter() {
411 match v {
412 Some(v) => {
413 b.append(v)?;
414 }
415 None => {
416 b.append_null();
417 }
418 }
419 }
420
421 Ok(Arc::new(b.finish()))
422}
423
424pub(crate) fn pack_byte_to_dictionary<K, T>(
427 array: &dyn Array,
428 cast_options: &CastOptions,
429) -> Result<ArrayRef, ArrowError>
430where
431 K: ArrowDictionaryKeyType,
432 T: ByteArrayType,
433{
434 let cast_values = cast_with_options(array, &T::DATA_TYPE, cast_options)?;
435 let values = cast_values
436 .as_any()
437 .downcast_ref::<GenericByteArray<T>>()
438 .ok_or_else(|| {
439 ArrowError::ComputeError("Internal Error: Cannot cast to GenericByteArray".to_string())
440 })?;
441 let mut b = GenericByteDictionaryBuilder::<K, T>::with_capacity(values.len(), 1024, 1024);
442
443 for i in 0..values.len() {
445 if values.is_null(i) {
446 b.append_null();
447 } else {
448 b.append(values.value(i))?;
449 }
450 }
451 Ok(Arc::new(b.finish()))
452}