@@ -902,10 +902,22 @@ fn evaluate_map_to_struct(
902902 . map ( |f| ArrowField :: try_from_kernel ( * f) )
903903 . try_collect ( ) ?;
904904
905+ // Propagate the input map's null bitmap to the output struct. This is critical:
906+ // when a map row is null, the loop above appends null to every child builder
907+ // (since no keys match). Without this null bitmap, the output struct row appears
908+ // valid (non-null) to Arrow, but its children contain nulls. If any child field
909+ // is non-nullable, Arrow rejects this as "Found unmasked nulls for non-nullable
910+ // StructArray field". With the bitmap, the struct row is marked null, which masks
911+ // the child nulls and satisfies Arrow's validation.
912+ //
913+ // This matters during checkpoint creation: the COALESCE expression evaluates
914+ // MAP_TO_STRUCT for all rows including non-add actions (remove, metadata, protocol)
915+ // where the partition values map is null. Partition columns declared NOT NULL would
916+ // cause the checkpoint to fail without this propagation.
905917 Ok ( StructArray :: try_new (
906918 arrow_fields. into ( ) ,
907919 output_columns,
908- None ,
920+ map_array . nulls ( ) . cloned ( ) ,
909921 ) ?)
910922}
911923
@@ -924,7 +936,8 @@ mod tests {
924936
925937 use super :: * ;
926938 use crate :: arrow:: array:: {
927- ArrayRef , BooleanArray , Int32Array , Int64Array , StringArray , StructArray ,
939+ ArrayRef , BooleanArray , Int32Array , Int64Array , MapBuilder , StringArray , StringBuilder ,
940+ StructArray ,
928941 } ;
929942 use crate :: arrow:: datatypes:: {
930943 DataType as ArrowDataType , Field as ArrowField , Schema as ArrowSchema ,
@@ -1971,8 +1984,6 @@ mod tests {
19711984
19721985 /// Helper: creates a RecordBatch with a `pv` column of type Map<String, String>.
19731986 fn create_partition_map_batch ( ) -> RecordBatch {
1974- use crate :: arrow:: array:: { MapBuilder , StringBuilder } ;
1975-
19761987 let mut builder = MapBuilder :: new ( None , StringBuilder :: new ( ) , StringBuilder :: new ( ) ) ;
19771988
19781989 // Row 0: {"date": "2024-01-15", "region": "us", "id": "42"}
@@ -2073,8 +2084,6 @@ mod tests {
20732084
20742085 #[ test]
20752086 fn test_map_to_struct_parse_error ( ) {
2076- use crate :: arrow:: array:: { MapBuilder , StringBuilder } ;
2077-
20782087 let mut builder = MapBuilder :: new ( None , StringBuilder :: new ( ) , StringBuilder :: new ( ) ) ;
20792088 builder. keys ( ) . append_value ( "count" ) ;
20802089 builder. values ( ) . append_value ( "not_a_number" ) ;
@@ -2098,8 +2107,6 @@ mod tests {
20982107
20992108 #[ test]
21002109 fn test_map_to_struct_duplicate_keys ( ) {
2101- use crate :: arrow:: array:: { MapBuilder , StringBuilder } ;
2102-
21032110 let mut builder = MapBuilder :: new ( None , StringBuilder :: new ( ) , StringBuilder :: new ( ) ) ;
21042111 builder. keys ( ) . append_value ( "x" ) ;
21052112 builder. values ( ) . append_value ( "first" ) ;
@@ -2130,6 +2137,98 @@ mod tests {
21302137 assert_eq ! ( col. value( 0 ) , "last" ) ;
21312138 }
21322139
2140+ #[ rstest]
2141+ #[ case:: mixed_nulls(
2142+ vec![
2143+ Some ( vec![ ( "region" , "us" ) , ( "id" , "42" ) ] ) ,
2144+ None ,
2145+ Some ( vec![ ( "region" , "eu" ) , ( "id" , "7" ) ] ) ,
2146+ ] ,
2147+ vec![ true , false , true ] ,
2148+ ) ]
2149+ #[ case:: all_nulls( vec![ None , None ] , vec![ false , false ] ) ]
2150+ fn test_map_to_struct_null_propagation_with_non_nullable_fields (
2151+ #[ case] rows : Vec < Option < Vec < ( & str , & str ) > > > ,
2152+ #[ case] expected_validity : Vec < bool > ,
2153+ ) {
2154+ let mut builder = MapBuilder :: new ( None , StringBuilder :: new ( ) , StringBuilder :: new ( ) ) ;
2155+ for row in & rows {
2156+ match row {
2157+ Some ( entries) => {
2158+ for ( k, v) in entries {
2159+ builder. keys ( ) . append_value ( k) ;
2160+ builder. values ( ) . append_value ( v) ;
2161+ }
2162+ builder. append ( true ) . unwrap ( ) ;
2163+ }
2164+ None => {
2165+ builder. append ( false ) . unwrap ( ) ;
2166+ }
2167+ }
2168+ }
2169+ let map_array = builder. finish ( ) ;
2170+ let schema = ArrowSchema :: new ( vec ! [ ArrowField :: new(
2171+ "pv" ,
2172+ map_array. data_type( ) . clone( ) ,
2173+ true ,
2174+ ) ] ) ;
2175+ let batch = RecordBatch :: try_new ( Arc :: new ( schema) , vec ! [ Arc :: new( map_array) ] ) . unwrap ( ) ;
2176+
2177+ let output_schema = StructType :: new_unchecked ( vec ! [
2178+ StructField :: new( "region" , DataType :: STRING , false ) ,
2179+ StructField :: new( "id" , DataType :: INTEGER , false ) ,
2180+ ] ) ;
2181+ let result_type = DataType :: Struct ( Box :: new ( output_schema) ) ;
2182+ let expr = Expr :: map_to_struct ( column_expr ! ( "pv" ) ) ;
2183+ let result = evaluate_expression ( & expr, & batch, Some ( & result_type) ) . unwrap ( ) ;
2184+ let structs = result. as_any ( ) . downcast_ref :: < StructArray > ( ) . unwrap ( ) ;
2185+
2186+ assert_eq ! ( structs. len( ) , expected_validity. len( ) ) ;
2187+ for ( i, & valid) in expected_validity. iter ( ) . enumerate ( ) {
2188+ assert_eq ! ( structs. is_valid( i) , valid, "row {i} validity mismatch" ) ;
2189+ }
2190+ }
2191+
2192+ #[ test]
2193+ fn test_coalesce_map_to_struct_with_null_map_non_nullable_fields ( ) {
2194+ let mut builder = MapBuilder :: new ( None , StringBuilder :: new ( ) , StringBuilder :: new ( ) ) ;
2195+ builder. keys ( ) . append_value ( "date" ) ;
2196+ builder. values ( ) . append_value ( "2024-01-15" ) ;
2197+ builder. append ( true ) . unwrap ( ) ;
2198+ builder. append ( false ) . unwrap ( ) ;
2199+
2200+ let map_array = builder. finish ( ) ;
2201+ let map_type = map_array. data_type ( ) . clone ( ) ;
2202+ let schema = Arc :: new ( ArrowSchema :: new ( vec ! [
2203+ ArrowField :: new(
2204+ "pv_parsed" ,
2205+ ArrowDataType :: Struct (
2206+ vec![ ArrowField :: new( "date" , ArrowDataType :: Date32 , false ) ] . into( ) ,
2207+ ) ,
2208+ true ,
2209+ ) ,
2210+ ArrowField :: new( "pv" , map_type, true ) ,
2211+ ] ) ) ;
2212+
2213+ let pv_parsed = new_null_array ( schema. field ( 0 ) . data_type ( ) , 2 ) ;
2214+ let batch = RecordBatch :: try_new ( schema, vec ! [ pv_parsed, Arc :: new( map_array) ] ) . unwrap ( ) ;
2215+
2216+ let output_schema =
2217+ StructType :: new_unchecked ( vec ! [ StructField :: new( "date" , DataType :: DATE , false ) ] ) ;
2218+ let result_type = DataType :: Struct ( Box :: new ( output_schema) ) ;
2219+ let expr = Expr :: coalesce ( [
2220+ Expr :: column ( [ "pv_parsed" ] ) ,
2221+ Expr :: map_to_struct ( column_expr ! ( "pv" ) ) ,
2222+ ] ) ;
2223+ let result = evaluate_expression ( & expr, & batch, Some ( & result_type) ) . unwrap ( ) ;
2224+ let structs = result. as_any ( ) . downcast_ref :: < StructArray > ( ) . unwrap ( ) ;
2225+
2226+ // Row 0: pv_parsed null, MAP_TO_STRUCT succeeds
2227+ assert ! ( structs. is_valid( 0 ) ) ;
2228+ // Row 1: pv_parsed null, map null → null struct
2229+ assert ! ( structs. is_null( 1 ) ) ;
2230+ }
2231+
21332232 #[ test]
21342233 fn test_map_to_struct_non_map_input ( ) {
21352234 let schema = ArrowSchema :: new ( vec ! [ ArrowField :: new( "s" , ArrowDataType :: Utf8 , true ) ] ) ;
0 commit comments