66use indexmap:: IndexMap ;
77
88use crate :: error:: Error ;
9- use crate :: schema:: { SchemaRef , StructField , StructType } ;
9+ use crate :: expressions:: ColumnName ;
10+ use crate :: schema:: { DataType , SchemaRef , StructField , StructType } ;
1011use crate :: DeltaResult ;
1112
1213/// A schema evolution operation to be applied during ALTER TABLE.
@@ -18,6 +19,60 @@ use crate::DeltaResult;
1819pub ( crate ) enum SchemaOperation {
1920 /// Add a top-level column.
2021 AddColumn { field : StructField } ,
22+
23+ /// Change a column's nullability from NOT NULL to nullable.
24+ SetNullable { path : ColumnName } ,
25+ }
26+
27+ // Helper to modify a nested column. For each component in `remaining`, locates the matching
28+ // field (case-insensitive), then descends into the next nested struct. At the leaf, applies
29+ // `modifier` to produce the replacement field. Returns the modified field list. `full_path` is
30+ // used only in error messages.
31+ //
32+ // Unlike `StructType::walk_column_fields_by` which is iterative and read-only, this must be
33+ // recursive so it can rebuild parent structs bottom-up after modifying the leaf.
34+ //
35+ // Returns an error if a field in the path does not exist or an intermediate field is not a struct.
36+ fn modify_field_at_path (
37+ mut fields : Vec < StructField > ,
38+ full_path : & [ String ] ,
39+ remaining : & [ String ] ,
40+ modifier : & dyn Fn ( StructField ) -> DeltaResult < StructField > ,
41+ ) -> DeltaResult < Vec < StructField > > {
42+ let ( first, rest) = remaining
43+ . split_first ( )
44+ . ok_or_else ( || Error :: internal_error ( "modify_field_at_path called with empty path" ) ) ?;
45+
46+ // Delta column names are case-insensitive
47+ let idx = fields
48+ . iter ( )
49+ . position ( |f| f. name ( ) . eq_ignore_ascii_case ( first) )
50+ . ok_or_else ( || {
51+ Error :: generic ( format ! (
52+ "Column '{}' does not exist" ,
53+ ColumnName :: new( full_path. iter( ) )
54+ ) )
55+ } ) ?;
56+
57+ if rest. is_empty ( ) {
58+ let original = fields. remove ( idx) ;
59+ fields. insert ( idx, modifier ( original) ?) ;
60+ } else {
61+ // Take ownership of the field to avoid cloning inner struct fields
62+ let mut field = fields. remove ( idx) ;
63+ let DataType :: Struct ( inner) = field. data_type else {
64+ return Err ( Error :: generic ( format ! (
65+ "Column '{}' is not a struct; cannot traverse into it" ,
66+ ColumnName :: new( full_path. iter( ) )
67+ ) ) ) ;
68+ } ;
69+ // into_fields() gives owned fields -- no cloning
70+ let new_inner_fields =
71+ modify_field_at_path ( inner. into_fields ( ) . collect ( ) , full_path, rest, modifier) ?;
72+ field. data_type = DataType :: Struct ( Box :: new ( StructType :: try_new ( new_inner_fields) ?) ) ;
73+ fields. insert ( idx, field) ;
74+ }
75+ Ok ( fields)
2176}
2277
2378/// The result of applying schema operations.
@@ -68,6 +123,43 @@ pub(crate) fn apply_schema_operations(
68123 }
69124 fields. insert ( key, field) ;
70125 }
126+ SchemaOperation :: SetNullable { path } => {
127+ let segments = path. path ( ) ;
128+ let key = segments
129+ . first ( )
130+ . ok_or_else ( || Error :: generic ( "Cannot set nullable: empty column path" ) ) ?
131+ . to_lowercase ( ) ;
132+ let field = fields. get_mut ( & key) . ok_or_else ( || {
133+ Error :: generic ( format ! (
134+ "Cannot set nullable on column '{path}': column does not exist"
135+ ) )
136+ } ) ?;
137+ if segments. len ( ) == 1 {
138+ field. nullable = true ;
139+ } else {
140+ // Take ownership of data_type to avoid cloning inner fields
141+ let data_type = std:: mem:: replace ( & mut field. data_type , DataType :: BOOLEAN ) ;
142+ let DataType :: Struct ( inner) = data_type else {
143+ field. data_type = data_type;
144+ return Err ( Error :: generic ( format ! (
145+ "Column '{}' is not a struct; cannot traverse into it \
146+ while resolving '{path}'",
147+ segments[ 0 ]
148+ ) ) ) ;
149+ } ;
150+ let modified_inner = modify_field_at_path (
151+ inner. into_fields ( ) . collect ( ) ,
152+ segments,
153+ & segments[ 1 ..] ,
154+ & |mut f| {
155+ f. nullable = true ;
156+ Ok ( f)
157+ } ,
158+ ) ?;
159+ field. data_type =
160+ DataType :: Struct ( Box :: new ( StructType :: try_new ( modified_inner) ?) ) ;
161+ }
162+ }
71163 }
72164 }
73165
@@ -82,6 +174,7 @@ mod tests {
82174 use rstest:: rstest;
83175
84176 use super :: * ;
177+ use crate :: expressions:: column_name;
85178 use crate :: schema:: { DataType , StructField , StructType } ;
86179
87180 fn simple_schema ( ) -> StructType {
@@ -101,6 +194,103 @@ mod tests {
101194 SchemaOperation :: AddColumn { field }
102195 }
103196
197+ fn nested_schema ( ) -> StructType {
198+ StructType :: try_new ( vec ! [
199+ StructField :: not_null( "id" , DataType :: INTEGER ) ,
200+ StructField :: nullable(
201+ "address" ,
202+ StructType :: try_new( vec![
203+ StructField :: not_null( "city" , DataType :: STRING ) ,
204+ StructField :: nullable( "zip" , DataType :: STRING ) ,
205+ ] )
206+ . unwrap( ) ,
207+ ) ,
208+ ] )
209+ . unwrap ( )
210+ }
211+
212+ // === modify_field_at_path tests ===
213+
214+ #[ test]
215+ fn modify_top_level_field ( ) {
216+ let fields: Vec < StructField > = simple_schema ( ) . into_fields ( ) . collect ( ) ;
217+ let path = vec ! [ "id" . to_string( ) ] ;
218+ let result = modify_field_at_path ( fields, & path, & path, & |mut f| {
219+ f. nullable = true ;
220+ Ok ( f)
221+ } )
222+ . unwrap ( ) ;
223+ let id = result. iter ( ) . find ( |f| f. name ( ) == "id" ) . unwrap ( ) ;
224+ assert ! ( id. is_nullable( ) ) ;
225+ }
226+
227+ #[ test]
228+ fn modify_nested_field ( ) {
229+ let fields: Vec < StructField > = nested_schema ( ) . into_fields ( ) . collect ( ) ;
230+ let path = vec ! [ "address" . to_string( ) , "city" . to_string( ) ] ;
231+ let result = modify_field_at_path ( fields, & path, & path, & |mut f| {
232+ f. nullable = true ;
233+ Ok ( f)
234+ } )
235+ . unwrap ( ) ;
236+ let addr = result. iter ( ) . find ( |f| f. name ( ) == "address" ) . unwrap ( ) ;
237+ match addr. data_type ( ) {
238+ DataType :: Struct ( s) => assert ! ( s. field( "city" ) . unwrap( ) . is_nullable( ) ) ,
239+ other => panic ! ( "Expected Struct, got: {other:?}" ) ,
240+ }
241+ }
242+
243+ #[ test]
244+ fn modify_preserves_sibling_fields ( ) {
245+ let fields: Vec < StructField > = nested_schema ( ) . into_fields ( ) . collect ( ) ;
246+ let path = vec ! [ "address" . to_string( ) , "city" . to_string( ) ] ;
247+ let result = modify_field_at_path ( fields, & path, & path, & |mut f| {
248+ f. nullable = true ;
249+ Ok ( f)
250+ } )
251+ . unwrap ( ) ;
252+ // "id" should be unchanged
253+ let id = result. iter ( ) . find ( |f| f. name ( ) == "id" ) . unwrap ( ) ;
254+ assert ! ( !id. is_nullable( ) ) ;
255+ // "zip" sibling should be unchanged
256+ let addr = result. iter ( ) . find ( |f| f. name ( ) == "address" ) . unwrap ( ) ;
257+ match addr. data_type ( ) {
258+ DataType :: Struct ( s) => assert ! ( s. field( "zip" ) . unwrap( ) . is_nullable( ) ) ,
259+ other => panic ! ( "Expected Struct, got: {other:?}" ) ,
260+ }
261+ }
262+
263+ #[ test]
264+ fn modify_nonexistent_field_fails ( ) {
265+ let fields: Vec < StructField > = simple_schema ( ) . into_fields ( ) . collect ( ) ;
266+ let path = vec ! [ "nope" . to_string( ) ] ;
267+ let err = modify_field_at_path ( fields, & path, & path, & |f| Ok ( f) ) . unwrap_err ( ) ;
268+ assert ! ( err. to_string( ) . contains( "does not exist" ) ) ;
269+ }
270+
271+ #[ test]
272+ fn modify_through_non_struct_fails ( ) {
273+ let fields: Vec < StructField > = simple_schema ( ) . into_fields ( ) . collect ( ) ;
274+ let path = vec ! [ "name" . to_string( ) , "inner" . to_string( ) ] ;
275+ let err = modify_field_at_path ( fields, & path, & path, & |f| Ok ( f) ) . unwrap_err ( ) ;
276+ assert ! ( err. to_string( ) . contains( "not a struct" ) ) ;
277+ }
278+
279+ #[ test]
280+ fn modify_case_insensitive_lookup ( ) {
281+ let fields: Vec < StructField > = simple_schema ( ) . into_fields ( ) . collect ( ) ;
282+ let path = vec ! [ "ID" . to_string( ) ] ;
283+ let result = modify_field_at_path ( fields, & path, & path, & |mut f| {
284+ f. nullable = true ;
285+ Ok ( f)
286+ } )
287+ . unwrap ( ) ;
288+ let id = result. iter ( ) . find ( |f| f. name ( ) == "id" ) . unwrap ( ) ;
289+ assert ! ( id. is_nullable( ) ) ;
290+ }
291+
292+ // === apply_schema_operations tests ===
293+
104294 #[ rstest]
105295 #[ case:: new_column( "email" , true , 3 ) ]
106296 #[ case:: duplicate_exact( "name" , true , 0 ) ]
@@ -144,4 +334,123 @@ mod tests {
144334 let names: Vec < & String > = result. schema . fields ( ) . map ( |f| f. name ( ) ) . collect ( ) ;
145335 assert_eq ! ( names, vec![ "id" , "name" , "email" ] ) ;
146336 }
337+
338+ // === apply_schema_operations: SetNullable tests ===
339+
340+ #[ test]
341+ fn set_nullable_on_required_field ( ) {
342+ let ops = vec ! [ SchemaOperation :: SetNullable {
343+ path: column_name!( "id" ) ,
344+ } ] ;
345+ let result = apply_schema_operations ( simple_schema ( ) , ops) . unwrap ( ) ;
346+ let id_field = result. schema . field ( "id" ) . unwrap ( ) ;
347+ assert ! ( id_field. is_nullable( ) ) ;
348+ }
349+
350+ #[ test]
351+ fn set_nullable_already_nullable_is_noop ( ) {
352+ let ops = vec ! [ SchemaOperation :: SetNullable {
353+ path: column_name!( "name" ) ,
354+ } ] ;
355+ let result = apply_schema_operations ( simple_schema ( ) , ops) . unwrap ( ) ;
356+ let name_field = result. schema . field ( "name" ) . unwrap ( ) ;
357+ assert ! ( name_field. is_nullable( ) ) ;
358+ }
359+
360+ #[ test]
361+ fn set_nullable_case_insensitive ( ) {
362+ let ops = vec ! [ SchemaOperation :: SetNullable {
363+ path: column_name!( "ID" ) ,
364+ } ] ;
365+ let result = apply_schema_operations ( simple_schema ( ) , ops) . unwrap ( ) ;
366+ let id_field = result. schema . field ( "id" ) . unwrap ( ) ;
367+ assert ! ( id_field. is_nullable( ) ) ;
368+ }
369+
370+ #[ test]
371+ fn set_nullable_deeply_nested_field ( ) {
372+ let schema = StructType :: try_new ( vec ! [
373+ StructField :: not_null( "id" , DataType :: INTEGER ) ,
374+ StructField :: nullable(
375+ "address" ,
376+ StructType :: try_new( vec![ StructField :: nullable(
377+ "location" ,
378+ StructType :: try_new( vec![ StructField :: not_null( "zipcode" , DataType :: STRING ) ] )
379+ . unwrap( ) ,
380+ ) ] )
381+ . unwrap( ) ,
382+ ) ,
383+ ] )
384+ . unwrap ( ) ;
385+ let ops = vec ! [ SchemaOperation :: SetNullable {
386+ path: column_name!( "address.location.zipcode" ) ,
387+ } ] ;
388+ let result = apply_schema_operations ( schema, ops) . unwrap ( ) ;
389+ let addr = result. schema . field ( "address" ) . unwrap ( ) ;
390+ match addr. data_type ( ) {
391+ DataType :: Struct ( s) => match s. field ( "location" ) . unwrap ( ) . data_type ( ) {
392+ DataType :: Struct ( loc) => {
393+ let zip = loc. field ( "zipcode" ) . unwrap ( ) ;
394+ assert ! ( zip. is_nullable( ) ) ;
395+ }
396+ other => panic ! ( "Expected Struct, got: {other:?}" ) ,
397+ } ,
398+ other => panic ! ( "Expected Struct, got: {other:?}" ) ,
399+ }
400+ }
401+
402+ #[ test]
403+ fn chain_add_and_set_nullable ( ) {
404+ let ops = vec ! [
405+ SchemaOperation :: AddColumn {
406+ field: StructField :: nullable( "email" , DataType :: STRING ) ,
407+ } ,
408+ SchemaOperation :: SetNullable {
409+ path: column_name!( "id" ) ,
410+ } ,
411+ ] ;
412+ let result = apply_schema_operations ( simple_schema ( ) , ops) . unwrap ( ) ;
413+ assert_eq ! ( result. schema. fields( ) . count( ) , 3 ) ;
414+ assert ! ( result. schema. field( "email" ) . is_some( ) ) ;
415+ assert ! ( result. schema. field( "id" ) . unwrap( ) . is_nullable( ) ) ;
416+ }
417+
418+ #[ test]
419+ fn set_nullable_nonexistent_column_fails ( ) {
420+ let ops = vec ! [ SchemaOperation :: SetNullable {
421+ path: column_name!( "nonexistent" ) ,
422+ } ] ;
423+ let err = apply_schema_operations ( simple_schema ( ) , ops) . unwrap_err ( ) ;
424+ assert ! ( err. to_string( ) . contains( "does not exist" ) ) ;
425+ }
426+
427+ #[ test]
428+ fn set_nullable_nested_field ( ) {
429+ let schema = StructType :: try_new ( vec ! [
430+ StructField :: not_null( "id" , DataType :: INTEGER ) ,
431+ StructField :: nullable(
432+ "address" ,
433+ StructType :: try_new( vec![ StructField :: not_null( "city" , DataType :: STRING ) ] ) . unwrap( ) ,
434+ ) ,
435+ ] )
436+ . unwrap ( ) ;
437+ let ops = vec ! [ SchemaOperation :: SetNullable {
438+ path: column_name!( "address.city" ) ,
439+ } ] ;
440+ let result = apply_schema_operations ( schema, ops) . unwrap ( ) ;
441+ let addr = result. schema . field ( "address" ) . unwrap ( ) ;
442+ match addr. data_type ( ) {
443+ DataType :: Struct ( s) => assert ! ( s. field( "city" ) . unwrap( ) . is_nullable( ) ) ,
444+ other => panic ! ( "Expected Struct, got: {other:?}" ) ,
445+ }
446+ }
447+
448+ #[ test]
449+ fn set_nullable_through_non_struct_fails ( ) {
450+ let ops = vec ! [ SchemaOperation :: SetNullable {
451+ path: column_name!( "name.inner" ) ,
452+ } ] ;
453+ let err = apply_schema_operations ( simple_schema ( ) , ops) . unwrap_err ( ) ;
454+ assert ! ( err. to_string( ) . contains( "not a struct" ) ) ;
455+ }
147456}
0 commit comments