Skip to content

Commit 6891ac8

Browse files
feat: add set_nullable support for ALTER TABLE
Implement the SetNullable schema operation which changes a column's nullability from NOT NULL to nullable. If the column is already nullable, this is a no-op. Only the safe direction is allowed (NOT NULL -> nullable) per the Delta protocol.
1 parent a711638 commit 6891ac8

3 files changed

Lines changed: 384 additions & 3 deletions

File tree

kernel/src/transaction/builder/alter_table.rs

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,14 @@
77
//!
88
//! - [`Ready`]: Initial state. Operations are available, but `build()` is not (at least one
99
//! operation is required).
10-
//! - [`Modifying`]: After `add_column`. Can chain more `add_column` calls, and `build()` is
11-
//! available.
10+
//! - [`Modifying`]: After at least one operation (`add_column`, `set_nullable`). Can chain more
11+
//! operations, and `build()` is available.
1212
1313
use std::marker::PhantomData;
1414
use std::sync::Arc;
1515

1616
use crate::committer::Committer;
17+
use crate::expressions::ColumnName;
1718
use crate::schema::StructField;
1819
use crate::snapshot::SnapshotRef;
1920
use crate::table_configuration::TableConfiguration;
@@ -76,6 +77,12 @@ impl AlterTableTransactionBuilder<Ready> {
7677
self.operations.push(SchemaOperation::AddColumn { field });
7778
self.transition()
7879
}
80+
81+
/// Change a column's nullability from NOT NULL to nullable.
82+
pub fn set_nullable(mut self, path: ColumnName) -> AlterTableTransactionBuilder<Modifying> {
83+
self.operations.push(SchemaOperation::SetNullable { path });
84+
self.transition()
85+
}
7986
}
8087

8188
impl AlterTableTransactionBuilder<Modifying> {
@@ -85,6 +92,12 @@ impl AlterTableTransactionBuilder<Modifying> {
8592
self
8693
}
8794

95+
/// Change a column's nullability from NOT NULL to nullable.
96+
pub fn set_nullable(mut self, path: ColumnName) -> Self {
97+
self.operations.push(SchemaOperation::SetNullable { path });
98+
self
99+
}
100+
88101
/// Validate and apply schema operations, then build the [`AlterTableTransaction`].
89102
///
90103
/// This method:

kernel/src/transaction/schema_evolution.rs

Lines changed: 310 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
use indexmap::IndexMap;
77

88
use crate::error::Error;
9-
use crate::schema::{SchemaRef, StructField, StructType};
9+
use crate::expressions::ColumnName;
10+
use crate::schema::{DataType, SchemaRef, StructField, StructType};
1011
use crate::DeltaResult;
1112

1213
/// A schema evolution operation to be applied during ALTER TABLE.
@@ -18,6 +19,60 @@ use crate::DeltaResult;
1819
pub(crate) enum SchemaOperation {
1920
/// Add a top-level column.
2021
AddColumn { field: StructField },
22+
23+
/// Change a column's nullability from NOT NULL to nullable.
24+
SetNullable { path: ColumnName },
25+
}
26+
27+
// Helper to modify a nested column. For each component in `remaining`, locates the matching
28+
// field (case-insensitive), then descends into the next nested struct. At the leaf, applies
29+
// `modifier` to produce the replacement field. Returns the modified field list. `full_path` is
30+
// used only in error messages.
31+
//
32+
// Unlike `StructType::walk_column_fields_by` which is iterative and read-only, this must be
33+
// recursive so it can rebuild parent structs bottom-up after modifying the leaf.
34+
//
35+
// Returns an error if a field in the path does not exist or an intermediate field is not a struct.
36+
fn modify_field_at_path(
37+
mut fields: Vec<StructField>,
38+
full_path: &[String],
39+
remaining: &[String],
40+
modifier: &dyn Fn(StructField) -> DeltaResult<StructField>,
41+
) -> DeltaResult<Vec<StructField>> {
42+
let (first, rest) = remaining
43+
.split_first()
44+
.ok_or_else(|| Error::internal_error("modify_field_at_path called with empty path"))?;
45+
46+
// Delta column names are case-insensitive
47+
let idx = fields
48+
.iter()
49+
.position(|f| f.name().eq_ignore_ascii_case(first))
50+
.ok_or_else(|| {
51+
Error::generic(format!(
52+
"Column '{}' does not exist",
53+
ColumnName::new(full_path.iter())
54+
))
55+
})?;
56+
57+
if rest.is_empty() {
58+
let original = fields.remove(idx);
59+
fields.insert(idx, modifier(original)?);
60+
} else {
61+
// Take ownership of the field to avoid cloning inner struct fields
62+
let mut field = fields.remove(idx);
63+
let DataType::Struct(inner) = field.data_type else {
64+
return Err(Error::generic(format!(
65+
"Column '{}' is not a struct; cannot traverse into it",
66+
ColumnName::new(full_path.iter())
67+
)));
68+
};
69+
// into_fields() gives owned fields -- no cloning
70+
let new_inner_fields =
71+
modify_field_at_path(inner.into_fields().collect(), full_path, rest, modifier)?;
72+
field.data_type = DataType::Struct(Box::new(StructType::try_new(new_inner_fields)?));
73+
fields.insert(idx, field);
74+
}
75+
Ok(fields)
2176
}
2277

2378
/// The result of applying schema operations.
@@ -68,6 +123,43 @@ pub(crate) fn apply_schema_operations(
68123
}
69124
fields.insert(key, field);
70125
}
126+
SchemaOperation::SetNullable { path } => {
127+
let segments = path.path();
128+
let key = segments
129+
.first()
130+
.ok_or_else(|| Error::generic("Cannot set nullable: empty column path"))?
131+
.to_lowercase();
132+
let field = fields.get_mut(&key).ok_or_else(|| {
133+
Error::generic(format!(
134+
"Cannot set nullable on column '{path}': column does not exist"
135+
))
136+
})?;
137+
if segments.len() == 1 {
138+
field.nullable = true;
139+
} else {
140+
// Take ownership of data_type to avoid cloning inner fields
141+
let data_type = std::mem::replace(&mut field.data_type, DataType::BOOLEAN);
142+
let DataType::Struct(inner) = data_type else {
143+
field.data_type = data_type;
144+
return Err(Error::generic(format!(
145+
"Column '{}' is not a struct; cannot traverse into it \
146+
while resolving '{path}'",
147+
segments[0]
148+
)));
149+
};
150+
let modified_inner = modify_field_at_path(
151+
inner.into_fields().collect(),
152+
segments,
153+
&segments[1..],
154+
&|mut f| {
155+
f.nullable = true;
156+
Ok(f)
157+
},
158+
)?;
159+
field.data_type =
160+
DataType::Struct(Box::new(StructType::try_new(modified_inner)?));
161+
}
162+
}
71163
}
72164
}
73165

@@ -82,6 +174,7 @@ mod tests {
82174
use rstest::rstest;
83175

84176
use super::*;
177+
use crate::expressions::column_name;
85178
use crate::schema::{DataType, StructField, StructType};
86179

87180
fn simple_schema() -> StructType {
@@ -101,6 +194,103 @@ mod tests {
101194
SchemaOperation::AddColumn { field }
102195
}
103196

197+
fn nested_schema() -> StructType {
198+
StructType::try_new(vec![
199+
StructField::not_null("id", DataType::INTEGER),
200+
StructField::nullable(
201+
"address",
202+
StructType::try_new(vec![
203+
StructField::not_null("city", DataType::STRING),
204+
StructField::nullable("zip", DataType::STRING),
205+
])
206+
.unwrap(),
207+
),
208+
])
209+
.unwrap()
210+
}
211+
212+
// === modify_field_at_path tests ===
213+
214+
#[test]
215+
fn modify_top_level_field() {
216+
let fields: Vec<StructField> = simple_schema().into_fields().collect();
217+
let path = vec!["id".to_string()];
218+
let result = modify_field_at_path(fields, &path, &path, &|mut f| {
219+
f.nullable = true;
220+
Ok(f)
221+
})
222+
.unwrap();
223+
let id = result.iter().find(|f| f.name() == "id").unwrap();
224+
assert!(id.is_nullable());
225+
}
226+
227+
#[test]
228+
fn modify_nested_field() {
229+
let fields: Vec<StructField> = nested_schema().into_fields().collect();
230+
let path = vec!["address".to_string(), "city".to_string()];
231+
let result = modify_field_at_path(fields, &path, &path, &|mut f| {
232+
f.nullable = true;
233+
Ok(f)
234+
})
235+
.unwrap();
236+
let addr = result.iter().find(|f| f.name() == "address").unwrap();
237+
match addr.data_type() {
238+
DataType::Struct(s) => assert!(s.field("city").unwrap().is_nullable()),
239+
other => panic!("Expected Struct, got: {other:?}"),
240+
}
241+
}
242+
243+
#[test]
244+
fn modify_preserves_sibling_fields() {
245+
let fields: Vec<StructField> = nested_schema().into_fields().collect();
246+
let path = vec!["address".to_string(), "city".to_string()];
247+
let result = modify_field_at_path(fields, &path, &path, &|mut f| {
248+
f.nullable = true;
249+
Ok(f)
250+
})
251+
.unwrap();
252+
// "id" should be unchanged
253+
let id = result.iter().find(|f| f.name() == "id").unwrap();
254+
assert!(!id.is_nullable());
255+
// "zip" sibling should be unchanged
256+
let addr = result.iter().find(|f| f.name() == "address").unwrap();
257+
match addr.data_type() {
258+
DataType::Struct(s) => assert!(s.field("zip").unwrap().is_nullable()),
259+
other => panic!("Expected Struct, got: {other:?}"),
260+
}
261+
}
262+
263+
#[test]
264+
fn modify_nonexistent_field_fails() {
265+
let fields: Vec<StructField> = simple_schema().into_fields().collect();
266+
let path = vec!["nope".to_string()];
267+
let err = modify_field_at_path(fields, &path, &path, &|f| Ok(f)).unwrap_err();
268+
assert!(err.to_string().contains("does not exist"));
269+
}
270+
271+
#[test]
272+
fn modify_through_non_struct_fails() {
273+
let fields: Vec<StructField> = simple_schema().into_fields().collect();
274+
let path = vec!["name".to_string(), "inner".to_string()];
275+
let err = modify_field_at_path(fields, &path, &path, &|f| Ok(f)).unwrap_err();
276+
assert!(err.to_string().contains("not a struct"));
277+
}
278+
279+
#[test]
280+
fn modify_case_insensitive_lookup() {
281+
let fields: Vec<StructField> = simple_schema().into_fields().collect();
282+
let path = vec!["ID".to_string()];
283+
let result = modify_field_at_path(fields, &path, &path, &|mut f| {
284+
f.nullable = true;
285+
Ok(f)
286+
})
287+
.unwrap();
288+
let id = result.iter().find(|f| f.name() == "id").unwrap();
289+
assert!(id.is_nullable());
290+
}
291+
292+
// === apply_schema_operations tests ===
293+
104294
#[rstest]
105295
#[case::new_column("email", true, 3)]
106296
#[case::duplicate_exact("name", true, 0)]
@@ -144,4 +334,123 @@ mod tests {
144334
let names: Vec<&String> = result.schema.fields().map(|f| f.name()).collect();
145335
assert_eq!(names, vec!["id", "name", "email"]);
146336
}
337+
338+
// === apply_schema_operations: SetNullable tests ===
339+
340+
#[test]
341+
fn set_nullable_on_required_field() {
342+
let ops = vec![SchemaOperation::SetNullable {
343+
path: column_name!("id"),
344+
}];
345+
let result = apply_schema_operations(simple_schema(), ops).unwrap();
346+
let id_field = result.schema.field("id").unwrap();
347+
assert!(id_field.is_nullable());
348+
}
349+
350+
#[test]
351+
fn set_nullable_already_nullable_is_noop() {
352+
let ops = vec![SchemaOperation::SetNullable {
353+
path: column_name!("name"),
354+
}];
355+
let result = apply_schema_operations(simple_schema(), ops).unwrap();
356+
let name_field = result.schema.field("name").unwrap();
357+
assert!(name_field.is_nullable());
358+
}
359+
360+
#[test]
361+
fn set_nullable_case_insensitive() {
362+
let ops = vec![SchemaOperation::SetNullable {
363+
path: column_name!("ID"),
364+
}];
365+
let result = apply_schema_operations(simple_schema(), ops).unwrap();
366+
let id_field = result.schema.field("id").unwrap();
367+
assert!(id_field.is_nullable());
368+
}
369+
370+
#[test]
371+
fn set_nullable_deeply_nested_field() {
372+
let schema = StructType::try_new(vec![
373+
StructField::not_null("id", DataType::INTEGER),
374+
StructField::nullable(
375+
"address",
376+
StructType::try_new(vec![StructField::nullable(
377+
"location",
378+
StructType::try_new(vec![StructField::not_null("zipcode", DataType::STRING)])
379+
.unwrap(),
380+
)])
381+
.unwrap(),
382+
),
383+
])
384+
.unwrap();
385+
let ops = vec![SchemaOperation::SetNullable {
386+
path: column_name!("address.location.zipcode"),
387+
}];
388+
let result = apply_schema_operations(schema, ops).unwrap();
389+
let addr = result.schema.field("address").unwrap();
390+
match addr.data_type() {
391+
DataType::Struct(s) => match s.field("location").unwrap().data_type() {
392+
DataType::Struct(loc) => {
393+
let zip = loc.field("zipcode").unwrap();
394+
assert!(zip.is_nullable());
395+
}
396+
other => panic!("Expected Struct, got: {other:?}"),
397+
},
398+
other => panic!("Expected Struct, got: {other:?}"),
399+
}
400+
}
401+
402+
#[test]
403+
fn chain_add_and_set_nullable() {
404+
let ops = vec![
405+
SchemaOperation::AddColumn {
406+
field: StructField::nullable("email", DataType::STRING),
407+
},
408+
SchemaOperation::SetNullable {
409+
path: column_name!("id"),
410+
},
411+
];
412+
let result = apply_schema_operations(simple_schema(), ops).unwrap();
413+
assert_eq!(result.schema.fields().count(), 3);
414+
assert!(result.schema.field("email").is_some());
415+
assert!(result.schema.field("id").unwrap().is_nullable());
416+
}
417+
418+
#[test]
419+
fn set_nullable_nonexistent_column_fails() {
420+
let ops = vec![SchemaOperation::SetNullable {
421+
path: column_name!("nonexistent"),
422+
}];
423+
let err = apply_schema_operations(simple_schema(), ops).unwrap_err();
424+
assert!(err.to_string().contains("does not exist"));
425+
}
426+
427+
#[test]
428+
fn set_nullable_nested_field() {
429+
let schema = StructType::try_new(vec![
430+
StructField::not_null("id", DataType::INTEGER),
431+
StructField::nullable(
432+
"address",
433+
StructType::try_new(vec![StructField::not_null("city", DataType::STRING)]).unwrap(),
434+
),
435+
])
436+
.unwrap();
437+
let ops = vec![SchemaOperation::SetNullable {
438+
path: column_name!("address.city"),
439+
}];
440+
let result = apply_schema_operations(schema, ops).unwrap();
441+
let addr = result.schema.field("address").unwrap();
442+
match addr.data_type() {
443+
DataType::Struct(s) => assert!(s.field("city").unwrap().is_nullable()),
444+
other => panic!("Expected Struct, got: {other:?}"),
445+
}
446+
}
447+
448+
#[test]
449+
fn set_nullable_through_non_struct_fails() {
450+
let ops = vec![SchemaOperation::SetNullable {
451+
path: column_name!("name.inner"),
452+
}];
453+
let err = apply_schema_operations(simple_schema(), ops).unwrap_err();
454+
assert!(err.to_string().contains("not a struct"));
455+
}
147456
}

0 commit comments

Comments
 (0)