Skip to content

Commit 8d3e10a

Browse files
authored
feat: auto-enable invariants writer feature for non-null columns in CREATE TABLE (#2418)
## What changes are proposed in this pull request? Auto-adds the invariants writer feature to the protocol when a CREATE TABLE schema contains any user-controlled non-null field (nullable: false). Delta-Spark treats non-nullable schema columns as implicit invariants and requires this feature to be listed in writerFeatures. Previously kernel rejected non-null columns at CREATE; this change makes such tables compatible with Spark readers/writers. Explicit delta.invariants metadata annotations (SQL expression invariants like x > 3) remain rejected because kernel cannot evaluate them. The existing write guard in TableConfiguration::ensure_write_supported blocks writes to any table with such metadata; we now reject it at CREATE time for a clearer, path-aware error. Also adds TableFeature::Invariants to ALLOWED_DELTA_FEATURES so users can pre-enable the feature on an all-nullable table via delta.feature.invariants=supported. This is useful when a later ALTER TABLE ADD COLUMN NOT NULL (by Spark) should not need a protocol upgrade. Core changes: - validate_schema_for_create no longer takes invariants_enabled and instead rejects fields with delta.invariants metadata - New schema_contains_non_null_fields helper - New maybe_enable_invariants in the CREATE builder, following the maybe_enable_variant_type / maybe_enable_timestamp_ntz pattern; adds schema driven Invariants to writer_features only - TableFeature::Invariants added to ALLOWED_DELTA_FEATURES ## How was this change tested? - Unit: schema_contains_non_null_fields (top-level, nested, array, map, all-nullable, Variant-skip), test_maybe_enable_invariants (feature flips on for non-null only), invariants_metadata_rejected (all nesting paths), and invariants case in test_feature_signal_accepted - Integration: test_create_table_with_non_null_columns_auto_enables_invariants, test_create_table_with_invariants_feature_signal_allowed, test_create_table_rejects_delta_invariants_metadata - Full cargo test -p delta_kernel --all-features --locked --lib --tests passes - cargo clippy --tests --benches -- -D warnings clean Co-authored-by: Sanuj Basu <sanujbasu@users.noreply.github.com>
1 parent af7f316 commit 8d3e10a

5 files changed

Lines changed: 382 additions & 136 deletions

File tree

CLAUDE.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,9 @@ Keep this list updated when new protocol features are added to kernel.
204204
- Prefer `==` over `matches!` for simple single-variant enum comparisons. `matches!` is
205205
for patterns with bindings or guards. For example: `self == Variant` not
206206
`matches!(self, Variant)`.
207+
- Prefer `StructField::nullable` / `StructField::not_null` over
208+
`StructField::new(name, type, bool)` when nullability is known at compile time.
209+
Reserve `StructField::new` for cases where nullability is a runtime value.
207210
- NEVER panic in production code -- use errors instead. Panicking
208211
(including `unwrap()`, `expect()`, `panic!()`, `unreachable!()`, etc) is acceptable in test code only.
209212

kernel/src/schema/mod.rs

Lines changed: 99 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -446,7 +446,7 @@ impl StructField {
446446
MakePhysical::new(column_mapping_mode).run_field(self)
447447
}
448448

449-
fn has_invariants(&self) -> bool {
449+
pub(crate) fn has_invariants(&self) -> bool {
450450
self.metadata
451451
.contains_key(ColumnMetadataKey::Invariants.as_ref())
452452
}
@@ -1291,6 +1291,40 @@ pub(crate) fn schema_has_invariants(schema: &Schema) -> bool {
12911291
checker.0
12921292
}
12931293

1294+
/// Visitor that reports whether any non-null (`nullable: false`) field exists in a schema.
1295+
/// Walks the full schema tree including nested struct, array element, and map value structs.
1296+
struct NonNullFieldChecker {
1297+
found: bool,
1298+
}
1299+
1300+
impl<'a> SchemaTransform<'a> for NonNullFieldChecker {
1301+
/// Skip recursion into variant internals. The `metadata` and `value` fields inside a
1302+
/// `Variant` are protocol-defined, always non-null, and not user-controlled, so they
1303+
/// must not be treated as user-declared non-null columns.
1304+
fn transform_variant(&mut self, stype: &'a StructType) -> Option<Cow<'a, StructType>> {
1305+
Some(Cow::Borrowed(stype))
1306+
}
1307+
1308+
fn transform_struct_field(&mut self, field: &'a StructField) -> Option<Cow<'a, StructField>> {
1309+
if !field.is_nullable() {
1310+
self.found = true;
1311+
} else if !self.found {
1312+
let _ = self.recurse_into_struct_field(field);
1313+
}
1314+
Some(Cow::Borrowed(field))
1315+
}
1316+
}
1317+
1318+
/// Checks if any user-controlled column in the schema (including nested columns) is declared
1319+
/// non-null (`nullable: false`).
1320+
///
1321+
/// Skips `Variant` internal struct fields, which are protocol-defined and always non-null.
1322+
pub(crate) fn schema_contains_non_null_fields(schema: &Schema) -> bool {
1323+
let mut checker = NonNullFieldChecker { found: false };
1324+
let _ = checker.transform_struct(schema);
1325+
checker.found
1326+
}
1327+
12941328
/// Normalizes column name field names to match the casing in the schema.
12951329
///
12961330
/// Walks each field name through the schema's struct hierarchy, replacing user-provided
@@ -2733,6 +2767,70 @@ mod tests {
27332767
assert!(schema_has_invariants(&schema));
27342768
}
27352769

2770+
fn all_nullable_schema() -> StructType {
2771+
StructType::new_unchecked([
2772+
StructField::nullable("a", DataType::STRING),
2773+
StructField::nullable("b", DataType::INTEGER),
2774+
])
2775+
}
2776+
2777+
fn top_level_non_null_schema() -> StructType {
2778+
StructType::new_unchecked([
2779+
StructField::not_null("id", DataType::INTEGER),
2780+
StructField::nullable("name", DataType::STRING),
2781+
])
2782+
}
2783+
2784+
fn nested_non_null_schema() -> StructType {
2785+
let nested_field = StructField::nullable(
2786+
"parent",
2787+
DataType::try_struct_type([StructField::not_null("child", DataType::INTEGER)]).unwrap(),
2788+
);
2789+
StructType::new_unchecked([StructField::nullable("a", DataType::STRING), nested_field])
2790+
}
2791+
2792+
fn array_non_null_schema() -> StructType {
2793+
let array_field = StructField::nullable(
2794+
"arr",
2795+
ArrayType::new(
2796+
DataType::try_struct_type([StructField::not_null("child", DataType::INTEGER)])
2797+
.unwrap(),
2798+
true,
2799+
),
2800+
);
2801+
StructType::new_unchecked([array_field])
2802+
}
2803+
2804+
fn map_non_null_schema() -> StructType {
2805+
let map_field = StructField::nullable(
2806+
"map",
2807+
MapType::new(
2808+
DataType::STRING,
2809+
DataType::try_struct_type([StructField::not_null("child", DataType::INTEGER)])
2810+
.unwrap(),
2811+
true,
2812+
),
2813+
);
2814+
StructType::new_unchecked([map_field])
2815+
}
2816+
2817+
fn variant_only_schema() -> StructType {
2818+
// Variant internal fields (metadata, value) are protocol-defined non-null but
2819+
// must NOT be counted as user-controlled non-null fields.
2820+
StructType::new_unchecked([StructField::nullable("v", DataType::unshredded_variant())])
2821+
}
2822+
2823+
#[rstest]
2824+
#[case::all_nullable(all_nullable_schema(), false)]
2825+
#[case::top_level(top_level_non_null_schema(), true)]
2826+
#[case::nested_struct(nested_non_null_schema(), true)]
2827+
#[case::array_element(array_non_null_schema(), true)]
2828+
#[case::map_value(map_non_null_schema(), true)]
2829+
#[case::variant_skipped(variant_only_schema(), false)]
2830+
fn test_schema_contains_non_null_fields(#[case] schema: StructType, #[case] expected: bool) {
2831+
assert_eq!(schema_contains_non_null_fields(&schema), expected);
2832+
}
2833+
27362834
#[test]
27372835
fn test_struct_type_iterator_basic() {
27382836
let fields = vec![

0 commit comments

Comments
 (0)