diff --git a/README.md b/README.md index 0501e3b..0a42f35 100644 --- a/README.md +++ b/README.md @@ -82,7 +82,7 @@ Once the image has been built or you have downloaded the correct image, you can In the following instructions, the variable `${DELTA_PACKAGE_VERSION}` refers to the Delta Lake Package version. -The current version is `delta-spark_2.13:4.0.0` which corresponds to Apache Spark 4.x release line. +For Spark 4.x, use Spark-version-specific Delta artifacts: `delta-spark_4.1_2.13:` for Spark 4.1 and `delta-spark_4.0_2.13:` for Spark 4.0 (use `delta-spark_2.13:` for older Spark lines). ## Choose an Interface diff --git a/startup.sh b/startup.sh index dcd45ea..dc538c7 100644 --- a/startup.sh +++ b/startup.sh @@ -4,8 +4,30 @@ source "$HOME/.cargo/env" export PYSPARK_DRIVER_PYTHON=jupyter export PYSPARK_DRIVER_PYTHON_OPTS='lab --ip=0.0.0.0' -export DELTA_SPARK_VERSION='4.0.1' -export DELTA_PACKAGE_VERSION=delta-spark_2.13:${DELTA_SPARK_VERSION} + +# Default Delta version; can be overridden by setting DELTA_SPARK_VERSION in the environment +: "${DELTA_SPARK_VERSION:=4.1.0}" + +# Detect the Spark major.minor version from the running runtime (e.g. "4.1") +SPARK_FULL_VERSION=$("${SPARK_HOME}/bin/spark-submit" --version 2>&1 \ + | grep -oE '[0-9]+\.[0-9]+\.[0-9]+' | head -1) +SPARK_MAJOR_MINOR=$(echo "${SPARK_FULL_VERSION}" | cut -d. -f1,2) + +# Select the Delta Maven artifact that matches this Spark version. +# Spark 4.1 and 4.0 each publish a Spark-specific artifact; older releases use the generic one. +case "${SPARK_MAJOR_MINOR}" in + 4.1) + DELTA_ARTIFACT="delta-spark_4.1_2.13" + ;; + 4.0) + DELTA_ARTIFACT="delta-spark_4.0_2.13" + ;; + *) + DELTA_ARTIFACT="delta-spark_2.13" + ;; +esac + +export DELTA_PACKAGE_VERSION="${DELTA_ARTIFACT}:${DELTA_SPARK_VERSION}" $SPARK_HOME/bin/pyspark --packages io.delta:${DELTA_PACKAGE_VERSION} \ --conf "spark.driver.extraJavaOptions=-Divy.cache.dir=/tmp -Divy.home=/tmp -Dio.netty.tryReflectionSetAccessible=true" \ diff --git a/tests/test_docker.sh b/tests/test_docker.sh index 7736fea..da9f89f 100755 --- a/tests/test_docker.sh +++ b/tests/test_docker.sh @@ -172,7 +172,32 @@ run_test "spark-submit is on PATH" "spark-submit --version" run_test "pyspark is on PATH" "pyspark --version" # --------------------------------------------------------------- -# 6. Rust toolchain +# 6. startup.sh artifact resolution +# --------------------------------------------------------------- + +section "startup.sh artifact resolution" +run_test_verbose "startup.sh selects Spark 4.1 Delta artifact" \ + "set -euo pipefail +mkdir -p /tmp/mock-spark/bin +printf '%s\n' '#!/usr/bin/env bash' 'echo \"Spark version 4.1.1\" >&2' > /tmp/mock-spark/bin/spark-submit +printf '%s\n' '#!/usr/bin/env bash' 'echo \"\$*\"' > /tmp/mock-spark/bin/pyspark +chmod +x /tmp/mock-spark/bin/spark-submit /tmp/mock-spark/bin/pyspark +startup_output=\$(SPARK_HOME=/tmp/mock-spark DELTA_SPARK_VERSION=4.1.0 bash startup.sh 2>&1) +echo \"\$startup_output\" +[[ \"\$startup_output\" == *\"--packages io.delta:delta-spark_4.1_2.13:4.1.0\"* ]]" + +run_test_verbose "startup.sh selects Spark 4.0 Delta artifact" \ + "set -euo pipefail +mkdir -p /tmp/mock-spark/bin +printf '%s\n' '#!/usr/bin/env bash' 'echo \"Spark version 4.0.3\" >&2' > /tmp/mock-spark/bin/spark-submit +printf '%s\n' '#!/usr/bin/env bash' 'echo \"\$*\"' > /tmp/mock-spark/bin/pyspark +chmod +x /tmp/mock-spark/bin/spark-submit /tmp/mock-spark/bin/pyspark +startup_output=\$(SPARK_HOME=/tmp/mock-spark DELTA_SPARK_VERSION=4.1.0 bash startup.sh 2>&1) +echo \"\$startup_output\" +[[ \"\$startup_output\" == *\"--packages io.delta:delta-spark_4.0_2.13:4.1.0\"* ]]" + +# --------------------------------------------------------------- +# 7. Rust toolchain # --------------------------------------------------------------- section "Rust Toolchain" @@ -180,7 +205,7 @@ run_test "rustc is available" 'source "$HOME/.cargo/env" && rustc --versio run_test "cargo is available" 'source "$HOME/.cargo/env" && cargo --version' # --------------------------------------------------------------- -# 7. Functional: delta-rs (Python) write/read via Polars +# 8. Functional: delta-rs (Python) write/read via Polars # --------------------------------------------------------------- section "Functional: delta-rs + Polars" @@ -215,7 +240,7 @@ print('Polars Delta append OK') \"" # --------------------------------------------------------------- -# 8. Functional: deltalake Python API +# 9. Functional: deltalake Python API # --------------------------------------------------------------- section "Functional: deltalake Python API"