Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 30 additions & 14 deletions src/Build/Include/Makefile.inc
Original file line number Diff line number Diff line change
Expand Up @@ -16,33 +16,49 @@ clean:
@echo Cleaning $(NAME)
rm -f $(APPNAME) $(NAME).a $(OBJS) $(OBJSEX) $(OBJSNOOPT) $(OBJSHANI) $(OBJAESNI) $(OBJSSSE41) $(OBJSSSSE3) $(OBJSAVX2) $(OBJARMV8CRYPTO) $(OBJS:.o=.d) $(OBJSEX:.oo=.d) $(OBJSNOOPT:.o0=.d) $(OBJSHANI:.oshani=.d) $(OBJAESNI:.oaesni=.d) $(OBJSSSE41:.osse41=.d) $(OBJSSSSE3:.ossse3=.d) $(OBJSAVX2:.oavx2=.d) $(OBJARMV8CRYPTO:.oarmv8crypto=.d) *.gch

# On arm64 hosts, x86 feature flags are rejected by clang.
# Source files are guarded by CRYPTOPP_BOOL_X86/X64 so they produce empty objects on arm64.
ifeq "$(CPU_ARCH)" "arm64"
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This has the same scoping issue as the Volume.make change: CPU_ARCH=arm64 is true on Apple Silicon even for the normal non-local macOS build, which still targets a universal x86_64 + arm64 binary.
For that universal path, the x86_64 slice still needs these feature flags. In particular, clearing X86_SHANI_FLAGS makes Sha2Intel.oshani compile without -msha: with Apple clang this fails because _mm_sha256msg1_epu32 requires the sha target feature.
Please either leave these pattern rules unchanged and avoid selecting the x86-specific suffix objects for local arm64-only builds, or gate this flag clearing on the local arm64-only case, e.g. LOCAL_DEVELOPMENT_BUILD=true plus CPU_ARCH=arm64, not CPU_ARCH=arm64 alone.

X86_SSE41_FLAGS :=
X86_SHANI_FLAGS :=
X86_AESNI_FLAGS :=
X86_SSSE3_FLAGS :=
X86_AVX2_FLAGS :=
else
X86_SSE41_FLAGS := -mssse3 -msse4.1
X86_SHANI_FLAGS := -mssse3 -msse4.1 -msha
X86_AESNI_FLAGS := -mssse3 -msse4.1 -maes
X86_SSSE3_FLAGS := -mssse3
X86_AVX2_FLAGS := -mavx2
endif

%.o: %.c
@echo Compiling $(<F)
$(CC) $(CFLAGS) -c $< -o $@

%.o0: %.c
@echo Compiling $(<F)
$(CC) $(CFLAGS) -O0 -c $< -o $@

%.osse41: %.c
@echo Compiling $(<F)
$(CC) $(CFLAGS) -mssse3 -msse4.1 -c $< -o $@
$(CC) $(CFLAGS) $(X86_SSE41_FLAGS) -c $< -o $@

%.oshani: %.c
@echo Compiling $(<F)
$(CC) $(CFLAGS) -mssse3 -msse4.1 -msha -c $< -o $@
$(CC) $(CFLAGS) $(X86_SHANI_FLAGS) -c $< -o $@

%.oaesni: %.c
@echo Compiling $(<F)
$(CC) $(CFLAGS) -mssse3 -msse4.1 -maes -c $< -o $@
$(CC) $(CFLAGS) $(X86_AESNI_FLAGS) -c $< -o $@

%.ossse3: %.c
@echo Compiling $(<F)
$(CC) $(CFLAGS) -mssse3 -c $< -o $@
$(CC) $(CFLAGS) $(X86_SSSE3_FLAGS) -c $< -o $@

%.oavx2: %.c
@echo Compiling $(<F)
$(CC) $(CFLAGS) -mavx2 -c $< -o $@
$(CC) $(CFLAGS) $(X86_AVX2_FLAGS) -c $< -o $@

%.oarmv8crypto: %.c
@echo Compiling $(<F)
Expand All @@ -51,26 +67,26 @@ clean:
%.o: %.cpp
@echo Compiling $(<F)
$(CXX) $(CXXFLAGS) -c $< -o $@

%.osse41: %.cpp
@echo Compiling $(<F)
$(CXX) $(CXXFLAGS) -mssse3 -msse4.1 -c $< -o $@
$(CXX) $(CXXFLAGS) $(X86_SSE41_FLAGS) -c $< -o $@

%.oshani: %.cpp
@echo Compiling $(<F)
$(CXX) $(CXXFLAGS) -mssse3 -msse4.1 -msha -c $< -o $@
$(CXX) $(CXXFLAGS) $(X86_SHANI_FLAGS) -c $< -o $@

%.oaesni: %.cpp
@echo Compiling $(<F)
$(CXX) $(CXXFLAGS) -mssse3 -msse4.1 -maes -c $< -o $@
$(CXX) $(CXXFLAGS) $(X86_AESNI_FLAGS) -c $< -o $@

%.ossse3: %.cpp
@echo Compiling $(<F)
$(CXX) $(CXXFLAGS) -mssse3 -c $< -o $@
$(CXX) $(CXXFLAGS) $(X86_SSSE3_FLAGS) -c $< -o $@

%.oavx2: %.cpp
@echo Compiling $(<F)
$(CXX) $(CXXFLAGS) -mavx2 -c $< -o $@
$(CXX) $(CXXFLAGS) $(X86_AVX2_FLAGS) -c $< -o $@

%.o: %.S
@echo Compiling $(<F)
Expand Down
14 changes: 13 additions & 1 deletion src/Volume/Volume.make
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,11 @@ else ifneq "$(COMPILE_ASM)" "false"
OBJSEX += ../Crypto/sha512_avx1.oo
OBJSEX += ../Crypto/sha512_avx2.oo
OBJSEX += ../Crypto/sha512_sse4.oo
else
else ifeq "$(CPU_ARCH)" "arm64"
# Local arm64 MacOSX build (no universal binary / no yasm x86 asm)
OBJARMV8CRYPTO += ../Crypto/Aes_hw_armv8.oarmv8crypto
OBJS += ../Crypto/Aescrypt.o
OBJARMV8CRYPTO += ../Crypto/sha256_armv8.oarmv8crypto
endif
else ifeq "$(CPU_ARCH)" "x86"
OBJS += ../Crypto/Aes_x86.o
Expand Down Expand Up @@ -95,6 +98,14 @@ else
OBJS += ../Crypto/Aescrypt.o
endif

ifeq "$(CPU_ARCH)" "arm64"
# x86-only intrinsics sources are compiled as plain objects on arm64
# (their bodies are #ifdef-gated to x86/x64 and become empty translation units)
OBJS += ../Crypto/blake2s_SSE41.o
OBJS += ../Crypto/blake2s_SSSE3.o
OBJS += ../Crypto/Sha2Intel.o
OBJS += ../Crypto/Argon2/src/opt_avx2.o
else
Comment on lines +101 to +108
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This condition is too broad. CPU_ARCH=arm64 is also true for the normal non-local macOS build on Apple Silicon, which still produces a universal x86_64 + arm64 binary. In that path this block makes the x86_64 slice compile Argon2/src/opt_avx2.c as plain .o instead of .oavx2, so __AVX2__ is not defined and fill_segment_avx2() becomes the stub that returns ARGON2_INCORRECT_PARAMETER. Please scope this fallback to the local arm64-only case, e.g. LOCAL_DEVELOPMENT_BUILD=true plus CPU_ARCH=arm64, not CPU_ARCH=arm64 alone.

ifeq "$(GCC_GTEQ_430)" "1"
OBJSSSE41 += ../Crypto/blake2s_SSE41.osse41
OBJSSSSE3 += ../Crypto/blake2s_SSSE3.ossse3
Expand All @@ -112,6 +123,7 @@ ifeq "$(GCC_GTEQ_470)" "1"
else
OBJS += ../Crypto/Argon2/src/opt_avx2.o
endif
endif
else
OBJS += ../Crypto/wolfCrypt.o
endif
Expand Down
Loading