From b93f60f7cd4bebbe6c26a456ea0b394fcafc731f Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Fri, 17 Nov 2017 11:41:34 +0100 Subject: conv_acc: Our code requires SSSE3, not just SSE3 The accelerated convolutional decoder uses SSSE3 instructions such as PSIGNW (via _mm_sign_epi16) which go beyond what SSE3 offers. So let's make sure we use the right compiler flag (-mssse3) and also the right runtime check. Without this patch, we would use illegal instructions e.g. on Opteron Gen3 such as Opteron 2427, which are also used as build.opensuse.org build hosts (build31 through build36) where we wouldn't pass "make check" as a result. Change-Id: I2754164384109f2821fd98ffb48f625893f2923d Fixes: OS#2386 --- src/Makefile.am | 10 +++++----- src/conv_acc.c | 24 ++++++++++++------------ src/conv_acc_sse.c | 2 +- src/conv_acc_sse_avx.c | 2 +- 4 files changed, 19 insertions(+), 19 deletions(-) (limited to 'src') diff --git a/src/Makefile.am b/src/Makefile.am index e7f94cef..3d6e6f79 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -23,20 +23,20 @@ libosmocore_la_SOURCES = timer.c timer_gettimeofday.c select.c signal.c msgb.c b macaddr.c stat_item.c stats.c stats_statsd.c prim.c \ conv_acc.c conv_acc_generic.c sercomm.c prbs.c -if HAVE_SSE3 +if HAVE_SSSE3 libosmocore_la_SOURCES += conv_acc_sse.c if HAVE_SSE4_1 -conv_acc_sse.lo : AM_CFLAGS += -msse3 -msse4.1 +conv_acc_sse.lo : AM_CFLAGS += -mssse3 -msse4.1 else -conv_acc_sse.lo : AM_CFLAGS += -msse3 +conv_acc_sse.lo : AM_CFLAGS += -mssse3 endif if HAVE_AVX2 libosmocore_la_SOURCES += conv_acc_sse_avx.c if HAVE_SSE4_1 -conv_acc_sse_avx.lo : AM_CFLAGS += -msse3 -mavx2 -msse4.1 +conv_acc_sse_avx.lo : AM_CFLAGS += -mssse3 -mavx2 -msse4.1 else -conv_acc_sse_avx.lo : AM_CFLAGS += -msse3 -mavx2 +conv_acc_sse_avx.lo : AM_CFLAGS += -mssse3 -mavx2 endif endif endif diff --git a/src/conv_acc.c b/src/conv_acc.c index 33fe2649..c16e4364 100644 --- a/src/conv_acc.c +++ b/src/conv_acc.c @@ -48,7 +48,7 @@ static int init_complete = 0; __attribute__ ((visibility("hidden"))) int avx2_supported = 0; -__attribute__ ((visibility("hidden"))) int sse3_supported = 0; +__attribute__ ((visibility("hidden"))) int ssse3_supported = 0; __attribute__ ((visibility("hidden"))) int sse41_supported = 0; /** @@ -75,12 +75,12 @@ void (*osmo_conv_metrics_k7_n4)(const int8_t *seq, int16_t *osmo_conv_gen_vdec_malloc(size_t n); void osmo_conv_gen_vdec_free(int16_t *ptr); -#if defined(HAVE_SSE3) +#if defined(HAVE_SSSE3) int16_t *osmo_conv_sse_vdec_malloc(size_t n); void osmo_conv_sse_vdec_free(int16_t *ptr); #endif -#if defined(HAVE_SSE3) && defined(HAVE_AVX2) +#if defined(HAVE_SSSE3) && defined(HAVE_AVX2) int16_t *osmo_conv_sse_avx_vdec_malloc(size_t n); void osmo_conv_sse_avx_vdec_free(int16_t *ptr); #endif @@ -99,7 +99,7 @@ void osmo_conv_gen_metrics_k7_n3(const int8_t *seq, const int16_t *out, void osmo_conv_gen_metrics_k7_n4(const int8_t *seq, const int16_t *out, int16_t *sums, int16_t *paths, int norm); -#if defined(HAVE_SSE3) +#if defined(HAVE_SSSE3) void osmo_conv_sse_metrics_k5_n2(const int8_t *seq, const int16_t *out, int16_t *sums, int16_t *paths, int norm); void osmo_conv_sse_metrics_k5_n3(const int8_t *seq, const int16_t *out, @@ -114,7 +114,7 @@ void osmo_conv_sse_metrics_k7_n4(const int8_t *seq, const int16_t *out, int16_t *sums, int16_t *paths, int norm); #endif -#if defined(HAVE_SSE3) && defined(HAVE_AVX2) +#if defined(HAVE_SSSE3) && defined(HAVE_AVX2) void osmo_conv_sse_avx_metrics_k5_n2(const int8_t *seq, const int16_t *out, int16_t *sums, int16_t *paths, int norm); void osmo_conv_sse_avx_metrics_k5_n3(const int8_t *seq, const int16_t *out, @@ -654,8 +654,8 @@ static void osmo_conv_init(void) avx2_supported = __builtin_cpu_supports("avx2"); #endif - #ifdef HAVE_SSE3 - sse3_supported = __builtin_cpu_supports("sse3"); + #ifdef HAVE_SSSE3 + ssse3_supported = __builtin_cpu_supports("ssse3"); #endif #ifdef HAVE_SSE4_1 @@ -667,16 +667,16 @@ static void osmo_conv_init(void) * Usage of curly braces is mandatory, * because we use multi-line define. */ -#if defined(HAVE_SSE3) && defined(HAVE_AVX2) - if (sse3_supported && avx2_supported) { +#if defined(HAVE_SSSE3) && defined(HAVE_AVX2) + if (ssse3_supported && avx2_supported) { INIT_POINTERS(sse_avx); - } else if (sse3_supported) { + } else if (ssse3_supported) { INIT_POINTERS(sse); } else { INIT_POINTERS(gen); } -#elif defined(HAVE_SSE3) - if (sse3_supported) { +#elif defined(HAVE_SSSE3) + if (ssse3_supported) { INIT_POINTERS(sse); } else { INIT_POINTERS(gen); diff --git a/src/conv_acc_sse.c b/src/conv_acc_sse.c index a9679ef0..63d8722a 100644 --- a/src/conv_acc_sse.c +++ b/src/conv_acc_sse.c @@ -1,6 +1,6 @@ /*! \file conv_acc_sse.c * Accelerated Viterbi decoder implementation - * for architectures with only SSE3 available. */ + * for architectures with only SSSE3 available. */ /* * Copyright (C) 2013, 2014 Thomas Tsou * diff --git a/src/conv_acc_sse_avx.c b/src/conv_acc_sse_avx.c index 5b6e7040..5ac3c163 100644 --- a/src/conv_acc_sse_avx.c +++ b/src/conv_acc_sse_avx.c @@ -1,6 +1,6 @@ /*! \file conv_acc_sse_avx.c * Accelerated Viterbi decoder implementation - * for architectures with both SSE3 and AVX2 support. */ + * for architectures with both SSSE3 and AVX2 support. */ /* * Copyright (C) 2013, 2014 Thomas Tsou * -- cgit v1.2.3