From 37efeae8866c3c4d9827d0ca271b8e27f731c3e1 Mon Sep 17 00:00:00 2001 From: Ruud van Asseldonk Date: Tue, 8 Mar 2016 21:41:18 +0100 Subject: [PATCH 1/3] Define AVX broadcast intrinsics This defines `_mm256_broadcast_ps` and `_mm256_broadcast_pd`. The `_ss` and `_sd` variants are not supported by LLVM. In Clang these intrinsics are implemented as inline functions in C++. Intel reference: https://software.intel.com/en-us/node/514144. Note: the argument type should really be "0hPc" (a pointer to a vector of half the width), but internally the LLVM intrinsic takes a pointer to a signed integer, and for any other type LLVM will complain. This means that a transmute is required to call these intrinsics. The AVX2 broadcast intrinsics `_mm256_broadcastss_ps` and `_mm256_broadcastsd_pd` are not available as LLVM intrinsics. In Clang they are implemented using the shufflevector builtin. --- src/etc/platform-intrinsics/x86/avx.json | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/etc/platform-intrinsics/x86/avx.json b/src/etc/platform-intrinsics/x86/avx.json index 2c1492c2954c8..981838536b2ff 100644 --- a/src/etc/platform-intrinsics/x86/avx.json +++ b/src/etc/platform-intrinsics/x86/avx.json @@ -8,6 +8,13 @@ "ret": "f(32-64)", "args": ["0", "0"] }, + { + "intrinsic": "256_broadcast_{0.data_type}", + "width": [256], + "llvm": "vbroadcastf128.{0.data_type}.256", + "ret": "f(32-64)", + "args": ["s8SPc"] + }, { "intrinsic": "256_dp_ps", "width": [256], From 51b5300b3fc83b445cd1e410be4b7d123f10472e Mon Sep 17 00:00:00 2001 From: Ruud van Asseldonk Date: Tue, 8 Mar 2016 22:36:54 +0100 Subject: [PATCH 2/3] Define AVX conversion intrinsics This defines the following intrinsics: * `_mm256_cvtepi32_pd` * `_mm256_cvtepi32_ps` * `_mm256_cvtpd_epi32` * `_mm256_cvtpd_ps` * `_mm256_cvtps_epi32` * `_mm256_cvtps_pd` * `_mm256_cvttpd_epi32` * `_mm256_cvttps_epi32` Intel reference: https://software.intel.com/en-us/node/514130. --- src/etc/platform-intrinsics/x86/avx.json | 56 ++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/src/etc/platform-intrinsics/x86/avx.json b/src/etc/platform-intrinsics/x86/avx.json index 981838536b2ff..08524fbd6dd8a 100644 --- a/src/etc/platform-intrinsics/x86/avx.json +++ b/src/etc/platform-intrinsics/x86/avx.json @@ -15,6 +15,62 @@ "ret": "f(32-64)", "args": ["s8SPc"] }, + { + "intrinsic": "256_cvtepi32_pd", + "width": [256], + "llvm": "cvtdq2.pd.256", + "ret": "f64", + "args": ["s32h"] + }, + { + "intrinsic": "256_cvtepi32_ps", + "width": [256], + "llvm": "cvtdq2.ps.256", + "ret": "f32", + "args": ["s32"] + }, + { + "intrinsic": "256_cvtpd_epi32", + "width": [256], + "llvm": "cvt.pd2dq.256", + "ret": "s32h", + "args": ["f64"] + }, + { + "intrinsic": "256_cvtpd_ps", + "width": [256], + "llvm": "cvt.pd2.ps.256", + "ret": "f32h", + "args": ["f64"] + }, + { + "intrinsic": "256_cvtps_epi32", + "width": [256], + "llvm": "cvt.ps2dq.256", + "ret": "s32", + "args": ["f32"] + }, + { + "intrinsic": "256_cvtps_pd", + "width": [256], + "llvm": "cvt.ps2.pd.256", + "ret": "f64", + "args": ["f32h"] + }, + { + "intrinsic": "256_cvttpd_epi32", + "width": [256], + "llvm": "cvtt.pd2dq.256", + "ret": "s32h", + "args": ["f64"] + }, + { + "intrinsic": "256_cvttps_epi32", + "width": [256], + "llvm": "cvtt.ps2dq.256", + "ret": "s32", + "args": ["f32"] + }, { "intrinsic": "256_dp_ps", "width": [256], From c306853edafb8b740c3e224ce4fa1842a6924dc5 Mon Sep 17 00:00:00 2001 From: Ruud van Asseldonk Date: Wed, 9 Mar 2016 01:16:31 +0100 Subject: [PATCH 3/3] Regenerate x86 platform intrinsics The exact command used was: $ cd src/etc/platform-intrinsics/x86 $ python2 ../generator.py --format compiler-defs -i info.json \ sse.json sse2.json sse3.json ssse3.json sse41.json sse42.json \ avx.json avx2.json fma.json \ > ../../../librustc_platform_intrinsics/x86.rs --- src/librustc_platform_intrinsics/x86.rs | 50 +++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/src/librustc_platform_intrinsics/x86.rs b/src/librustc_platform_intrinsics/x86.rs index 168ae79ab748f..d8aaf151267f6 100644 --- a/src/librustc_platform_intrinsics/x86.rs +++ b/src/librustc_platform_intrinsics/x86.rs @@ -498,6 +498,56 @@ pub fn find<'tcx>(_tcx: &TyCtxt<'tcx>, name: &str) -> Option { output: v(f(64), 4), definition: Named("llvm.x86.avx.addsub.pd.256") }, + "256_broadcast_ps" => Intrinsic { + inputs: vec![p(true, i(8), None)], + output: v(f(32), 8), + definition: Named("llvm.x86.avx.vbroadcastf128.ps.256") + }, + "256_broadcast_pd" => Intrinsic { + inputs: vec![p(true, i(8), None)], + output: v(f(64), 4), + definition: Named("llvm.x86.avx.vbroadcastf128.pd.256") + }, + "256_cvtepi32_pd" => Intrinsic { + inputs: vec![v(i(32), 4)], + output: v(f(64), 4), + definition: Named("llvm.x86.avx.cvtdq2.pd.256") + }, + "256_cvtepi32_ps" => Intrinsic { + inputs: vec![v(i(32), 8)], + output: v(f(32), 8), + definition: Named("llvm.x86.avx.cvtdq2.ps.256") + }, + "256_cvtpd_epi32" => Intrinsic { + inputs: vec![v(f(64), 4)], + output: v(i(32), 4), + definition: Named("llvm.x86.avx.cvt.pd2dq.256") + }, + "256_cvtpd_ps" => Intrinsic { + inputs: vec![v(f(64), 4)], + output: v(f(32), 4), + definition: Named("llvm.x86.avx.cvt.pd2.ps.256") + }, + "256_cvtps_epi32" => Intrinsic { + inputs: vec![v(f(32), 8)], + output: v(i(32), 8), + definition: Named("llvm.x86.avx.cvt.ps2dq.256") + }, + "256_cvtps_pd" => Intrinsic { + inputs: vec![v(f(32), 4)], + output: v(f(64), 4), + definition: Named("llvm.x86.avx.cvt.ps2.pd.256") + }, + "256_cvttpd_epi32" => Intrinsic { + inputs: vec![v(f(64), 4)], + output: v(i(32), 4), + definition: Named("llvm.x86.avx.cvtt.pd2dq.256") + }, + "256_cvttps_epi32" => Intrinsic { + inputs: vec![v(f(32), 8)], + output: v(i(32), 8), + definition: Named("llvm.x86.avx.cvtt.ps2dq.256") + }, "256_dp_ps" => Intrinsic { inputs: vec![v(f(32), 8), v(f(32), 8), i_(32, 8)], output: v(f(32), 8),