From 37efeae8866c3c4d9827d0ca271b8e27f731c3e1 Mon Sep 17 00:00:00 2001
From: Ruud van Asseldonk <ruuda@google.com>
Date: Tue, 8 Mar 2016 21:41:18 +0100
Subject: [PATCH 1/3] Define AVX broadcast intrinsics

This defines `_mm256_broadcast_ps` and `_mm256_broadcast_pd`. The `_ss`
and `_sd` variants are not supported by LLVM. In Clang these intrinsics
are implemented as inline functions in C++.

Intel reference: https://software.intel.com/en-us/node/514144.

Note: the argument type should really be "0hPc" (a pointer to a vector
of half the width), but internally the LLVM intrinsic takes a pointer to
a signed integer, and for any other type LLVM will complain. This means
that a transmute is required to call these intrinsics.

The AVX2 broadcast intrinsics `_mm256_broadcastss_ps` and
`_mm256_broadcastsd_pd` are not available as LLVM intrinsics. In Clang
they are implemented using the shufflevector builtin.
---
 src/etc/platform-intrinsics/x86/avx.json | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/etc/platform-intrinsics/x86/avx.json b/src/etc/platform-intrinsics/x86/avx.json
index 2c1492c2954c8..981838536b2ff 100644
--- a/src/etc/platform-intrinsics/x86/avx.json
+++ b/src/etc/platform-intrinsics/x86/avx.json
@@ -8,6 +8,13 @@
             "ret": "f(32-64)",
             "args": ["0", "0"]
         },
+        {
+            "intrinsic": "256_broadcast_{0.data_type}",
+            "width": [256],
+            "llvm": "vbroadcastf128.{0.data_type}.256",
+            "ret": "f(32-64)",
+            "args": ["s8SPc"]
+        },
         {
             "intrinsic": "256_dp_ps",
             "width": [256],

From 51b5300b3fc83b445cd1e410be4b7d123f10472e Mon Sep 17 00:00:00 2001
From: Ruud van Asseldonk <ruuda@google.com>
Date: Tue, 8 Mar 2016 22:36:54 +0100
Subject: [PATCH 2/3] Define AVX conversion intrinsics

This defines the following intrinsics:

 * `_mm256_cvtepi32_pd`
 * `_mm256_cvtepi32_ps`
 * `_mm256_cvtpd_epi32`
 * `_mm256_cvtpd_ps`
 * `_mm256_cvtps_epi32`
 * `_mm256_cvtps_pd`
 * `_mm256_cvttpd_epi32`
 * `_mm256_cvttps_epi32`

Intel reference: https://software.intel.com/en-us/node/514130.
---
 src/etc/platform-intrinsics/x86/avx.json | 56 ++++++++++++++++++++++++
 1 file changed, 56 insertions(+)

diff --git a/src/etc/platform-intrinsics/x86/avx.json b/src/etc/platform-intrinsics/x86/avx.json
index 981838536b2ff..08524fbd6dd8a 100644
--- a/src/etc/platform-intrinsics/x86/avx.json
+++ b/src/etc/platform-intrinsics/x86/avx.json
@@ -15,6 +15,62 @@
             "ret": "f(32-64)",
             "args": ["s8SPc"]
         },
+        {
+            "intrinsic": "256_cvtepi32_pd",
+            "width": [256],
+            "llvm": "cvtdq2.pd.256",
+            "ret": "f64",
+            "args": ["s32h"]
+        },
+        {
+            "intrinsic": "256_cvtepi32_ps",
+            "width": [256],
+            "llvm": "cvtdq2.ps.256",
+            "ret": "f32",
+            "args": ["s32"]
+        },
+        {
+            "intrinsic": "256_cvtpd_epi32",
+            "width": [256],
+            "llvm": "cvt.pd2dq.256",
+            "ret": "s32h",
+            "args": ["f64"]
+        },
+        {
+            "intrinsic": "256_cvtpd_ps",
+            "width": [256],
+            "llvm": "cvt.pd2.ps.256",
+            "ret": "f32h",
+            "args": ["f64"]
+        },
+        {
+            "intrinsic": "256_cvtps_epi32",
+            "width": [256],
+            "llvm": "cvt.ps2dq.256",
+            "ret": "s32",
+            "args": ["f32"]
+        },
+        {
+            "intrinsic": "256_cvtps_pd",
+            "width": [256],
+            "llvm": "cvt.ps2.pd.256",
+            "ret": "f64",
+            "args": ["f32h"]
+        },
+        {
+            "intrinsic": "256_cvttpd_epi32",
+            "width": [256],
+            "llvm": "cvtt.pd2dq.256",
+            "ret": "s32h",
+            "args": ["f64"]
+        },
+        {
+            "intrinsic": "256_cvttps_epi32",
+            "width": [256],
+            "llvm": "cvtt.ps2dq.256",
+            "ret": "s32",
+            "args": ["f32"]
+        },
         {
             "intrinsic": "256_dp_ps",
             "width": [256],

From c306853edafb8b740c3e224ce4fa1842a6924dc5 Mon Sep 17 00:00:00 2001
From: Ruud van Asseldonk <ruuda@google.com>
Date: Wed, 9 Mar 2016 01:16:31 +0100
Subject: [PATCH 3/3] Regenerate x86 platform intrinsics

The exact command used was:

    $ cd src/etc/platform-intrinsics/x86
    $ python2 ../generator.py --format compiler-defs -i info.json   \
      sse.json sse2.json sse3.json ssse3.json sse41.json sse42.json \
      avx.json avx2.json fma.json                                   \
      > ../../../librustc_platform_intrinsics/x86.rs
---
 src/librustc_platform_intrinsics/x86.rs | 50 +++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/src/librustc_platform_intrinsics/x86.rs b/src/librustc_platform_intrinsics/x86.rs
index 168ae79ab748f..d8aaf151267f6 100644
--- a/src/librustc_platform_intrinsics/x86.rs
+++ b/src/librustc_platform_intrinsics/x86.rs
@@ -498,6 +498,56 @@ pub fn find<'tcx>(_tcx: &TyCtxt<'tcx>, name: &str) -> Option<Intrinsic> {
             output: v(f(64), 4),
             definition: Named("llvm.x86.avx.addsub.pd.256")
         },
+        "256_broadcast_ps" => Intrinsic {
+            inputs: vec![p(true, i(8), None)],
+            output: v(f(32), 8),
+            definition: Named("llvm.x86.avx.vbroadcastf128.ps.256")
+        },
+        "256_broadcast_pd" => Intrinsic {
+            inputs: vec![p(true, i(8), None)],
+            output: v(f(64), 4),
+            definition: Named("llvm.x86.avx.vbroadcastf128.pd.256")
+        },
+        "256_cvtepi32_pd" => Intrinsic {
+            inputs: vec![v(i(32), 4)],
+            output: v(f(64), 4),
+            definition: Named("llvm.x86.avx.cvtdq2.pd.256")
+        },
+        "256_cvtepi32_ps" => Intrinsic {
+            inputs: vec![v(i(32), 8)],
+            output: v(f(32), 8),
+            definition: Named("llvm.x86.avx.cvtdq2.ps.256")
+        },
+        "256_cvtpd_epi32" => Intrinsic {
+            inputs: vec![v(f(64), 4)],
+            output: v(i(32), 4),
+            definition: Named("llvm.x86.avx.cvt.pd2dq.256")
+        },
+        "256_cvtpd_ps" => Intrinsic {
+            inputs: vec![v(f(64), 4)],
+            output: v(f(32), 4),
+            definition: Named("llvm.x86.avx.cvt.pd2.ps.256")
+        },
+        "256_cvtps_epi32" => Intrinsic {
+            inputs: vec![v(f(32), 8)],
+            output: v(i(32), 8),
+            definition: Named("llvm.x86.avx.cvt.ps2dq.256")
+        },
+        "256_cvtps_pd" => Intrinsic {
+            inputs: vec![v(f(32), 4)],
+            output: v(f(64), 4),
+            definition: Named("llvm.x86.avx.cvt.ps2.pd.256")
+        },
+        "256_cvttpd_epi32" => Intrinsic {
+            inputs: vec![v(f(64), 4)],
+            output: v(i(32), 4),
+            definition: Named("llvm.x86.avx.cvtt.pd2dq.256")
+        },
+        "256_cvttps_epi32" => Intrinsic {
+            inputs: vec![v(f(32), 8)],
+            output: v(i(32), 8),
+            definition: Named("llvm.x86.avx.cvtt.ps2dq.256")
+        },
         "256_dp_ps" => Intrinsic {
             inputs: vec![v(f(32), 8), v(f(32), 8), i_(32, 8)],
             output: v(f(32), 8),