From 2d52c0a6060930677703263ce409b3b4f6f03e5a Mon Sep 17 00:00:00 2001
From: Huon Wilson <dbau.pp+github@gmail.com>
Date: Thu, 4 Apr 2013 02:54:34 +1100
Subject: [PATCH 1/3] librustc: add rint and nearbyint intrinsics to go with
 ceil and trunc from LLVM 3.3.

---
 src/librustc/middle/trans/base.rs | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/librustc/middle/trans/base.rs b/src/librustc/middle/trans/base.rs
index 15238f168944d..295ff09f09aba 100644
--- a/src/librustc/middle/trans/base.rs
+++ b/src/librustc/middle/trans/base.rs
@@ -2712,6 +2712,14 @@ pub fn declare_intrinsics(llmod: ModuleRef) -> LinearMap<~str, ValueRef> {
                                 T_fn(~[T_f32()], T_f32()));
     let truncf64 = decl_cdecl_fn(llmod, ~"llvm.trunc.f64",
                                 T_fn(~[T_f64()], T_f64()));
+    let rintf32 = decl_cdecl_fn(llmod, ~"llvm.rint.f32",
+                                T_fn(~[T_f32()], T_f32()));
+    let rintf64 = decl_cdecl_fn(llmod, ~"llvm.rint.f64",
+                                T_fn(~[T_f64()], T_f64()));
+    let nearbyintf32 = decl_cdecl_fn(llmod, ~"llvm.nearbyint.f32",
+                                T_fn(~[T_f32()], T_f32()));
+    let nearbyintf64 = decl_cdecl_fn(llmod, ~"llvm.nearbyint.f64",
+                                T_fn(~[T_f64()], T_f64()));
     let ctpop8 = decl_cdecl_fn(llmod, ~"llvm.ctpop.i8",
                                 T_fn(~[T_i8()], T_i8()));
     let ctpop16 = decl_cdecl_fn(llmod, ~"llvm.ctpop.i16",
@@ -2784,6 +2792,10 @@ pub fn declare_intrinsics(llmod: ModuleRef) -> LinearMap<~str, ValueRef> {
     intrinsics.insert(~"llvm.ceil.f64", ceilf64);
     intrinsics.insert(~"llvm.trunc.f32", truncf32);
     intrinsics.insert(~"llvm.trunc.f64", truncf64);
+    intrinsics.insert(~"llvm.rint.f32", rintf32);
+    intrinsics.insert(~"llvm.rint.f64", rintf64);
+    intrinsics.insert(~"llvm.nearbyint.f32", nearbyintf32);
+    intrinsics.insert(~"llvm.nearbyint.f64", nearbyintf64);
     intrinsics.insert(~"llvm.ctpop.i8", ctpop8);
     intrinsics.insert(~"llvm.ctpop.i16", ctpop16);
     intrinsics.insert(~"llvm.ctpop.i32", ctpop32);

From 05bb618037480940fc3812c4990448faa7d3b01d Mon Sep 17 00:00:00 2001
From: Huon Wilson <dbau.pp+github@gmail.com>
Date: Thu, 4 Apr 2013 03:08:53 +1100
Subject: [PATCH 2/3] librustc: use LLVM intrinsics for several floating point
 operations.

Achieves up to 5x speed up! However, the intrinsics seem to do bad
things to the stack, especially sin, cos and exp (#5686 has
discussion).

Also, add f{32,64,loat}::powi, and reorganise the delegation code so
that functions have the #[inline(always)] annotation, and reduce the
repetition of delegate!(..).
---
 src/libcore/num/f32.rs | 158 +++++++++++++++-------------
 src/libcore/num/f64.rs | 226 ++++++++++++++++++++++++++---------------
 2 files changed, 231 insertions(+), 153 deletions(-)

diff --git a/src/libcore/num/f32.rs b/src/libcore/num/f32.rs
index 6361a6a5cb75e..87a210eef3ded 100644
--- a/src/libcore/num/f32.rs
+++ b/src/libcore/num/f32.rs
@@ -10,13 +10,10 @@
 
 //! Operations and constants for `f32`
 
-use cmath;
-use libc::{c_float, c_int};
 use num::NumCast;
 use num::strconv;
 use num;
 use option::Option;
-use unstable::intrinsics::floorf32;
 use from_str;
 use to_str;
 
@@ -25,79 +22,99 @@ use to_str;
 
 pub use cmath::c_float_targ_consts::*;
 
+// An inner module is required to get the #[inline(always)] attribute on the
+// functions.
+pub use self::delegated::*;
+
 macro_rules! delegate(
     (
-        fn $name:ident(
-            $(
-                $arg:ident : $arg_ty:ty
-            ),*
-        ) -> $rv:ty = $bound_name:path
+        $(
+            fn $name:ident(
+                $(
+                    $arg:ident : $arg_ty:ty
+                ),*
+            ) -> $rv:ty = $bound_name:path
+        ),*
     ) => (
-        pub fn $name($( $arg : $arg_ty ),*) -> $rv {
-            unsafe {
-                $bound_name($( $arg ),*)
-            }
+        mod delegated {
+            use cmath::c_float_utils;
+            use libc::{c_float, c_int};
+            use unstable::intrinsics;
+
+            $(
+                #[inline(always)]
+                pub fn $name($( $arg : $arg_ty ),*) -> $rv {
+                    unsafe {
+                        $bound_name($( $arg ),*)
+                    }
+                }
+            )*
         }
     )
 )
 
-delegate!(fn acos(n: c_float) -> c_float = cmath::c_float_utils::acos)
-delegate!(fn asin(n: c_float) -> c_float = cmath::c_float_utils::asin)
-delegate!(fn atan(n: c_float) -> c_float = cmath::c_float_utils::atan)
-delegate!(fn atan2(a: c_float, b: c_float) -> c_float =
-    cmath::c_float_utils::atan2)
-delegate!(fn cbrt(n: c_float) -> c_float = cmath::c_float_utils::cbrt)
-delegate!(fn ceil(n: c_float) -> c_float = cmath::c_float_utils::ceil)
-delegate!(fn copysign(x: c_float, y: c_float) -> c_float =
-    cmath::c_float_utils::copysign)
-delegate!(fn cos(n: c_float) -> c_float = cmath::c_float_utils::cos)
-delegate!(fn cosh(n: c_float) -> c_float = cmath::c_float_utils::cosh)
-delegate!(fn erf(n: c_float) -> c_float = cmath::c_float_utils::erf)
-delegate!(fn erfc(n: c_float) -> c_float = cmath::c_float_utils::erfc)
-delegate!(fn exp(n: c_float) -> c_float = cmath::c_float_utils::exp)
-delegate!(fn expm1(n: c_float) -> c_float = cmath::c_float_utils::expm1)
-delegate!(fn exp2(n: c_float) -> c_float = cmath::c_float_utils::exp2)
-delegate!(fn abs(n: c_float) -> c_float = cmath::c_float_utils::abs)
-delegate!(fn abs_sub(a: c_float, b: c_float) -> c_float =
-    cmath::c_float_utils::abs_sub)
-delegate!(fn mul_add(a: c_float, b: c_float, c: c_float) -> c_float =
-    cmath::c_float_utils::mul_add)
-delegate!(fn fmax(a: c_float, b: c_float) -> c_float =
-    cmath::c_float_utils::fmax)
-delegate!(fn fmin(a: c_float, b: c_float) -> c_float =
-    cmath::c_float_utils::fmin)
-delegate!(fn nextafter(x: c_float, y: c_float) -> c_float =
-    cmath::c_float_utils::nextafter)
-delegate!(fn frexp(n: c_float, value: &mut c_int) -> c_float =
-    cmath::c_float_utils::frexp)
-delegate!(fn hypot(x: c_float, y: c_float) -> c_float =
-    cmath::c_float_utils::hypot)
-delegate!(fn ldexp(x: c_float, n: c_int) -> c_float =
-    cmath::c_float_utils::ldexp)
-delegate!(fn lgamma(n: c_float, sign: &mut c_int) -> c_float =
-    cmath::c_float_utils::lgamma)
-delegate!(fn ln(n: c_float) -> c_float = cmath::c_float_utils::ln)
-delegate!(fn log_radix(n: c_float) -> c_float =
-    cmath::c_float_utils::log_radix)
-delegate!(fn ln1p(n: c_float) -> c_float = cmath::c_float_utils::ln1p)
-delegate!(fn log10(n: c_float) -> c_float = cmath::c_float_utils::log10)
-delegate!(fn log2(n: c_float) -> c_float = cmath::c_float_utils::log2)
-delegate!(fn ilog_radix(n: c_float) -> c_int =
-    cmath::c_float_utils::ilog_radix)
-delegate!(fn modf(n: c_float, iptr: &mut c_float) -> c_float =
-    cmath::c_float_utils::modf)
-delegate!(fn pow(n: c_float, e: c_float) -> c_float =
-    cmath::c_float_utils::pow)
-delegate!(fn round(n: c_float) -> c_float = cmath::c_float_utils::round)
-delegate!(fn ldexp_radix(n: c_float, i: c_int) -> c_float =
-    cmath::c_float_utils::ldexp_radix)
-delegate!(fn sin(n: c_float) -> c_float = cmath::c_float_utils::sin)
-delegate!(fn sinh(n: c_float) -> c_float = cmath::c_float_utils::sinh)
-delegate!(fn sqrt(n: c_float) -> c_float = cmath::c_float_utils::sqrt)
-delegate!(fn tan(n: c_float) -> c_float = cmath::c_float_utils::tan)
-delegate!(fn tanh(n: c_float) -> c_float = cmath::c_float_utils::tanh)
-delegate!(fn tgamma(n: c_float) -> c_float = cmath::c_float_utils::tgamma)
-delegate!(fn trunc(n: c_float) -> c_float = cmath::c_float_utils::trunc)
+delegate!(
+    // intrinsics
+    fn abs(n: f32) -> f32 = intrinsics::fabsf32,
+    fn exp2(n: f32) -> f32 = intrinsics::exp2f32,
+    fn floor(x: f32) -> f32 = intrinsics::floorf32,
+    fn ln(n: f32) -> f32 = intrinsics::logf32,
+    fn log10(n: f32) -> f32 = intrinsics::log10f32,
+    fn log2(n: f32) -> f32 = intrinsics::log2f32,
+    fn mul_add(a: f32, b: f32, c: f32) -> f32 = intrinsics::fmaf32,
+    fn pow(n: f32, e: f32) -> f32 = intrinsics::powf32,
+    fn powi(n: f32, e: c_int) -> f32 = intrinsics::powif32,
+    fn sqrt(n: f32) -> f32 = intrinsics::sqrtf32,
+
+    // FIXME: using intrinsics for these kill the stack canary more easily
+    // than those above (see discussion on #5686)
+    fn cos(n: c_float) -> c_float = c_float_utils::cos,
+    fn sin(n: c_float) -> c_float = c_float_utils::sin,
+    fn exp(n: f32) -> f32 = c_float_utils::exp,
+    fn cos_intr(n: f32) -> f32 = intrinsics::cosf32,
+    fn sin_intr(n: f32) -> f32 = intrinsics::sinf32,
+    fn exp_intr(n: f32) -> f32 = intrinsics::expf32,
+
+    // LLVM 3.3 required to use intrinsics for these four
+    fn ceil(n: c_float) -> c_float = c_float_utils::ceil,
+    fn trunc(n: c_float) -> c_float = c_float_utils::trunc,
+    /*
+    fn ceil(n: f32) -> f32 = intrinsics::ceilf32,
+    fn trunc(n: f32) -> f32 = intrinsics::truncf32,
+    fn rint(n: f32) -> f32 = intrinsics::rintf32,
+    fn nearbyint(n: f32) -> f32 = intrinsics::nearbyintf32,
+    */
+
+    // cmath
+    fn acos(n: c_float) -> c_float = c_float_utils::acos,
+    fn asin(n: c_float) -> c_float = c_float_utils::asin,
+    fn atan(n: c_float) -> c_float = c_float_utils::atan,
+    fn atan2(a: c_float, b: c_float) -> c_float = c_float_utils::atan2,
+    fn cbrt(n: c_float) -> c_float = c_float_utils::cbrt,
+    fn copysign(x: c_float, y: c_float) -> c_float = c_float_utils::copysign,
+    fn cosh(n: c_float) -> c_float = c_float_utils::cosh,
+    fn erf(n: c_float) -> c_float = c_float_utils::erf,
+    fn erfc(n: c_float) -> c_float = c_float_utils::erfc,
+    fn expm1(n: c_float) -> c_float = c_float_utils::expm1,
+    fn abs_sub(a: c_float, b: c_float) -> c_float = c_float_utils::abs_sub,
+    fn fmax(a: c_float, b: c_float) -> c_float = c_float_utils::fmax,
+    fn fmin(a: c_float, b: c_float) -> c_float = c_float_utils::fmin,
+    fn nextafter(x: c_float, y: c_float) -> c_float = c_float_utils::nextafter,
+    fn frexp(n: c_float, value: &mut c_int) -> c_float = c_float_utils::frexp,
+    fn hypot(x: c_float, y: c_float) -> c_float = c_float_utils::hypot,
+    fn ldexp(x: c_float, n: c_int) -> c_float = c_float_utils::ldexp,
+    fn lgamma(n: c_float, sign: &mut c_int) -> c_float = c_float_utils::lgamma,
+    fn log_radix(n: c_float) -> c_float = c_float_utils::log_radix,
+    fn ln1p(n: c_float) -> c_float = c_float_utils::ln1p,
+    fn ilog_radix(n: c_float) -> c_int = c_float_utils::ilog_radix,
+    fn modf(n: c_float, iptr: &mut c_float) -> c_float = c_float_utils::modf,
+    fn round(n: c_float) -> c_float = c_float_utils::round,
+    fn ldexp_radix(n: c_float, i: c_int) -> c_float = c_float_utils::ldexp_radix,
+    fn sinh(n: c_float) -> c_float = c_float_utils::sinh,
+    fn tan(n: c_float) -> c_float = c_float_utils::tan,
+    fn tanh(n: c_float) -> c_float = c_float_utils::tanh,
+    fn tgamma(n: c_float) -> c_float = c_float_utils::tgamma)
+
 
 // These are not defined inside consts:: for consistency with
 // the integer types
@@ -144,9 +161,6 @@ pub fn ge(x: f32, y: f32) -> bool { return x >= y; }
 #[inline(always)]
 pub fn gt(x: f32, y: f32) -> bool { return x > y; }
 
-/// Returns `x` rounded down
-#[inline(always)]
-pub fn floor(x: f32) -> f32 { unsafe { floorf32(x) } }
 
 // FIXME (#1999): replace the predicates below with llvm intrinsics or
 // calls to the libmath macros in the rust runtime for performance.
diff --git a/src/libcore/num/f64.rs b/src/libcore/num/f64.rs
index 9e731e61ec49e..48a4163665de9 100644
--- a/src/libcore/num/f64.rs
+++ b/src/libcore/num/f64.rs
@@ -9,14 +9,10 @@
 // except according to those terms.
 
 //! Operations and constants for `f64`
-
-use cmath;
-use libc::{c_double, c_int};
 use num::NumCast;
 use num::strconv;
 use num;
 use option::Option;
-use unstable::intrinsics::floorf64;
 use to_str;
 use from_str;
 
@@ -26,87 +22,104 @@ use from_str;
 pub use cmath::c_double_targ_consts::*;
 pub use cmp::{min, max};
 
+// An inner module is required to get the #[inline(always)] attribute on the
+// functions.
+pub use self::delegated::*;
+
 macro_rules! delegate(
     (
-        fn $name:ident(
-            $(
-                $arg:ident : $arg_ty:ty
-            ),*
-        ) -> $rv:ty = $bound_name:path
+        $(
+            fn $name:ident(
+                $(
+                    $arg:ident : $arg_ty:ty
+                ),*
+            ) -> $rv:ty = $bound_name:path
+        ),*
     ) => (
-        pub fn $name($( $arg : $arg_ty ),*) -> $rv {
-            unsafe {
-                $bound_name($( $arg ),*)
-            }
+        mod delegated {
+            use cmath::c_double_utils;
+            use libc::{c_double, c_int};
+            use unstable::intrinsics;
+
+            $(
+                #[inline(always)]
+                pub fn $name($( $arg : $arg_ty ),*) -> $rv {
+                    unsafe {
+                        $bound_name($( $arg ),*)
+                    }
+                }
+            )*
         }
     )
 )
 
-delegate!(fn acos(n: c_double) -> c_double = cmath::c_double_utils::acos)
-delegate!(fn asin(n: c_double) -> c_double = cmath::c_double_utils::asin)
-delegate!(fn atan(n: c_double) -> c_double = cmath::c_double_utils::atan)
-delegate!(fn atan2(a: c_double, b: c_double) -> c_double =
-    cmath::c_double_utils::atan2)
-delegate!(fn cbrt(n: c_double) -> c_double = cmath::c_double_utils::cbrt)
-delegate!(fn ceil(n: c_double) -> c_double = cmath::c_double_utils::ceil)
-delegate!(fn copysign(x: c_double, y: c_double) -> c_double =
-    cmath::c_double_utils::copysign)
-delegate!(fn cos(n: c_double) -> c_double = cmath::c_double_utils::cos)
-delegate!(fn cosh(n: c_double) -> c_double = cmath::c_double_utils::cosh)
-delegate!(fn erf(n: c_double) -> c_double = cmath::c_double_utils::erf)
-delegate!(fn erfc(n: c_double) -> c_double = cmath::c_double_utils::erfc)
-delegate!(fn exp(n: c_double) -> c_double = cmath::c_double_utils::exp)
-delegate!(fn expm1(n: c_double) -> c_double = cmath::c_double_utils::expm1)
-delegate!(fn exp2(n: c_double) -> c_double = cmath::c_double_utils::exp2)
-delegate!(fn abs(n: c_double) -> c_double = cmath::c_double_utils::abs)
-delegate!(fn abs_sub(a: c_double, b: c_double) -> c_double =
-    cmath::c_double_utils::abs_sub)
-delegate!(fn mul_add(a: c_double, b: c_double, c: c_double) -> c_double =
-    cmath::c_double_utils::mul_add)
-delegate!(fn fmax(a: c_double, b: c_double) -> c_double =
-    cmath::c_double_utils::fmax)
-delegate!(fn fmin(a: c_double, b: c_double) -> c_double =
-    cmath::c_double_utils::fmin)
-delegate!(fn nextafter(x: c_double, y: c_double) -> c_double =
-    cmath::c_double_utils::nextafter)
-delegate!(fn frexp(n: c_double, value: &mut c_int) -> c_double =
-    cmath::c_double_utils::frexp)
-delegate!(fn hypot(x: c_double, y: c_double) -> c_double =
-    cmath::c_double_utils::hypot)
-delegate!(fn ldexp(x: c_double, n: c_int) -> c_double =
-    cmath::c_double_utils::ldexp)
-delegate!(fn lgamma(n: c_double, sign: &mut c_int) -> c_double =
-    cmath::c_double_utils::lgamma)
-delegate!(fn ln(n: c_double) -> c_double = cmath::c_double_utils::ln)
-delegate!(fn log_radix(n: c_double) -> c_double =
-    cmath::c_double_utils::log_radix)
-delegate!(fn ln1p(n: c_double) -> c_double = cmath::c_double_utils::ln1p)
-delegate!(fn log10(n: c_double) -> c_double = cmath::c_double_utils::log10)
-delegate!(fn log2(n: c_double) -> c_double = cmath::c_double_utils::log2)
-delegate!(fn ilog_radix(n: c_double) -> c_int =
-    cmath::c_double_utils::ilog_radix)
-delegate!(fn modf(n: c_double, iptr: &mut c_double) -> c_double =
-    cmath::c_double_utils::modf)
-delegate!(fn pow(n: c_double, e: c_double) -> c_double =
-    cmath::c_double_utils::pow)
-delegate!(fn round(n: c_double) -> c_double = cmath::c_double_utils::round)
-delegate!(fn ldexp_radix(n: c_double, i: c_int) -> c_double =
-    cmath::c_double_utils::ldexp_radix)
-delegate!(fn sin(n: c_double) -> c_double = cmath::c_double_utils::sin)
-delegate!(fn sinh(n: c_double) -> c_double = cmath::c_double_utils::sinh)
-delegate!(fn sqrt(n: c_double) -> c_double = cmath::c_double_utils::sqrt)
-delegate!(fn tan(n: c_double) -> c_double = cmath::c_double_utils::tan)
-delegate!(fn tanh(n: c_double) -> c_double = cmath::c_double_utils::tanh)
-delegate!(fn tgamma(n: c_double) -> c_double = cmath::c_double_utils::tgamma)
-delegate!(fn trunc(n: c_double) -> c_double = cmath::c_double_utils::trunc)
-delegate!(fn j0(n: c_double) -> c_double = cmath::c_double_utils::j0)
-delegate!(fn j1(n: c_double) -> c_double = cmath::c_double_utils::j1)
-delegate!(fn jn(i: c_int, n: c_double) -> c_double =
-    cmath::c_double_utils::jn)
-delegate!(fn y0(n: c_double) -> c_double = cmath::c_double_utils::y0)
-delegate!(fn y1(n: c_double) -> c_double = cmath::c_double_utils::y1)
-delegate!(fn yn(i: c_int, n: c_double) -> c_double =
-    cmath::c_double_utils::yn)
+delegate!(
+    // intrinsics
+    fn abs(n: f64) -> f64 = intrinsics::fabsf64,
+    fn exp2(n: f64) -> f64 = intrinsics::exp2f64,
+    fn floor(x: f64) -> f64 = intrinsics::floorf64,
+    fn ln(n: f64) -> f64 = intrinsics::logf64,
+    fn log10(n: f64) -> f64 = intrinsics::log10f64,
+    fn log2(n: f64) -> f64 = intrinsics::log2f64,
+    fn mul_add(a: f64, b: f64, c: f64) -> f64 = intrinsics::fmaf64,
+    fn pow(n: f64, e: f64) -> f64 = intrinsics::powf64,
+    fn powi(n: f64, e: c_int) -> f64 = intrinsics::powif64,
+    fn sqrt(n: f64) -> f64 = intrinsics::sqrtf64,
+
+    // FIXME: using intrinsics for these kill the stack canary more easily
+    // than those above (see discussion on #5686)
+    fn cos(n: c_double) -> c_double = c_double_utils::cos,
+    fn sin(n: c_double) -> c_double = c_double_utils::sin,
+    fn exp(n: c_double) -> c_double = c_double_utils::exp,
+    fn cos_intr(n: f64) -> f64 = intrinsics::cosf64,
+    fn sin_intr(n: f64) -> f64 = intrinsics::sinf64,
+    fn exp_intr(n: f64) -> f64 = intrinsics::expf64,
+
+    // LLVM 3.3 required to use intrinsics for these four
+    fn ceil(n: c_double) -> c_double = c_double_utils::ceil,
+    fn trunc(n: c_double) -> c_double = c_double_utils::trunc,
+    /*
+    fn ceil(n: f64) -> f64 = intrinsics::ceilf64,
+    fn trunc(n: f64) -> f64 = intrinsics::truncf64,
+    fn rint(n: c_double) -> c_double = intrinsics::rintf64,
+    fn nearbyint(n: c_double) -> c_double = intrinsics::nearbyintf64,
+    */
+
+    // cmath
+    fn acos(n: c_double) -> c_double = c_double_utils::acos,
+    fn asin(n: c_double) -> c_double = c_double_utils::asin,
+    fn atan(n: c_double) -> c_double = c_double_utils::atan,
+    fn atan2(a: c_double, b: c_double) -> c_double = c_double_utils::atan2,
+    fn cbrt(n: c_double) -> c_double = c_double_utils::cbrt,
+    fn copysign(x: c_double, y: c_double) -> c_double = c_double_utils::copysign,
+    fn cosh(n: c_double) -> c_double = c_double_utils::cosh,
+    fn erf(n: c_double) -> c_double = c_double_utils::erf,
+    fn erfc(n: c_double) -> c_double = c_double_utils::erfc,
+    fn expm1(n: c_double) -> c_double = c_double_utils::expm1,
+    fn abs_sub(a: c_double, b: c_double) -> c_double = c_double_utils::abs_sub,
+    fn fmax(a: c_double, b: c_double) -> c_double = c_double_utils::fmax,
+    fn fmin(a: c_double, b: c_double) -> c_double = c_double_utils::fmin,
+    fn nextafter(x: c_double, y: c_double) -> c_double = c_double_utils::nextafter,
+    fn frexp(n: c_double, value: &mut c_int) -> c_double = c_double_utils::frexp,
+    fn hypot(x: c_double, y: c_double) -> c_double = c_double_utils::hypot,
+    fn ldexp(x: c_double, n: c_int) -> c_double = c_double_utils::ldexp,
+    fn lgamma(n: c_double, sign: &mut c_int) -> c_double = c_double_utils::lgamma,
+    fn log_radix(n: c_double) -> c_double = c_double_utils::log_radix,
+    fn ln1p(n: c_double) -> c_double = c_double_utils::ln1p,
+    fn ilog_radix(n: c_double) -> c_int = c_double_utils::ilog_radix,
+    fn modf(n: c_double, iptr: &mut c_double) -> c_double = c_double_utils::modf,
+    fn round(n: c_double) -> c_double = c_double_utils::round,
+    fn ldexp_radix(n: c_double, i: c_int) -> c_double = c_double_utils::ldexp_radix,
+    fn sinh(n: c_double) -> c_double = c_double_utils::sinh,
+    fn tan(n: c_double) -> c_double = c_double_utils::tan,
+    fn tanh(n: c_double) -> c_double = c_double_utils::tanh,
+    fn tgamma(n: c_double) -> c_double = c_double_utils::tgamma,
+    fn j0(n: c_double) -> c_double = c_double_utils::j0,
+    fn j1(n: c_double) -> c_double = c_double_utils::j1,
+    fn jn(i: c_int, n: c_double) -> c_double = c_double_utils::jn,
+    fn y0(n: c_double) -> c_double = c_double_utils::y0,
+    fn y1(n: c_double) -> c_double = c_double_utils::y1,
+    fn yn(i: c_int, n: c_double) -> c_double = c_double_utils::yn)
 
 // FIXME (#1433): obtain these in a different way
 
@@ -219,9 +232,6 @@ pub fn is_finite(x: f64) -> bool {
     return !(is_NaN(x) || is_infinite(x));
 }
 
-/// Returns `x` rounded down
-#[inline(always)]
-pub fn floor(x: f64) -> f64 { unsafe { floorf64(x) } }
 
 // FIXME (#1999): add is_normal, is_subnormal, and fpclassify
 
@@ -659,6 +669,60 @@ fn test_numcast() {
     assert!((20f64 == num::cast(20f64)));
 }
 
+#[test]
+fn test_functions() {
+    acos(1.5);
+    asin(1.5);
+    atan(1.5);
+    atan2(1.5, 1.5);
+    cbrt(1.5);
+    ceil(1.5);
+    copysign(1.5, 1.5);
+    cos(1.5);
+    cosh(1.5);
+    erf(1.5);
+    erfc(1.5);
+    exp(1.5);
+    expm1(1.5);
+    exp2(1.5);
+    abs(1.5);
+    abs_sub(1.5, 1.5);
+    mul_add(1.5, 1.5, 1.5);
+    fmax(1.5, 1.5);
+    fmin(1.5, 1.5);
+    nextafter(1.5, 1.5);
+    let mut value = 1;
+    frexp(1.5, &mut value);
+    hypot(1.5, 1.5);
+    ldexp(1.5, 1);
+    let mut sign = 1;
+    lgamma(1.5, &mut sign);
+    ln(1.5);
+    log_radix(1.5);
+    ln1p(1.5);
+    log10(1.5);
+    log2(1.5);
+    ilog_radix(1.5);
+    let mut iptr = 1.0;
+    modf(1.5, &mut iptr);
+    pow(1.5, 1.5);
+    round(1.5);
+    ldexp_radix(1.5, 1);
+    sin(1.5);
+    sinh(1.5);
+    sqrt(1.5);
+    tan(1.5);
+    tanh(1.5);
+    tgamma(1.5);
+    trunc(1.5);
+    j0(1.5);
+    j1(1.5);
+    jn(1, 1.5);
+    y0(1.5);
+    y1(1.5);
+    yn(1, 1.5);
+}
+
 //
 // Local Variables:
 // mode: rust

From e656de59e37bc46bf86a5609df58bda86fda6183 Mon Sep 17 00:00:00 2001
From: Huon Wilson <dbau.pp+github@gmail.com>
Date: Thu, 4 Apr 2013 03:14:26 +1100
Subject: [PATCH 3/3] testsuite: update shootout-nbody to use the intrinsic
 sqrt

---
 src/test/bench/shootout-nbody.rs | 21 +++++----------------
 1 file changed, 5 insertions(+), 16 deletions(-)

diff --git a/src/test/bench/shootout-nbody.rs b/src/test/bench/shootout-nbody.rs
index 97907025bd1a0..086f5ae7f6fa8 100644
--- a/src/test/bench/shootout-nbody.rs
+++ b/src/test/bench/shootout-nbody.rs
@@ -15,18 +15,6 @@ extern mod std;
 
 use core::os;
 
-// Using sqrt from the standard library is way slower than using libc
-// directly even though std just calls libc, I guess it must be
-// because the the indirection through another dynamic linker
-// stub. Kind of shocking. Might be able to make it faster still with
-// an llvm intrinsic.
-mod libc {
-    #[nolink]
-    pub extern {
-        pub fn sqrt(n: float) -> float;
-    }
-}
-
 fn main() {
     let args = os::args();
     let args = if os::getenv(~"RUST_BENCH").is_some() {
@@ -49,6 +37,7 @@ fn main() {
 
 pub mod NBodySystem {
     use Body;
+    use core::float::sqrt;
 
     pub fn make() -> ~[Body::Props] {
         let mut bodies: ~[Body::Props] =
@@ -107,7 +96,7 @@ pub mod NBodySystem {
 
             let dSquared = dx * dx + dy * dy + dz * dz;
 
-            let distance = ::libc::sqrt(dSquared);
+            let distance = sqrt(dSquared);
             let mag = dt / (dSquared * distance);
 
             bi.vx -= dx * bj.mass * mag;
@@ -148,9 +137,9 @@ pub mod NBodySystem {
                     dy = bodies[i].y - bodies[j].y;
                     dz = bodies[i].z - bodies[j].z;
 
-                    distance = ::libc::sqrt(dx * dx
-                                            + dy * dy
-                                            + dz * dz);
+                    distance = sqrt(dx * dx
+                                    + dy * dy
+                                    + dz * dz);
                     e -= bodies[i].mass
                         * bodies[j].mass / distance;