Skip to content

Commit 09a8f68

Browse files
committed
Override carrying_mul_add in cg_llvm
1 parent 4013392 commit 09a8f68

File tree

2 files changed

+117
-0
lines changed

2 files changed

+117
-0
lines changed

compiler/rustc_codegen_llvm/src/intrinsic.rs

+27
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,33 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
340340
self.const_i32(cache_type),
341341
])
342342
}
343+
sym::carrying_mul_add => {
344+
let uty = fn_args.type_at(0);
345+
let (size, signed) = uty.int_size_and_signed(self.tcx);
346+
assert!(!signed);
347+
348+
let wide_llty = self.type_ix(size.bits() * 2);
349+
let a = self.zext(args[0].immediate(), wide_llty);
350+
let b = self.zext(args[1].immediate(), wide_llty);
351+
let c = self.zext(args[2].immediate(), wide_llty);
352+
let d = self.zext(args[3].immediate(), wide_llty);
353+
354+
let wide = self.unchecked_umul(a, b);
355+
let wide = self.unchecked_uadd(wide, c);
356+
let wide = self.unchecked_uadd(wide, d);
357+
358+
let narrow_llty = self.type_ix(size.bits());
359+
let low = self.trunc(wide, narrow_llty);
360+
let bits_const = self.const_uint(wide_llty, size.bits());
361+
let high = self.lshr(wide, bits_const);
362+
let high = self.trunc(high, narrow_llty);
363+
364+
let pair_llty = self.type_struct(&[narrow_llty, narrow_llty], false);
365+
let pair = self.const_poison(pair_llty);
366+
let pair = self.insert_value(pair, low, 0);
367+
let pair = self.insert_value(pair, high, 1);
368+
pair
369+
}
343370
sym::ctlz
344371
| sym::ctlz_nonzero
345372
| sym::cttz
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
//@ revisions: RAW OPT
2+
//@ compile-flags: -C opt-level=1
3+
//@[RAW] compile-flags: -C no-prepopulate-passes
4+
//@[OPT] min-llvm-version: 19
5+
6+
#![crate_type = "lib"]
7+
#![feature(core_intrinsics)]
8+
9+
// Note that LLVM seems to sometimes permute the order of arguments to mul and add,
10+
// so these tests don't check the arguments in the optimized revision.
11+
12+
use std::intrinsics::carrying_mul_add;
13+
14+
// The fallbacks are emitted even when they're never used, but optimize out.
15+
16+
// RAW: wide_mul_u128
17+
// OPT-NOT: wide_mul_u128
18+
19+
// CHECK-LABEL: @cma_u8
20+
#[no_mangle]
21+
pub unsafe fn cma_u8(a: u8, b: u8, c: u8, d: u8) -> (u8, u8) {
22+
// CHECK: [[A:%.+]] = zext i8 %a to i16
23+
// CHECK: [[B:%.+]] = zext i8 %b to i16
24+
// CHECK: [[C:%.+]] = zext i8 %c to i16
25+
// CHECK: [[D:%.+]] = zext i8 %d to i16
26+
// CHECK: [[AB:%.+]] = mul nuw i16
27+
// RAW-SAME: [[A]], [[B]]
28+
// CHECK: [[ABC:%.+]] = add nuw i16
29+
// RAW-SAME: [[AB]], [[C]]
30+
// CHECK: [[ABCD:%.+]] = add nuw i16
31+
// RAW-SAME: [[ABC]], [[D]]
32+
// CHECK: [[LOW:%.+]] = trunc i16 [[ABCD]] to i8
33+
// CHECK: [[HIGHW:%.+]] = lshr i16 [[ABCD]], 8
34+
// RAW: [[HIGH:%.+]] = trunc i16 [[HIGHW]] to i8
35+
// OPT: [[HIGH:%.+]] = trunc nuw i16 [[HIGHW]] to i8
36+
// CHECK: [[PAIR0:%.+]] = insertvalue { i8, i8 } poison, i8 [[LOW]], 0
37+
// CHECK: [[PAIR1:%.+]] = insertvalue { i8, i8 } [[PAIR0]], i8 [[HIGH]], 1
38+
// OPT: ret { i8, i8 } [[PAIR1]]
39+
carrying_mul_add(a, b, c, d)
40+
}
41+
42+
// CHECK-LABEL: @cma_u32
43+
#[no_mangle]
44+
pub unsafe fn cma_u32(a: u32, b: u32, c: u32, d: u32) -> (u32, u32) {
45+
// CHECK: [[A:%.+]] = zext i32 %a to i64
46+
// CHECK: [[B:%.+]] = zext i32 %b to i64
47+
// CHECK: [[C:%.+]] = zext i32 %c to i64
48+
// CHECK: [[D:%.+]] = zext i32 %d to i64
49+
// CHECK: [[AB:%.+]] = mul nuw i64
50+
// RAW-SAME: [[A]], [[B]]
51+
// CHECK: [[ABC:%.+]] = add nuw i64
52+
// RAW-SAME: [[AB]], [[C]]
53+
// CHECK: [[ABCD:%.+]] = add nuw i64
54+
// RAW-SAME: [[ABC]], [[D]]
55+
// CHECK: [[LOW:%.+]] = trunc i64 [[ABCD]] to i32
56+
// CHECK: [[HIGHW:%.+]] = lshr i64 [[ABCD]], 32
57+
// RAW: [[HIGH:%.+]] = trunc i64 [[HIGHW]] to i32
58+
// OPT: [[HIGH:%.+]] = trunc nuw i64 [[HIGHW]] to i32
59+
// CHECK: [[PAIR0:%.+]] = insertvalue { i32, i32 } poison, i32 [[LOW]], 0
60+
// CHECK: [[PAIR1:%.+]] = insertvalue { i32, i32 } [[PAIR0]], i32 [[HIGH]], 1
61+
// OPT: ret { i32, i32 } [[PAIR1]]
62+
carrying_mul_add(a, b, c, d)
63+
}
64+
65+
// CHECK-LABEL: @cma_u128
66+
// CHECK-SAME: sret{{.+}}dereferenceable(32){{.+}}%_0,{{.+}}%a,{{.+}}%b,{{.+}}%c,{{.+}}%d
67+
#[no_mangle]
68+
pub unsafe fn cma_u128(a: u128, b: u128, c: u128, d: u128) -> (u128, u128) {
69+
// CHECK: [[A:%.+]] = zext i128 %a to i256
70+
// CHECK: [[B:%.+]] = zext i128 %b to i256
71+
// CHECK: [[C:%.+]] = zext i128 %c to i256
72+
// CHECK: [[D:%.+]] = zext i128 %d to i256
73+
// CHECK: [[AB:%.+]] = mul nuw i256
74+
// RAW-SAME: [[A]], [[B]]
75+
// CHECK: [[ABC:%.+]] = add nuw i256
76+
// RAW-SAME: [[AB]], [[C]]
77+
// CHECK: [[ABCD:%.+]] = add nuw i256
78+
// RAW-SAME: [[ABC]], [[D]]
79+
// CHECK: [[LOW:%.+]] = trunc i256 [[ABCD]] to i128
80+
// CHECK: [[HIGHW:%.+]] = lshr i256 [[ABCD]], 128
81+
// RAW: [[HIGH:%.+]] = trunc i256 [[HIGHW]] to i128
82+
// OPT: [[HIGH:%.+]] = trunc nuw i256 [[HIGHW]] to i128
83+
// RAW: [[PAIR0:%.+]] = insertvalue { i128, i128 } poison, i128 [[LOW]], 0
84+
// RAW: [[PAIR1:%.+]] = insertvalue { i128, i128 } [[PAIR0]], i128 [[HIGH]], 1
85+
// OPT: store i128 [[LOW]], ptr %_0
86+
// OPT: [[P1:%.+]] = getelementptr inbounds i8, ptr %_0, i64 16
87+
// OPT: store i128 [[HIGH]], ptr [[P1]]
88+
// CHECK: ret void
89+
carrying_mul_add(a, b, c, d)
90+
}

0 commit comments

Comments
 (0)