@@ -1522,3 +1522,78 @@ func @conv3d_no_symbols(%in : memref<?x?x?xf32>, %filter : memref<?x?x?xf32>, %o
1522
1522
// CHECKPARALLEL: %[[inc:.*]] = mulf %[[vb]], %[[va]] : f32
1523
1523
// CHECKPARALLEL: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32
1524
1524
// CHECKPARALLEL: store %[[res]], %[[arg2]][%[[arg3]], %[[arg4]], %[[arg5]]] : memref<?x?x?xf32>
1525
+
1526
+
1527
+ #map0 = affine_map <(d0 ) -> (24 , -d0 + 192 )>
1528
+ #map1 = affine_map <(d0 , d1 )[s0 ] -> (d0 * 192 + s0 + d1 )>
1529
+ #map2 = affine_map <(d0 ) -> (16 , -d0 + 192 )>
1530
+
1531
+ func @tiled_loop_to_parallel (%A: memref <192 x192 xf32 >,
1532
+ %B: memref <192 x192 xf32 >,
1533
+ %C: memref <192 x192 xf32 >) {
1534
+ %cst = constant 0.000000e+00 : f32
1535
+ %c24 = constant 24 : index
1536
+ %c16 = constant 16 : index
1537
+ %c0 = constant 0 : index
1538
+ %c192 = constant 192 : index
1539
+
1540
+ linalg.tiled_loop (%i , %j ) = (%c0 , %c0 ) to (%c192 , %c192 ) step (%c24 , %c16 )
1541
+ ins (%A_ = %A: memref <192 x192 xf32 >, %B_ = %B: memref <192 x192 xf32 >)
1542
+ outs (%C_ = %C: memref <192 x192 xf32 >) {
1543
+ %0 = affine.min #map0 (%i )
1544
+ %1 = memref.subview %A_ [%i , 0 ] [%0 , 192 ] [1 , 1 ]
1545
+ : memref <192 x192 xf32 > to memref <?x192 xf32 , #map1 >
1546
+ %2 = affine.min #map2 (%j )
1547
+ %3 = memref.subview %B_ [0 , %j ] [192 , %2 ] [1 , 1 ]
1548
+ : memref <192 x192 xf32 > to memref <192 x?xf32 , #map1 >
1549
+ %4 = memref.subview %C_ [%i , %j ] [%0 , %2 ] [1 , 1 ]
1550
+ : memref <192 x192 xf32 > to memref <?x?xf32 , #map1 >
1551
+ linalg.fill (%4 , %cst ) : memref <?x?xf32 , #map1 >, f32
1552
+ linalg.matmul ins (%1 , %3 : memref <?x192 xf32 , #map1 >,
1553
+ memref <192 x?xf32 , #map1 >)
1554
+ outs (%4 : memref <?x?xf32 , #map1 >)
1555
+ linalg.yield
1556
+ }
1557
+ return
1558
+ }
1559
+
1560
+ // CHECKLOOP-LABEL: @tiled_loop_to_parallel
1561
+ // CHECKLOOP-SAME: %[[A:.*]]: memref<192x192xf32>, %[[B:.*]]: memref<192x192xf32>,
1562
+ // CHECKLOOP-SAME: %[[C:.*]]: memref<192x192xf32>) {
1563
+ // CHECKLOOP: %[[C24:.*]] = constant 24 : index
1564
+ // CHECKLOOP: %[[C16:.*]] = constant 16 : index
1565
+ // CHECKLOOP: %[[C192:.*]] = constant 192 : index
1566
+ // CHECKLOOP: %[[C0:.*]] = constant 0 : index
1567
+ // CHECKLOOP: scf.for %[[I:.*]] = %[[C0]] to %[[C192]] step %[[C24]] {
1568
+ // CHECKLOOP: scf.for %[[J:.*]] = %[[C0]] to %[[C192]] step %[[C16]] {
1569
+ // CHECKLOOP: %[[A_sub:.*]] = memref.subview %[[A]][%[[I]]
1570
+ // CHECKLOOP: %[[B_sub:.*]] = memref.subview %[[B]][0, %[[J]]]
1571
+ // CHECKLOOP: %[[C_sub:.*]] = memref.subview %[[C]][%[[I]]
1572
+
1573
+
1574
+ func @tiled_loop_to_for (%A: memref <192 x192 xf32 >,
1575
+ %B: memref <192 x192 xf32 >,
1576
+ %C: memref <f32 >) {
1577
+ %c24 = constant 24 : index
1578
+ %c16 = constant 16 : index
1579
+ %c0 = constant 0 : index
1580
+ %c192 = constant 192 : index
1581
+ %cst = constant 0.000000e+00 : f32
1582
+
1583
+ linalg.tiled_loop (%i , %j ) = (%c0 , %c0 ) to (%c192 , %c192 ) step (%c24 , %c16 )
1584
+ ins (%A_ = %A: memref <192 x192 xf32 >, %B_ = %B: memref <192 x192 xf32 >)
1585
+ outs (%C_ = %C: memref <f32 >)
1586
+ iterators [" reduction" , " reduction" ] {
1587
+ linalg.fill (%A_ , %cst ) : memref <192 x192 xf32 >, f32
1588
+ linalg.yield
1589
+ }
1590
+ return
1591
+ }
1592
+
1593
+ // CHECKLOOP-LABEL: @tiled_loop_to_for
1594
+ // CHECKLOOP: %[[C24:.*]] = constant 24 : index
1595
+ // CHECKLOOP: %[[C16:.*]] = constant 16 : index
1596
+ // CHECKLOOP: %[[C192:.*]] = constant 192 : index
1597
+ // CHECKLOOP: %[[C0:.*]] = constant 0 : index
1598
+ // CHECKLOOP: scf.for %{{.*}} = %[[C0]] to %[[C192]] step %[[C24]]
1599
+ // CHECKLOOP: scf.for %{{.*}} = %[[C0]] to %[[C192]] step %[[C16]]
0 commit comments