106 lines
4.0 KiB
LLVM
106 lines
4.0 KiB
LLVM
; RUN: opt -S -loop-fusion -loop-fusion-peel-max-count=3 < %s | FileCheck %s
|
|
|
|
; Tests whether we can fuse two loops together if they have constant but a
|
|
; different tripcount.
|
|
; The first three iterations of the first loop should be peeled, and then the
|
|
; two loops should be fused together in this example.
|
|
|
|
; C Code
|
|
;
|
|
; int B[1024];
|
|
;
|
|
; void function(int *arg) {
|
|
; for (int i = 0; i != 100; ++i)
|
|
; arg[i] = ((i - 3)*(i+3)) % i;
|
|
;
|
|
; for (int i = 3; i != 100; ++i)
|
|
; B[i] = ((i-6)*(i+3)) % i;
|
|
; }
|
|
|
|
; CHECK-LABEL: void @function(i32* noalias %arg)
|
|
; CHECK-NEXT: for.first.preheader:
|
|
; CHECK-NEXT: br label %for.first.peel.begin
|
|
; CHECK: for.first.peel.begin:
|
|
; CHECK-NEXT: br label %for.first.peel
|
|
; CHECK: for.first.peel:
|
|
; CHECK: br label %for.first.latch.peel
|
|
; CHECK: for.first.latch.peel:
|
|
; CHECK: br label %for.first.peel.next
|
|
; CHECK: for.first.peel.next:
|
|
; CHECK-NEXT: br label %for.first.peel2
|
|
; CHECK: for.first.peel2:
|
|
; CHECK: br label %for.first.latch.peel10
|
|
; CHECK: for.first.latch.peel10:
|
|
; CHECK: br label %for.first.peel.next1
|
|
; CHECK: for.first.peel.next1:
|
|
; CHECK-NEXT: br label %for.first.peel15
|
|
; CHECK: for.first.peel15:
|
|
; CHECK: br label %for.first.latch.peel23
|
|
; CHECK: for.first.latch.peel23:
|
|
; CHECK: br label %for.first.peel.next14
|
|
; CHECK: for.first.peel.next14:
|
|
; CHECK-NEXT: br label %for.first.peel.next27
|
|
; CHECK: for.first.peel.next27:
|
|
; CHECK-NEXT: br label %for.first.preheader.peel.newph
|
|
; CHECK: for.first.preheader.peel.newph:
|
|
; CHECK-NEXT: br label %for.first
|
|
; CHECK: for.first:
|
|
; CHECK: br label %for.first.latch
|
|
; CHECK: for.first.latch:
|
|
; CHECK: br label %for.second.latch
|
|
; CHECK: for.second.latch:
|
|
; CHECK: br i1 %exitcond, label %for.first, label %for.end
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: ret void
|
|
|
|
@B = common global [1024 x i32] zeroinitializer, align 16
|
|
|
|
define void @function(i32* noalias %arg) {
|
|
for.first.preheader:
|
|
br label %for.first
|
|
|
|
for.first: ; preds = %for.first.preheader, %for.first.latch
|
|
%.014 = phi i32 [ 0, %for.first.preheader ], [ %tmp15, %for.first.latch ]
|
|
%indvars.iv23 = phi i64 [ 0, %for.first.preheader ], [ %indvars.iv.next3, %for.first.latch ]
|
|
%tmp = add nsw i32 %.014, -3
|
|
%tmp8 = add nuw nsw i64 %indvars.iv23, 3
|
|
%tmp9 = trunc i64 %tmp8 to i32
|
|
%tmp10 = mul nsw i32 %tmp, %tmp9
|
|
%tmp11 = trunc i64 %indvars.iv23 to i32
|
|
%tmp12 = srem i32 %tmp10, %tmp11
|
|
%tmp13 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv23
|
|
store i32 %tmp12, i32* %tmp13, align 4
|
|
br label %for.first.latch
|
|
|
|
for.first.latch: ; preds = %for.first
|
|
%indvars.iv.next3 = add nuw nsw i64 %indvars.iv23, 1
|
|
%tmp15 = add nuw nsw i32 %.014, 1
|
|
%exitcond4 = icmp ne i64 %indvars.iv.next3, 100
|
|
br i1 %exitcond4, label %for.first, label %for.second.preheader
|
|
|
|
for.second.preheader: ; preds = %for.first.latch
|
|
br label %for.second
|
|
|
|
for.second: ; preds = %for.second.preheader, %for.second.latch
|
|
%.02 = phi i32 [ 0, %for.second.preheader ], [ %tmp28, %for.second.latch ]
|
|
%indvars.iv1 = phi i64 [ 3, %for.second.preheader ], [ %indvars.iv.next, %for.second.latch ]
|
|
%tmp20 = add nsw i32 %.02, -3
|
|
%tmp21 = add nuw nsw i64 %indvars.iv1, 3
|
|
%tmp22 = trunc i64 %tmp21 to i32
|
|
%tmp23 = mul nsw i32 %tmp20, %tmp22
|
|
%tmp24 = trunc i64 %indvars.iv1 to i32
|
|
%tmp25 = srem i32 %tmp23, %tmp24
|
|
%tmp26 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv1
|
|
store i32 %tmp25, i32* %tmp26, align 4
|
|
br label %for.second.latch
|
|
|
|
for.second.latch: ; preds = %for.second
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv1, 1
|
|
%tmp28 = add nuw nsw i32 %.02, 1
|
|
%exitcond = icmp ne i64 %indvars.iv.next, 100
|
|
br i1 %exitcond, label %for.second, label %for.end
|
|
|
|
for.end: ; preds = %for.second.latch
|
|
ret void
|
|
}
|