222 lines
9.9 KiB
LLVM
222 lines
9.9 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -O3 -rotation-max-header-size=0 -S -enable-new-pm=0 < %s | FileCheck %s --check-prefix=HOIST
|
|
; RUN: opt -passes='default<O3>' -rotation-max-header-size=0 -S < %s | FileCheck %s --check-prefix=HOIST
|
|
|
|
; RUN: opt -O3 -rotation-max-header-size=1 -S -enable-new-pm=0 < %s | FileCheck %s --check-prefix=HOIST
|
|
; RUN: opt -passes='default<O3>' -rotation-max-header-size=1 -S < %s | FileCheck %s --check-prefix=HOIST
|
|
|
|
; RUN: opt -O3 -rotation-max-header-size=2 -S -enable-new-pm=0 < %s | FileCheck %s --check-prefix=ROTATED_LATER_OLDPM
|
|
; RUN: opt -passes='default<O3>' -rotation-max-header-size=2 -S < %s | FileCheck %s --check-prefix=ROTATED_LATER_NEWPM
|
|
|
|
; RUN: opt -O3 -rotation-max-header-size=3 -S -enable-new-pm=0 < %s | FileCheck %s --check-prefix=ROTATE_OLDPM
|
|
; RUN: opt -passes='default<O3>' -rotation-max-header-size=3 -S < %s | FileCheck %s --check-prefix=ROTATE_NEWPM
|
|
|
|
; This example is produced from a very basic C code:
|
|
;
|
|
; void f0();
|
|
; void f1();
|
|
; void f2();
|
|
;
|
|
; void loop(int width) {
|
|
; if(width < 1)
|
|
; return;
|
|
; for(int i = 0; i < width - 1; ++i) {
|
|
; f0();
|
|
; f1();
|
|
; }
|
|
; f0();
|
|
; f2();
|
|
; }
|
|
|
|
; We have a choice here. We can either
|
|
; * hoist the f0() call into loop header,
|
|
; * which potentially makes loop rotation unprofitable since loop header might
|
|
; have grown above certain threshold, and such unrotated loops will be
|
|
; ignored by LoopVectorizer, preventing vectorization
|
|
; * or loop rotation will succeed, resulting in some weird PHIs that will also
|
|
; harm vectorization
|
|
; * or not hoist f0() call before performing loop rotation,
|
|
; at the cost of potential code bloat and/or potentially successfully rotating
|
|
; the loops, vectorizing them at the cost of compile time.
|
|
|
|
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
|
|
|
|
declare void @f0()
|
|
declare void @f1()
|
|
declare void @f2()
|
|
|
|
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
|
|
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)
|
|
|
|
define void @_Z4loopi(i32 %width) {
|
|
; HOIST-LABEL: @_Z4loopi(
|
|
; HOIST-NEXT: entry:
|
|
; HOIST-NEXT: [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1
|
|
; HOIST-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]]
|
|
; HOIST: for.cond.preheader:
|
|
; HOIST-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1
|
|
; HOIST-NEXT: br label [[FOR_COND:%.*]]
|
|
; HOIST: for.cond:
|
|
; HOIST-NEXT: [[I_0:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[FOR_COND_PREHEADER]] ]
|
|
; HOIST-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[I_0]], [[TMP0]]
|
|
; HOIST-NEXT: tail call void @f0()
|
|
; HOIST-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
|
|
; HOIST: for.cond.cleanup:
|
|
; HOIST-NEXT: tail call void @f2()
|
|
; HOIST-NEXT: br label [[RETURN]]
|
|
; HOIST: for.body:
|
|
; HOIST-NEXT: tail call void @f1()
|
|
; HOIST-NEXT: [[INC]] = add nuw i32 [[I_0]], 1
|
|
; HOIST-NEXT: br label [[FOR_COND]]
|
|
; HOIST: return:
|
|
; HOIST-NEXT: ret void
|
|
;
|
|
; ROTATED_LATER_OLDPM-LABEL: @_Z4loopi(
|
|
; ROTATED_LATER_OLDPM-NEXT: entry:
|
|
; ROTATED_LATER_OLDPM-NEXT: [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1
|
|
; ROTATED_LATER_OLDPM-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]]
|
|
; ROTATED_LATER_OLDPM: for.cond.preheader:
|
|
; ROTATED_LATER_OLDPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1
|
|
; ROTATED_LATER_OLDPM-NEXT: [[EXITCOND_NOT3:%.*]] = icmp eq i32 [[TMP0]], 0
|
|
; ROTATED_LATER_OLDPM-NEXT: br i1 [[EXITCOND_NOT3]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]]
|
|
; ROTATED_LATER_OLDPM: for.cond.cleanup:
|
|
; ROTATED_LATER_OLDPM-NEXT: tail call void @f0()
|
|
; ROTATED_LATER_OLDPM-NEXT: tail call void @f2()
|
|
; ROTATED_LATER_OLDPM-NEXT: br label [[RETURN]]
|
|
; ROTATED_LATER_OLDPM: for.body:
|
|
; ROTATED_LATER_OLDPM-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_COND_PREHEADER]] ]
|
|
; ROTATED_LATER_OLDPM-NEXT: tail call void @f0()
|
|
; ROTATED_LATER_OLDPM-NEXT: tail call void @f1()
|
|
; ROTATED_LATER_OLDPM-NEXT: [[INC]] = add nuw i32 [[I_04]], 1
|
|
; ROTATED_LATER_OLDPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[TMP0]]
|
|
; ROTATED_LATER_OLDPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]]
|
|
; ROTATED_LATER_OLDPM: return:
|
|
; ROTATED_LATER_OLDPM-NEXT: ret void
|
|
;
|
|
; ROTATED_LATER_NEWPM-LABEL: @_Z4loopi(
|
|
; ROTATED_LATER_NEWPM-NEXT: entry:
|
|
; ROTATED_LATER_NEWPM-NEXT: [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1
|
|
; ROTATED_LATER_NEWPM-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]]
|
|
; ROTATED_LATER_NEWPM: for.cond.preheader:
|
|
; ROTATED_LATER_NEWPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1
|
|
; ROTATED_LATER_NEWPM-NEXT: [[EXITCOND_NOT3:%.*]] = icmp eq i32 [[TMP0]], 0
|
|
; ROTATED_LATER_NEWPM-NEXT: br i1 [[EXITCOND_NOT3]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND_PREHEADER_FOR_BODY_CRIT_EDGE:%.*]]
|
|
; ROTATED_LATER_NEWPM: for.cond.preheader.for.body_crit_edge:
|
|
; ROTATED_LATER_NEWPM-NEXT: [[INC_1:%.*]] = add nuw i32 0, 1
|
|
; ROTATED_LATER_NEWPM-NEXT: br label [[FOR_BODY:%.*]]
|
|
; ROTATED_LATER_NEWPM: for.cond.cleanup:
|
|
; ROTATED_LATER_NEWPM-NEXT: tail call void @f0()
|
|
; ROTATED_LATER_NEWPM-NEXT: tail call void @f2()
|
|
; ROTATED_LATER_NEWPM-NEXT: br label [[RETURN]]
|
|
; ROTATED_LATER_NEWPM: for.body:
|
|
; ROTATED_LATER_NEWPM-NEXT: [[INC_PHI:%.*]] = phi i32 [ [[INC_0:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE:%.*]] ], [ [[INC_1]], [[FOR_COND_PREHEADER_FOR_BODY_CRIT_EDGE]] ]
|
|
; ROTATED_LATER_NEWPM-NEXT: tail call void @f0()
|
|
; ROTATED_LATER_NEWPM-NEXT: tail call void @f1()
|
|
; ROTATED_LATER_NEWPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_PHI]], [[TMP0]]
|
|
; ROTATED_LATER_NEWPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_FOR_BODY_CRIT_EDGE]]
|
|
; ROTATED_LATER_NEWPM: for.body.for.body_crit_edge:
|
|
; ROTATED_LATER_NEWPM-NEXT: [[INC_0]] = add nuw i32 [[INC_PHI]], 1
|
|
; ROTATED_LATER_NEWPM-NEXT: br label [[FOR_BODY]]
|
|
; ROTATED_LATER_NEWPM: return:
|
|
; ROTATED_LATER_NEWPM-NEXT: ret void
|
|
;
|
|
; ROTATE_OLDPM-LABEL: @_Z4loopi(
|
|
; ROTATE_OLDPM-NEXT: entry:
|
|
; ROTATE_OLDPM-NEXT: [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1
|
|
; ROTATE_OLDPM-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]]
|
|
; ROTATE_OLDPM: for.cond.preheader:
|
|
; ROTATE_OLDPM-NEXT: [[CMP13_NOT:%.*]] = icmp eq i32 [[WIDTH]], 1
|
|
; ROTATE_OLDPM-NEXT: br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
|
|
; ROTATE_OLDPM: for.body.preheader:
|
|
; ROTATE_OLDPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1
|
|
; ROTATE_OLDPM-NEXT: br label [[FOR_BODY:%.*]]
|
|
; ROTATE_OLDPM: for.cond.cleanup:
|
|
; ROTATE_OLDPM-NEXT: tail call void @f0()
|
|
; ROTATE_OLDPM-NEXT: tail call void @f2()
|
|
; ROTATE_OLDPM-NEXT: br label [[RETURN]]
|
|
; ROTATE_OLDPM: for.body:
|
|
; ROTATE_OLDPM-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
|
|
; ROTATE_OLDPM-NEXT: tail call void @f0()
|
|
; ROTATE_OLDPM-NEXT: tail call void @f1()
|
|
; ROTATE_OLDPM-NEXT: [[INC]] = add nuw nsw i32 [[I_04]], 1
|
|
; ROTATE_OLDPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[TMP0]]
|
|
; ROTATE_OLDPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]]
|
|
; ROTATE_OLDPM: return:
|
|
; ROTATE_OLDPM-NEXT: ret void
|
|
;
|
|
; ROTATE_NEWPM-LABEL: @_Z4loopi(
|
|
; ROTATE_NEWPM-NEXT: entry:
|
|
; ROTATE_NEWPM-NEXT: [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1
|
|
; ROTATE_NEWPM-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]]
|
|
; ROTATE_NEWPM: for.cond.preheader:
|
|
; ROTATE_NEWPM-NEXT: [[CMP13_NOT:%.*]] = icmp eq i32 [[WIDTH]], 1
|
|
; ROTATE_NEWPM-NEXT: br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
|
|
; ROTATE_NEWPM: for.body.preheader:
|
|
; ROTATE_NEWPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1
|
|
; ROTATE_NEWPM-NEXT: [[INC_1:%.*]] = add nuw nsw i32 0, 1
|
|
; ROTATE_NEWPM-NEXT: br label [[FOR_BODY:%.*]]
|
|
; ROTATE_NEWPM: for.cond.cleanup:
|
|
; ROTATE_NEWPM-NEXT: tail call void @f0()
|
|
; ROTATE_NEWPM-NEXT: tail call void @f2()
|
|
; ROTATE_NEWPM-NEXT: br label [[RETURN]]
|
|
; ROTATE_NEWPM: for.body:
|
|
; ROTATE_NEWPM-NEXT: [[INC_PHI:%.*]] = phi i32 [ [[INC_0:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE:%.*]] ], [ [[INC_1]], [[FOR_BODY_PREHEADER]] ]
|
|
; ROTATE_NEWPM-NEXT: tail call void @f0()
|
|
; ROTATE_NEWPM-NEXT: tail call void @f1()
|
|
; ROTATE_NEWPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_PHI]], [[TMP0]]
|
|
; ROTATE_NEWPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_FOR_BODY_CRIT_EDGE]]
|
|
; ROTATE_NEWPM: for.body.for.body_crit_edge:
|
|
; ROTATE_NEWPM-NEXT: [[INC_0]] = add nuw nsw i32 [[INC_PHI]], 1
|
|
; ROTATE_NEWPM-NEXT: br label [[FOR_BODY]]
|
|
; ROTATE_NEWPM: return:
|
|
; ROTATE_NEWPM-NEXT: ret void
|
|
;
|
|
entry:
|
|
%width.addr = alloca i32, align 4
|
|
%i = alloca i32, align 4
|
|
store i32 %width, i32* %width.addr, align 4
|
|
%i1 = load i32, i32* %width.addr, align 4
|
|
%cmp = icmp slt i32 %i1, 1
|
|
br i1 %cmp, label %if.then, label %if.end
|
|
|
|
if.then:
|
|
br label %return
|
|
|
|
if.end:
|
|
%i2 = bitcast i32* %i to i8*
|
|
call void @llvm.lifetime.start.p0i8(i64 4, i8* %i2)
|
|
store i32 0, i32* %i, align 4
|
|
br label %for.cond
|
|
|
|
for.cond:
|
|
%i3 = load i32, i32* %i, align 4
|
|
%i4 = load i32, i32* %width.addr, align 4
|
|
%sub = sub nsw i32 %i4, 1
|
|
%cmp1 = icmp slt i32 %i3, %sub
|
|
br i1 %cmp1, label %for.body, label %for.cond.cleanup
|
|
|
|
for.cond.cleanup:
|
|
%i5 = bitcast i32* %i to i8*
|
|
call void @llvm.lifetime.end.p0i8(i64 4, i8* %i5)
|
|
br label %for.end
|
|
|
|
for.body:
|
|
call void @f0()
|
|
call void @f1()
|
|
br label %for.inc
|
|
|
|
for.inc:
|
|
%i6 = load i32, i32* %i, align 4
|
|
%inc = add nsw i32 %i6, 1
|
|
store i32 %inc, i32* %i, align 4
|
|
br label %for.cond
|
|
|
|
for.end:
|
|
call void @f0()
|
|
call void @f2()
|
|
br label %return
|
|
|
|
return:
|
|
ret void
|
|
}
|