Speed difference between If-Else and Ternary operator in C…?

后端 未结 6 2316
别跟我提以往
别跟我提以往 2020-12-01 16:49

So at the suggestion of a colleague, I just tested the speed difference between the ternary operator and the equivalent If-Else block... and it seems that the ternary operat

6条回答
  •  遥遥无期
    2020-12-01 17:39

    If there is any, change your compiler!

    For this kind of questions I use the Try Out LLVM page. It's an old release of LLVM (still using the gcc front-end), but those are old tricks.

    Here is my little sample program (simplified version of yours):

    #include 
    #include 
    #include 
    
    int main (int argc, char* argv[]) {
      int N = atoi(argv[0]);
    
      int a = 0, d = 0, b = atoi(argv[1]), c = atoi(argv[2]);
    
      int i;
      for(i = 0; i < N; i++)
      {
         a = i & 1;
         if(a) a = b+i; else a = c+i;
      }
    
      for(i = 0; i < N; i++)
      {
         d = i & 1;
         d = d ? b+i : c+i;
      }
    
      printf("%d %d", a, d);
    
      return 0;
    }
    

    And there is the corresponding LLVM IR generated:

    define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
    entry:
      %0 = load i8** %argv, align 8                   ;  [#uses=1]
      %N = tail call i32 @atoi(i8* %0) nounwind readonly ;  [#uses=5]
    
      %2 = getelementptr inbounds i8** %argv, i64 1   ;  [#uses=1]
      %3 = load i8** %2, align 8                      ;  [#uses=1]
      %b = tail call i32 @atoi(i8* %3) nounwind readonly ;  [#uses=2]
    
      %5 = getelementptr inbounds i8** %argv, i64 2   ;  [#uses=1]
      %6 = load i8** %5, align 8                      ;  [#uses=1]
      %c = tail call i32 @atoi(i8* %6) nounwind readonly ;  [#uses=2]
    
      %8 = icmp sgt i32 %N, 0                         ;  [#uses=2]
      br i1 %8, label %bb, label %bb11
    
    bb:                                               ; preds = %bb, %entry
      %9 = phi i32 [ %10, %bb ], [ 0, %entry ]        ;  [#uses=2]
      %10 = add nsw i32 %9, 1                         ;  [#uses=2]
      %exitcond22 = icmp eq i32 %10, %N               ;  [#uses=1]
      br i1 %exitcond22, label %bb10.preheader, label %bb
    
    bb10.preheader:                                   ; preds = %bb
      %11 = and i32 %9, 1                             ;  [#uses=1]
      %12 = icmp eq i32 %11, 0                        ;  [#uses=1]
      %.pn13 = select i1 %12, i32 %c, i32 %b          ;  [#uses=1]
      %tmp21 = add i32 %N, -1                         ;  [#uses=1]
      %a.1 = add i32 %.pn13, %tmp21                   ;  [#uses=2]
      br i1 %8, label %bb6, label %bb11
    
    bb6:                                              ; preds = %bb6, %bb10.preheader
      %13 = phi i32 [ %14, %bb6 ], [ 0, %bb10.preheader ] ;  [#uses=2]
      %14 = add nsw i32 %13, 1                        ;  [#uses=2]
      %exitcond = icmp eq i32 %14, %N                 ;  [#uses=1]
      br i1 %exitcond, label %bb10.bb11_crit_edge, label %bb6
    
    bb10.bb11_crit_edge:                              ; preds = %bb6
      %15 = and i32 %13, 1                            ;  [#uses=1]
      %16 = icmp eq i32 %15, 0                        ;  [#uses=1]
      %.pn = select i1 %16, i32 %c, i32 %b            ;  [#uses=1]
      %tmp = add i32 %N, -1                           ;  [#uses=1]
      %d.1 = add i32 %.pn, %tmp                       ;  [#uses=1]
      br label %bb11
    
    bb11:                                             ; preds = %bb10.bb11_crit_edge, %bb10.preheader, %entry
      %a.0 = phi i32 [ %a.1, %bb10.bb11_crit_edge ], [ %a.1, %bb10.preheader ], [ 0, %entry ] ;  [#uses=1]
      %d.0 = phi i32 [ %d.1, %bb10.bb11_crit_edge ], [ 0, %bb10.preheader ], [ 0, %entry ] ;  [#uses=1]
      %17 = tail call i32 (i8*, ...)* @printf(i8* noalias getelementptr inbounds ([6 x i8]* @.str, i64 0, i64 0), i32 %a.0, i32 %d.0) nounwind ;  [#uses=0]
      ret i32 0
    }
    

    Okay, so it's likely to be chinese, even though I went ahead and renamed some variables to make it a bit easier to read.

    The important bits are these two blocks:

      %.pn13 = select i1 %12, i32 %c, i32 %b          ;  [#uses=1]
      %tmp21 = add i32 %N, -1                         ;  [#uses=1]
      %a.1 = add i32 %.pn13, %tmp21                   ;  [#uses=2]
    
      %.pn = select i1 %16, i32 %c, i32 %b            ;  [#uses=1]
      %tmp = add i32 %N, -1                           ;  [#uses=1]
      %d.1 = add i32 %.pn, %tmp                       ;  [#uses=1]
    

    Which respectively set a and d.

    And the conclusion is: No difference

    Note: in a simpler example the two variables actually got merged, it seems here that the optimizer did not detect the similarity...

提交回复
热议问题