Is it possible to remove dispatch_once in Objective-C++?

前端 未结 2 1599
生来不讨喜
生来不讨喜 2021-01-02 20:54

Since C++11, local static variables are known to be initialized in a thread safe manner (unless the -fno-threadsafe-statics is given), as specified

2条回答
  •  [愿得一人]
    2021-01-02 21:54

    TL;DR - it seems that it's possible to use C++11 static variable initialization in a thread safe manner which has the same performance characteristics as dispatch_once.

    Following Stephan Lechner's answer, I wrote the most simple code that tests the C++ static initialization flow:

    class Object {  
    };
    
    static Object *GetObjectCppStatic() {
      static Object *object = new Object();
      return object;
    }
    
    int main() {
      GetObjectCppStatic();
    }
    

    Compiling this to assembly via clang++ test.cpp -O0 -fno-exceptions -S (-O0 to avoid inlining, same general code is produced for -Os, -fno-exceptions to simplify generated code), shows that GetObjectCppStatic compiles to:

    __ZL18GetObjectCppStaticv:        ## @_ZL18GetObjectCppStaticv
      .cfi_startproc
    ## BB#0:
      pushq   %rbp
    Lcfi6:
      .cfi_def_cfa_offset 16
    Lcfi7:
      .cfi_offset %rbp, -16
      movq  %rsp, %rbp
    Lcfi8:
      .cfi_def_cfa_register %rbp
      cmpb  $0, __ZGVZL18GetObjectCppStaticvE6object(%rip)
      jne LBB2_3
    ## BB#1:
      leaq  __ZGVZL18GetObjectCppStaticvE6object(%rip), %rdi
      callq   ___cxa_guard_acquire
      cmpl  $0, %eax
      je  LBB2_3
    ## BB#2:
      movl  $1, %eax
      movl  %eax, %edi
      callq   __Znwm
      leaq  __ZGVZL18GetObjectCppStaticvE6object(%rip), %rdi
      movq  %rax, __ZZL18GetObjectCppStaticvE6object(%rip)
      callq   ___cxa_guard_release
    LBB2_3:
      movq  __ZZL18GetObjectCppStaticvE6object(%rip), %rax
      popq  %rbp
      retq
      .cfi_endproc
    

    We can definitely see the ___cxa_guard_acquire and ___cxa_guard_release, implemented by the libc++ ABI here. Note that we didn't even had to specify to clang that we use C++11, as apparently this was supported by default even prior than that.

    So we know both forms ensures thread-safe initialization of local statics. But what about performance? The following test code checks both methods with no contention (single threaded) and with heavy contention (multi threaded):

    #include 
    #include 
    #include 
    
    class Object {  
    };
    
    static double Measure(int times, void(^executionBlock)(), void(^finallyBlock)()) {
      struct mach_timebase_info timebaseInfo;
      mach_timebase_info(&timebaseInfo);
    
      uint64_t start = mach_absolute_time();
      for (int i = 0; i < times; ++i) {
        executionBlock();
      }
      finallyBlock();
      uint64_t end = mach_absolute_time();
    
      uint64_t timeTook = end - start;
      return ((double)timeTook * timebaseInfo.numer / timebaseInfo.denom) /
          NSEC_PER_SEC;
    }
    
    static Object *GetObjectDispatchOnce() {
      static Object *object;
      static dispatch_once_t onceToken;
    
      dispatch_once(&onceToken, ^{
        object = new Object();
      });
    
      return object;
    }
    
    static Object *GetObjectCppStatic() {
      static Object *object = new Object();
      return object;
    }
    
    int main() {
      printf("Single thread statistics:\n");
      printf("DispatchOnce took %g\n", Measure(10000000, ^{
        GetObjectDispatchOnce();
      }, ^{}));
      printf("CppStatic took %g\n", Measure(10000000, ^{
        GetObjectCppStatic();
      }, ^{}));
    
      printf("\n");
    
      dispatch_queue_t queue = dispatch_queue_create("queue", 
          DISPATCH_QUEUE_CONCURRENT);
      dispatch_group_t group = dispatch_group_create();
    
      printf("Multi thread statistics:\n");
      printf("DispatchOnce took %g\n", Measure(1000000, ^{
        dispatch_group_async(group, queue, ^{
          GetObjectDispatchOnce();
        });
      }, ^{
        dispatch_group_wait(group, DISPATCH_TIME_FOREVER);
      }));
      printf("CppStatic took %g\n", Measure(1000000, ^{
        dispatch_group_async(group, queue, ^{
          GetObjectCppStatic();
        });
      }, ^{
        dispatch_group_wait(group, DISPATCH_TIME_FOREVER);
      }));
    }
    

    Which yields the following results on x64:

    Single thread statistics:
    DispatchOnce took 0.025486
    CppStatic took 0.0232348
    
    Multi thread statistics:
    DispatchOnce took 0.285058
    CppStatic took 0.32596
    

    So up to measurement error, it seems that the performance characteristics of both methods are similar, mostly due to the double-check locking that is performed by both of them. For dispatch_once, this happens in the _dispatch_once function:

    void
    _dispatch_once(dispatch_once_t *predicate,
        DISPATCH_NOESCAPE dispatch_block_t block)
    {
      if (DISPATCH_EXPECT(*predicate, ~0l) != ~0l) {
        // ...
      } else {
        // ...
      }
    }
    

    Where in the C++ static initialization flow it happens right before the call to ___cxa_guard_acquire.

提交回复
热议问题