Thread/warp local lock in cuda

跟風遠走 提交于 2019-12-24 09:48:13

问题


I want to implement critical sections in cuda. I read many questions and answers on this subject, and answers often involve atomicCAS and atomicExch.

However, this doesn't work at warp level, since all threads in the warp acquire the same lock after the atomicCAS, leading to a deadlock.

I think there is a way to have a real lock in cuda by using warp __ballot or __any instructions.

However, after many attempts, I don't get to a satisfying (read working) solution.

Does anyone here have a good answer on that?

ps: i KNOW warp divergence is bad, so don't tell me to change my algorithm.


回答1:


Atomics can be made to work at the warp level. The hazard with intra-warp negotiation for the lock has to do with how the lock is acquired in relation to how it is released. Questions such as this and this and this point out this hazard, but you should not conflate this as being a limitation of using atomics. The hazard arises as a result of how conditional code is executed on a GPU(*), and dependency between the 2 paths in the conditional construct.

In order to avoid the above hazard within a warp, it's necessary to break the dependency between the 2 paths in the conditional construct. The acquisition of the lock must be able to proceed independently from the critical section and the release of the lock.

However, since you desire just coordination within a warp, atomics are overkill. We already have a number of powerful warp communication and coordination mechanisms such as __ballot() that you mention.

What follows then is a worked example of using __ballot() in a mechanism to ensure that only 1 thread (per warp) will "acquire a lock". Since this is all happening only at the warp level, we once again can leverage this to entirely dispense with the release of the lock. We can know the lock is released when the critical section is complete (since we only need to track completion across a warp).

This example performs stream compaction at the warp level, using a __ballot() mechanism to ensure that only 1 thread proceeds into the "critical section" at a time:

$ cat t398.cu
#include <stdio.h>
#include <stdlib.h>

const int num_threads = 1024;  // should be 1024 or less for this demo

// returns true if the thread acquired the lock
// will return true for at most 1 thread per warp

__device__ bool warp_lock(int req){
  return ((__ffs(__ballot(req))) == ((threadIdx.x & 31)+1));
}

__global__ void test_lock(int *pattern, int *result){

  __shared__ int done;
  __shared__ int warp_index[32];
  int myreq = pattern[threadIdx.x];
  done = 1;
  if ((threadIdx.x & 31) == 0) warp_index[threadIdx.x>>5] = 0;
  do {
    done = 1;
    __syncthreads();
    // attempt to  "acquire lock"
    bool mylock = warp_lock(myreq);
    // if lock acquired, do "critical section"
    if (mylock){
      done = 0;
      int my_index = (warp_index[threadIdx.x>>5]++) + ((threadIdx.x>>5)*32);
      result[my_index] = threadIdx.x;
      myreq = 0;
      }
    __syncthreads();
    } while (!done);
}

int main(){

  int *h_pattern, *d_pattern, *h_result, *d_result;
  h_pattern = (int *)malloc(num_threads*sizeof(int));
  h_result  = (int *)malloc(num_threads*sizeof(int));
  cudaMalloc(&d_pattern, num_threads*sizeof(int));
  cudaMalloc(&d_result, num_threads*sizeof(int));
  for (int i = 0; i < num_threads; i++) {h_pattern[i] = rand()&1;}
  cudaMemcpy(d_pattern, h_pattern, num_threads*sizeof(int), cudaMemcpyHostToDevice);
  cudaMemset(d_result, 0, num_threads*sizeof(int));
  test_lock<<<1, num_threads>>>(d_pattern, d_result);
  cudaMemcpy(h_result, d_result, num_threads*sizeof(int), cudaMemcpyDeviceToHost);
  printf("index, in, out\n");
  for (int i = 0; i < num_threads; i++)
    printf("%4d, %d, %4d\n", i, h_pattern[i], h_result[i]);
  return 0;
}
$ nvcc -arch=sm_61 -o t398 t398.cu
$ cuda-memcheck ./t398
========= CUDA-MEMCHECK
index, in, out
   0, 1,    0
   1, 0,    2
   2, 1,    3
   3, 1,    4
   4, 1,    5
   5, 1,    8
   6, 0,    9
   7, 0,   11
   8, 1,   13
   9, 1,   14
  10, 0,   20
  11, 1,   22
  12, 0,   23
  13, 1,   27
  14, 1,   28
  15, 0,   29
  16, 0,   30
  17, 0,    0
  18, 0,    0
  19, 0,    0
  20, 1,    0
  21, 0,    0
  22, 1,    0
  23, 1,    0
  24, 0,    0
  25, 0,    0
  26, 0,    0
  27, 1,    0
  28, 1,    0
  29, 1,    0
  30, 1,    0
  31, 0,    0
  32, 0,   34
  33, 0,   35
  34, 1,   36
  35, 1,   38
  36, 1,   40
  37, 0,   41
  38, 1,   42
  39, 0,   43
  40, 1,   45
  41, 1,   48
  42, 1,   50
  43, 1,   52
  44, 0,   55
  45, 1,   59
  46, 0,   60
  47, 0,   61
  48, 1,   63
  49, 0,    0
  50, 1,    0
  51, 0,    0
  52, 1,    0
  53, 0,    0
  54, 0,    0
  55, 1,    0
  56, 0,    0
  57, 0,    0
  58, 0,    0
  59, 1,    0
  60, 1,    0
  61, 1,    0
  62, 0,    0
  63, 1,    0
  64, 0,   65
  65, 1,   67
  66, 0,   68
  67, 1,   69
  68, 1,   71
  69, 1,   73
  70, 0,   75
  71, 1,   78
  72, 0,   80
  73, 1,   86
  74, 0,   87
  75, 1,   89
  76, 0,   94
  77, 0,    0
  78, 1,    0
  79, 0,    0
  80, 1,    0
  81, 0,    0
  82, 0,    0
  83, 0,    0
  84, 0,    0
  85, 0,    0
  86, 1,    0
  87, 1,    0
  88, 0,    0
  89, 1,    0
  90, 0,    0
  91, 0,    0
  92, 0,    0
  93, 0,    0
  94, 1,    0
  95, 0,    0
  96, 0,   99
  97, 0,  100
  98, 0,  104
  99, 1,  105
 100, 1,  106
 101, 0,  108
 102, 0,  112
 103, 0,  115
 104, 1,  116
 105, 1,  117
 106, 1,  119
 107, 0,  121
 108, 1,  122
 109, 0,  123
 110, 0,  124
 111, 0,  125
 112, 1,  126
 113, 0,  127
 114, 0,    0
 115, 1,    0
 116, 1,    0
 117, 1,    0
 118, 0,    0
 119, 1,    0
 120, 0,    0
 121, 1,    0
 122, 1,    0
 123, 1,    0
 124, 1,    0
 125, 1,    0
 126, 1,    0
 127, 1,    0
 128, 1,  128
 129, 1,  129
 130, 1,  130
 131, 0,  132
 132, 1,  133
 133, 1,  135
 134, 0,  137
 135, 1,  138
 136, 0,  141
 137, 1,  143
 138, 1,  144
 139, 0,  145
 140, 0,  149
 141, 1,  151
 142, 0,  153
 143, 1,  155
 144, 1,  157
 145, 1,  158
 146, 0,  159
 147, 0,    0
 148, 0,    0
 149, 1,    0
 150, 0,    0
 151, 1,    0
 152, 0,    0
 153, 1,    0
 154, 0,    0
 155, 1,    0
 156, 0,    0
 157, 1,    0
 158, 1,    0
 159, 1,    0
 160, 1,  160
 161, 0,  164
 162, 0,  166
 163, 0,  172
 164, 1,  174
 165, 0,  175
 166, 1,  176
 167, 0,  177
 168, 0,  178
 169, 0,  181
 170, 0,  182
 171, 0,  184
 172, 1,  185
 173, 0,  187
 174, 1,  189
 175, 1,  190
 176, 1,  191
 177, 1,    0
 178, 1,    0
 179, 0,    0
 180, 0,    0
 181, 1,    0
 182, 1,    0
 183, 0,    0
 184, 1,    0
 185, 1,    0
 186, 0,    0
 187, 1,    0
 188, 0,    0
 189, 1,    0
 190, 1,    0
 191, 1,    0
 192, 1,  192
 193, 1,  193
 194, 0,  196
 195, 0,  197
 196, 1,  199
 197, 1,  202
 198, 0,  203
 199, 1,  204
 200, 0,  212
 201, 0,  213
 202, 1,  214
 203, 1,  217
 204, 1,  219
 205, 0,  221
 206, 0,  223
 207, 0,    0
 208, 0,    0
 209, 0,    0
 210, 0,    0
 211, 0,    0
 212, 1,    0
 213, 1,    0
 214, 1,    0
 215, 0,    0
 216, 0,    0
 217, 1,    0
 218, 0,    0
 219, 1,    0
 220, 0,    0
 221, 1,    0
 222, 0,    0
 223, 1,    0
 224, 0,  226
 225, 0,  227
 226, 1,  230
 227, 1,  234
 228, 0,  235
 229, 0,  236
 230, 1,  239
 231, 0,  242
 232, 0,  243
 233, 0,  248
 234, 1,  250
 235, 1,  251
 236, 1,  252
 237, 0,  255
 238, 0,    0
 239, 1,    0
 240, 0,    0
 241, 0,    0
 242, 1,    0
 243, 1,    0
 244, 0,    0
 245, 0,    0
 246, 0,    0
 247, 0,    0
 248, 1,    0
 249, 0,    0
 250, 1,    0
 251, 1,    0
 252, 1,    0
 253, 0,    0
 254, 0,    0
 255, 1,    0
 256, 0,  261
 257, 0,  267
 258, 0,  273
 259, 0,  274
 260, 0,  277
 261, 1,  278
 262, 0,  279
 263, 0,  280
 264, 0,  282
 265, 0,  285
 266, 0,  286
 267, 1,  287
 268, 0,    0
 269, 0,    0
 270, 0,    0
 271, 0,    0
 272, 0,    0
 273, 1,    0
 274, 1,    0
 275, 0,    0
 276, 0,    0
 277, 1,    0
 278, 1,    0
 279, 1,    0
 280, 1,    0
 281, 0,    0
 282, 1,    0
 283, 0,    0
 284, 0,    0
 285, 1,    0
 286, 1,    0
 287, 1,    0
 288, 1,  288
 289, 0,  290
 290, 1,  291
 291, 1,  292
 292, 1,  295
 293, 0,  305
 294, 0,  306
 295, 1,  308
 296, 0,  310
 297, 0,  314
 298, 0,  315
 299, 0,  319
 300, 0,    0
 301, 0,    0
 302, 0,    0
 303, 0,    0
 304, 0,    0
 305, 1,    0
 306, 1,    0
 307, 0,    0
 308, 1,    0
 309, 0,    0
 310, 1,    0
 311, 0,    0
 312, 0,    0
 313, 0,    0
 314, 1,    0
 315, 1,    0
 316, 0,    0
 317, 0,    0
 318, 0,    0
 319, 1,    0
 320, 0,  321
 321, 1,  322
 322, 1,  324
 323, 0,  325
 324, 1,  326
 325, 1,  327
 326, 1,  328
 327, 1,  330
 328, 1,  331
 329, 0,  333
 330, 1,  334
 331, 1,  336
 332, 0,  343
 333, 1,  345
 334, 1,    0
 335, 0,    0
 336, 1,    0
 337, 0,    0
 338, 0,    0
 339, 0,    0
 340, 0,    0
 341, 0,    0
 342, 0,    0
 343, 1,    0
 344, 0,    0
 345, 1,    0
 346, 0,    0
 347, 0,    0
 348, 0,    0
 349, 0,    0
 350, 0,    0
 351, 0,    0
 352, 1,  352
 353, 1,  353
 354, 0,  355
 355, 1,  359
 356, 0,  364
 357, 0,  365
 358, 0,  366
 359, 1,  369
 360, 0,  372
 361, 0,  373
 362, 0,  374
 363, 0,  377
 364, 1,  380
 365, 1,  382
 366, 1,  383
 367, 0,    0
 368, 0,    0
 369, 1,    0
 370, 0,    0
 371, 0,    0
 372, 1,    0
 373, 1,    0
 374, 1,    0
 375, 0,    0
 376, 0,    0
 377, 1,    0
 378, 0,    0
 379, 0,    0
 380, 1,    0
 381, 0,    0
 382, 1,    0
 383, 1,    0
 384, 1,  384
 385, 1,  385
 386, 0,  387
 387, 1,  388
 388, 1,  391
 389, 0,  397
 390, 0,  401
 391, 1,  408
 392, 0,  410
 393, 0,  411
 394, 0,  412
 395, 0,    0
 396, 0,    0
 397, 1,    0
 398, 0,    0
 399, 0,    0
 400, 0,    0
 401, 1,    0
 402, 0,    0
 403, 0,    0
 404, 0,    0
 405, 0,    0
 406, 0,    0
 407, 0,    0
 408, 1,    0
 409, 0,    0
 410, 1,    0
 411, 1,    0
 412, 1,    0
 413, 0,    0
 414, 0,    0
 415, 0,    0
 416, 1,  416
 417, 0,  419
 418, 0,  423
 419, 1,  424
 420, 0,  426
 421, 0,  427
 422, 0,  428
 423, 1,  429
 424, 1,  430
 425, 0,  433
 426, 1,  436
 427, 1,  438
 428, 1,  439
 429, 1,  443
 430, 1,  447
 431, 0,    0
 432, 0,    0
 433, 1,    0
 434, 0,    0
 435, 0,    0
 436, 1,    0
 437, 0,    0
 438, 1,    0
 439, 1,    0
 440, 0,    0
 441, 0,    0
 442, 0,    0
 443, 1,    0
 444, 0,    0
 445, 0,    0
 446, 0,    0
 447, 1,    0
 448, 0,  453
 449, 0,  454
 450, 0,  455
 451, 0,  456
 452, 0,  459
 453, 1,  461
 454, 1,  462
 455, 1,  464
 456, 1,  465
 457, 0,  466
 458, 0,  468
 459, 1,  470
 460, 0,  471
 461, 1,  473
 462, 1,  474
 463, 0,  476
 464, 1,  477
 465, 1,  478
 466, 1,  479
 467, 0,    0
 468, 1,    0
 469, 0,    0
 470, 1,    0
 471, 1,    0
 472, 0,    0
 473, 1,    0
 474, 1,    0
 475, 0,    0
 476, 1,    0
 477, 1,    0
 478, 1,    0
 479, 1,    0
 480, 1,  480
 481, 0,  483
 482, 0,  484
 483, 1,  485
 484, 1,  486
 485, 1,  489
 486, 1,  490
 487, 0,  492
 488, 0,  496
 489, 1,  497
 490, 1,  500
 491, 0,  503
 492, 1,  504
 493, 0,  505
 494, 0,  506
 495, 0,  507
 496, 1,  509
 497, 1,  511
 498, 0,    0
 499, 0,    0
 500, 1,    0
 501, 0,    0
 502, 0,    0
 503, 1,    0
 504, 1,    0
 505, 1,    0
 506, 1,    0
 507, 1,    0
 508, 0,    0
 509, 1,    0
 510, 0,    0
 511, 1,    0
 512, 1,  512
 513, 0,  517
 514, 0,  520
 515, 0,  531
 516, 0,  534
 517, 1,  536
 518, 0,  538
 519, 0,  539
 520, 1,  540
 521, 0,  541
 522, 0,    0
 523, 0,    0
 524, 0,    0
 525, 0,    0
 526, 0,    0
 527, 0,    0
 528, 0,    0
 529, 0,    0
 530, 0,    0
 531, 1,    0
 532, 0,    0
 533, 0,    0
 534, 1,    0
 535, 0,    0
 536, 1,    0
 537, 0,    0
 538, 1,    0
 539, 1,    0
 540, 1,    0
 541, 1,    0
 542, 0,    0
 543, 0,    0
 544, 0,  545
 545, 1,  549
 546, 0,  551
 547, 0,  552
 548, 0,  554
 549, 1,  555
 550, 0,  557
 551, 1,  558
 552, 1,  565
 553, 0,  568
 554, 1,  569
 555, 1,  570
 556, 0,  571
 557, 1,  572
 558, 1,  574
 559, 0,  575
 560, 0,    0
 561, 0,    0
 562, 0,    0
 563, 0,    0
 564, 0,    0
 565, 1,    0
 566, 0,    0
 567, 0,    0
 568, 1,    0
 569, 1,    0
 570, 1,    0
 571, 1,    0
 572, 1,    0
 573, 0,    0
 574, 1,    0
 575, 1,    0
 576, 1,  576
 577, 0,  578
 578, 1,  579
 579, 1,  580
 580, 1,  581
 581, 1,  582
 582, 1,  584
 583, 0,  586
 584, 1,  587
 585, 0,  589
 586, 1,  592
 587, 1,  595
 588, 0,  596
 589, 1,  597
 590, 0,  598
 591, 0,  601
 592, 1,  602
 593, 0,  603
 594, 0,  604
 595, 1,  605
 596, 1,    0
 597, 1,    0
 598, 1,    0
 599, 0,    0
 600, 0,    0
 601, 1,    0
 602, 1,    0
 603, 1,    0
 604, 1,    0
 605, 1,    0
 606, 0,    0
 607, 0,    0
 608, 1,  608
 609, 1,  609
 610, 1,  610
 611, 0,  615
 612, 0,  616
 613, 0,  617
 614, 0,  618
 615, 1,  619
 616, 1,  621
 617, 1,  622
 618, 1,  623
 619, 1,  624
 620, 0,  625
 621, 1,  626
 622, 1,  631
 623, 1,  632
 624, 1,  636
 625, 1,  637
 626, 1,    0
 627, 0,    0
 628, 0,    0
 629, 0,    0
 630, 0,    0
 631, 1,    0
 632, 1,    0
 633, 0,    0
 634, 0,    0
 635, 0,    0
 636, 1,    0
 637, 1,    0
 638, 0,    0
 639, 0,    0
 640, 0,  643
 641, 0,  647
 642, 0,  648
 643, 1,  649
 644, 0,  653
 645, 0,  654
 646, 0,  655
 647, 1,  656
 648, 1,  658
 649, 1,  659
 650, 0,  660
 651, 0,  665
 652, 0,  666
 653, 1,  667
 654, 1,  669
 655, 1,  670
 656, 1,    0
 657, 0,    0
 658, 1,    0
 659, 1,    0
 660, 1,    0
 661, 0,    0
 662, 0,    0
 663, 0,    0
 664, 0,    0
 665, 1,    0
 666, 1,    0
 667, 1,    0
 668, 0,    0
 669, 1,    0
 670, 1,    0
 671, 0,    0
 672, 1,  672
 673, 1,  673
 674, 1,  674
 675, 0,  676
 676, 1,  678
 677, 0,  680
 678, 1,  684
 679, 0,  686
 680, 1,  689
 681, 0,  691
 682, 0,  692
 683, 0,  695
 684, 1,  696
 685, 0,  697
 686, 1,  699
 687, 0,  701
 688, 0,    0
 689, 1,    0
 690, 0,    0
 691, 1,    0
 692, 1,    0
 693, 0,    0
 694, 0,    0
 695, 1,    0
 696, 1,    0
 697, 1,    0
 698, 0,    0
 699, 1,    0
 700, 0,    0
 701, 1,    0
 702, 0,    0
 703, 0,    0
 704, 0,  705
 705, 1,  707
 706, 0,  708
 707, 1,  709
 708, 1,  710
 709, 1,  711
 710, 1,  712
 711, 1,  714
 712, 1,  715
 713, 0,  718
 714, 1,  720
 715, 1,  721
 716, 0,  726
 717, 0,  727
 718, 1,  728
 719, 0,  729
 720, 1,  730
 721, 1,  731
 722, 0,  733
 723, 0,  734
 724, 0,    0
 725, 0,    0
 726, 1,    0
 727, 1,    0
 728, 1,    0
 729, 1,    0
 730, 1,    0
 731, 1,    0
 732, 0,    0
 733, 1,    0
 734, 1,    0
 735, 0,    0
 736, 0,  737
 737, 1,  738
 738, 1,  740
 739, 0,  741
 740, 1,  742
 741, 1,  744
 742, 1,  746
 743, 0,  747
 744, 1,  749
 745, 0,  750
 746, 1,  753
 747, 1,  756
 748, 0,  760
 749, 1,  761
 750, 1,  765
 751, 0,    0
 752, 0,    0
 753, 1,    0
 754, 0,    0
 755, 0,    0
 756, 1,    0
 757, 0,    0
 758, 0,    0
 759, 0,    0
 760, 1,    0
 761, 1,    0
 762, 0,    0
 763, 0,    0
 764, 0,    0
 765, 1,    0
 766, 0,    0
 767, 0,    0
 768, 1,  768
 769, 0,  772
 770, 0,  773
 771, 0,  776
 772, 1,  778
 773, 1,  780
 774, 0,  783
 775, 0,  787
 776, 1,  792
 777, 0,  795
 778, 1,  796
 779, 0,  798
 780, 1,    0
 781, 0,    0
 782, 0,    0
 783, 1,    0
 784, 0,    0
 785, 0,    0
 786, 0,    0
 787, 1,    0
 788, 0,    0
 789, 0,    0
 790, 0,    0
 791, 0,    0
 792, 1,    0
 793, 0,    0
 794, 0,    0
 795, 1,    0
 796, 1,    0
 797, 0,    0
 798, 1,    0
 799, 0,    0
 800, 0,  803
 801, 0,  804
 802, 0,  805
 803, 1,  806
 804, 1,  807
 805, 1,  808
 806, 1,  810
 807, 1,  812
 808, 1,  813
 809, 0,  814
 810, 1,  815
 811, 0,  816
 812, 1,  817
 813, 1,  820
 814, 1,  826
 815, 1,  829
 816, 1,  831
 817, 1,    0
 818, 0,    0
 819, 0,    0
 820, 1,    0
 821, 0,    0
 822, 0,    0
 823, 0,    0
 824, 0,    0
 825, 0,    0
 826, 1,    0
 827, 0,    0
 828, 0,    0
 829, 1,    0
 830, 0,    0
 831, 1,    0
 832, 1,  832
 833, 1,  833
 834, 0,  838
 835, 0,  839
 836, 0,  842
 837, 0,  843
 838, 1,  844
 839, 1,  847
 840, 0,  849
 841, 0,  850
 842, 1,  851
 843, 1,  852
 844, 1,  853
 845, 0,  856
 846, 0,  857
 847, 1,  859
 848, 0,  863
 849, 1,    0
 850, 1,    0
 851, 1,    0
 852, 1,    0
 853, 1,    0
 854, 0,    0
 855, 0,    0
 856, 1,    0
 857, 1,    0
 858, 0,    0
 859, 1,    0
 860, 0,    0
 861, 0,    0
 862, 0,    0
 863, 1,    0
 864, 1,  864
 865, 1,  865
 866, 0,  867
 867, 1,  868
 868, 1,  869
 869, 1,  871
 870, 0,  873
 871, 1,  874
 872, 0,  875
 873, 1,  877
 874, 1,  881
 875, 1,  882
 876, 0,  885
 877, 1,  887
 878, 0,  888
 879, 0,  890
 880, 0,  891
 881, 1,  893
 882, 1,  894
 883, 0,    0
 884, 0,    0
 885, 1,    0
 886, 0,    0
 887, 1,    0
 888, 1,    0
 889, 0,    0
 890, 1,    0
 891, 1,    0
 892, 0,    0
 893, 1,    0
 894, 1,    0
 895, 0,    0
 896, 0,  897
 897, 1,  898
 898, 1,  899
 899, 1,  902
 900, 0,  904
 901, 0,  907
 902, 1,  908
 903, 0,  910
 904, 1,  913
 905, 0,  919
 906, 0,  921
 907, 1,  922
 908, 1,  927
 909, 0,    0
 910, 1,    0
 911, 0,    0
 912, 0,    0
 913, 1,    0
 914, 0,    0
 915, 0,    0
 916, 0,    0
 917, 0,    0
 918, 0,    0
 919, 1,    0
 920, 0,    0
 921, 1,    0
 922, 1,    0
 923, 0,    0
 924, 0,    0
 925, 0,    0
 926, 0,    0
 927, 1,    0
 928, 1,  928
 929, 0,  931
 930, 0,  933
 931, 1,  935
 932, 0,  936
 933, 1,  938
 934, 0,  939
 935, 1,  942
 936, 1,  944
 937, 0,  945
 938, 1,  946
 939, 1,  948
 940, 0,  949
 941, 0,  950
 942, 1,  951
 943, 0,  954
 944, 1,  958
 945, 1,  959
 946, 1,    0
 947, 0,    0
 948, 1,    0
 949, 1,    0
 950, 1,    0
 951, 1,    0
 952, 0,    0
 953, 0,    0
 954, 1,    0
 955, 0,    0
 956, 0,    0
 957, 0,    0
 958, 1,    0
 959, 1,    0
 960, 0,  962
 961, 0,  964
 962, 1,  965
 963, 0,  966
 964, 1,  967
 965, 1,  968
 966, 1,  971
 967, 1,  972
 968, 1,  973
 969, 0,  977
 970, 0,  979
 971, 1,  985
 972, 1,  987
 973, 1,  988
 974, 0,  991
 975, 0,    0
 976, 0,    0
 977, 1,    0
 978, 0,    0
 979, 1,    0
 980, 0,    0
 981, 0,    0
 982, 0,    0
 983, 0,    0
 984, 0,    0
 985, 1,    0
 986, 0,    0
 987, 1,    0
 988, 1,    0
 989, 0,    0
 990, 0,    0
 991, 1,    0
 992, 0,  993
 993, 1,  994
 994, 1,  995
 995, 1,  997
 996, 0,  999
 997, 1, 1000
 998, 0, 1002
 999, 1, 1004
1000, 1, 1005
1001, 0, 1006
1002, 1, 1007
1003, 0, 1009
1004, 1, 1012
1005, 1, 1018
1006, 1, 1019
1007, 1, 1021
1008, 0, 1022
1009, 1,    0
1010, 0,    0
1011, 0,    0
1012, 1,    0
1013, 0,    0
1014, 0,    0
1015, 0,    0
1016, 0,    0
1017, 0,    0
1018, 1,    0
1019, 1,    0
1020, 0,    0
1021, 1,    0
1022, 1,    0
1023, 0,    0
========= ERROR SUMMARY: 0 errors
$

Note that all threads can proceed thru the "acquire lock" section without any inter-thread dependency (although only one thread at a time will "win"), and note that all threads can proceed thru the "critical section" without any inter-thread dependency.

(*) Note that this situation is mitigated in Volta



来源:https://stackoverflow.com/questions/45718781/thread-warp-local-lock-in-cuda

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!