What is the fastest way to transpose the bits in an 8x8 block on bits?

前端 未结 7 2077
余生分开走
余生分开走 2020-12-05 21:10

I\'m not sure the exact term for what I\'m trying to do. I have an 8x8 block of bits stored in 8 bytes, each byte stores one row. When

7条回答
  •  广开言路
    2020-12-05 21:57

    Lisp prototype:

    (declaim (optimize (speed 3) (safety 0)))
    (defun bit-transpose (a)
      (declare (type (simple-array unsigned-byte 1) a))
      (let ((b (make-array 8 :element-type '(unsigned-byte 8))))
        (dotimes (j 8)
          (dotimes (i 8)
        (setf (ldb (byte 1 i) (aref b j))
              (ldb (byte 1 j) (aref a i)))))
        b))
    

    This is how you can run the code:

    #+nil
    (bit-transpose (make-array 8 :element-type 'unsigned-byte
                   :initial-contents '(1 2 3 4 5 6 7 8)))
    ;; => #(85 102 120 128 0 0 0 0)
    

    Occasionally I disassemble code to check that there are no unnecessary calls to safety functions.

    #+nil
    (disassemble #'bit-transpose)
    

    This is a benchmark. Run the function often enough to process a (binary) HDTV image.

    #+nil
    (time 
     (let ((a (make-array 8 :element-type 'unsigned-byte
                  :initial-contents '(1 2 3 4 5 6 7 8)))
           (b (make-array 8 :element-type 'unsigned-byte
                  :initial-contents '(1 2 3 4 5 6 7 8))))
       (dotimes (i (* (/ 1920 8) (/ 1080 8)))
         (bit-transpose a))))
    

    That took only took 51ms. Note that I'm consing quite a lot because the function allocates new 8 byte arrays all the time. I'm sure an implementation in C can be tweaked a lot more.

    Evaluation took:
      0.051 seconds of real time
      0.052004 seconds of total run time (0.052004 user, 0.000000 system)
      101.96% CPU
      122,179,503 processor cycles
      1,048,576 bytes consed
    

    Here are some more test cases:

    #+nil
    (loop for j below 12 collect
      (let ((l (loop for i below 8 collect (random 255))))
        (list l (bit-transpose (make-array 8 :element-type 'unsigned-byte
                    :initial-contents l)))))
    ;; => (((111 97 195 202 47 124 113 164) #(87 29 177 57 96 243 111 140))
    ;;     ((180 192 70 173 167 41 30 127) #(184 212 221 232 193 185 134 27))
    ;;     ((244 86 149 57 191 65 129 178) #(124 146 23 24 159 153 35 213))
    ;;     ((227 244 139 35 38 65 214 64) #(45 93 82 4 66 27 227 71))
    ;;     ((207 62 236 89 50 64 157 120) #(73 19 71 207 218 150 173 69))
    ;;     ((89 211 149 140 233 72 193 192) #(87 2 12 57 7 16 243 222))
    ;;     ((97 144 19 13 135 198 238 33) #(157 116 120 72 6 193 97 114))
    ;;     ((145 119 3 85 41 202 79 134) #(95 230 202 112 11 18 106 161))
    ;;     ((42 153 67 166 175 190 114 21) #(150 125 184 51 226 121 68 58))
    ;;     ((58 232 38 210 137 254 19 112) #(80 109 36 51 233 167 170 58))
    ;;     ((27 245 1 197 208 221 21 101) #(239 1 234 33 115 130 186 58))
    ;;     ((66 204 110 232 46 67 37 34) #(96 181 86 30 0 220 47 10)))
    

    Now I really want to see how my code compares to Andrejs Cainikovs' C solution (Edit: I think its wrong):

    #include 
    
    unsigned char bytes_in[8]={1,2,3,4,5,6,7,8};
    unsigned char bytes_out[8];
    
    /* please fill bytes_in[] here with some pixel-crap */
    void bit_transpose(){
      memset(bytes_out, 0, 8);
      int i,j;
      for(i = 0; i < 8; i++)
        for(j = 0; j < 8; j++) 
          bytes_out[i] = (bytes_out[i] << 1) | ((bytes_in[j] >> (7 - i)) & 0x01);
    }
    
    int
    main()
    {
      int j,i;
      for(j=0;j<100;j++)
        for(i=0;i<(1920/8*1080/8);i++)
          bit_transpose();
      return 0;
    }
    

    And benchmarking it:

    wg@hp:~/0803/so$ gcc -O3 trans.c
    wg@hp:~/0803/so$ time ./a.out 
    
    real    0m0.249s
    user    0m0.232s
    sys     0m0.000s
    

    Each loop over the HDTV image takes 2.5ms. That is quite a lot faster than my unoptimized Lisp.

    Unfortunately the C code doesn't give the same results like my lisp:

    #include 
    int
    main()
    {
      int j,i;
      bit_transpose();
      for(i=0;i<8;i++)
        printf("%d ",(int)bytes_out[i]);
      return 0;
    }
    wg@hp:~/0803/so$ ./a.out 
    0 0 0 0 1 30 102 170 
    

提交回复
热议问题