How do I find next bit to change in a Gray code in constant time?

后端未结

关注

 7  791

终归单人心 2020-12-25 08:59

I have a small 8-bit processor which has a N-to-M decoder on some output lines - eg, for the 5 to 32 bit case, I write 00101 and bit 5 changes state. The only interface to

7条回答

不思量自难忘° (楼主)

2020-12-25 09:43

/*
As previously posted this answer, https://stackoverflow.com/questions/4648716#42865348 using a Johnson counter Gray code, is very simple:

Number_of_Bit_To_Flip = ++Number_of_Bit_To_Flip % Total_Number_of_Counter_Bits

which is executed on every event occurrence.

Otherwise, using a Binary Reflected Gray Code and a 4 byte base 2 counter n, ...

Method 1 - using a table */

static const byte twos[ ] = { //  note pattern    V       V       V V V V
  0,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,
    0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,  6,
    0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,
    0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,    7,
    0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,
    0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,  6,
    0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,
    0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,      8,
};

//  operation count worst case    3 logic   4 index     1 addition
//            for 7/8 of calls    2         3           1

byte bit_change(byte ctr[4]) {
 return  
  (byte[]){ 
    (byte[]){  16 + twos[ctr[2]]               ,
      (byte[]){24 + twos[ctr[3]] ,
               31                } [ !ctr[3] ] } [ !ctr[2] ] ,
    (byte[]){   0 + twos[ctr[0]]               ,
                8 + twos[ctr[1]]               } [ !ctr[0] ] }
                                                         [ctr[0] || ctr[1]];

// --------  NB. orphaned, included for pedagogic purposes  --------

  return  (byte[]){ 0 + twos[ctr[0]] ,   //    this IS totally time constant
                    8 + twos[ctr[1]] ,   // NB. 31 + time "constantator" 
                   16 + twos[ctr[2]] ,   // case ctr[i]==0 for all i
                   24 + twos[ctr[3]] ,
                   31 + twos[ctr[0]] } [ !ctr[0]*( 1+ 
                                         !ctr[1]*( 1+
                                         !ctr[2]*( 1+
                                         !ctr[3]       ) ) ) ];  
 }

/Method 2 - no tables */

byte bin2toN(byte b){  
   return 
      (byte []) {(byte []) {(byte []) {7,6} [b < 128 ] , 
                            (byte []) {5,4} [b <  32 ] } [b < 64 ] ,
                 (byte []) {(byte []) {3,2} [b <   8 ] , 
                            (byte []) {1,0} [b <   2 ] } [b <  4 ] } [b < 16 ] ;
} 

byte flip_bit(byte n[4]){  
return    
  (byte []){ 
    (byte []){   16 + bin2toN(  n[2] & -n[2] )            ,
      (byte []){ 24 + bin2toN(  n[3] & -n[3] ),
                 31                           } [ !n[3] ] } [ !n[2] ] ,
    (byte []){    0 + bin2toN(  n[0] & -n[0] )            ,
                  8 + bin2toN(  n[1] & -n[1] )            } [ !n[0] ] } 
                                                               [ n[0] || n[1] ] ;

 // - - - - - - - - - - - - ORPHANED, fails on zero - - - - - - - - - - - -

  return                             //  included for pedagogic purposes
    (byte []) {                         
      (byte []) { bin2toN(  n[2] & -n[2] ) + 16 ,
                  bin2toN(  n[3] & -n[3] ) + 24 } [ !n[2] ] ,
      (byte []) { bin2toN(  n[0] & -n[0] ) +  0 ,
                  bin2toN(  n[1] & -n[1] ) +  8 } [ !n[0] ] } [ n[0] || n[1] ] ;
}

/*
Bit Bunnies and Cargo Cult Coders have no need to read further.

The efficiency of execution of the above code depends on the fact that n[0], n[1], ... are computed at compile time as fixed addresses which is quite conventional. Also, using a call-by-need optimizing compiler will expedite the array contents so only one indexed value needs to be calculated. This compiler sophistication is likely missing but it is easy to manually assemble the raw machine code to do it (basically a switch, computed goto, etc. ).

Analysis of the above algorithms, using the orphaned code, shows that every function call will execute exactly the same instruction sequence, optimized or not.

In both methods, the non-orphaned returns require handling the case when there is counter roll over on 0, consequently using extra index and logical (!) operations. This extra happens for 1/2 of 1/2 of 1/2 or 1/8 of the total counts, and for one count in this 1/8 there is nothing to do but return 31.

The first method requires 2 logical operations (! ||), 1 addition and 3 index calculations for 7/8 of the total counts. On a single count of zero, 3 logical and 3 index operations are needed and the rest of the other 1/8 need 3 logical, 1 addition and 4 index operations.

The final code on execution of method 2 (compiled optimally), for 7/8 of the calculations, uses 7 logical operations (|| & ! < - the last is two's complement), 1 arithmetic (+) and 5 calculated index operations. The other 1/8, but one instance, need 8 logical, 1 addition and 6 calculated index operations.

Unfortunately, no flash of divine inspiration manifested any cargo code. This is an abridged tale of how this composition's authorship happened.

How this was done involved a crucial preliminary investigation as documented: https://stackoverflow.com/a/42846062. Then code was derived using a succesive refinement process commencing with an assesment of the algorithms in this post.
Specifically, this answer: https://stackoverflow.com/a/4657711 .

This algorithm's time variant execution from the loop overhead will be poignently and prominently accentuated by reducing the return calculation to a single addition and two index operations.

byte bit_change(byte ctr[4]) {
  static byte ones[256]; //  this sparse RA is precomputed once
    for (byte i = 255; --i; ones[i]=0) ;
    ones[ 0] =    ones[ 1] = 0; ones[ 3] = 1; ones[  7] = 2; 
    ones[15] = 3; ones[31] = 4; ones[63] = 5; ones[127] = 6; ones[255] = 7;

//   { ie. this very sparse array is completely adequate for original code
//    0,0, ,1, , , ,2, , , , , , , ,3, , , , , , , , , , , , , , , ,4,
//     , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,5,
//     , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,
//     , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,6,
//     , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,
//     , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,
//     , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,
//     , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ,7,  }

//  1/2 of count uses 2 index 2 conditionals 0 increment 1 logic 2 +/-  1 x
//  1/4               3       4              1           1       2      1
//  1/8               4       6              2           1       2      1
//  1/16              5       8              3           1       2      1
//     average  14 = 3.5      5             1.5          1       2      1  

unsigned char i;  for (i = 0; i < 4; i++) {         //  original code
                    unsigned char x = counter[i];   //  "works" but
                         if (x) {                   //  still fails on 
                           x ^= x - 1;              //  count 0 rollover
                           return 8 * i + ones[x];
                   }     }

//  .............................  refinement .............................
byte x;             for (byte i = 0; i < 4; i++)         //
                         if (x = counter[i]) 
                              return  i<<3 + ones[x ^ x - 1];
}

/-------------------------------------------------------------- --------------------------------/

//              for (byte i = 255; --i; twos[i] == ones[i ^ i-1] ) ;
// ones[ ] uses only 9 of 1/4K inefficient,  twos[ ] uses all 1/4K

static const byte twos[ ] = {  
 0,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,
   0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,  6,
   0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,
   0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,    7,
   0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,
   0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,  6,
   0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,
   0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,      8,
};


//  fix count 0 rollover failure, make time absolutely constant as per OP

byte f0(byte ctr[4]) {
  byte ansr=31;
  for (byte i = 0; i < 4; i++) 
   if (ctr[i]) {
       ansr=(byte[]){0,8,16,24}[i] + twos[ctr[i]];    //  i<<3 faster?
       break;
    }
  for (; i < 4; i++) if (ctr[i]) ;
  return ansr;
}

//..................................................

// loop ops (average):  1.5 ++   2.5 []  5 if
// result calculation:   1  +     2  []       significant loop overhead

byte f1(byte counter[4]) {
  for (byte i = 0; i < 4; i++) 
    if (counter[i]) 
      return  (byte[]){  0 + twos[counter[0]], 
                         8 + twos[counter[1]],                           
                        16 + twos[counter[2]], 
                        24 + twos[counter[3]]  } [i];
  return 31;
}

//..................................................

//    5 +/++    6 []     10 if

byte f2(byte counter[4]){  
  byte i, ansr=31;
  for (i = 0; i < 4; i++) {      //  definite loop overhead
    if (counter[i]) {
       ansr= (byte[]){   0 + twos[counter[0]], 
                         8 + twos[counter[1]],                           
                        16 + twos[counter[2]], 
                        24 + twos[counter[3]]  } [i];
       break;
  } }
  for (; i < 4; i++)   if (counter[i]);  //   time "constantator"
  return ansr;
}

//..................................................

//   4 +    4 !    3 x    1 []    1 computed goto/switch

byte f3(byte counter[4]){          // default: time "constantator"
  switch (!counter[0]*( 1 +        //           counter[0]==0 !!
          !counter[1]*( 1 +
          !counter[2]*( 1 +
          !counter[3]        ) ) ) ){
              case 0:  return   0 +  twos[ counter[0] ] ;
              case 1:  return   8 +  twos[ counter[1] ] ;
              case 2:  return  16 +  twos[ counter[2] ] ;
              case 3:  return  24 +  twos[ counter[3] ] ;
              default: return  31 +  twos[ counter[0] ] ;
}         }

There is a comparable chronology for method 2.

This sequence has been radically attenuated and abbreviated to an intermediate example:

Inadvertently, the code posted in https://stackoverflow.com/a/42865348 was not the exclusively byte sized one as intended. The intended code is in this post. */

byte log2toN(byte b){ return    ( b & 0xF0 ? 4:0 )  +   //    4444....
                                ( b & 0xCC ? 2:0 )  +   //    22..22..
                                ( b & 0xAA ? 1:0 )  ;   //    1.1.1.1.
} 
// . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
byte BRGC(byte n){ return n ^ n>>1; }

byte bitNbyte(byte n){ return log2toN( BRGC(n) ^ BRGC(n+1) ); }

byte bit2flip(byte n[4]){  
   boolean n3=n[3]<255, n2=n[2]<255, n1=n[1]<255, n0=n[0]<255;
           return n0*( 0 + bitNbyte( n[0] ) ) + !n0*( 
                  n1*( 8 + bitNbyte( n[1] ) ) + !n1*( 
                  n2*(16 + bitNbyte( n[2] ) ) + !n2*( 
                  n3*(24 + bitNbyte( n[3] ) ) + !n3*( 0 ) ) ) );
}
// . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
byte bit_flip(byte n[4]){  
//switch(     ( !!n[0] << 3 ) | ( !!n[1] << 2 ) | ( !!n[2] << 1 )  |  !!n[3] )
  switch( 15 ^ ( !n[0] << 3 ) ^  ( !n[1] << 2 ) ^  ( !n[2] << 1 )  ^   !n[3] ) { 
      case 15: case 14: case 13: case 12:
      case 11: case 10: case  9: case  8:  return  0 + log2toN(  n[0] & -n[0]  );
      case  7: case  6: case  5: case  4:  return  8 + log2toN(  n[1] & -n[1]  );
                        case  3: case  2:  return 16 + log2toN(  n[2] & -n[2]  );
                                 case  1:  return 24 + log2toN(  n[3] & -n[3]  );
      default:                             return 31 + log2toN(  n[0] & -n[0]  );
}           }

/*
Rhetorically, the answer to How do I find ... can only be answered explicitly in the personal sense (see this answer: https://stackoverflow.com/a/42846062) as it is not possible to speak for other individuals' cognitive abilities.

The content of https://stackoverflow.com/a/42846062 is crucial for background information and reflects the very personal pedantic mechanism required for the mental gymnastics to solve this problem. Undoubtedly, the milieu and plethora of material is daunting but this is exactly the personal approach taken in garnering sufficient insight, repertoire, anecdotal precedents, etc. to extrapolate and interpolate an answer to answer specifically, what program meets the criteria, exactly. Further, it is this very "chase" that excites and motivates the (perhaps pathological) psyche to invest time and effort to satiate curiosity with an inquisitive quest. */

void setup() {    }

void loop() {     }

0 讨论(0)

查看其它7个回答