For example, I have a cstring \"E8 48 D8 FF FF 8B 0D\"
(including spaces) which needs to be converted into the equivalent unsigned char array {0xE8,0x48,0
The old C way, do it by hand ;-) (there is many shorter ways, but I'm not golfing, I'm going for run-time).
enum { NBBYTES = 7 };
char res[NBBYTES+1];
const char * c = "E8 48 D8 FF FF 8B 0D";
const char * p = c;
int i = 0;
for (i = 0; i < NBBYTES; i++){
switch (*p){
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
res[i] = *p - '0';
break;
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
res[i] = *p - 'A' + 10;
break;
default:
// parse error, throw exception
;
}
p++;
switch (*p){
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
res[i] = res[i]*16 + *p - '0';
break;
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
res[i] = res[i]*16 + *p - 'A' + 10;
break;
default:
// parse error, throw exception
;
}
p++;
if (*p == 0) { continue; }
if (*p == ' ') { p++; continue; }
// parse error, throw exception
}
// let's show the result, C style IO, just cout if you want C++
for (i = 0 ; i < 7; i++){
printf("%2.2x ", 0xFF & res[i]);
}
printf("\n");
Now another one that allow for any number of digit between numbers, any number of spaces to separate them, including leading or trailing spaces (Ben's specs):
#include <stdio.h>
#include <stdlib.h>
int main(){
enum { NBBYTES = 7 };
char res[NBBYTES];
const char * c = "E8 48 D8 FF FF 8B 0D";
const char * p = c;
int i = -1;
res[i] = 0;
char ch = ' ';
while (ch && i < NBBYTES){
switch (ch){
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
ch -= '0' + 10 - 'A';
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
ch -= 'A' - 10;
res[i] = res[i]*16 + ch;
break;
case ' ':
if (*p != ' ') {
if (i == NBBYTES-1){
printf("parse error, throw exception\n");
exit(-1);
}
res[++i] = 0;
}
break;
case 0:
break;
default:
printf("parse error, throw exception\n");
exit(-1);
}
ch = *(p++);
}
if (i != NBBYTES-1){
printf("parse error, throw exception\n");
exit(-1);
}
for (i = 0 ; i < 7; i++){
printf("%2.2x ", 0xFF & res[i]);
}
printf("\n");
}
No, it's not really obfuscated... but well, it looks like it is.
If you know the length of the string to be parsed beforehand (e.g. you are reading something from /proc) you can use sscanf with the 'hh' type modifier, which specifies that the next conversion is one of diouxX and the pointer to store it will be either signed char or unsigned char.
// example: ipv6 address as seen in /proc/net/if_inet6:
char myString[] = "fe80000000000000020c29fffe01bafb";
unsigned char addressBytes[16];
sscanf(myString, "%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx
%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx", &addressBytes[0],
&addressBytes[1], &addressBytes[2], &addressBytes[3], &addressBytes[4],
&addressBytes[5], &addressBytes[6], &addressBytes[7], &addressBytes[8],
&addressBytes[9], &addressBytes[10], addressBytes[11],&addressBytes[12],
&addressBytes[13], &addressBytes[14], &addressBytes[15]);
int i;
for (i = 0; i < 16; i++){
printf("addressBytes[%d] = %02x\n", i, addressBytes[i]);
}
Output:
addressBytes[0] = fe
addressBytes[1] = 80
addressBytes[2] = 00
addressBytes[3] = 00
addressBytes[4] = 00
addressBytes[5] = 00
addressBytes[6] = 00
addressBytes[7] = 00
addressBytes[8] = 02
addressBytes[9] = 0c
addressBytes[10] = 29
addressBytes[11] = ff
addressBytes[12] = fe
addressBytes[13] = 01
addressBytes[14] = ba
addressBytes[15] = fb
This answers the original question, which asked for a C++ solution.
You can use an istringstream
with the hex
manipulator:
std::string hex_chars("E8 48 D8 FF FF 8B 0D");
std::istringstream hex_chars_stream(hex_chars);
std::vector<unsigned char> bytes;
unsigned int c;
while (hex_chars_stream >> std::hex >> c)
{
bytes.push_back(c);
}
Note that c
must be an int
(or long
, or some other integer type), not a char
; if it is a char
(or unsigned char
), the wrong >>
overload will be called and individual characters will be extracted from the string, not hexadecimal integer strings.
Additional error checking to ensure that the extracted value fits within a char
would be a good idea.
You'll never convince me that this operation is a performance bottleneck. The efficient way is to make good use of your time by using the standard C library:
static unsigned char gethex(const char *s, char **endptr) {
assert(s);
while (isspace(*s)) s++;
assert(*s);
return strtoul(s, endptr, 16);
}
unsigned char *convert(const char *s, int *length) {
unsigned char *answer = malloc((strlen(s) + 1) / 3);
unsigned char *p;
for (p = answer; *s; p++)
*p = gethex(s, (char **)&s);
*length = p - answer;
return answer;
}
Compiled and tested. Works on your example.
use the "old" sscanf() function:
string s_hex = "E8 48 D8 FF FF 8B 0D"; // source string
char *a_Char = new char( s_hex.length()/3 +1 ); // output char array
for( unsigned i = 0, uchr ; i < s_hex.length() ; i += 3 ) {
sscanf( s_hex.c_str()+ i, "%2x", &uchr ); // conversion
a_Char[i/3] = uchr; // save as char
}
delete a_Char;
(ch >= 'A')? (ch - 'A' + 10): (ch - '0')
.