1、浮点数和定点数存储
https://blog.csdn.net/niaolianjiulin/article/details/82764511
2、浮点转定点
本篇主要介绍另外一种浮点转定点的方式,并结合neon代码进行介绍(上面的浮点定点基础最好先看,大佬忽略)
static inline uint32_t fp32_to_bits(float f) { union { float as_value; uint32_t as_bits; } fp32 = { f }; return fp32.as_bits; }
int main() { int32_t value[4] = {234,536,382,430}; int32x4_t vacc0x0123 = vld1q_s32(value); float scale = 0.05434; const uint32_t scale_bits = fp32_to_bits(scale); /* Multiplier is in [0x40000000, 0x7FFFFF80] range */ int32_t multiplier = (int32_t)(((scale_bits & UINT32_C(0x007FFFFF)) | UINT32_C(0x00800000))<<7 ); assert(multiplier >= INT32_C(0x40000000)); assert(multiplier <= INT32_C(0x7FFFFF80)); /* Shift is in [0, 31] range */ const int32_t shift = 127 + 31 - 32 - (fp32_to_bits(scale) >> 23); assert(shift >= 0); assert(shift < 32); int32_t right_shift = -shift; const int32x4_t vmultiplier = vld1q_dup_s32(&multiplier); vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier); const int32x4_t vright_shift = vld1q_dup_s32(&right_shift); const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0))); vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31); vacc0x0123 = vrshlq_s32(vacc0x0123, vright_shift); int32_t result[4]; vst1q_s32(result, vacc0x0123); printf("%f %f %f %f\n",(value[0]*scale),(value[1]*scale),(value[2]*scale),(value[3]*scale)); printf("%d %d %d %d ",result[0],result[1],result[2],result[3]); return 0; }
1)首先看fp32tobits函数,巧妙利用联合体公用内存空间转换float数据类型为uint32_t
等价于const uint32_t scale_bits = *(uint32_t*)(&float)
2)得到浮点的二进制后,将有效位取出,并左移七位得到int32_t
3)接下来就是比较费解的一步,
就是const int32_t shift = 127+31-32-(fp32tobits(scale)>>23)
(fp32tobits(scale)>>23)比较好理解就是获得符号位与基数位,即127+m (m为浮点的二进制小数转换为

。。。。。。待更新
来源:https://blog.csdn.net/qq_20880415/article/details/99688542