This intrinsic is named __mul128.
typedef unsigned long long BIG;
// handles only the "hard" case when high bit of n is set
BIG shl_mod( BIG v, BIG n, int by )
{
if (v > n) v -= n;
while (by--) {
if (v > (n-v))
v -= n-v;
else
v <<= 1;
}
return v;
}
Now you can use shl_mod(B, n, 64)