P_MUL_CROSS_ULOOP(16, { pd = (uint32_t)ps1 * (uint32_t)ps2; })