P_MUL_CROSS_ULOOP(8, { pd = (uint16_t)ps1 * (uint16_t)ps2; })