P_MUL_CROSS_LOOP(16, { pd = (int32_t)ps1 * (int32_t)ps2; })