P_MUL_CROSS_LOOP(8, { pd = (int16_t)ps1 * (int16_t)ps2; })