require_vector_vs; P_REDUCTION_LOOP(32, 16, true, true, { pd_res += ps1 * ps2; })