require_vector_vs; P_REDUCTION_CROSS_LOOP(32, 16, false, true, { pd_res += ps1 * ps2; })