#include #include #ifndef PREF #define PREF c #endif #define FX(N,P) P ## _ ## N #define MK(N,P) FX(P,N) #define N 32 #define TYPE double // ------ FMA // Complex FMA instructions rotating the result __attribute__((noinline,noipa)) void MK(fma0, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] += a[i] * b[i]; } __attribute__((noinline,noipa)) void MK(fma90, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] += a[i] * b[i] * I; } __attribute__((noinline,noipa)) void MK(fma180, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] += a[i] * b[i] * I * I; } __attribute__((noinline,noipa)) void MK(fma270, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] += a[i] * b[i] * I * I * I; } // Complex FMA instructions rotating the second parameter. __attribute__((noinline,noipa)) void MK(fma0_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] += a[i] * b[i]; } __attribute__((noinline,noipa)) void MK(fma90_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] += a[i] * (b[i] * I); } __attribute__((noinline,noipa)) void MK(fma180_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] += a[i] * (b[i] * I * I); } __attribute__((noinline,noipa)) void MK(fma270_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] += a[i] * (b[i] * I * I * I); } // Complex FMA instructions with conjucated values. __attribute__((noinline,noipa)) void MK(fma_conj_first, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] += conj (a[i]) * b[i]; } __attribute__((noinline,noipa)) void MK(fma_conj_second, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] += a[i] * conj (b[i]); } __attribute__((noinline,noipa)) void MK(fma_conj_both, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] += conj (a[i]) * conj (b[i]); } // ----- FMS // Complex FMS instructions rotating the result __attribute__((noinline,noipa)) void MK(fms0, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] -= a[i] * b[i]; } __attribute__((noinline,noipa)) void MK(fms90, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] -= a[i] * b[i] * I; } __attribute__((noinline,noipa)) void MK(fms180, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] -= a[i] * b[i] * I * I; } __attribute__((noinline,noipa)) void MK(fms270, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] -= a[i] * b[i] * I * I * I; } // Complex FMS instructions rotating the second parameter. __attribute__((noinline,noipa)) void MK(fms0_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] -= a[i] * b[i]; } __attribute__((noinline,noipa)) void MK(fms90_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] -= a[i] * (b[i] * I); } __attribute__((noinline,noipa)) void MK(fms180_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] -= a[i] * (b[i] * I * I); } __attribute__((noinline,noipa)) void MK(fms270_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] -= a[i] * (b[i] * I * I * I); } // Complex FMS instructions with conjucated values. __attribute__((noinline,noipa)) void MK(fms_conj_first, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] -= conj (a[i]) * b[i]; } __attribute__((noinline,noipa)) void MK(fms_conj_second, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] -= a[i] * conj (b[i]); } __attribute__((noinline,noipa)) void MK(fms_conj_both, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] -= conj (a[i]) * conj (b[i]); } // ----- MUL // Complex MUL instructions rotating the result __attribute__((noinline,noipa)) void MK(mul0, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] = a[i] * b[i]; } __attribute__((noinline,noipa)) void MK(mul90, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] = a[i] * b[i] * I; } __attribute__((noinline,noipa)) void MK(mul180, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] = a[i] * b[i] * I * I; } __attribute__((noinline,noipa)) void MK(mul270, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] = a[i] * b[i] * I * I * I; } // Complex MUL instructions rotating the second parameter. __attribute__((noinline,noipa)) void MK(mul0_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] = a[i] * b[i]; } __attribute__((noinline,noipa)) void MK(mul90_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] = a[i] * (b[i] * I); } __attribute__((noinline,noipa)) void MK(mul180_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] = a[i] * (b[i] * I * I); } __attribute__((noinline,noipa)) void MK(mul270_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] = a[i] * (b[i] * I * I * I); } // Complex FMS instructions with conjucated values. __attribute__((noinline,noipa)) void MK(mul_conj_first, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] = conj (a[i]) * b[i]; } __attribute__((noinline,noipa)) void MK(mul_conj_second, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] = a[i] * conj (b[i]); } __attribute__((noinline,noipa)) void MK(mul_conj_both, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] = conj (a[i]) * conj (b[i]); } // ----- ADD // Complex ADD instructions rotating the result __attribute__((noinline,noipa)) void MK(add0, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] = a[i] + b[i]; } __attribute__((noinline,noipa)) void MK(add90, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] = (a[i] + b[i]) * I; } __attribute__((noinline,noipa)) void MK(add180, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] = (a[i] + b[i]) * I * I; } __attribute__((noinline,noipa)) void MK(add270, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] = (a[i] + b[i]) * I * I * I; } // Complex ADD instructions rotating the second parameter. __attribute__((noinline,noipa)) void MK(add0_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] = a[i] + b[i]; } __attribute__((noinline,noipa)) void MK(add90_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] = a[i] + (b[i] * I); } __attribute__((noinline,noipa)) void MK(add180_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] = a[i] + (b[i] * I * I); } __attribute__((noinline,noipa)) void MK(add270_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] = a[i] + (b[i] * I * I * I); } // Complex ADD instructions with conjucated values. __attribute__((noinline,noipa)) void MK(add_conj_first, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] = conj (a[i]) + b[i]; } __attribute__((noinline,noipa)) void MK(add_conj_second, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] = a[i] + conj (b[i]); } __attribute__((noinline,noipa)) void MK(add_conj_both, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N]) { for (int i=0; i < N; i++) c[i] = conj (a[i]) + conj (b[i]); }