/* Test that signed and unsigned division and modulus use the correct
   vector routines and give the correct results.  */

/* Setting it this way ensures the run tests use the same flag as the
   compile tests.  */
#pragma GCC optimize("O2")

typedef signed char v2qi __attribute__ ((vector_size (2)));
typedef signed char v4qi __attribute__ ((vector_size (4)));
typedef signed char v8qi __attribute__ ((vector_size (8)));
typedef signed char v16qi __attribute__ ((vector_size (16)));
typedef signed char v32qi __attribute__ ((vector_size (32)));
typedef signed char v64qi __attribute__ ((vector_size (64)));

typedef unsigned char v2uqi __attribute__ ((vector_size (2)));
typedef unsigned char v4uqi __attribute__ ((vector_size (4)));
typedef unsigned char v8uqi __attribute__ ((vector_size (8)));
typedef unsigned char v16uqi __attribute__ ((vector_size (16)));
typedef unsigned char v32uqi __attribute__ ((vector_size (32)));
typedef unsigned char v64uqi __attribute__ ((vector_size (64)));

typedef short v2hi __attribute__ ((vector_size (4)));
typedef short v4hi __attribute__ ((vector_size (8)));
typedef short v8hi __attribute__ ((vector_size (16)));
typedef short v16hi __attribute__ ((vector_size (32)));
typedef short v32hi __attribute__ ((vector_size (64)));
typedef short v64hi __attribute__ ((vector_size (128)));

typedef unsigned short v2uhi __attribute__ ((vector_size (4)));
typedef unsigned short v4uhi __attribute__ ((vector_size (8)));
typedef unsigned short v8uhi __attribute__ ((vector_size (16)));
typedef unsigned short v16uhi __attribute__ ((vector_size (32)));
typedef unsigned short v32uhi __attribute__ ((vector_size (64)));
typedef unsigned short v64uhi __attribute__ ((vector_size (128)));

typedef int v2si __attribute__ ((vector_size (8)));
typedef int v4si __attribute__ ((vector_size (16)));
typedef int v8si __attribute__ ((vector_size (32)));
typedef int v16si __attribute__ ((vector_size (64)));
typedef int v32si __attribute__ ((vector_size (128)));
typedef int v64si __attribute__ ((vector_size (256)));

typedef unsigned int v2usi __attribute__ ((vector_size (8)));
typedef unsigned int v4usi __attribute__ ((vector_size (16)));
typedef unsigned int v8usi __attribute__ ((vector_size (32)));
typedef unsigned int v16usi __attribute__ ((vector_size (64)));
typedef unsigned int v32usi __attribute__ ((vector_size (128)));
typedef unsigned int v64usi __attribute__ ((vector_size (256)));

typedef long v2di __attribute__ ((vector_size (16)));
typedef long v4di __attribute__ ((vector_size (32)));
typedef long v8di __attribute__ ((vector_size (64)));
typedef long v16di __attribute__ ((vector_size (128)));
typedef long v32di __attribute__ ((vector_size (256)));
typedef long v64di __attribute__ ((vector_size (512)));

typedef unsigned long v2udi __attribute__ ((vector_size (16)));
typedef unsigned long v4udi __attribute__ ((vector_size (32)));
typedef unsigned long v8udi __attribute__ ((vector_size (64)));
typedef unsigned long v16udi __attribute__ ((vector_size (128)));
typedef unsigned long v32udi __attribute__ ((vector_size (256)));
typedef unsigned long v64udi __attribute__ ((vector_size (512)));

#ifndef STYPE
#define STYPE v64si
#define UTYPE v64usi
#endif
#ifndef N
#define N 64
#endif

STYPE a;
STYPE b;
UTYPE ua;
UTYPE ub;

int main()
{
  int i;
  STYPE squot, srem;
  UTYPE usquot, usrem;
  STYPE vquot, vrem;
  UTYPE uvquot, uvrem;
  STYPE vquot2, vrem2;
  UTYPE uvquot2, uvrem2;
  STYPE refquot, refrem;
  UTYPE urefquot, urefrem;

  for (i = 0; i < N; i++)
    {
      a[i] = i * (i >> 2) + (i >> 1);
      ua[i] = a[i];
      b[i] = i;
      ub[i] = i;
    }

  for (i = 0; i < N; i++)
    {
      /* Calculate reference values using regular scalar div and mod.  */
      refquot[i] = a[i] / b[i];
      __asm__ ("" ::: "memory");
      refrem[i] = a[i] % b[i];
      urefquot[i] = ua[i] / ub[i];
      __asm__ ("" ::: "memory");
      urefrem[i] = ua[i] % ub[i];
    }

  __asm__ ("" ::: "memory");
  /* Scalar with divmod.  */
  for (i = 0; i < N; i++)
    {
      squot[i] = a[i] / b[i];
      srem[i] = a[i] % b[i];
      usquot[i] = ua[i] / ub[i];
      usrem[i] = ua[i] % ub[i];
    }

  __asm__ ("" ::: "memory");
  /* Vectorized with divmod.  */
  vquot = a / b;
  vrem = a % b;
  uvquot = ua / ub;
  uvrem = ua % ub;

  __asm__ ("" ::: "memory");
  /* Vectorized with separte div and mod.  */
  vquot2 = a / b;
  __asm__ ("" ::: "memory");
  vrem2 = a % b;
  uvquot2 = ua / ub;
  __asm__ ("" ::: "memory");
  uvrem2 = ua % ub;

#ifdef DEBUG
#define DUMP(VAR) \
  __builtin_printf ("%8s: ", #VAR); \
  for (i = 0; i < N; i++) \
    __builtin_printf ("%d ", (int)VAR[i]); \
  __builtin_printf ("\n");
  DUMP (refquot)
  DUMP (squot)
  DUMP (vquot)
  DUMP (vquot2)
  __builtin_printf ("\n");
  DUMP (urefquot)
  DUMP (usquot)
  DUMP (uvquot)
  DUMP (uvquot2)
  __builtin_printf ("\n");
  DUMP (refrem)
  DUMP (srem)
  DUMP (vrem)
  DUMP (vrem2)
  __builtin_printf ("\n");
  DUMP (urefrem)
  DUMP (usrem)
  DUMP (uvrem)
  DUMP (uvrem2)
  __builtin_printf ("\n");
#endif

  for (i = 0; i < N; i++)
    if (squot[i] != refquot[i]
	|| vquot[i] != refquot[i]
	|| vquot2[i] != refquot[i]
	|| usquot[i] != urefquot[i]
	|| uvquot[i] != urefquot[i]
	|| uvquot2[i] != urefquot[i]
	|| srem[i] != refrem[i]
	|| vrem[i] != refrem[i]
	|| vrem2[i] != refrem[i]
	|| usrem[i] != urefrem[i]
	|| uvrem[i] != urefrem[i]
	|| uvrem2[i] != urefrem[i])
      __builtin_abort ();
  return 0;
}

/* { dg-final { scan-assembler-times {__divmodv64si4@rel32@lo} 1 { xfail *-*-* } } } */
/* { dg-final { scan-assembler-times {__udivmodv64si4@rel32@lo} 1 { xfail *-*-* } } } */
/* { dg-final { scan-assembler-times {__divv64si3@rel32@lo} 1 } } */
/* { dg-final { scan-assembler-times {__udivv64si3@rel32@lo} 1 } } */
/* { dg-final { scan-assembler-times {__modv64si3@rel32@lo} 1 } } */
/* { dg-final { scan-assembler-times {__umodv64si3@rel32@lo} 1 } } */
/* { dg-final { scan-assembler-times {__divsi3@rel32@lo} 1 } } */
/* { dg-final { scan-assembler-times {__udivsi3@rel32@lo} 1 } } */