//  -*- C -*-
// Media Instructions
// ------------------

// Ref: http://www.sgi.com/MIPS/arch/ISA5/MDMXspec.pdf

// Note: For OB instructions, the sel field is deduced by special
// handling of the "vt" operand.
//         If vt is:
//             of the form $vt[0],        then sel is 0000
//             of the form $vt[1],        then sel is 0001
//             of the form $vt[2],        then sel is 0010
//             of the form $vt[3],        then sel is 0011
//             of the form $vt[4],        then sel is 0100
//             of the form $vt[5],        then sel is 0101
//             of the form $vt[6],        then sel is 0110
//             of the form $vt[7],        then sel is 0111
//             Normal register specifier, then sel is 1011
//             Constant,                  then sel is 1111
//
// VecAcc is the Vector Accumulator.
//     This accumulator is organized as 8X24 bit (192 bit) register.
//     This accumulator holds only signed values.


// Verify that the instruction is valid for the curent Architecture
// If valid, return the scale (log nr bits) of a vector element
// as determined by SEL.

:function:64,f::int:get_scale:int sel
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
#if 0
  switch (my_index X STATE_ARCHITECTURE)
    {
    }
#endif
  switch (sel & 0x7)
    {
    case 0:
    case 2:  
    case 4:
    case 6:
      /* octal byte - ssss0 */
      return 0;
    case 1:
    case 5:
      /* quad halfword - sss01 */
      return 1;
    case 3:
      /* bi word - ss011 */
      semantic_illegal (CPU_, cia);
      return 2;
    case 7:
      /* long - ss111 */
      semantic_illegal (CPU_, cia);
      return 3;
    default:
      abort ();
      return -1;
    }
}


// Fetch/Store VALUE in ELEMENT of vector register FPR.
// The the of the element determined by SCALE.

:function:64,f::signed:value_vr:int scale, int fpr, int el
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  switch (FPR_STATE[fpr])
    {
    case fmt_uninterpreted:
      FPR_STATE[fpr] = fmt_long;
      break;
    case fmt_long:
    case fmt_unknown:
      break;
    default:
      sim_io_eprintf (SD, "Vector %d format invalid (PC = 0x%08lx)\n",
		      fpr, (long) CIA);
      FPR_STATE[fpr] = fmt_unknown;
    }
  switch (scale)
    {
    case 0:
      {
	signed8 value = *A1_8 (&FGR[fpr], 7 - el);
	return value;
      }
    case 1:
      {
	signed16 value = *A2_8 (&FGR[fpr], 3 - el);
	return value;
      }
    default:
      abort;
    }
  return 0;
}

:function:64,f::void:store_vr:int scale, int fpr, int element, signed value
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  switch (FPR_STATE[fpr])
    {
    case fmt_uninterpreted:
      FPR_STATE[fpr] = fmt_long;
      break;
    case fmt_long:
    case fmt_unknown:
      break;
    default:
      sim_io_eprintf (SD, "Vector %d format invalid (PC = 0x%08lx)\n",
		      fpr, (long) cia);
      FPR_STATE[fpr] = fmt_unknown;
    }
  switch (scale)
    {
    case 0:
      {
	*A1_8 (&FGR[fpr], 7 - element) = value;
	break;
      }
    case 1:
      {
	*A2_8 (&FGR[fpr], 3 - element) = value;
	break;
      }
    default:
      abort ();
    }
}


// Select a value from onr of FGR[VT][ELEMENT], VT and GFR[VT][CONST]
// according to SEL

:function:64,f::unsigned:select_vr:int sel, int vt, int element
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  switch (sel)
    {
      /* element select - 0xxxx */
    case 0x00: /* 0 xxx 0 */
    case 0x02:
    case 0x04:
    case 0x06:
    case 0x08:
    case 0x0a:
    case 0x0c:
    case 0x0e:
      return value_vr (SD_, 0, vt, sel >> 1);
    case 0x01: /* 0 xx 01 */
    case 0x05:
    case 0x09:
    case 0x0d:
      return value_vr (SD_, 1, vt, sel >> 2);
    case 0x03: /* 0 x 011 */
    case 0x0b:
      return value_vr (SD_, 2, vt, sel >> 3);
    case 0x07: /* 0 x 111 */
    case 0x0f:
      return value_vr (SD_, 3, vt, sel >> 4);

      /* select vector - 10xxx */
    case 0x16: /* 10 11 0 */
      return value_vr (SD_, 0, vt, element);
    case 0x15: /* 10 1 01 */
      return value_vr (SD_, 1, vt, element);
    case 0x13: /* 10  011 */
      return value_vr (SD_, 2, vt, element);
    case 0x17: /* 10  111 */
      return value_vr (SD_, 3, vt, element);

      /* select immediate - 11xxx */
    case 0x1e: /* 11 11 0 */
    case 0x1d: /* 11 1 01 */
    case 0x1b: /* 11  011 */
    case 0x1f: /* 11  111 */
      return vt;

    }
  return 0;
}


// Saturate (clamp) the signed value to (8 << SCALE) bits.

:function:64,f::signed:Clamp:int scale, signed value
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  switch (scale)
    {
    case 0:
      {
	if (value != (signed8) value)
	  {
	    if (value > 0)
	      return 0x7f;
	    else
	      return 0x80;
	  }
	return value & 0xff;
      }
    case 1:
      {
	if (value != (signed16) value)
	  {
	    if (value > 0)
	       return 0x7fff;
	    else
	       return 0x8000;
	  }
	return value & 0xffff;
      }
    default:
      abort ();
      return 0;
    }
}


// Access a single bit of the floating point CC register.

:function:64,f::void:store_cc:int i, int value
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  SETFCC (i, value);
}

:function:64,f::int:value_cc:int i
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  return GETFCC (i);
}


//  Read/write the accumulator

:function:64,f::signed64:value_acc:int scale, int element
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  signed64 value = 0;
  switch (scale)
    {
    case 0:
      value |= (unsigned64) (unsigned8) CPU->acc [element * 3 + 0];
      value |= (unsigned64) (unsigned8) CPU->acc [element * 3 + 1] << 8;
      value |= (signed64) (signed8) CPU->acc [element * 3 + 2] << 16;
      break;
    case 1:
      value |= (unsigned64) (unsigned8) CPU->acc [element * 3 + 0];
      value |= (unsigned64) (unsigned8) CPU->acc [element * 3 + 1] << 8;
      value |= (unsigned64) (unsigned8) CPU->acc [element * 3 + 2] << 16;
      value |= (unsigned64) (unsigned8) CPU->acc [element * 3 + 3] << 24;
      value |= (unsigned64) (unsigned8) CPU->acc [element * 3 + 4] << 32;
      value |= (signed64) (signed8) CPU->acc [element * 3 + 5] << 40;
      break;
    }
  return value;
}

:function:64,f::void:store_acc:int scale, int element, signed64 value
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  switch (scale)
    {
    case 0:
      CPU->acc [element * 3 + 0] = value >> 0;
      CPU->acc [element * 3 + 1] = value >> 8;
      CPU->acc [element * 3 + 2] = value >> 16;
      break;
    case 1:
      CPU->acc [element * 3 + 0] = value >> 0;
      CPU->acc [element * 3 + 1] = value >> 8;
      CPU->acc [element * 3 + 2] = value >> 16;
      CPU->acc [element * 3 + 3] = value >> 24;
      CPU->acc [element * 3 + 4] = value >> 32;
      CPU->acc [element * 3 + 5] = value >> 40;
      break;
    }
}


// Formatting

:%s:64,f:::VT:int sel, int vt
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  static char buf[20];
  if (sel < 8)
    sprintf (buf, "v%d[%d]", vt, sel);
  else if (sel == 0x13)
    sprintf (buf, "v%d", vt);
  else if (sel == 0x1f)
    sprintf (buf, "%d", vt);
  else
    sprintf (buf, "(invalid)");
  return buf;
}

:%s:64,f:::SEL:int sel
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  switch (sel & 7)
    {
    case 0:
    case 2:
    case 4:
    case 6:
      return "ob";
    case 1:
    case 5:
      return "qh";
    case 3:
      return "bw";
    default:
      return "l";
    }
}


// Vector Add.

010010,5.SEL,5.VT,5.VS,5.VD,001011::64,f::ADD.fmt
"add.%s<SEL> v<VD>, v<VS>, %s<VT#SEL,VT>"
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  int i;
  int scale = get_scale (SD_, SEL);
  for (i = 0; i < (8 >> scale); i++)
    store_vr (SD_, scale, VD, i,
	      Clamp (SD_, scale,
		     (value_vr (SD_, scale, VS, i)
		      + select_vr (SD_, SEL, VT, i))));
}


// Accumulate Vector Add

010010,5.SEL,5.VT,5.VS,1,0000,110111::64,f::ADDA.fmt
"adda.%s<SEL> v<VD>, v<VS>"
*mdmx:
{
  int i;
  int scale = get_scale (SD_, SEL);
  for (i = 0; i < (8 >> scale); i++)
    store_acc (SD_, scale, i,
	       (value_acc (SD_, scale, i)
		+ (signed64) value_vr (SD_, scale, VS, i)
		+ (signed64) select_vr (SD_, SEL, VT, i)));
}


// Load Vector Add

010010,5.SEL,5.VT,5.VS,0,0000,110111::64,f::ADDA.fmt
"addl.%s<SEL> v<VD>, v<VS>"
*mdmx:
{
  int i;
  int scale = get_scale (SD_, SEL);
  for (i = 0; i < (8 >> scale); i++)
    store_acc (SD_, scale, i,
	       ((signed64) value_vr (SD_, scale, VS, i)
		+ (signed64) select_vr (SD_, SEL, VT, i)));
}



// Vector align, Constant Alignment

:function:64,f::void:ByteAlign:int vd, int imm, int vs, int vt
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  int s = imm * 8;
  unsigned64 rs = ValueFPR (vs, fmt_long);
  unsigned64 rt = ValueFPR (vt, fmt_long);
  unsigned64 rd;
  if (BigEndianCPU)
    {
      /* (vs || vt) [127 - S .. 64 - S] */
      if (s == 0)
	rd = rs;
      else
	rd = (MOVED64 (rs, 64 - s, 0, 63, s)
	      | EXTRACTED64 (rt, 63, 64 - s));
    }
  else
    {
      /* (vs || vt) [63 + S .. S] */
      if (s == 0)
	rd = rt;
      else
	rd = (MOVED64 (rs, s, 0, 63, 64 - s)
	      | EXTRACTED64 (rt, 63, s));
    }
  StoreFPR (vd, fmt_long, rd);
}

010010,00,3.IMM,5.VT,5.VS,5.VD,0110,X,0::64,f::ALNI.fmt
"alni.%s<FMT#X> v<VD>, v<VS>, v<VT>, <IMM>"
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  ByteAlign (SD_, VD, IMM, VS, VT);
}



// Vector align, Variable Alignment

010010,5.RS,5.VT,5.VS,5.VD,0110,X,1::64,f::ALNV.fmt
"alnv.%s<FMT#X> v<VD>, v<VS>, v<VT>, r<RS>"
*mdmx:
{
  ByteAlign (SD_, VD, GPR[RS], VS, VT);
}



// Vector And.

010010,5.SEL,5.VT,5.VS,5.VD,001100::64,f::AND.fmt
"and.%s<SEL> v<VD>, v<VS>, %s<VT#SEL,VT>"
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  int i;
  int scale = get_scale (SD_, SEL);
  for (i = 0; i < (8 >> scale); i++)
    store_vr (SD_, scale, VD, i,
	      (value_vr (SD_, scale, VS, i)
	       & select_vr (SD_, SEL, VT, i)));
}



// Vector Compare Equal.


010010,5.SEL,5.VT,5.VS,00000,000001::64,f::C.EQ.fmt
"c.EQ.%s<SEL> v<VS>, %s<VT#SEL,VT>"
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  int i;
  int scale = get_scale (SD_, SEL);
  for (i = 0; i < (8 >> scale); i++)
    store_cc (SD_, i,
	      (value_vr (SD_, scale, VS, i)
	       == select_vr (SD_, SEL, VT, i)));
}



// Vector Compare Less Than or Equal.

010010,5.SEL,5.VT,5.VS,00000,000101::64,f::C.LE.fmt
"c.le.%s<SEL> v<VS>, %s<VT#SEL,VT>"
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  int i;
  int scale = get_scale (SD_, SEL);
  for (i = 0; i < (8 >> scale); i++)
    store_cc (SD_, i,
	      (value_vr (SD_, scale, VS, i)
	       <= select_vr (SD_, SEL, VT, i)));
}



// Vector Compare Less Than.

010010,5.SEL,5.VT,5.VS,00000,000100::64,f::C.LT.fmt
"c.lt.%s<SEL> v<VS>, %s<VT#SEL,VT>"
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  int i;
  int scale = get_scale (SD_, SEL);
  for (i = 0; i < (8 >> scale); i++)
    store_cc (SD_, i,
	      (value_vr (SD_, scale, VS, i)
	       < select_vr (SD_, SEL, VT, i)));
}



// Vector Maximum.

:function:64,f::signed:Maxi:int scale, signed l, signed r
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  if (l < r)
    return r;
  else
    return l;
}

010010,5.SEL,5.VT,5.VS,5.VD,000111::64,f::MAX.fmt
"max.%s<SEL> v<VD>, v<VS>, %s<VT#SEL,VT>"
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  int i;
  int scale = get_scale (SD_, SEL);
  for (i = 0; i < (8 >> scale); i++)
    store_vr (SD_, scale, VD, i,
	      Maxi (SD_, scale,
		    value_vr (SD_, scale, VS, i),
		    select_vr (SD_, SEL, VT, i)));
}



// Vector Minimum.

:function:64,f::signed:Mini:int scale, signed l, signed r
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  if (l < r)
    return l;
  else
    return r;
}

010010,5.SEL,5.VT,5.VS,5.VD,000110::64,f::MIN.fmt
"min.%s<SEL> v<VD>, v<VS>, %s<VT#SEL,VT>"
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  int i;
  int scale = get_scale (SD_, SEL);
  for (i = 0; i < (8 >> scale); i++)
    store_vr (SD_, scale, VD, i,
	      Mini (SD_, scale,
		    value_vr (SD_, scale, VS, i),
		    select_vr (SD_, SEL, VT, i)));
}



// Vector Sign.

:function:64,f::signed:Sign:int scale, signed l, signed r
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  if (l >= 0)
    return r;
  else if (r >= 0)
    return -r;
  else
    {
      /* watch for overflow of MIN_INT */
      switch (scale)
	{
	case 0:
	  if ((r & 0xff) == 0x80)
	    return 0x7ff;
	  else
	    return -r;
	case 1:
	  if ((r & 0xffff) == 0x8000)
	    return 0x7ffff;
	  else
	    return -r;
	default:
	  abort ();
	}
      return -r;
    }
}

010010,5.SEL,5.VT,5.VS,5.VD,000110::64,f::MSGN.fmt
"msgn.%s<SEL> v<VD>, v<VS>, %s<VT#SEL,VT>"
*mdmx:
{
  int i;
  int scale = get_scale (SD_, SEL);
  if ((SEL & 1) != 1)
    /* only QH allowed */
    semantic_illegal (sd, cia);
  for (i = 0; i < (8 >> scale); i++)
    store_vr (SD_, scale, VD, i,
	      Sign (SD_, scale,
		    value_vr (SD_, scale, VS, i),
		    select_vr (SD_, SEL, VT, i)));
}



// Vector Multiply.

010010,5.SEL,5.VT,5.VS,5.VD,110000::64,f::MUL.fmt
"mul.%s<SEL> v<VD>, v<VS>, %s<VT#SEL,VT>"
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  int i;
  int scale = get_scale (SD_, SEL);
  for (i = 0; i < (8 >> scale); i++)
    store_vr (SD_, scale, VD, i,
	      Clamp (SD_, scale,
		     (value_vr (SD_, scale, VS, i)
		      * select_vr (SD_, SEL, VT, i))));
}



// Accumulate Vector Multiply

010010,5.SEL,5.VT,5.VS,00000,110011::64,f::MULA.fmt
"mula.%s<SEL> v<VS>, %s<VT#SEL,VT>"
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  int i;
  int scale = get_scale (SD_, SEL);
  for (i = 0; i < (8 >> scale); i++)
    store_acc (SD_, scale, i,
	       (value_acc (SD_, scale, i)
		+ ((signed64) value_vr (SD_, scale, VS, i)
		   * (signed64) select_vr (SD_, SEL, VT, i))));
}



// Add Vector Multiply to Accumulator.

010010,5.SEL,5.VT,5.VS,10000,110011::64,f::MULL.fmt
"mull.%s<SEL> v<VS>, %s<VT#SEL,VT>"
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  int i;
  int scale = get_scale (SD_, SEL);
  for (i = 0; i < (8 >> scale); i++)
    store_acc (SD_, scale, i,
	       ((signed64) value_vr (SD_, scale, VS, i)
		* (signed64) select_vr (SD_, SEL, VT, i)));
}



// Subtract Vector Multiply from Accumulator

010010,5.SEL,5.VT,5.VS,00000,110010::64,f::MULS.fmt
"muls.%s<SEL> v<VS>, %s<VT#SEL,VT>"
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  int i;
  int scale = get_scale (SD_, SEL);
  for (i = 0; i < (8 >> scale); i++)
    store_acc (SD_, scale, i,
	       (value_acc (SD_, scale, i)
		- ((signed64) value_vr (SD_, scale, VS, i)
		   * (signed64) select_vr (SD_, SEL, VT, i))));
}



// Load Negative Vector Multiply

010010,5.SEL,5.VT,5.VS,10000,110010::64,f::MULSL.fmt
"mulsl.%s<SEL> v<VS>, %s<VT#SEL,VT>"
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  int i;
  int scale = get_scale (SD_, SEL);
  for (i = 0; i < (8 >> scale); i++)
    store_acc (SD_, scale, i,
	       - ((signed64) value_vr (SD_, scale, VS, i)
		  * (signed64) select_vr (SD_, SEL, VT, i)));
}



// Vector Nor.

010010,5.SEL,5.VT,5.VS,5.VD,001111::64,f::NOR.fmt
"nor.%s<SEL> v<VD>, v<VS>, %s<VT#SEL,VT>"
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  int i;
  int scale = get_scale (SD_, SEL);
  for (i = 0; i < (8 >> scale); i++)
    store_vr (SD_, scale, VD, i,
	      ~(value_vr (SD_, scale, VS, i)
		| select_vr (SD_, SEL, VT, i)));
}



// Vector Or.

010010,5.SEL,5.VT,5.VS,5.VD,001110::64,f::OR.fmt
"or.%s<SEL> v<VD>, v<VS>, %s<VT#SEL,VT>"
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  int i;
  int scale = get_scale (SD_, SEL);
  for (i = 0; i < (8 >> scale); i++)
    store_vr (SD_, scale, VD, i,
	      (value_vr (SD_, scale, VS, i)
	       | select_vr (SD_, SEL, VT, i)));
}



// Select Vector Elements - False

010010,5.SEL,5.VT,5.VS,5.VD,000010::64,f::PICKF.fmt
"pickf.%s<SEL> v<VD>, v<VS>, %s<VT#SEL,VT>"
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  int i;
  int scale = get_scale (SD_, SEL);
  for (i = 0; i < (8 >> scale); i++)
    store_vr (SD_, scale, VD, i,
	      (value_cc (SD_, i) == 0
	       ? value_vr (SD_, scale, VS, i)
	       : select_vr (SD_, SEL, VT, i)));
}



// Select Vector Elements - True

010010,5.SEL,5.VT,5.VS,5.VD,000011::64,f::PICKT.fmt
"pickt.%s<SEL> v<VD>, v<VS>, %s<VT#SEL,VT>"
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  int i;
  int scale = get_scale (SD_, SEL);
  for (i = 0; i < (8 >> scale); i++)
    store_vr (SD_, scale, VD, i,
	      (value_cc (SD_, i) != 0
	       ? value_vr (SD_, scale, VS, i)
	       : select_vr (SD_, SEL, VT, i)));
}



// Scale, Round and Clamp Accumulator

:%s:64,f:::RND:int rnd
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  switch (rnd)
    {
    case 0:
      return "zu";
    case 1:
      return "nau";
    case 2:
      return "neu";
    case 4:
      return "rzs";
    case 5:
      return "nas";
    case 6:
      return "nes";
    default:
      return "(invalid)";
    }
}

:function:64,f::signed:ScaleRoundClamp:int scale, int rnd, signed val, signed shift
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  int halfway = (1 << (shift - 1));
  /* must be positive */
  if (shift < 0)
    return 0;
  /* too much shift? */
  switch (scale)
    {
    case 0:
      if (shift >= 24)
	return 0;
      break;
    case 1:
      if (shift >= 48)
	return 0;
      break;
    default:
      abort ();
    }
  /* round */
  switch (rnd & 3)
    {
    case 0: /* round towards zero */
      break;
    case 1: /* nearest, halfaway rounds away from zero */
      if (val >= 0)
	val += halfway;
      else
	val -= halfway;
      break;
    case 2: /* nearest, halfway rounds to even! */
      if (val >= 0)
	{
	  if (val & (halfway << 1))
	    val += halfway;
	  else
	    val += (halfway - 1);
	}
      else
	{
	  if (val & (halfway << 1))
	    val -= halfway;
	  else
	    val -= (halfway - 1);
	}
    default:
      abort ();
    }
  /* shift */
  val >>= shift;
  /* clamp */
  switch (rnd & 4)
    {
    case 0:
      /* unsigned clamp */
      if (val < 0)
	val = 0;
      else
	switch (scale)
	  {
	  case 0:
	    if (val > 0xff)
	      val = 0xff;
	    break;
	  case 1:
	    if (val > 0xffff)
	      val = 0xffff;
	    break;
	  }
      break;
    case 8:
      /* normal signed clamp */
      val = Clamp (_SD, scale, val);
      break;
    }
  return val;
}

010010,5.SEL,5.VT,00000,5.VD,100,3.RND::64,f::Rx.fmt
"r%s<RND>.%s<SEL> v<VD>, v<VT>"
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  int i;
  int scale = get_scale (SD_, SEL);
  for (i = 0; i < (8 >> scale); i++)
    store_vr (SD_, scale, VD, i,
	      ScaleRoundClamp (SD_, scale, RND,
			       value_acc (SD_, scale, i),
			       select_vr (SD_, SEL, VT, i)));
}



// Vector Read  Accumulator Low.

010010,0000,1.SEL,00000,00000,5.VD,111111::64,f::RACL.fmt
"racl.%s<SEL> v<VD>"
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  int i;
  int scale = get_scale (SD_, SEL);
  for (i = 0; i < (8 >> scale); i++)
    store_vr (SD_, scale, VD, i,
	      EXTRACTED (value_acc (SD_, scale, i),
			 (8 << scale) - 1,
			 0));
}



// Vector Read  Accumulator Middle.

010010,0100,1.SEL,00000,00000,5.VD,111111::64,f::RACM.fmt
"racm.%s<SEL> v<VD>"
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  int i;
  int scale = get_scale (SD_, SEL);
  for (i = 0; i < (8 >> scale); i++)
    store_vr (SD_, scale, VD, i,
	      EXTRACTED (value_acc (SD_, scale, i),
			 (16 << scale) - 1,
			 (8 << scale) - 0));
}



// Vector Read  Accumulator High.

010010,1000,1.SEL,00000,00000,5.VD,111111::64,f::RACH.fmt
"rach.%s<SEL> v<VD>"
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  int i;
  int scale = get_scale (SD_, SEL);
  for (i = 0; i < (8 >> scale); i++)
    store_vr (SD_, scale, VD, i,
	      EXTRACTED (value_acc (SD_, scale, i),
			 (24 << scale) - 1,
			 (16 << scale) - 0));
}



// Vector Element Shuffle.

010010,0000,0,5.VT,5.VS,5.VD,011111::64,f::SHFL.UPUH.fmt
"shfl.upuh.%s<SEL> v<VD>, v<VS>, <VT>"
*mdmx:
{
  int i;
  int scale = get_scale (SD_, SEL);
  for (i = 0; i < 4; i++)
    {
      store_vr (SD_, 1, VD, i,
		value_vr (SD_, 0, VS, i + 4) & 0xff);
    }
}

010010,0001,0,5.VT,5.VS,5.VD,011111::64,f::SHFL.UPUL.fmt
"shfl.upul.%s<SEL> v<VD>, v<VS>, <VT>"
*mdmx:
{
  int i;
  for (i = 0; i < 4; i++)
    {
      store_vr (SD_, 1, VD, i,
		value_vr (SD_, 0, VS, i) & 0xff);
    }
}

010010,0000,0,5.VT,5.VS,5.VD,011111::64,f::SHFL.UPSH.fmt
"shfl.upsh.%s<SEL> v<VD>, v<VS>, <VT>"
*mdmx:
{
  int i;
  int scale = get_scale (SD_, SEL);
  for (i = 0; i < 4; i++)
    {
      store_vr (SD_, 1, VD, i,
		value_vr (SD_, 0, VS, i + 4));
    }
}

010010,0001,0,5.VT,5.VS,5.VD,011111::64,f::SHFL.UPSL.fmt
"shfl.upsl.%s<SEL> v<VD>, v<VS>, <VT>"
*mdmx:
{
  int i;
  for (i = 0; i < 4; i++)
    {
      store_vr (SD_, 1, VD, i,
		value_vr (SD_, 0, VS, i));
    }
}

010010,0100,1.SEL,5.VT,5.VS,5.VD,011111::64,f::SHFL.PACH.fmt
"shfl.pach.%s<SEL> v<VD>, v<VS>, <VT>"
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  int i;
  int scale = get_scale (SD_, SEL);
  for (i = 0; i < (4 >> scale); i++)
    {
      store_vr (SD_, scale, VD, i,
		value_vr (SD_, scale, VT, i * 2 + 1));
      store_vr (SD_, scale, VD, 1 + (4 >> scale),
		value_vr (SD_, scale, VS, i * 2 + 1));
    }
}

010010,0101,1.SEL,5.VT,5.VS,5.VD,011111::64,f::SHFL.PACL.fmt
"shfl.pacl.%s<SEL> v<VD>, v<VS>, <VT>"
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  int i;
  int scale = get_scale (SD_, SEL);
  for (i = 0; i < (4 >> scale); i++)
    {
      store_vr (SD_, scale, VD, i,
		value_vr (SD_, scale, VT, i * 2));
      store_vr (SD_, scale, VD, 1 + (4 >> scale),
		value_vr (SD_, scale, VS, i * 2));
    }
}

010010,0110,1.SEL,5.VT,5.VS,5.VD,011111::64,f::SHFL.MIXH.fmt
"shfl.mixh.%s<SEL> v<VD>, v<VS>, <VT>"
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  int i;
  int scale = get_scale (SD_, SEL);
  for (i = 0; i < (4 >> scale); i++)
    {
      store_vr (SD_, scale, VD, i * 2,
		value_vr (SD_, scale, VT, i + (4 >> scale)));
      store_vr (SD_, scale, VD, i * 2 + 1,
		value_vr (SD_, scale, VS, i + (4 >> scale)));
    }
}

010010,0111,1.SEL,5.VT,5.VS,5.VD,011111::64,f::SHFL.MIXL.fmt
"shfl.mixl.%s<SEL> v<VD>, v<VS>, <VT>"
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  int i;
  int scale = get_scale (SD_, SEL);
  for (i = 0; i < (4 >> scale); i++)
    {
      store_vr (SD_, scale, VD, i * 2,
		value_vr (SD_, scale, VT, i));
      store_vr (SD_, scale, VD, i * 2 + 1,
		value_vr (SD_, scale, VS, i));
    }
}

010010,100,01,5.VT,5.VS,5.VD,011111::64,f::SHFL.BFLA.fmt
"shfl.bfla.qh v<VD>, v<VS>, <VT>"
*mdmx:
{
  store_vr (SD_, 1, VD, 0,
	    value_vr (SD_, 1, VT, 1));
  store_vr (SD_, 1, VD, 1,
	    value_vr (SD_, 1, VS, 0));
  store_vr (SD_, 1, VD, 2,
	    value_vr (SD_, 1, VT, 3));
  store_vr (SD_, 1, VD, 3,
	    value_vr (SD_, 1, VS, 2));
}

010010,101,01,5.VT,5.VS,5.VD,011111::64,f::SHFL.BFLB.fmt
"shfl.bflb.qh v<VD>, v<VS>, <VT>"
*mdmx:
{
  store_vr (SD_, 1, VD, 0,
	    value_vr (SD_, 1, VT, 3));
  store_vr (SD_, 1, VD, 1,
	    value_vr (SD_, 1, VS, 2));
  store_vr (SD_, 1, VD, 2,
	    value_vr (SD_, 1, VT, 1));
  store_vr (SD_, 1, VD, 3,
	    value_vr (SD_, 1, VS, 0));
}

010010,101,01,5.VT,5.VS,5.VD,011111::64,f::SHFL.REPA.fmt
"shfl.repa.qh v<VD>, v<VS>, <VT>"
*mdmx:
{
  store_vr (SD_, 1, VD, 0,
	    value_vr (SD_, 1, VT, 2));
  store_vr (SD_, 1, VD, 1,
	    value_vr (SD_, 1, VT, 3));
  store_vr (SD_, 1, VD, 2,
	    value_vr (SD_, 1, VS, 2));
  store_vr (SD_, 1, VD, 3,
	    value_vr (SD_, 1, VS, 3));
}

010010,101,01,5.VT,5.VS,5.VD,011111::64,f::SHFL.REPB.fmt
"shfl.repb.qh v<VD>, v<VS>, <VT>"
*mdmx:
{
  store_vr (SD_, 1, VD, 0,
	    value_vr (SD_, 1, VT, 0));
  store_vr (SD_, 1, VD, 1,
	    value_vr (SD_, 1, VT, 1));
  store_vr (SD_, 1, VD, 2,
	    value_vr (SD_, 1, VS, 0));
  store_vr (SD_, 1, VD, 3,
	    value_vr (SD_, 1, VS, 1));
}



// Vector Shift Left Logical

010010,5.SEL,5.VT,5.VS,5.VD,010000::64,f::SLL.fmt
"sll.%s<SEL> v<VD>, v<VS>, %s<VT#SEL,VT>"
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  int i;
  int scale = get_scale (SD_, SEL);
  int mask = (4 << scale) - 1;
  for (i = 0; i < (8 >> scale); i++)
    store_vr (SD_, scale, VD, i,
	      (value_vr (SD_, scale, VS, i)
	       << (select_vr (SD_, SEL, VT, i) & mask)));
}



// Vector Shift Right Arithmetic

010010,5.SEL,5.VT,5.VS,5.VD,010011::64,f::SRA.fmt
"sra.%s<SEL> v<VD>, v<VS>, %s<VT#SEL,VT>"
*mdmx:
{
  int i;
  int mask = (4 << scale) - 1;
  int scale = get_scale (SD_, SEL);
  for (i = 0; i < (8 >> scale); i++)
    store_vr (SD_, scale, VD, i,
	      (value_vr (SD_, scale, VS, i)
	       >> (select_vr (SD_, SEL, VT, i) & mask)));
}



// Vector Shift Right Logical.

010010,5.SEL,5.VT,5.VS,5.VD,010010::64,f::SRL.fmt
"srl.%s<SEL> v<VD>, v<VS>, %s<VT#SEL,VT>"
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  int i;
  int scale = get_scale (SD_, SEL);
  int mask = (4 << scale) - 1;
  int zeros = (1 << (8 << scale)) - 1;
  for (i = 0; i < (8 >> scale); i++)
    store_vr (SD_, scale, VD, i,
	      ((value_vr (SD_, scale, VS, i) & zeros)
	       >> (select_vr (SD_, SEL, VT, i) & mask)));
}



// Vector Subtract.

010010,5.SEL,5.VT,5.VS,5.VD,001010::64,f::SUB.fmt
"sub.%s<SEL> v<VD>, v<VS>, %s<VT#SEL,VT>"
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  int i;
  int scale = get_scale (SD_, SEL);
  for (i = 0; i < (8 >> scale); i++)
    store_vr (SD_, scale, VD, i,
	      (value_vr (SD_, scale, VS, i)
	       - select_vr (SD_, SEL, VT, i)));
}



// Accumulate Vector Difference

010010,5.SEL,5.VT,5.VS,0,0000,110110::64,f::SUBA.fmt
"suba.%s<SEL> v<VD>, v<VS>, %s<VT#SEL,VT>"
*mdmx:
{
  int i;
  int scale = get_scale (SD_, SEL);
  for (i = 0; i < (8 >> scale); i++)
    store_acc (SD_, scale, VD, i,
	       (value_acc (SD, scale, i)
		+ (signed64) value_vr (SD_, scale, VS, i)
		- (signed64) select_vr (SD_, SEL, VT, i)));
}



// Load Vector Difference

010010,5.SEL,5.VT,5.VS,1,0000,110110::64,f::SUBL.fmt
"subl.%s<SEL> v<VD>, v<VS>, %s<VT#SEL,VT>"
*mdmx:
{
  int i;
  int scale = get_scale (SD_, SEL);
  for (i = 0; i < (8 >> scale); i++)
    store_acc (SD_, scale, VD, i,
	       ((signed64) value_vr (SD_, scale, VS, i)
		- (signed64) select_vr (SD_, SEL, VT, i)));
}



// Write Accumulator High.

010010,1000,1.SEL,00000,5.VS,00000,111110::64,f::WACH.fmt
"wach.%s<SEL> v<VS>"
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  int i;
  int scale = get_scale (SD_, SEL);
  for (i = 0; i < (8 >> scale); i++)
    store_acc (SD_, scale, i,
	       (((signed64) value_vr (SD_, scale, VS, i) << (16 << scale))
		| MASKED (value_acc (SD_, scale, i), (16 << scale) - 1, 0)));
}



// Vector Write Accumulator Low.

010010,0000,1.SEL,5.VT,5.VS,00000,111110::64,f::WACL.fmt
"wacl.%s<SEL> v<VS>, <VT>"
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  int i;
  int scale = get_scale (SD_, SEL);
  for (i = 0; i < (8 >> scale); i++)
    store_acc (SD_, scale, i,
	       (((signed64) value_vr (SD_, scale, VS, i) << (16 << scale))
		| MASKED (value_vr (SD_, scale, VT, i),
			  (16 << scale) - 1, 0)));
}



// Vector Xor.

010010,5.SEL,5.VT,5.VS,5.VD,001101::64,f::XOR.fmt
"xor.%s<SEL> v<VD>, v<VS>, %s<VT#SEL,VT>"
*mdmx:
// start-sanitize-vr5400
*vr5400:
// end-sanitize-vr5400
{
  int i;
  int scale = get_scale (SD_, SEL);
  for (i = 0; i < (8 >> scale); i++)
    store_vr (SD_, scale, VD, i,
	      (value_vr (SD_, scale, VS, i)
	       ^ select_vr (SD_, SEL, VT, i)));
}