/* Integrated Register Allocator.  Changing code and generating moves.
   Copyright (C) 2006-2024 Free Software Foundation, Inc.
   Contributed by Vladimir Makarov <vmakarov@redhat.com>.

This file is part of GCC.

GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.

GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3.  If not see
<http://www.gnu.org/licenses/>.  */

/* When we have more one region, we need to change the original RTL
   code after coloring.  Let us consider two allocnos representing the
   same pseudo-register outside and inside a region respectively.
   They can get different hard-registers.  The reload pass works on
   pseudo registers basis and there is no way to say the reload that
   pseudo could be in different registers and it is even more
   difficult to say in what places of the code the pseudo should have
   particular hard-registers.  So in this case IRA has to create and
   use a new pseudo-register inside the region and adds code to move
   allocno values on the region's borders.  This is done by the code
   in this file.

   The code makes top-down traversal of the regions and generate new
   pseudos and the move code on the region borders.  In some
   complicated cases IRA can create a new pseudo used temporarily to
   move allocno values when a swap of values stored in two
   hard-registers is needed (e.g. two allocnos representing different
   pseudos outside region got respectively hard registers 1 and 2 and
   the corresponding allocnos inside the region got respectively hard
   registers 2 and 1).  At this stage, the new pseudo is marked as
   spilled.

   IRA still creates the pseudo-register and the moves on the region
   borders even when the both corresponding allocnos were assigned to
   the same hard-register.  It is done because, if the reload pass for
   some reason spills a pseudo-register representing the original
   pseudo outside or inside the region, the effect will be smaller
   because another pseudo will still be in the hard-register.  In most
   cases, this is better then spilling the original pseudo in its
   whole live-range.  If reload does not change the allocation for the
   two pseudo-registers, the trivial move will be removed by
   post-reload optimizations.

   IRA does not generate a new pseudo and moves for the allocno values
   if the both allocnos representing an original pseudo inside and
   outside region assigned to the same hard register when the register
   pressure in the region for the corresponding pressure class is less
   than number of available hard registers for given pressure class.

   IRA also does some optimizations to remove redundant moves which is
   transformed into stores by the reload pass on CFG edges
   representing exits from the region.

   IRA tries to reduce duplication of code generated on CFG edges
   which are enters and exits to/from regions by moving some code to
   the edge sources or destinations when it is possible.  */

#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "backend.h"
#include "rtl.h"
#include "tree.h"
#include "predict.h"
#include "df.h"
#include "insn-config.h"
#include "regs.h"
#include "memmodel.h"
#include "ira.h"
#include "ira-int.h"
#include "cfgrtl.h"
#include "cfgbuild.h"
#include "expr.h"
#include "reload.h"
#include "cfgloop.h"


/* Data used to emit live range split insns and to flattening IR.  */
ira_emit_data_t ira_allocno_emit_data;

/* Definitions for vectors of pointers.  */
typedef void *void_p;

/* Pointers to data allocated for allocnos being created during
   emitting.  Usually there are quite few such allocnos because they
   are created only for resolving loop in register shuffling.  */
static vec<void_p> new_allocno_emit_data_vec;

/* Allocate and initiate the emit data.  */
void
ira_initiate_emit_data (void)
{
  ira_allocno_t a;
  ira_allocno_iterator ai;

  ira_allocno_emit_data
    = (ira_emit_data_t) ira_allocate (ira_allocnos_num
				      * sizeof (struct ira_emit_data));
  memset (ira_allocno_emit_data, 0,
	  ira_allocnos_num * sizeof (struct ira_emit_data));
  FOR_EACH_ALLOCNO (a, ai)
    ALLOCNO_ADD_DATA (a) = ira_allocno_emit_data + ALLOCNO_NUM (a);
  new_allocno_emit_data_vec.create (50);

}

/* Free the emit data.  */
void
ira_finish_emit_data (void)
{
  void_p p;
  ira_allocno_t a;
  ira_allocno_iterator ai;

  ira_free (ira_allocno_emit_data);
  FOR_EACH_ALLOCNO (a, ai)
    ALLOCNO_ADD_DATA (a) = NULL;
  for (;new_allocno_emit_data_vec.length () != 0;)
    {
      p = new_allocno_emit_data_vec.pop ();
      ira_free (p);
    }
  new_allocno_emit_data_vec.release ();
}

/* Create and return a new allocno with given REGNO and
   LOOP_TREE_NODE.  Allocate emit data for it.  */
static ira_allocno_t
create_new_allocno (int regno, ira_loop_tree_node_t loop_tree_node)
{
  ira_allocno_t a;

  a = ira_create_allocno (regno, false, loop_tree_node);
  ALLOCNO_ADD_DATA (a) = ira_allocate (sizeof (struct ira_emit_data));
  memset (ALLOCNO_ADD_DATA (a), 0, sizeof (struct ira_emit_data));
  new_allocno_emit_data_vec.safe_push (ALLOCNO_ADD_DATA (a));
  return a;
}


/* See comments below.  */
typedef struct move *move_t;

/* The structure represents an allocno move.  Both allocnos have the
   same original regno but different allocation.  */
struct move
{
  /* The allocnos involved in the move.  */
  ira_allocno_t from, to;
  /* The next move in the move sequence.  */
  move_t next;
  /* Used for finding dependencies.  */
  bool visited_p;
  /* The size of the following array. */
  int deps_num;
  /* Moves on which given move depends on.  Dependency can be cyclic.
     It means we need a temporary to generates the moves.  Sequence
     A1->A2, B1->B2 where A1 and B2 are assigned to reg R1 and A2 and
     B1 are assigned to reg R2 is an example of the cyclic
     dependencies.  */
  move_t *deps;
  /* First insn generated for the move.  */
  rtx_insn *insn;
};

/* Array of moves (indexed by BB index) which should be put at the
   start/end of the corresponding basic blocks.  */
static move_t *at_bb_start, *at_bb_end;

/* Max regno before renaming some pseudo-registers.  For example, the
   same pseudo-register can be renamed in a loop if its allocation is
   different outside the loop.  */
static int max_regno_before_changing;

/* Return new move of allocnos TO and FROM.  */
static move_t
create_move (ira_allocno_t to, ira_allocno_t from)
{
  move_t move;

  move = (move_t) ira_allocate (sizeof (struct move));
  move->deps = NULL;
  move->deps_num = 0;
  move->to = to;
  move->from = from;
  move->next = NULL;
  move->insn = NULL;
  move->visited_p = false;
  return move;
}

/* Free memory for MOVE and its dependencies.  */
static void
free_move (move_t move)
{
  if (move->deps != NULL)
    ira_free (move->deps);
  ira_free (move);
}

/* Free memory for list of the moves given by its HEAD.  */
static void
free_move_list (move_t head)
{
  move_t next;

  for (; head != NULL; head = next)
    {
      next = head->next;
      free_move (head);
    }
}

/* Return TRUE if the move list LIST1 and LIST2 are equal (two
   moves are equal if they involve the same allocnos).  */
static bool
eq_move_lists_p (move_t list1, move_t list2)
{
  for (; list1 != NULL && list2 != NULL;
       list1 = list1->next, list2 = list2->next)
    if (list1->from != list2->from || list1->to != list2->to)
      return false;
  return list1 == list2;
}

/* Print move list LIST into file F.  */
static void
print_move_list (FILE *f, move_t list)
{
  for (; list != NULL; list = list->next)
    fprintf (f, " a%dr%d->a%dr%d",
	     ALLOCNO_NUM (list->from), ALLOCNO_REGNO (list->from),
	     ALLOCNO_NUM (list->to), ALLOCNO_REGNO (list->to));
  fprintf (f, "\n");
}

extern void ira_debug_move_list (move_t list);

/* Print move list LIST into stderr.  */
void
ira_debug_move_list (move_t list)
{
  print_move_list (stderr, list);
}

/* This recursive function changes pseudo-registers in *LOC if it is
   necessary.  The function returns TRUE if a change was done.  */
static bool
change_regs (rtx *loc)
{
  int i, regno, result = false;
  const char *fmt;
  enum rtx_code code;
  rtx reg;

  if (*loc == NULL_RTX)
    return false;
  code = GET_CODE (*loc);
  if (code == REG)
    {
      regno = REGNO (*loc);
      if (regno < FIRST_PSEUDO_REGISTER)
	return false;
      if (regno >= max_regno_before_changing)
	/* It is a shared register which was changed already.  */
	return false;
      if (ira_curr_regno_allocno_map[regno] == NULL)
	return false;
      reg = allocno_emit_reg (ira_curr_regno_allocno_map[regno]);
      if (reg == *loc)
	return false;
      *loc = reg;
      return true;
    }

  fmt = GET_RTX_FORMAT (code);
  for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
    {
      if (fmt[i] == 'e')
	result = change_regs (&XEXP (*loc, i)) || result;
      else if (fmt[i] == 'E')
	{
	  int j;

	  for (j = XVECLEN (*loc, i) - 1; j >= 0; j--)
	    result = change_regs (&XVECEXP (*loc, i, j)) || result;
	}
    }
  return result;
}

static bool
change_regs_in_insn (rtx_insn **insn_ptr)
{
  rtx rtx = *insn_ptr;
  bool result = change_regs (&rtx);
  *insn_ptr = as_a <rtx_insn *> (rtx);
  return result;
}

/* Attach MOVE to the edge E.  The move is attached to the head of the
   list if HEAD_P is TRUE.  */
static void
add_to_edge_list (edge e, move_t move, bool head_p)
{
  move_t last;

  if (head_p || e->aux == NULL)
    {
      move->next = (move_t) e->aux;
      e->aux = move;
    }
  else
    {
      for (last = (move_t) e->aux; last->next != NULL; last = last->next)
	;
      last->next = move;
      move->next = NULL;
    }
}

/* Create and return new pseudo-register with the same attributes as
   ORIGINAL_REG.  */
rtx
ira_create_new_reg (rtx original_reg)
{
  rtx new_reg;

  new_reg = gen_reg_rtx (GET_MODE (original_reg));
  ORIGINAL_REGNO (new_reg) = ORIGINAL_REGNO (original_reg);
  REG_USERVAR_P (new_reg) = REG_USERVAR_P (original_reg);
  REG_POINTER (new_reg) = REG_POINTER (original_reg);
  REG_ATTRS (new_reg) = REG_ATTRS (original_reg);
  if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL)
    fprintf (ira_dump_file, "      Creating newreg=%i from oldreg=%i\n",
	     REGNO (new_reg), REGNO (original_reg));
  ira_expand_reg_equiv ();
  return new_reg;
}

/* Return TRUE if loop given by SUBNODE inside the loop given by
   NODE.  */
static bool
subloop_tree_node_p (ira_loop_tree_node_t subnode, ira_loop_tree_node_t node)
{
  for (; subnode != NULL; subnode = subnode->parent)
    if (subnode == node)
      return true;
  return false;
}

/* Set up member `reg' to REG for allocnos which has the same regno as
   ALLOCNO and which are inside the loop corresponding to ALLOCNO. */
static void
set_allocno_reg (ira_allocno_t allocno, rtx reg)
{
  int regno;
  ira_allocno_t a;
  ira_loop_tree_node_t node;

  node = ALLOCNO_LOOP_TREE_NODE (allocno);
  for (a = ira_regno_allocno_map[ALLOCNO_REGNO (allocno)];
       a != NULL;
       a = ALLOCNO_NEXT_REGNO_ALLOCNO (a))
    if (subloop_tree_node_p (ALLOCNO_LOOP_TREE_NODE (a), node))
      ALLOCNO_EMIT_DATA (a)->reg = reg;
  for (a = ALLOCNO_CAP (allocno); a != NULL; a = ALLOCNO_CAP (a))
    ALLOCNO_EMIT_DATA (a)->reg = reg;
  regno = ALLOCNO_REGNO (allocno);
  for (a = allocno;;)
    {
      if (a == NULL || (a = ALLOCNO_CAP (a)) == NULL)
	{
	  node = node->parent;
	  if (node == NULL)
	    break;
	  a = node->regno_allocno_map[regno];
	}
      if (a == NULL)
	continue;
      if (ALLOCNO_EMIT_DATA (a)->child_renamed_p)
	break;
      ALLOCNO_EMIT_DATA (a)->child_renamed_p = true;
    }
}

/* Return true if there is an entry to given loop not from its parent
   (or grandparent) block.  For example, it is possible for two
   adjacent loops inside another loop.  */
static bool
entered_from_non_parent_p (ira_loop_tree_node_t loop_node)
{
  ira_loop_tree_node_t bb_node, src_loop_node, parent;
  edge e;
  edge_iterator ei;

  for (bb_node = loop_node->children;
       bb_node != NULL;
       bb_node = bb_node->next)
    if (bb_node->bb != NULL)
      {
	FOR_EACH_EDGE (e, ei, bb_node->bb->preds)
	  if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
	      && (src_loop_node = IRA_BB_NODE (e->src)->parent) != loop_node)
	    {
	      for (parent = src_loop_node->parent;
		   parent != NULL;
		   parent = parent->parent)
		if (parent == loop_node)
		  break;
	      if (parent != NULL)
		/* That is an exit from a nested loop -- skip it.  */
		continue;
	      for (parent = loop_node->parent;
		   parent != NULL;
		   parent = parent->parent)
		if (src_loop_node == parent)
		  break;
	      if (parent == NULL)
		return true;
	    }
      }
  return false;
}

/* Set up ENTERED_FROM_NON_PARENT_P for each loop region.  */
static void
setup_entered_from_non_parent_p (void)
{
  unsigned int i;
  loop_p loop;

  ira_assert (current_loops != NULL);
  FOR_EACH_VEC_SAFE_ELT (get_loops (cfun), i, loop)
    if (ira_loop_nodes[i].regno_allocno_map != NULL)
      ira_loop_nodes[i].entered_from_non_parent_p
	= entered_from_non_parent_p (&ira_loop_nodes[i]);
}

/* Return TRUE if move of SRC_ALLOCNO (assigned to hard register) to
   DEST_ALLOCNO (assigned to memory) can be removed because it does
   not change value of the destination.  One possible reason for this
   is the situation when SRC_ALLOCNO is not modified in the
   corresponding loop.  */
static bool
store_can_be_removed_p (ira_allocno_t src_allocno, ira_allocno_t dest_allocno)
{
  int regno, orig_regno;
  ira_allocno_t a;
  ira_loop_tree_node_t node;

  ira_assert (ALLOCNO_CAP_MEMBER (src_allocno) == NULL
	      && ALLOCNO_CAP_MEMBER (dest_allocno) == NULL);
  orig_regno = ALLOCNO_REGNO (src_allocno);
  regno = REGNO (allocno_emit_reg (dest_allocno));
  for (node = ALLOCNO_LOOP_TREE_NODE (src_allocno);
       node != NULL;
       node = node->parent)
    {
      a = node->regno_allocno_map[orig_regno];
      ira_assert (a != NULL);
      if (REGNO (allocno_emit_reg (a)) == (unsigned) regno)
	/* We achieved the destination and everything is ok.  */
	return true;
      else if (bitmap_bit_p (node->modified_regnos, orig_regno))
	return false;
      else if (node->entered_from_non_parent_p)
	/* If there is a path from a destination loop block to the
	   source loop header containing basic blocks of non-parents
	   (grandparents) of the source loop, we should have checked
	   modifications of the pseudo on this path too to decide
	   about possibility to remove the store.  It could be done by
	   solving a data-flow problem.  Unfortunately such global
	   solution would complicate IR flattening.  Therefore we just
	   prohibit removal of the store in such complicated case.  */
	return false;
    }
  /* It is actually a loop entry -- do not remove the store.  */
  return false;
}

/* Generate and attach moves to the edge E.  This looks at the final
   regnos of allocnos living on the edge with the same original regno
   to figure out when moves should be generated.  */
static void
generate_edge_moves (edge e)
{
  ira_loop_tree_node_t src_loop_node, dest_loop_node;
  unsigned int regno;
  bitmap_iterator bi;
  ira_allocno_t src_allocno, dest_allocno, *src_map, *dest_map;
  move_t move;
  bitmap regs_live_in_dest, regs_live_out_src;

  src_loop_node = IRA_BB_NODE (e->src)->parent;
  dest_loop_node = IRA_BB_NODE (e->dest)->parent;
  e->aux = NULL;
  if (src_loop_node == dest_loop_node)
    return;
  src_map = src_loop_node->regno_allocno_map;
  dest_map = dest_loop_node->regno_allocno_map;
  regs_live_in_dest = df_get_live_in (e->dest);
  regs_live_out_src = df_get_live_out (e->src);
  EXECUTE_IF_SET_IN_REG_SET (regs_live_in_dest,
			     FIRST_PSEUDO_REGISTER, regno, bi)
    if (bitmap_bit_p (regs_live_out_src, regno))
      {
	src_allocno = src_map[regno];
	dest_allocno = dest_map[regno];
	if (REGNO (allocno_emit_reg (src_allocno))
	    == REGNO (allocno_emit_reg (dest_allocno)))
	  continue;
	/* Remove unnecessary stores at the region exit.  We should do
	   this for readonly memory for sure and this is guaranteed by
	   that we never generate moves on region borders (see
	   checking in function change_loop).  */
 	if (ALLOCNO_HARD_REGNO (dest_allocno) < 0
	    && ALLOCNO_HARD_REGNO (src_allocno) >= 0
	    && store_can_be_removed_p (src_allocno, dest_allocno))
	  {
	    ALLOCNO_EMIT_DATA (src_allocno)->mem_optimized_dest = dest_allocno;
	    ALLOCNO_EMIT_DATA (dest_allocno)->mem_optimized_dest_p = true;
	    if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL)
	      fprintf (ira_dump_file, "      Remove r%d:a%d->a%d(mem)\n",
		       regno, ALLOCNO_NUM (src_allocno),
		       ALLOCNO_NUM (dest_allocno));
	    continue;
	  }
	move = create_move (dest_allocno, src_allocno);
	add_to_edge_list (e, move, true);
    }
}

/* Bitmap of allocnos local for the current loop.  */
static bitmap local_allocno_bitmap;

/* This bitmap is used to find that we need to generate and to use a
   new pseudo-register when processing allocnos with the same original
   regno.  */
static bitmap used_regno_bitmap;

/* This bitmap contains regnos of allocnos which were renamed locally
   because the allocnos correspond to disjoint live ranges in loops
   with a common parent.  */
static bitmap renamed_regno_bitmap;

/* Change (if necessary) pseudo-registers inside loop given by loop
   tree node NODE.  */
static void
change_loop (ira_loop_tree_node_t node)
{
  bitmap_iterator bi;
  unsigned int i;
  int regno;
  bool used_p;
  ira_allocno_t allocno, parent_allocno, *map;
  rtx_insn *insn;
  rtx original_reg;
  enum reg_class aclass, pclass;
  ira_loop_tree_node_t parent;

  if (node != ira_loop_tree_root)
    {
      ira_assert (current_loops != NULL);
      
      if (node->bb != NULL)
	{
	  FOR_BB_INSNS (node->bb, insn)
	    if (INSN_P (insn) && change_regs_in_insn (&insn))
	      {
		df_insn_rescan (insn);
		df_notes_rescan (insn);
	      }
	  return;
	}

      if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL)
	fprintf (ira_dump_file,
		 "      Changing RTL for loop %d (header bb%d)\n",
		 node->loop_num, node->loop->header->index);

      parent = ira_curr_loop_tree_node->parent;
      map = parent->regno_allocno_map;
      EXECUTE_IF_SET_IN_REG_SET (ira_curr_loop_tree_node->border_allocnos,
				 0, i, bi)
	{
	  allocno = ira_allocnos[i];
	  regno = ALLOCNO_REGNO (allocno);
	  aclass = ALLOCNO_CLASS (allocno);
	  pclass = ira_pressure_class_translate[aclass];
	  parent_allocno = map[regno];
	  ira_assert (regno < ira_reg_equiv_len);
	  /* We generate the same hard register move because the
	     reload pass can put an allocno into memory in this case
	     we will have live range splitting.  If it does not happen
	     such the same hard register moves will be removed.  The
	     worst case when the both allocnos are put into memory by
	     the reload is very rare.  */
	  if (parent_allocno != NULL
	      && (ALLOCNO_HARD_REGNO (allocno)
		  == ALLOCNO_HARD_REGNO (parent_allocno))
	      && (ALLOCNO_HARD_REGNO (allocno) < 0
		  || (parent->reg_pressure[pclass] + 1
		      <= ira_class_hard_regs_num[pclass])
		  || TEST_HARD_REG_BIT (ira_prohibited_mode_move_regs
					[ALLOCNO_MODE (allocno)],
					ALLOCNO_HARD_REGNO (allocno))
		  /* don't create copies because reload can spill an
		     allocno set by copy although the allocno will not
		     get memory slot.  */
		  || ira_equiv_no_lvalue_p (regno)
		  || (pic_offset_table_rtx != NULL
		      && (ALLOCNO_REGNO (allocno)
			  == (int) REGNO (pic_offset_table_rtx)))))
	    continue;
	  original_reg = allocno_emit_reg (allocno);
	  if (parent_allocno == NULL
	      || (REGNO (allocno_emit_reg (parent_allocno))
		  == REGNO (original_reg)))
	    {
	      if (internal_flag_ira_verbose > 3 && ira_dump_file)
		fprintf (ira_dump_file, "  %i vs parent %i:",
			 ALLOCNO_HARD_REGNO (allocno),
			 ALLOCNO_HARD_REGNO (parent_allocno));
	      set_allocno_reg (allocno, ira_create_new_reg (original_reg));
	    }
	}
    }
  /* Rename locals: Local allocnos with same regno in different loops
     might get the different hard register.  So we need to change
     ALLOCNO_REG.  */
  bitmap_and_compl (local_allocno_bitmap,
		    ira_curr_loop_tree_node->all_allocnos,
		    ira_curr_loop_tree_node->border_allocnos);
  EXECUTE_IF_SET_IN_REG_SET (local_allocno_bitmap, 0, i, bi)
    {
      allocno = ira_allocnos[i];
      regno = ALLOCNO_REGNO (allocno);
      if (ALLOCNO_CAP_MEMBER (allocno) != NULL)
	continue;
      used_p = !bitmap_set_bit (used_regno_bitmap, regno);
      ALLOCNO_EMIT_DATA (allocno)->somewhere_renamed_p = true;
      if (! used_p)
	continue;
      bitmap_set_bit (renamed_regno_bitmap, regno);
      set_allocno_reg (allocno, ira_create_new_reg (allocno_emit_reg (allocno)));
    }
}

/* Process to set up flag somewhere_renamed_p.  */
static void
set_allocno_somewhere_renamed_p (void)
{
  unsigned int regno;
  ira_allocno_t allocno;
  ira_allocno_iterator ai;

  FOR_EACH_ALLOCNO (allocno, ai)
    {
      regno = ALLOCNO_REGNO (allocno);
      if (bitmap_bit_p (renamed_regno_bitmap, regno)
	  && REGNO (allocno_emit_reg (allocno)) == regno)
	ALLOCNO_EMIT_DATA (allocno)->somewhere_renamed_p = true;
    }
}

/* Return TRUE if move lists on all edges given in vector VEC are
   equal.  */
static bool
eq_edge_move_lists_p (vec<edge, va_gc> *vec)
{
  move_t list;
  int i;

  list = (move_t) EDGE_I (vec, 0)->aux;
  for (i = EDGE_COUNT (vec) - 1; i > 0; i--)
    if (! eq_move_lists_p (list, (move_t) EDGE_I (vec, i)->aux))
      return false;
  return true;
}

/* Look at all entry edges (if START_P) or exit edges of basic block
   BB and put move lists at the BB start or end if it is possible.  In
   other words, this decreases code duplication of allocno moves.  */
static void
unify_moves (basic_block bb, bool start_p)
{
  int i;
  edge e;
  move_t list;
  vec<edge, va_gc> *vec;

  vec = (start_p ? bb->preds : bb->succs);
  if (EDGE_COUNT (vec) == 0 || ! eq_edge_move_lists_p (vec))
    return;
  e = EDGE_I (vec, 0);
  list = (move_t) e->aux;
  if (! start_p && control_flow_insn_p (BB_END (bb)))
    return;
  e->aux = NULL;
  for (i = EDGE_COUNT (vec) - 1; i > 0; i--)
    {
      e = EDGE_I (vec, i);
      free_move_list ((move_t) e->aux);
      e->aux = NULL;
    }
  if (start_p)
    at_bb_start[bb->index] = list;
  else
    at_bb_end[bb->index] = list;
}

/* Last move (in move sequence being processed) setting up the
   corresponding hard register.  */
static move_t hard_regno_last_set[FIRST_PSEUDO_REGISTER];

/* If the element value is equal to CURR_TICK then the corresponding
   element in `hard_regno_last_set' is defined and correct.  */
static int hard_regno_last_set_check[FIRST_PSEUDO_REGISTER];

/* Last move (in move sequence being processed) setting up the
   corresponding allocno.  */
static move_t *allocno_last_set;

/* If the element value is equal to CURR_TICK then the corresponding
   element in . `allocno_last_set' is defined and correct.  */
static int *allocno_last_set_check;

/* Definition of vector of moves.  */

/* This vec contains moves sorted topologically (depth-first) on their
   dependency graph.  */
static vec<move_t> move_vec;

/* The variable value is used to check correctness of values of
   elements of arrays `hard_regno_last_set' and
   `allocno_last_set_check'.  */
static int curr_tick;

/* This recursive function traverses dependencies of MOVE and produces
   topological sorting (in depth-first order).  */
static void
traverse_moves (move_t move)
{
  int i;

  if (move->visited_p)
    return;
  move->visited_p = true;
  for (i = move->deps_num - 1; i >= 0; i--)
    traverse_moves (move->deps[i]);
  move_vec.safe_push (move);
}

/* Remove unnecessary moves in the LIST, makes topological sorting,
   and removes cycles on hard reg dependencies by introducing new
   allocnos assigned to memory and additional moves.  It returns the
   result move list.  */
static move_t
modify_move_list (move_t list)
{
  int i, n, nregs, hard_regno;
  ira_allocno_t to, from;
  move_t move, new_move, set_move, first, last;

  if (list == NULL)
    return NULL;
  /* Create move deps.  */
  curr_tick++;
  for (move = list; move != NULL; move = move->next)
    {
      to = move->to;
      if ((hard_regno = ALLOCNO_HARD_REGNO (to)) < 0)
	continue;
      nregs = hard_regno_nregs (hard_regno, ALLOCNO_MODE (to));
      for (i = 0; i < nregs; i++)
	{
	  hard_regno_last_set[hard_regno + i] = move;
	  hard_regno_last_set_check[hard_regno + i] = curr_tick;
	}
    }
  for (move = list; move != NULL; move = move->next)
    {
      from = move->from;
      to = move->to;
      if ((hard_regno = ALLOCNO_HARD_REGNO (from)) >= 0)
	{
	  nregs = hard_regno_nregs (hard_regno, ALLOCNO_MODE (from));
	  for (n = i = 0; i < nregs; i++)
	    if (hard_regno_last_set_check[hard_regno + i] == curr_tick
		&& (ALLOCNO_REGNO (hard_regno_last_set[hard_regno + i]->to)
		    != ALLOCNO_REGNO (from)))
	      n++;
	  move->deps = (move_t *) ira_allocate (n * sizeof (move_t));
	  for (n = i = 0; i < nregs; i++)
	    if (hard_regno_last_set_check[hard_regno + i] == curr_tick
		&& (ALLOCNO_REGNO (hard_regno_last_set[hard_regno + i]->to)
		    != ALLOCNO_REGNO (from)))
	      move->deps[n++] = hard_regno_last_set[hard_regno + i];
	  move->deps_num = n;
	}
    }
  /* Topological sorting:  */
  move_vec.truncate (0);
  for (move = list; move != NULL; move = move->next)
    traverse_moves (move);
  last = NULL;
  for (i = (int) move_vec.length () - 1; i >= 0; i--)
    {
      move = move_vec[i];
      move->next = NULL;
      if (last != NULL)
	last->next = move;
      last = move;
    }
  first = move_vec.last ();
  /* Removing cycles:  */
  curr_tick++;
  move_vec.truncate (0);
  for (move = first; move != NULL; move = move->next)
    {
      from = move->from;
      to = move->to;
      if ((hard_regno = ALLOCNO_HARD_REGNO (from)) >= 0)
	{
	  nregs = hard_regno_nregs (hard_regno, ALLOCNO_MODE (from));
	  for (i = 0; i < nregs; i++)
	    if (hard_regno_last_set_check[hard_regno + i] == curr_tick
		&& ALLOCNO_HARD_REGNO
		   (hard_regno_last_set[hard_regno + i]->to) >= 0)
	      {
		int n, j;
		ira_allocno_t new_allocno;

		set_move = hard_regno_last_set[hard_regno + i];
		/* It does not matter what loop_tree_node (of TO or
		   FROM) to use for the new allocno because of
		   subsequent IRA internal representation
		   flattening.  */
		new_allocno
		  = create_new_allocno (ALLOCNO_REGNO (set_move->to),
					ALLOCNO_LOOP_TREE_NODE (set_move->to));
		ALLOCNO_MODE (new_allocno) = ALLOCNO_MODE (set_move->to);
		ira_set_allocno_class (new_allocno,
				       ALLOCNO_CLASS (set_move->to));
		ira_create_allocno_objects (new_allocno);
		ALLOCNO_ASSIGNED_P (new_allocno) = true;
		ALLOCNO_HARD_REGNO (new_allocno) = -1;
		ALLOCNO_EMIT_DATA (new_allocno)->reg
		  = ira_create_new_reg (allocno_emit_reg (set_move->to));

		/* Make it possibly conflicting with all earlier
		   created allocnos.  Cases where temporary allocnos
		   created to remove the cycles are quite rare.  */
		n = ALLOCNO_NUM_OBJECTS (new_allocno);
		gcc_assert (n == ALLOCNO_NUM_OBJECTS (set_move->to));
		for (j = 0; j < n; j++)
		  {
		    ira_object_t new_obj = ALLOCNO_OBJECT (new_allocno, j);

		    OBJECT_MIN (new_obj) = 0;
		    OBJECT_MAX (new_obj) = ira_objects_num - 1;
		  }

		new_move = create_move (set_move->to, new_allocno);
		set_move->to = new_allocno;
		move_vec.safe_push (new_move);
		ira_move_loops_num++;
		if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL)
		  fprintf (ira_dump_file,
			   "    Creating temporary allocno a%dr%d\n",
			   ALLOCNO_NUM (new_allocno),
			   REGNO (allocno_emit_reg (new_allocno)));
	      }
	}
      if ((hard_regno = ALLOCNO_HARD_REGNO (to)) < 0)
	continue;
      nregs = hard_regno_nregs (hard_regno, ALLOCNO_MODE (to));
      for (i = 0; i < nregs; i++)
	{
	  hard_regno_last_set[hard_regno + i] = move;
	  hard_regno_last_set_check[hard_regno + i] = curr_tick;
	}
    }
  for (i = (int) move_vec.length () - 1; i >= 0; i--)
    {
      move = move_vec[i];
      move->next = NULL;
      last->next = move;
      last = move;
    }
  return first;
}

/* Generate RTX move insns from the move list LIST.  This updates
   allocation cost using move execution frequency FREQ.  */
static rtx_insn *
emit_move_list (move_t list, int freq)
{
  rtx to, from, dest;
  int to_regno, from_regno, cost, regno;
  rtx_insn *result, *insn;
  rtx set;
  machine_mode mode;
  enum reg_class aclass;

  grow_reg_equivs ();
  start_sequence ();
  for (; list != NULL; list = list->next)
    {
      start_sequence ();
      to = allocno_emit_reg (list->to);
      to_regno = REGNO (to);
      from = allocno_emit_reg (list->from);
      from_regno = REGNO (from);
      emit_move_insn (to, from);
      list->insn = get_insns ();
      end_sequence ();
      for (insn = list->insn; insn != NULL_RTX; insn = NEXT_INSN (insn))
	{
	  /* The reload needs to have set up insn codes.  If the
	     reload sets up insn codes by itself, it may fail because
	     insns will have hard registers instead of pseudos and
	     there may be no machine insn with given hard
	     registers.  */
	  recog_memoized (insn);
	  /* Add insn to equiv init insn list if it is necessary.
	     Otherwise reload will not remove this insn if it decides
	     to use the equivalence.  */
	  if ((set = single_set (insn)) != NULL_RTX)
	    {
	      dest = SET_DEST (set);
	      if (GET_CODE (dest) == SUBREG)
		dest = SUBREG_REG (dest);
	      ira_assert (REG_P (dest));
	      regno = REGNO (dest);
	      if (regno >= ira_reg_equiv_len
		  || (ira_reg_equiv[regno].invariant == NULL_RTX
		      && ira_reg_equiv[regno].constant == NULL_RTX))
		continue; /* regno has no equivalence.  */
	      ira_assert ((int) reg_equivs->length () > regno);
	      reg_equiv_init (regno)
		= gen_rtx_INSN_LIST (VOIDmode, insn, reg_equiv_init (regno));
	    }
	}
      if (ira_use_lra_p)
	ira_update_equiv_info_by_shuffle_insn (to_regno, from_regno, list->insn);
      emit_insn (list->insn);
      mode = ALLOCNO_MODE (list->to);
      aclass = ALLOCNO_CLASS (list->to);
      cost = 0;
      if (ALLOCNO_HARD_REGNO (list->to) < 0)
	{
	  if (ALLOCNO_HARD_REGNO (list->from) >= 0)
	    {
	      cost = ira_memory_move_cost[mode][aclass][0] * freq;
	      ira_store_cost += cost;
	    }
	}
      else if (ALLOCNO_HARD_REGNO (list->from) < 0)
	{
	  if (ALLOCNO_HARD_REGNO (list->to) >= 0)
	    {
	      cost = ira_memory_move_cost[mode][aclass][0] * freq;
	      ira_load_cost += cost;
	    }
	}
      else
	{
	  ira_init_register_move_cost_if_necessary (mode);
	  cost = ira_register_move_cost[mode][aclass][aclass] * freq;
	  ira_shuffle_cost += cost;
	}
      ira_overall_cost += cost;
    }
  result = get_insns ();
  end_sequence ();
  return result;
}

/* Generate RTX move insns from move lists attached to basic blocks
   and edges.  */
static void
emit_moves (void)
{
  basic_block bb;
  edge_iterator ei;
  edge e;
  rtx_insn *insns, *tmp, *next;

  FOR_EACH_BB_FN (bb, cfun)
    {
      if (at_bb_start[bb->index] != NULL)
	{
	  at_bb_start[bb->index] = modify_move_list (at_bb_start[bb->index]);
	  insns
	    = emit_move_list (at_bb_start[bb->index], REG_FREQ_FROM_BB (bb));
	  tmp = BB_HEAD (bb);
	  if (LABEL_P (tmp))
	    tmp = NEXT_INSN (tmp);
	  if (NOTE_INSN_BASIC_BLOCK_P (tmp))
	    tmp = NEXT_INSN (tmp);
	  /* Make sure to put the location of TMP or a subsequent instruction
	     to avoid inheriting the location of the previous instruction.  */
	  next = tmp;
	  while (next && !NONDEBUG_INSN_P (next))
	    next = NEXT_INSN (next);
	  if (next)
	    set_insn_locations (insns, INSN_LOCATION (next));
	  if (tmp == BB_HEAD (bb))
	    emit_insn_before (insns, tmp);
	  else if (tmp)
	    emit_insn_after (insns, PREV_INSN (tmp));
	  else
	    emit_insn_after (insns, get_last_insn ());
	}

      if (at_bb_end[bb->index] != NULL)
	{
	  at_bb_end[bb->index] = modify_move_list (at_bb_end[bb->index]);
	  insns = emit_move_list (at_bb_end[bb->index], REG_FREQ_FROM_BB (bb));
	  ira_assert (! control_flow_insn_p (BB_END (bb)));
	  emit_insn_after (insns, BB_END (bb));
	}

      FOR_EACH_EDGE (e, ei, bb->succs)
	{
	  if (e->aux == NULL)
	    continue;
	  ira_assert ((e->flags & EDGE_ABNORMAL) == 0
		      || ! EDGE_CRITICAL_P (e));
	  e->aux = modify_move_list ((move_t) e->aux);
	  insert_insn_on_edge
	    (emit_move_list ((move_t) e->aux,
			     REG_FREQ_FROM_EDGE_FREQ (EDGE_FREQUENCY (e))),
	     e);
	  if (e->src->next_bb != e->dest)
	    ira_additional_jumps_num++;
	}
    }
}

/* Update costs of A and corresponding allocnos on upper levels on the
   loop tree from reading (if READ_P) or writing A on an execution
   path with FREQ.  */
static void
update_costs (ira_allocno_t a, bool read_p, int freq)
{
  ira_loop_tree_node_t parent;

  for (;;)
    {
      ALLOCNO_NREFS (a)++;
      ALLOCNO_FREQ (a) += freq;
      ALLOCNO_MEMORY_COST (a)
	+= (ira_memory_move_cost[ALLOCNO_MODE (a)][ALLOCNO_CLASS (a)]
	    [read_p ? 1 : 0] * freq);
      if (ALLOCNO_CAP (a) != NULL)
	a = ALLOCNO_CAP (a);
      else if ((parent = ALLOCNO_LOOP_TREE_NODE (a)->parent) == NULL
	       || (a = parent->regno_allocno_map[ALLOCNO_REGNO (a)]) == NULL)
	break;
    }
}

/* Process moves from LIST with execution FREQ to add ranges, copies,
   and modify costs for allocnos involved in the moves.  All regnos
   living through the list is in LIVE_THROUGH, and the loop tree node
   used to find corresponding allocnos is NODE.  */
static void
add_range_and_copies_from_move_list (move_t list, ira_loop_tree_node_t node,
				     bitmap live_through, int freq)
{
  int start, n;
  unsigned int regno;
  move_t move;
  ira_allocno_t a;
  ira_copy_t cp;
  live_range_t r;
  bitmap_iterator bi;
  HARD_REG_SET hard_regs_live;

  if (list == NULL)
    return;
  n = 0;
  EXECUTE_IF_SET_IN_BITMAP (live_through, FIRST_PSEUDO_REGISTER, regno, bi)
    n++;
  REG_SET_TO_HARD_REG_SET (hard_regs_live, live_through);
  /* This is a trick to guarantee that new ranges is not merged with
     the old ones.  */
  ira_max_point++;
  start = ira_max_point;
  for (move = list; move != NULL; move = move->next)
    {
      ira_allocno_t from = move->from;
      ira_allocno_t to = move->to;
      int nr, i;

      bitmap_clear_bit (live_through, ALLOCNO_REGNO (from));
      bitmap_clear_bit (live_through, ALLOCNO_REGNO (to));

      nr = ALLOCNO_NUM_OBJECTS (to);
      for (i = 0; i < nr; i++)
	{
	  ira_object_t to_obj = ALLOCNO_OBJECT (to, i);
	  if (OBJECT_CONFLICT_ARRAY (to_obj) == NULL)
	    {
	      if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL)
		fprintf (ira_dump_file, "    Allocate conflicts for a%dr%d\n",
			 ALLOCNO_NUM (to), REGNO (allocno_emit_reg (to)));
	      ira_allocate_object_conflicts (to_obj, n);
	    }
	}
      ior_hard_reg_conflicts (from, hard_regs_live);
      ior_hard_reg_conflicts (to, hard_regs_live);

      update_costs (from, true, freq);
      update_costs (to, false, freq);
      cp = ira_add_allocno_copy (from, to, freq, false, move->insn, NULL);
      if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL)
	fprintf (ira_dump_file, "    Adding cp%d:a%dr%d-a%dr%d\n",
		 cp->num, ALLOCNO_NUM (cp->first),
		 REGNO (allocno_emit_reg (cp->first)),
		 ALLOCNO_NUM (cp->second),
		 REGNO (allocno_emit_reg (cp->second)));

      nr = ALLOCNO_NUM_OBJECTS (from);
      for (i = 0; i < nr; i++)
	{
	  ira_object_t from_obj = ALLOCNO_OBJECT (from, i);
	  r = OBJECT_LIVE_RANGES (from_obj);
	  if (r == NULL || r->finish >= 0)
	    {
	      ira_add_live_range_to_object (from_obj, start, ira_max_point);
	      if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL)
		fprintf (ira_dump_file,
			 "    Adding range [%d..%d] to allocno a%dr%d\n",
			 start, ira_max_point, ALLOCNO_NUM (from),
			 REGNO (allocno_emit_reg (from)));
	    }
	  else
	    {
	      r->finish = ira_max_point;
	      if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL)
		fprintf (ira_dump_file,
			 "    Adding range [%d..%d] to allocno a%dr%d\n",
			 r->start, ira_max_point, ALLOCNO_NUM (from),
			 REGNO (allocno_emit_reg (from)));
	    }
	}
      ira_max_point++;
      nr = ALLOCNO_NUM_OBJECTS (to);
      for (i = 0; i < nr; i++)
	{
	  ira_object_t to_obj = ALLOCNO_OBJECT (to, i);
	  ira_add_live_range_to_object (to_obj, ira_max_point, -1);
	}
      ira_max_point++;
    }
  for (move = list; move != NULL; move = move->next)
    {
      int nr, i;
      nr = ALLOCNO_NUM_OBJECTS (move->to);
      for (i = 0; i < nr; i++)
	{
	  ira_object_t to_obj = ALLOCNO_OBJECT (move->to, i);
	  r = OBJECT_LIVE_RANGES (to_obj);
	  if (r->finish < 0)
	    {
	      r->finish = ira_max_point - 1;
	      if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL)
		fprintf (ira_dump_file,
			 "    Adding range [%d..%d] to allocno a%dr%d\n",
			 r->start, r->finish, ALLOCNO_NUM (move->to),
			 REGNO (allocno_emit_reg (move->to)));
	    }
	}
    }
  EXECUTE_IF_SET_IN_BITMAP (live_through, FIRST_PSEUDO_REGISTER, regno, bi)
    {
      ira_allocno_t to;
      int nr, i;

      a = node->regno_allocno_map[regno];
      if ((to = ALLOCNO_EMIT_DATA (a)->mem_optimized_dest) != NULL)
	a = to;
      nr = ALLOCNO_NUM_OBJECTS (a);
      for (i = 0; i < nr; i++)
	{
	  ira_object_t obj = ALLOCNO_OBJECT (a, i);
	  ira_add_live_range_to_object (obj, start, ira_max_point - 1);
	}
      if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL)
	fprintf
	  (ira_dump_file,
	   "    Adding range [%d..%d] to live through %s allocno a%dr%d\n",
	   start, ira_max_point - 1,
	   to != NULL ? "upper level" : "",
	   ALLOCNO_NUM (a), REGNO (allocno_emit_reg (a)));
    }
}

/* Process all move list to add ranges, conflicts, copies, and modify
   costs for allocnos involved in the moves.  */
static void
add_ranges_and_copies (void)
{
  basic_block bb;
  edge_iterator ei;
  edge e;
  ira_loop_tree_node_t node;
  bitmap live_through;

  live_through = ira_allocate_bitmap ();
  FOR_EACH_BB_FN (bb, cfun)
    {
      /* It does not matter what loop_tree_node (of source or
	 destination block) to use for searching allocnos by their
	 regnos because of subsequent IR flattening.  */
      node = IRA_BB_NODE (bb)->parent;
      bitmap_copy (live_through, df_get_live_in (bb));
      add_range_and_copies_from_move_list
	(at_bb_start[bb->index], node, live_through, REG_FREQ_FROM_BB (bb));
      bitmap_copy (live_through, df_get_live_out (bb));
      add_range_and_copies_from_move_list
	(at_bb_end[bb->index], node, live_through, REG_FREQ_FROM_BB (bb));
      FOR_EACH_EDGE (e, ei, bb->succs)
	{
	  bitmap_and (live_through,
		      df_get_live_in (e->dest), df_get_live_out (bb));
	  add_range_and_copies_from_move_list
	    ((move_t) e->aux, node, live_through,
	     REG_FREQ_FROM_EDGE_FREQ (EDGE_FREQUENCY (e)));
	}
    }
  ira_free_bitmap (live_through);
}

/* The entry function changes code and generates shuffling allocnos on
   region borders for the regional (LOOPS_P is TRUE in this case)
   register allocation.  */
void
ira_emit (bool loops_p)
{
  basic_block bb;
  rtx_insn *insn;
  edge_iterator ei;
  edge e;
  ira_allocno_t a;
  ira_allocno_iterator ai;
  size_t sz;

  FOR_EACH_ALLOCNO (a, ai)
    ALLOCNO_EMIT_DATA (a)->reg = regno_reg_rtx[ALLOCNO_REGNO (a)];
  if (! loops_p)
    return;
  sz = sizeof (move_t) * last_basic_block_for_fn (cfun);
  at_bb_start = (move_t *) ira_allocate (sz);
  memset (at_bb_start, 0, sz);
  at_bb_end = (move_t *) ira_allocate (sz);
  memset (at_bb_end, 0, sz);
  local_allocno_bitmap = ira_allocate_bitmap ();
  used_regno_bitmap = ira_allocate_bitmap ();
  renamed_regno_bitmap = ira_allocate_bitmap ();
  max_regno_before_changing = max_reg_num ();
  ira_traverse_loop_tree (true, ira_loop_tree_root, change_loop, NULL);
  set_allocno_somewhere_renamed_p ();
  ira_free_bitmap (used_regno_bitmap);
  ira_free_bitmap (renamed_regno_bitmap);
  ira_free_bitmap (local_allocno_bitmap);
  setup_entered_from_non_parent_p ();
  FOR_EACH_BB_FN (bb, cfun)
    {
      at_bb_start[bb->index] = NULL;
      at_bb_end[bb->index] = NULL;
      FOR_EACH_EDGE (e, ei, bb->succs)
	if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
	  generate_edge_moves (e);
    }
  allocno_last_set
    = (move_t *) ira_allocate (sizeof (move_t) * max_reg_num ());
  allocno_last_set_check
    = (int *) ira_allocate (sizeof (int) * max_reg_num ());
  memset (allocno_last_set_check, 0, sizeof (int) * max_reg_num ());
  memset (hard_regno_last_set_check, 0, sizeof (hard_regno_last_set_check));
  curr_tick = 0;
  FOR_EACH_BB_FN (bb, cfun)
    unify_moves (bb, true);
  FOR_EACH_BB_FN (bb, cfun)
    unify_moves (bb, false);
  move_vec.create (ira_allocnos_num);
  emit_moves ();
  add_ranges_and_copies ();
  /* Clean up: */
  FOR_EACH_BB_FN (bb, cfun)
    {
      free_move_list (at_bb_start[bb->index]);
      free_move_list (at_bb_end[bb->index]);
      FOR_EACH_EDGE (e, ei, bb->succs)
	{
	  free_move_list ((move_t) e->aux);
	  e->aux = NULL;
	}
    }
  move_vec.release ();
  ira_free (allocno_last_set_check);
  ira_free (allocno_last_set);
  commit_edge_insertions ();
  /* Fix insn codes.  It is necessary to do it before reload because
     reload assumes initial insn codes defined.  The insn codes can be
     invalidated by CFG infrastructure for example in jump
     redirection.  */
  FOR_EACH_BB_FN (bb, cfun)
    FOR_BB_INSNS_REVERSE (bb, insn)
      if (INSN_P (insn))
	recog_memoized (insn);
  ira_free (at_bb_end);
  ira_free (at_bb_start);
}