// merge.h -- handle section merging for gold  -*- C++ -*-

// Copyright (C) 2006-2024 Free Software Foundation, Inc.
// Written by Ian Lance Taylor <iant@google.com>.

// This file is part of gold.

// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 3 of the License, or
// (at your option) any later version.

// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.

// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
// MA 02110-1301, USA.

#ifndef GOLD_MERGE_H
#define GOLD_MERGE_H

#include <climits>
#include <map>
#include <vector>

#include "stringpool.h"
#include "output.h"

namespace gold
{

// For each object with merge sections, we store an Object_merge_map.
// This is used to map locations in input sections to a merged output
// section.  The output section itself is not recorded here--it can be
// found in the output_sections_ field of the Object.

class Object_merge_map
{
 public:
  Object_merge_map()
    : section_merge_maps_()
  { }

  ~Object_merge_map();

  // Add a mapping for MERGE_MAP, for the bytes from OFFSET to OFFSET
  // + LENGTH in the input section SHNDX to OUTPUT_OFFSET in the
  // output section.  An OUTPUT_OFFSET of -1 means that the bytes are
  // discarded.  OUTPUT_OFFSET is relative to the start of the merged
  // data in the output section.
  void
  add_mapping(const Output_section_data*, unsigned int shndx,
              section_offset_type offset, section_size_type length,
              section_offset_type output_offset);

  // Get the output offset for an input address.  MERGE_MAP is the map
  // we are looking for, or NULL if we don't care.  The input address
  // is at offset OFFSET in section SHNDX.  This sets *OUTPUT_OFFSET
  // to the offset in the output section; this will be -1 if the bytes
  // are not being copied to the output.  This returns true if the
  // mapping is known, false otherwise.  *OUTPUT_OFFSET is relative to
  // the start of the merged data in the output section.
  bool
  get_output_offset(unsigned int shndx,
		    section_offset_type offset,
		    section_offset_type* output_offset);

  const Output_section_data*
  find_merge_section(unsigned int shndx) const;

  // Initialize an mapping from input offsets to output addresses for
  // section SHNDX.  STARTING_ADDRESS is the output address of the
  // merged section.
  template<int size>
  void
  initialize_input_to_output_map(
      unsigned int shndx,
      typename elfcpp::Elf_types<size>::Elf_Addr starting_address,
      Unordered_map<section_offset_type,
		    typename elfcpp::Elf_types<size>::Elf_Addr>*);

  // Map input section offsets to a length and an output section
  // offset.  An output section offset of -1 means that this part of
  // the input section is being discarded.
  struct Input_merge_entry
  {
    // The offset in the input section.
    section_offset_type input_offset;
    // The length.
    section_size_type length;
    // The offset in the output section.
    section_offset_type output_offset;
  };

  // A list of entries for a particular input section.
  struct Input_merge_map
  {
    void add_mapping(section_offset_type input_offset, section_size_type length,
                     section_offset_type output_offset);

    typedef std::vector<Input_merge_entry> Entries;

    // We store these with the Relobj, and we look them up by input
    // section.  It is possible to have two different merge maps
    // associated with a single output section.  For example, this
    // happens routinely with .rodata, when merged string constants
    // and merged fixed size constants are both put into .rodata.  The
    // output offset that we store is not the offset from the start of
    // the output section; it is the offset from the start of the
    // merged data in the output section.  That means that the caller
    // is going to add the offset of the merged data within the output
    // section, which means that the caller needs to know which set of
    // merged data it found the entry in.  So it's not enough to find
    // this data based on the input section and the output section; we
    // also have to find it based on a set of merged data in the
    // output section.  In order to verify that we are looking at the
    // right data, we store a pointer to the Merge_map here, and we
    // pass in a pointer when looking at the data.  If we are asked to
    // look up information for a different Merge_map, we report that
    // we don't have it, rather than trying a lookup and returning an
    // answer which will receive the wrong offset.
    const Output_section_data* output_data;
    // The list of mappings.
    Entries entries;
    // Whether the ENTRIES field is sorted by input_offset.
    bool sorted;

    Input_merge_map()
      : output_data(NULL), entries(), sorted(true)
    { }
  };

  // Get or make the Input_merge_map to use for the section SHNDX
  // with MERGE_MAP.
  Input_merge_map*
  get_or_make_input_merge_map(const Output_section_data* merge_map,
                              unsigned int shndx);

  private:
  // A less-than comparison routine for Input_merge_entry.
  struct Input_merge_compare
  {
    bool
    operator()(const Input_merge_entry& i1, const Input_merge_entry& i2) const
    { return i1.input_offset < i2.input_offset; }
  };

  // Map input section indices to merge maps.
  typedef std::vector<std::pair<unsigned int, Input_merge_map*> >
      Section_merge_maps;

  // Return a pointer to the Input_merge_map to use for the input
  // section SHNDX, or NULL.
  const Input_merge_map*
  get_input_merge_map(unsigned int shndx) const;

  Input_merge_map *
  get_input_merge_map(unsigned int shndx) {
    return const_cast<Input_merge_map *>(static_cast<const Object_merge_map *>(
                                             this)->get_input_merge_map(shndx));
  }

  Section_merge_maps section_merge_maps_;
};

// A general class for SHF_MERGE data, to hold functions shared by
// fixed-size constant data and string data.

class Output_merge_base : public Output_section_data
{
 public:
  Output_merge_base(uint64_t entsize, uint64_t addralign)
    : Output_section_data(addralign), entsize_(entsize),
      keeps_input_sections_(false), first_relobj_(NULL), first_shndx_(-1),
      input_sections_()
  { }

  // Return the entry size.
  uint64_t
  entsize() const
  { return this->entsize_; }

  // Whether this is a merge string section.  This is only true of
  // Output_merge_string.
  bool
  is_string()
  { return this->do_is_string(); }

  // Whether this keeps input sections.
  bool
  keeps_input_sections() const
  { return this->keeps_input_sections_; }

  // Set the keeps-input-sections flag.  This is virtual so that sub-classes
  // can perform additional checks.
  void
  set_keeps_input_sections()
  { this->do_set_keeps_input_sections(); }

  // Return the object of the first merged input section.  This used
  // for script processing.  This is NULL if merge section is empty.
  Relobj*
  first_relobj() const
  { return this->first_relobj_; }

  // Return the section index of the first merged input section.  This
  // is used for script processing.  This is valid only if merge section
  // is not valid.
  unsigned int
  first_shndx() const
  { 
    gold_assert(this->first_relobj_ != NULL);
    return this->first_shndx_;
  }
 
  // Set of merged input sections.
  typedef Unordered_set<Section_id, Section_id_hash> Input_sections;

  // Beginning of merged input sections.
  Input_sections::const_iterator
  input_sections_begin() const
  {
    gold_assert(this->keeps_input_sections_);
    return this->input_sections_.begin();
  }

  // Beginning of merged input sections.
  Input_sections::const_iterator
  input_sections_end() const
  {
    gold_assert(this->keeps_input_sections_);
    return this->input_sections_.end();
  }
 
 protected:
  // Return the output offset for an input offset.
  bool
  do_output_offset(const Relobj* object, unsigned int shndx,
		   section_offset_type offset,
		   section_offset_type* poutput) const;

  // This may be overridden by the child class.
  virtual bool
  do_is_string()
  { return false; }

  // This may be overridden by the child class.
  virtual void
  do_set_keeps_input_sections()
  { this->keeps_input_sections_ = true; }

  // Record the merged input section for script processing.
  void
  record_input_section(Relobj* relobj, unsigned int shndx);

 private:
  // The entry size.  For fixed-size constants, this is the size of
  // the constants.  For strings, this is the size of a character.
  uint64_t entsize_;
  // Whether we keep input sections.
  bool keeps_input_sections_;
  // Object of the first merged input section.  We use this for script
  // processing.
  Relobj* first_relobj_;
  // Section index of the first merged input section. 
  unsigned int first_shndx_;
  // Input sections.  We only keep them is keeps_input_sections_ is true.
  Input_sections input_sections_;
};

// Handle SHF_MERGE sections with fixed-size constant data.

class Output_merge_data : public Output_merge_base
{
 public:
  Output_merge_data(uint64_t entsize, uint64_t addralign)
    : Output_merge_base(entsize, addralign), p_(NULL), len_(0), alc_(0),
      input_count_(0),
      hashtable_(128, Merge_data_hash(this), Merge_data_eq(this))
  { }

 protected:
  // Add an input section.
  bool
  do_add_input_section(Relobj* object, unsigned int shndx);

  // Set the final data size.
  void
  set_final_data_size();

  // Write the data to the file.
  void
  do_write(Output_file*);

  // Write the data to a buffer.
  void
  do_write_to_buffer(unsigned char*);

  // Write to a map file.
  void
  do_print_to_mapfile(Mapfile* mapfile) const
  { mapfile->print_output_data(this, _("** merge constants")); }

  // Print merge stats to stderr.
  void
  do_print_merge_stats(const char* section_name);

  // Set keeps-input-sections flag.
  void
  do_set_keeps_input_sections()
  {
    gold_assert(this->input_count_ == 0);
    Output_merge_base::do_set_keeps_input_sections();
  }

 private:
  // We build a hash table of the fixed-size constants.  Each constant
  // is stored as a pointer into the section data we are accumulating.

  // A key in the hash table.  This is an offset in the section
  // contents we are building.
  typedef section_offset_type Merge_data_key;

  // Compute the hash code.  To do this we need a pointer back to the
  // object holding the data.
  class Merge_data_hash
  {
   public:
    Merge_data_hash(const Output_merge_data* pomd)
      : pomd_(pomd)
    { }

    size_t
    operator()(Merge_data_key) const;

   private:
    const Output_merge_data* pomd_;
  };

  friend class Merge_data_hash;

  // Compare two entries in the hash table for equality.  To do this
  // we need a pointer back to the object holding the data.  Note that
  // we now have a pointer to the object stored in two places in the
  // hash table.  Fixing this would require specializing the hash
  // table, which would be hard to do portably.
  class Merge_data_eq
  {
   public:
    Merge_data_eq(const Output_merge_data* pomd)
      : pomd_(pomd)
    { }

    bool
    operator()(Merge_data_key k1, Merge_data_key k2) const;

   private:
    const Output_merge_data* pomd_;
  };

  friend class Merge_data_eq;

  // The type of the hash table.
  typedef Unordered_set<Merge_data_key, Merge_data_hash, Merge_data_eq>
    Merge_data_hashtable;

  // Given a hash table key, which is just an offset into the section
  // data, return a pointer to the corresponding constant.
  const unsigned char*
  constant(Merge_data_key k) const
  {
    gold_assert(k >= 0 && k < static_cast<section_offset_type>(this->len_));
    return this->p_ + k;
  }

  // Add a constant to the output.
  void
  add_constant(const unsigned char*);

  // The accumulated data.
  unsigned char* p_;
  // The length of the accumulated data.
  section_size_type len_;
  // The size of the allocated buffer.
  section_size_type alc_;
  // The number of entries seen in input files.
  size_t input_count_;
  // The hash table.
  Merge_data_hashtable hashtable_;
};

// Handle SHF_MERGE sections with string data.  This is a template
// based on the type of the characters in the string.

template<typename Char_type>
class Output_merge_string : public Output_merge_base
{
 public:
  Output_merge_string(uint64_t addralign)
    : Output_merge_base(sizeof(Char_type), addralign), stringpool_(addralign),
      merged_strings_lists_(), input_count_(0), input_size_(0)
  {
    this->stringpool_.set_no_zero_null();
  }

 protected:
  // Add an input section.
  bool
  do_add_input_section(Relobj* object, unsigned int shndx);

  // Do all the final processing after the input sections are read in.
  // Returns the final data size.
  section_size_type
  finalize_merged_data();

  // Set the final data size.
  void
  set_final_data_size();

  // Write the data to the file.
  void
  do_write(Output_file*);

  // Write the data to a buffer.
  void
  do_write_to_buffer(unsigned char*);

  // Write to a map file.
  void
  do_print_to_mapfile(Mapfile* mapfile) const
  { mapfile->print_output_data(this, _("** merge strings")); }

  // Print merge stats to stderr.
  void
  do_print_merge_stats(const char* section_name);

  // Writes the stringpool to a buffer.
  void
  stringpool_to_buffer(unsigned char* buffer, section_size_type buffer_size)
  { this->stringpool_.write_to_buffer(buffer, buffer_size); }

  // Clears all the data in the stringpool, to save on memory.
  void
  clear_stringpool()
  { this->stringpool_.clear(); }

  // Whether this is a merge string section.
  virtual bool
  do_is_string()
  { return true; }

  // Set keeps-input-sections flag.
  void
  do_set_keeps_input_sections()
  {
    gold_assert(this->input_count_ == 0);
    Output_merge_base::do_set_keeps_input_sections();
  }

 private:
  // The name of the string type, for stats.
  const char*
  string_name();

  // As we see input sections, we build a mapping from object, section
  // index and offset to strings.
  struct Merged_string
  {
    // The offset in the input section.
    section_offset_type offset;
    // The key in the Stringpool.
    Stringpool::Key stringpool_key;

    Merged_string(section_offset_type offseta, Stringpool::Key stringpool_keya)
      : offset(offseta), stringpool_key(stringpool_keya)
    { }
  };

  typedef std::vector<Merged_string> Merged_strings;

  struct Merged_strings_list
  {
    // The input object where the strings were found.
    Relobj* object;
    // The input section in the input object.
    unsigned int shndx;
    // The list of merged strings.
    Merged_strings merged_strings;

    Merged_strings_list(Relobj* objecta, unsigned int shndxa)
      : object(objecta), shndx(shndxa), merged_strings()
    { }
  };

  typedef std::vector<Merged_strings_list*> Merged_strings_lists;

  // As we see the strings, we add them to a Stringpool.
  Stringpool_template<Char_type> stringpool_;
  // Map from a location in an input object to an entry in the
  // Stringpool.
  Merged_strings_lists merged_strings_lists_;
  // The number of entries seen in input files.
  size_t input_count_;
  // The total size of input sections.
  size_t input_size_;
};

} // End namespace gold.

#endif // !defined(GOLD_MERGE_H)