diff options
author | Dave Brolley <brolley@redhat.com> | 2003-08-29 16:35:47 +0000 |
---|---|---|
committer | Dave Brolley <brolley@redhat.com> | 2003-08-29 16:35:47 +0000 |
commit | b34f6357d032f4b39f9c7adb1995956d04339461 (patch) | |
tree | a77b3c9a47df5e56dc9fc85d3a2792b12cb436e8 /sim/frv/profile.c | |
parent | 60fac5b81a94dedf1997344af7a998e9ea611e55 (diff) | |
download | gdb-b34f6357d032f4b39f9c7adb1995956d04339461.zip gdb-b34f6357d032f4b39f9c7adb1995956d04339461.tar.gz gdb-b34f6357d032f4b39f9c7adb1995956d04339461.tar.bz2 |
New simulator for Fujitsu frv contributed by Red Hat.
Diffstat (limited to 'sim/frv/profile.c')
-rw-r--r-- | sim/frv/profile.c | 1807 |
1 files changed, 1807 insertions, 0 deletions
diff --git a/sim/frv/profile.c b/sim/frv/profile.c new file mode 100644 index 0000000..1a59e4a --- /dev/null +++ b/sim/frv/profile.c @@ -0,0 +1,1807 @@ +/* frv simulator machine independent profiling code. + + Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc. + Contributed by Red Hat + +This file is part of the GNU simulators. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +*/ +#define WANT_CPU +#define WANT_CPU_FRVBF + +#include "sim-main.h" +#include "bfd.h" + +#if WITH_PROFILE_MODEL_P + +#include "profile.h" +#include "profile-fr400.h" +#include "profile-fr500.h" + +static void +reset_gr_flags (SIM_CPU *cpu, INT gr) +{ + SIM_DESC sd = CPU_STATE (cpu); + if (STATE_ARCHITECTURE (sd)->mach == bfd_mach_fr400) + fr400_reset_gr_flags (cpu, gr); + /* Other machines have no gr flags right now. */ +} + +static void +reset_fr_flags (SIM_CPU *cpu, INT fr) +{ + SIM_DESC sd = CPU_STATE (cpu); + if (STATE_ARCHITECTURE (sd)->mach == bfd_mach_fr400) + fr400_reset_fr_flags (cpu, fr); + else if (STATE_ARCHITECTURE (sd)->mach == bfd_mach_fr500) + fr500_reset_fr_flags (cpu, fr); +} + +static void +reset_acc_flags (SIM_CPU *cpu, INT acc) +{ + SIM_DESC sd = CPU_STATE (cpu); + if (STATE_ARCHITECTURE (sd)->mach == bfd_mach_fr400) + fr400_reset_acc_flags (cpu, acc); + /* Other machines have no acc flags right now. */ +} + +static void +reset_cc_flags (SIM_CPU *cpu, INT cc) +{ + SIM_DESC sd = CPU_STATE (cpu); + if (STATE_ARCHITECTURE (sd)->mach == bfd_mach_fr500) + fr500_reset_cc_flags (cpu, cc); + /* Other machines have no cc flags. */ +} + +void +set_use_is_gr_complex (SIM_CPU *cpu, INT gr) +{ + if (gr != -1) + { + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + reset_gr_flags (cpu, gr); + ps->cur_gr_complex |= (((DI)1) << gr); + } +} + +void +set_use_not_gr_complex (SIM_CPU *cpu, INT gr) +{ + if (gr != -1) + { + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + ps->cur_gr_complex &= ~(((DI)1) << gr); + } +} + +int +use_is_gr_complex (SIM_CPU *cpu, INT gr) +{ + if (gr != -1) + { + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + return ps->cur_gr_complex & (((DI)1) << gr); + } + return 0; +} + +/* Globals flag indicates whether this insn is being modeled. */ +enum FRV_INSN_MODELING model_insn = FRV_INSN_NO_MODELING; + +/* static buffer for the name of the currently most restrictive hazard. */ +static char hazard_name[100] = ""; + +/* Print information about the wait applied to an entire VLIW insn. */ +FRV_INSN_FETCH_BUFFER frv_insn_fetch_buffer[] += { + {1, NO_REQNO}, {1, NO_REQNO} /* init with impossible address. */ +}; + +enum cache_request +{ + cache_load, + cache_invalidate, + cache_flush, + cache_preload, + cache_unlock +}; + +/* A queue of load requests from the data cache. Use to keep track of loads + which are still pending. */ +/* TODO -- some of these are mutually exclusive and can use a union. */ +typedef struct +{ + FRV_CACHE *cache; + unsigned reqno; + SI address; + int length; + int is_signed; + int regnum; + int cycles; + int regtype; + int lock; + int all; + int slot; + int active; + enum cache_request request; +} CACHE_QUEUE_ELEMENT; + +#define CACHE_QUEUE_SIZE 64 /* TODO -- make queue dynamic */ +struct +{ + unsigned reqno; + int ix; + CACHE_QUEUE_ELEMENT q[CACHE_QUEUE_SIZE]; +} cache_queue = {0, 0}; + +/* Queue a request for a load from the cache. The load will be queued as + 'inactive' and will be requested after the given number + of cycles have passed from the point the load is activated. */ +void +request_cache_load (SIM_CPU *cpu, INT regnum, int regtype, int cycles) +{ + CACHE_QUEUE_ELEMENT *q; + FRV_VLIW *vliw; + int slot; + + /* For a conditional load which was not executed, CPU_LOAD_LENGTH will be + zero. */ + if (CPU_LOAD_LENGTH (cpu) == 0) + return; + + if (cache_queue.ix >= CACHE_QUEUE_SIZE) + abort (); /* TODO: Make the queue dynamic */ + + q = & cache_queue.q[cache_queue.ix]; + ++cache_queue.ix; + + q->reqno = cache_queue.reqno++; + q->request = cache_load; + q->cache = CPU_DATA_CACHE (cpu); + q->address = CPU_LOAD_ADDRESS (cpu); + q->length = CPU_LOAD_LENGTH (cpu); + q->is_signed = CPU_LOAD_SIGNED (cpu); + q->regnum = regnum; + q->regtype = regtype; + q->cycles = cycles; + q->active = 0; + + vliw = CPU_VLIW (cpu); + slot = vliw->next_slot - 1; + q->slot = (*vliw->current_vliw)[slot]; + + CPU_LOAD_LENGTH (cpu) = 0; +} + +/* Queue a request to flush the cache. The request will be queued as + 'inactive' and will be requested after the given number + of cycles have passed from the point the request is activated. */ +void +request_cache_flush (SIM_CPU *cpu, FRV_CACHE *cache, int cycles) +{ + CACHE_QUEUE_ELEMENT *q; + FRV_VLIW *vliw; + int slot; + + if (cache_queue.ix >= CACHE_QUEUE_SIZE) + abort (); /* TODO: Make the queue dynamic */ + + q = & cache_queue.q[cache_queue.ix]; + ++cache_queue.ix; + + q->reqno = cache_queue.reqno++; + q->request = cache_flush; + q->cache = cache; + q->address = CPU_LOAD_ADDRESS (cpu); + q->all = CPU_PROFILE_STATE (cpu)->all_cache_entries; + q->cycles = cycles; + q->active = 0; + + vliw = CPU_VLIW (cpu); + slot = vliw->next_slot - 1; + q->slot = (*vliw->current_vliw)[slot]; +} + +/* Queue a request to invalidate the cache. The request will be queued as + 'inactive' and will be requested after the given number + of cycles have passed from the point the request is activated. */ +void +request_cache_invalidate (SIM_CPU *cpu, FRV_CACHE *cache, int cycles) +{ + CACHE_QUEUE_ELEMENT *q; + FRV_VLIW *vliw; + int slot; + + if (cache_queue.ix >= CACHE_QUEUE_SIZE) + abort (); /* TODO: Make the queue dynamic */ + + q = & cache_queue.q[cache_queue.ix]; + ++cache_queue.ix; + + q->reqno = cache_queue.reqno++; + q->request = cache_invalidate; + q->cache = cache; + q->address = CPU_LOAD_ADDRESS (cpu); + q->all = CPU_PROFILE_STATE (cpu)->all_cache_entries; + q->cycles = cycles; + q->active = 0; + + vliw = CPU_VLIW (cpu); + slot = vliw->next_slot - 1; + q->slot = (*vliw->current_vliw)[slot]; +} + +/* Queue a request to preload the cache. The request will be queued as + 'inactive' and will be requested after the given number + of cycles have passed from the point the request is activated. */ +void +request_cache_preload (SIM_CPU *cpu, FRV_CACHE *cache, int cycles) +{ + CACHE_QUEUE_ELEMENT *q; + FRV_VLIW *vliw; + int slot; + + if (cache_queue.ix >= CACHE_QUEUE_SIZE) + abort (); /* TODO: Make the queue dynamic */ + + q = & cache_queue.q[cache_queue.ix]; + ++cache_queue.ix; + + q->reqno = cache_queue.reqno++; + q->request = cache_preload; + q->cache = cache; + q->address = CPU_LOAD_ADDRESS (cpu); + q->length = CPU_LOAD_LENGTH (cpu); + q->lock = CPU_LOAD_LOCK (cpu); + q->cycles = cycles; + q->active = 0; + + vliw = CPU_VLIW (cpu); + slot = vliw->next_slot - 1; + q->slot = (*vliw->current_vliw)[slot]; + + CPU_LOAD_LENGTH (cpu) = 0; +} + +/* Queue a request to unlock the cache. The request will be queued as + 'inactive' and will be requested after the given number + of cycles have passed from the point the request is activated. */ +void +request_cache_unlock (SIM_CPU *cpu, FRV_CACHE *cache, int cycles) +{ + CACHE_QUEUE_ELEMENT *q; + FRV_VLIW *vliw; + int slot; + + if (cache_queue.ix >= CACHE_QUEUE_SIZE) + abort (); /* TODO: Make the queue dynamic */ + + q = & cache_queue.q[cache_queue.ix]; + ++cache_queue.ix; + + q->reqno = cache_queue.reqno++; + q->request = cache_unlock; + q->cache = cache; + q->address = CPU_LOAD_ADDRESS (cpu); + q->cycles = cycles; + q->active = 0; + + vliw = CPU_VLIW (cpu); + slot = vliw->next_slot - 1; + q->slot = (*vliw->current_vliw)[slot]; +} + +static void +submit_cache_request (CACHE_QUEUE_ELEMENT *q) +{ + switch (q->request) + { + case cache_load: + frv_cache_request_load (q->cache, q->reqno, q->address, q->slot); + break; + case cache_flush: + frv_cache_request_invalidate (q->cache, q->reqno, q->address, q->slot, + q->all, 1/*flush*/); + break; + case cache_invalidate: + frv_cache_request_invalidate (q->cache, q->reqno, q->address, q->slot, + q->all, 0/*flush*/); + break; + case cache_preload: + frv_cache_request_preload (q->cache, q->address, q->slot, + q->length, q->lock); + break; + case cache_unlock: + frv_cache_request_unlock (q->cache, q->address, q->slot); + break; + default: + abort (); + } +} + +/* Activate all inactive load requests. */ +static void +activate_cache_requests (SIM_CPU *cpu) +{ + int i; + for (i = 0; i < cache_queue.ix; ++i) + { + CACHE_QUEUE_ELEMENT *q = & cache_queue.q[i]; + if (! q->active) + { + q->active = 1; + /* Submit the request now if the cycle count is zero. */ + if (q->cycles == 0) + submit_cache_request (q); + } + } +} + +/* Check to see if a load is pending which affects the given register(s). + */ +int +load_pending_for_register (SIM_CPU *cpu, int regnum, int words, int regtype) +{ + int i; + for (i = 0; i < cache_queue.ix; ++i) + { + CACHE_QUEUE_ELEMENT *q = & cache_queue.q[i]; + + /* Must be the same kind of register. */ + if (! q->active || q->request != cache_load || q->regtype != regtype) + continue; + + /* If the registers numbers are equal, then we have a match. */ + if (q->regnum == regnum) + return 1; /* load pending */ + + /* Check for overlap of a load with a multi-word register. */ + if (regnum < q->regnum) + { + if (regnum + words > q->regnum) + return 1; + } + /* Check for overlap of a multi-word load with the register. */ + else + { + int data_words = (q->length + sizeof (SI) - 1) / sizeof (SI); + if (q->regnum + data_words > regnum) + return 1; + } + } + + return 0; /* no load pending */ +} + +/* Check to see if a cache flush pending which affects the given address. */ +static int +flush_pending_for_address (SIM_CPU *cpu, SI address) +{ + int line_mask = ~(CPU_DATA_CACHE (cpu)->line_size - 1); + int i; + for (i = 0; i < cache_queue.ix; ++i) + { + CACHE_QUEUE_ELEMENT *q = & cache_queue.q[i]; + + /* Must be the same kind of request and active. */ + if (! q->active || q->request != cache_flush) + continue; + + /* If the addresses are equal, then we have a match. */ + if ((q->address & line_mask) == (address & line_mask)) + return 1; /* flush pending */ + } + + return 0; /* no flush pending */ +} + +static void +remove_cache_queue_element (SIM_CPU *cpu, int i) +{ + /* If we are removing the load of a FR register, then remember which one(s). + */ + CACHE_QUEUE_ELEMENT q = cache_queue.q[i]; + + for (--cache_queue.ix; i < cache_queue.ix; ++i) + cache_queue.q[i] = cache_queue.q[i + 1]; + + /* If we removed a load of a FR register, check to see if any other loads + of that register is still queued. If not, then apply the queued post + processing time of that register to its latency. Also apply + 1 extra cycle of latency to the register since it was a floating point + load. */ + if (q.request == cache_load && q.regtype != REGTYPE_NONE) + { + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + int data_words = (q.length + sizeof (SI) - 1) / sizeof (SI); + int j; + for (j = 0; j < data_words; ++j) + { + int regnum = q.regnum + j; + if (! load_pending_for_register (cpu, regnum, 1, q.regtype)) + { + if (q.regtype == REGTYPE_FR) + { + int *fr = ps->fr_busy; + fr[regnum] += 1 + ps->fr_ptime[regnum]; + ps->fr_ptime[regnum] = 0; + } + } + } + } +} + +/* Copy data from the cache buffer to the target register(s). */ +static void +copy_load_data (SIM_CPU *current_cpu, FRV_CACHE *cache, int slot, + CACHE_QUEUE_ELEMENT *q) +{ + switch (q->length) + { + case 1: + if (q->regtype == REGTYPE_FR) + { + if (q->is_signed) + { + QI value = CACHE_RETURN_DATA (cache, slot, q->address, QI, 1); + SET_H_FR (q->regnum, value); + } + else + { + UQI value = CACHE_RETURN_DATA (cache, slot, q->address, UQI, 1); + SET_H_FR (q->regnum, value); + } + } + else + { + if (q->is_signed) + { + QI value = CACHE_RETURN_DATA (cache, slot, q->address, QI, 1); + SET_H_GR (q->regnum, value); + } + else + { + UQI value = CACHE_RETURN_DATA (cache, slot, q->address, UQI, 1); + SET_H_GR (q->regnum, value); + } + } + break; + case 2: + if (q->regtype == REGTYPE_FR) + { + if (q->is_signed) + { + HI value = CACHE_RETURN_DATA (cache, slot, q->address, HI, 2); + SET_H_FR (q->regnum, value); + } + else + { + UHI value = CACHE_RETURN_DATA (cache, slot, q->address, UHI, 2); + SET_H_FR (q->regnum, value); + } + } + else + { + if (q->is_signed) + { + HI value = CACHE_RETURN_DATA (cache, slot, q->address, HI, 2); + SET_H_GR (q->regnum, value); + } + else + { + UHI value = CACHE_RETURN_DATA (cache, slot, q->address, UHI, 2); + SET_H_GR (q->regnum, value); + } + } + break; + case 4: + if (q->regtype == REGTYPE_FR) + { + SET_H_FR (q->regnum, + CACHE_RETURN_DATA (cache, slot, q->address, SF, 4)); + } + else + { + SET_H_GR (q->regnum, + CACHE_RETURN_DATA (cache, slot, q->address, SI, 4)); + } + break; + case 8: + if (q->regtype == REGTYPE_FR) + { + SET_H_FR_DOUBLE (q->regnum, + CACHE_RETURN_DATA (cache, slot, q->address, DF, 8)); + } + else + { + SET_H_GR_DOUBLE (q->regnum, + CACHE_RETURN_DATA (cache, slot, q->address, DI, 8)); + } + break; + case 16: + if (q->regtype == REGTYPE_FR) + frvbf_h_fr_quad_set_handler (current_cpu, q->regnum, + CACHE_RETURN_DATA_ADDRESS (cache, slot, + q->address, + 16)); + else + frvbf_h_gr_quad_set_handler (current_cpu, q->regnum, + CACHE_RETURN_DATA_ADDRESS (cache, slot, + q->address, + 16)); + break; + default: + abort (); + } +} + +static int +request_complete (SIM_CPU *cpu, CACHE_QUEUE_ELEMENT *q) +{ + FRV_CACHE* cache; + if (! q->active || q->cycles > 0) + return 0; + + cache = CPU_DATA_CACHE (cpu); + switch (q->request) + { + case cache_load: + /* For loads, we must wait until the data is returned from the cache. */ + if (frv_cache_data_in_buffer (cache, 0, q->address, q->reqno)) + { + copy_load_data (cpu, cache, 0, q); + return 1; + } + if (frv_cache_data_in_buffer (cache, 1, q->address, q->reqno)) + { + copy_load_data (cpu, cache, 1, q); + return 1; + } + break; + + case cache_flush: + /* We must wait until the data is flushed. */ + if (frv_cache_data_flushed (cache, 0, q->address, q->reqno)) + return 1; + if (frv_cache_data_flushed (cache, 1, q->address, q->reqno)) + return 1; + break; + + default: + /* All other requests are complete once they've been made. */ + return 1; + } + + return 0; +} + +/* Run the insn and data caches through the given number of cycles, taking + note of load requests which are fullfilled as a result. */ +static void +run_caches (SIM_CPU *cpu, int cycles) +{ + FRV_CACHE* data_cache = CPU_DATA_CACHE (cpu); + FRV_CACHE* insn_cache = CPU_INSN_CACHE (cpu); + int i; + /* For each cycle, run the caches, noting which requests have been fullfilled + and submitting new requests on their designated cycles. */ + for (i = 0; i < cycles; ++i) + { + int j; + /* Run the caches through 1 cycle. */ + frv_cache_run (data_cache, 1); + frv_cache_run (insn_cache, 1); + + /* Note whether prefetched insn data has been loaded yet. */ + for (j = LS; j < FRV_CACHE_PIPELINES; ++j) + { + if (frv_insn_fetch_buffer[j].reqno != NO_REQNO + && frv_cache_data_in_buffer (insn_cache, j, + frv_insn_fetch_buffer[j].address, + frv_insn_fetch_buffer[j].reqno)) + frv_insn_fetch_buffer[j].reqno = NO_REQNO; + } + + /* Check to see which requests have been satisfied and which should + be submitted now. */ + for (j = 0; j < cache_queue.ix; ++j) + { + CACHE_QUEUE_ELEMENT *q = & cache_queue.q[j]; + if (! q->active) + continue; + + /* If a load has been satisfied, complete the operation and remove it + from the queue. */ + if (request_complete (cpu, q)) + { + remove_cache_queue_element (cpu, j); + --j; + continue; + } + + /* Decrease the cycle count of each queued request. + Submit a request for each queued request whose cycle count has + become zero. */ + --q->cycles; + if (q->cycles == 0) + submit_cache_request (q); + } + } +} + +static void +apply_latency_adjustments (SIM_CPU *cpu) +{ + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + int i; + /* update the latencies of the registers. */ + int *fr = ps->fr_busy; + int *acc = ps->acc_busy; + for (i = 0; i < 64; ++i) + { + if (ps->fr_busy_adjust[i] > 0) + *fr -= ps->fr_busy_adjust[i]; /* OK if it goes negative. */ + if (ps->acc_busy_adjust[i] > 0) + *acc -= ps->acc_busy_adjust[i]; /* OK if it goes negative. */ + ++fr; + ++acc; + } +} + +/* Account for the number of cycles which have just passed in the latency of + various system elements. Works for negative cycles too so that latency + can be extended in the case of insn fetch latency. + If negative or zero, then no adjustment is necessary. */ +static void +update_latencies (SIM_CPU *cpu, int cycles) +{ + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + int i; + /* update the latencies of the registers. */ + int *fdiv; + int *fsqrt; + int *idiv; + int *ccr; + int *gr = ps->gr_busy; + int *fr = ps->fr_busy; + int *acc = ps->acc_busy; + /* This loop handles GR, FR and ACC registers. */ + for (i = 0; i < 64; ++i) + { + if (*gr <= cycles) + { + *gr = 0; + reset_gr_flags (cpu, i); + } + else + *gr -= cycles; + /* If the busy drops to 0, then mark the register as + "not in use". */ + if (*fr <= cycles) + { + int *fr_lat = ps->fr_latency + i; + *fr = 0; + ps->fr_busy_adjust[i] = 0; + /* Only clear flags if this register has no target latency. */ + if (*fr_lat == 0) + reset_fr_flags (cpu, i); + } + else + *fr -= cycles; + /* If the busy drops to 0, then mark the register as + "not in use". */ + if (*acc <= cycles) + { + int *acc_lat = ps->acc_latency + i; + *acc = 0; + ps->acc_busy_adjust[i] = 0; + /* Only clear flags if this register has no target latency. */ + if (*acc_lat == 0) + reset_acc_flags (cpu, i); + } + else + *acc -= cycles; + ++gr; + ++fr; + ++acc; + } + /* This loop handles CCR registers. */ + ccr = ps->ccr_busy; + for (i = 0; i < 8; ++i) + { + if (*ccr <= cycles) + { + *ccr = 0; + reset_cc_flags (cpu, i); + } + else + *ccr -= cycles; + ++ccr; + } + /* This loop handles resources. */ + idiv = ps->idiv_busy; + fdiv = ps->fdiv_busy; + fsqrt = ps->fsqrt_busy; + for (i = 0; i < 2; ++i) + { + *idiv = (*idiv <= cycles) ? 0 : (*idiv - cycles); + *fdiv = (*fdiv <= cycles) ? 0 : (*fdiv - cycles); + *fsqrt = (*fsqrt <= cycles) ? 0 : (*fsqrt - cycles); + ++idiv; + ++fdiv; + ++fsqrt; + } +} + +/* Print information about the wait for the given number of cycles. */ +void +frv_model_trace_wait_cycles (SIM_CPU *cpu, int cycles, const char *hazard_name) +{ + if (TRACE_INSN_P (cpu) && cycles > 0) + { + SIM_DESC sd = CPU_STATE (cpu); + trace_printf (sd, cpu, "**** %s wait %d cycles ***\n", + hazard_name, cycles); + } +} + +void +trace_vliw_wait_cycles (SIM_CPU *cpu) +{ + if (TRACE_INSN_P (cpu)) + { + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + frv_model_trace_wait_cycles (cpu, ps->vliw_wait, hazard_name); + } +} + +/* Wait for the given number of cycles. */ +void +frv_model_advance_cycles (SIM_CPU *cpu, int cycles) +{ + PROFILE_DATA *p = CPU_PROFILE_DATA (cpu); + update_latencies (cpu, cycles); + run_caches (cpu, cycles); + PROFILE_MODEL_TOTAL_CYCLES (p) += cycles; +} + +void +handle_resource_wait (SIM_CPU *cpu) +{ + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + if (ps->vliw_wait != 0) + frv_model_advance_cycles (cpu, ps->vliw_wait); + if (ps->vliw_load_stall > ps->vliw_wait) + ps->vliw_load_stall -= ps->vliw_wait; + else + ps->vliw_load_stall = 0; +} + +/* Account for the number of cycles until these resources will be available + again. */ +static void +update_target_latencies (SIM_CPU *cpu) +{ + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + int i; + /* update the latencies of the registers. */ + int *ccr_lat; + int *gr_lat = ps->gr_latency; + int *fr_lat = ps->fr_latency; + int *acc_lat = ps->acc_latency; + int *ccr; + int *gr = ps->gr_busy; + int *fr = ps->fr_busy; + int *acc = ps->acc_busy; + /* This loop handles GR, FR and ACC registers. */ + for (i = 0; i < 64; ++i) + { + if (*gr_lat) + { + *gr = *gr_lat; + *gr_lat = 0; + } + if (*fr_lat) + { + *fr = *fr_lat; + *fr_lat = 0; + } + if (*acc_lat) + { + *acc = *acc_lat; + *acc_lat = 0; + } + ++gr; ++gr_lat; + ++fr; ++fr_lat; + ++acc; ++acc_lat; + } + /* This loop handles CCR registers. */ + ccr = ps->ccr_busy; + ccr_lat = ps->ccr_latency; + for (i = 0; i < 8; ++i) + { + if (*ccr_lat) + { + *ccr = *ccr_lat; + *ccr_lat = 0; + } + ++ccr; ++ccr_lat; + } +} + +/* Run the caches until all pending cache flushes are complete. */ +static void +wait_for_flush (SIM_CPU *cpu) +{ + SI address = CPU_LOAD_ADDRESS (cpu); + int wait = 0; + while (flush_pending_for_address (cpu, address)) + { + frv_model_advance_cycles (cpu, 1); + ++wait; + } + if (TRACE_INSN_P (cpu) && wait) + { + sprintf (hazard_name, "Data cache flush address %p:", address); + frv_model_trace_wait_cycles (cpu, wait, hazard_name); + } +} + +/* Initialize cycle counting for an insn. + FIRST_P is non-zero if this is the first insn in a set of parallel + insns. */ +void +frvbf_model_insn_before (SIM_CPU *cpu, int first_p) +{ + SIM_DESC sd = CPU_STATE (cpu); + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + + ps->vliw_wait = 0; + ps->post_wait = 0; + memset (ps->fr_busy_adjust, 0, sizeof (ps->fr_busy_adjust)); + memset (ps->acc_busy_adjust, 0, sizeof (ps->acc_busy_adjust)); + + if (first_p) + { + ps->vliw_insns++; + ps->vliw_cycles = 0; + ps->vliw_branch_taken = 0; + ps->vliw_load_stall = 0; + } + + switch (STATE_ARCHITECTURE (sd)->mach) + { + case bfd_mach_fr400: + fr400_model_insn_before (cpu, first_p); + break; + case bfd_mach_fr500: + fr500_model_insn_before (cpu, first_p); + break; + default: + break; + } + + if (first_p) + wait_for_flush (cpu); +} + +/* Record the cycles computed for an insn. + LAST_P is non-zero if this is the last insn in a set of parallel insns, + and we update the total cycle count. + CYCLES is the cycle count of the insn. */ + +void +frvbf_model_insn_after (SIM_CPU *cpu, int last_p, int cycles) +{ + PROFILE_DATA *p = CPU_PROFILE_DATA (cpu); + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + SIM_DESC sd = CPU_STATE (cpu); + + PROFILE_MODEL_CUR_INSN_CYCLES (p) = cycles; + + /* The number of cycles for a VLIW insn is the maximum number of cycles + used by any individual insn within it. */ + if (cycles > ps->vliw_cycles) + ps->vliw_cycles = cycles; + + if (last_p) + { + /* This is the last insn in a VLIW insn. */ + struct frv_interrupt_timer *timer = & frv_interrupt_state.timer; + + activate_cache_requests (cpu); /* before advancing cycles. */ + apply_latency_adjustments (cpu); /* must go first. */ + update_target_latencies (cpu); /* must go next. */ + frv_model_advance_cycles (cpu, ps->vliw_cycles); + + PROFILE_MODEL_LOAD_STALL_CYCLES (p) += ps->vliw_load_stall; + + /* Check the interrupt timer. cycles contains the total cycle count. */ + if (timer->enabled) + { + cycles = PROFILE_MODEL_TOTAL_CYCLES (p); + if (timer->current % timer->value + + (cycles - timer->current) >= timer->value) + frv_queue_external_interrupt (cpu, timer->interrupt); + timer->current = cycles; + } + + ps->past_first_p = 0; /* Next one will be the first in a new VLIW. */ + ps->branch_address = -1; + } + else + ps->past_first_p = 1; + + switch (STATE_ARCHITECTURE (sd)->mach) + { + case bfd_mach_fr400: + fr400_model_insn_after (cpu, last_p, cycles); + break; + case bfd_mach_fr500: + fr500_model_insn_after (cpu, last_p, cycles); + break; + default: + break; + } +} + +USI +frvbf_model_branch (SIM_CPU *current_cpu, PCADDR target, int hint) +{ + /* Record the hint and branch address for use in profiling. */ + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (current_cpu); + ps->branch_hint = hint; + ps->branch_address = target; +} + +/* Top up the latency of the given GR by the given number of cycles. */ +void +update_GR_latency (SIM_CPU *cpu, INT out_GR, int cycles) +{ + if (out_GR >= 0) + { + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + int *gr = ps->gr_latency; + if (gr[out_GR] < cycles) + gr[out_GR] = cycles; + } +} + +void +decrease_GR_busy (SIM_CPU *cpu, INT in_GR, int cycles) +{ + if (in_GR >= 0) + { + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + int *gr = ps->gr_busy; + gr[in_GR] -= cycles; + } +} + +/* Top up the latency of the given double GR by the number of cycles. */ +void +update_GRdouble_latency (SIM_CPU *cpu, INT out_GR, int cycles) +{ + if (out_GR >= 0) + { + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + int *gr = ps->gr_latency; + if (gr[out_GR] < cycles) + gr[out_GR] = cycles; + if (out_GR < 63 && gr[out_GR + 1] < cycles) + gr[out_GR + 1] = cycles; + } +} + +void +update_GR_latency_for_load (SIM_CPU *cpu, INT out_GR, int cycles) +{ + if (out_GR >= 0) + { + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + int *gr = ps->gr_latency; + + /* The latency of the GR will be at least the number of cycles used + by the insn. */ + if (gr[out_GR] < cycles) + gr[out_GR] = cycles; + + /* The latency will also depend on how long it takes to retrieve the + data from the cache or memory. Assume that the load is issued + after the last cycle of the insn. */ + request_cache_load (cpu, out_GR, REGTYPE_NONE, cycles); + } +} + +void +update_GRdouble_latency_for_load (SIM_CPU *cpu, INT out_GR, int cycles) +{ + if (out_GR >= 0) + { + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + int *gr = ps->gr_latency; + + /* The latency of the GR will be at least the number of cycles used + by the insn. */ + if (gr[out_GR] < cycles) + gr[out_GR] = cycles; + if (out_GR < 63 && gr[out_GR + 1] < cycles) + gr[out_GR + 1] = cycles; + + /* The latency will also depend on how long it takes to retrieve the + data from the cache or memory. Assume that the load is issued + after the last cycle of the insn. */ + request_cache_load (cpu, out_GR, REGTYPE_NONE, cycles); + } +} + +void +update_GR_latency_for_swap (SIM_CPU *cpu, INT out_GR, int cycles) +{ + update_GR_latency_for_load (cpu, out_GR, cycles); +} + +/* Top up the latency of the given FR by the given number of cycles. */ +void +update_FR_latency (SIM_CPU *cpu, INT out_FR, int cycles) +{ + if (out_FR >= 0) + { + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + int *fr = ps->fr_latency; + if (fr[out_FR] < cycles) + fr[out_FR] = cycles; + } +} + +/* Top up the latency of the given double FR by the number of cycles. */ +void +update_FRdouble_latency (SIM_CPU *cpu, INT out_FR, int cycles) +{ + if (out_FR >= 0) + { + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + int *fr = ps->fr_latency; + if (fr[out_FR] < cycles) + fr[out_FR] = cycles; + if (out_FR < 63 && fr[out_FR + 1] < cycles) + fr[out_FR + 1] = cycles; + } +} + +void +update_FR_latency_for_load (SIM_CPU *cpu, INT out_FR, int cycles) +{ + if (out_FR >= 0) + { + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + int *fr = ps->fr_latency; + + /* The latency of the FR will be at least the number of cycles used + by the insn. */ + if (fr[out_FR] < cycles) + fr[out_FR] = cycles; + + /* The latency will also depend on how long it takes to retrieve the + data from the cache or memory. Assume that the load is issued + after the last cycle of the insn. */ + request_cache_load (cpu, out_FR, REGTYPE_FR, cycles); + } +} + +void +update_FRdouble_latency_for_load (SIM_CPU *cpu, INT out_FR, int cycles) +{ + if (out_FR >= 0) + { + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + int *fr = ps->fr_latency; + + /* The latency of the FR will be at least the number of cycles used + by the insn. */ + if (fr[out_FR] < cycles) + fr[out_FR] = cycles; + if (out_FR < 63 && fr[out_FR + 1] < cycles) + fr[out_FR + 1] = cycles; + + /* The latency will also depend on how long it takes to retrieve the + data from the cache or memory. Assume that the load is issued + after the last cycle of the insn. */ + request_cache_load (cpu, out_FR, REGTYPE_FR, cycles); + } +} + +/* Top up the post-processing time of the given FR by the given number of + cycles. */ +void +update_ACC_ptime (SIM_CPU *cpu, INT out_ACC, int cycles) +{ + if (out_ACC >= 0) + { + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + /* No load can be pending on this register. Apply the cycles + directly to the latency of the register. */ + int *acc = ps->acc_latency; + acc[out_ACC] += cycles; + } +} + +void +decrease_ACC_busy (SIM_CPU *cpu, INT out_ACC, int cycles) +{ + if (out_ACC >= 0) + { + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + int *acc = ps->acc_busy; + acc[out_ACC] -= cycles; + if (ps->acc_busy_adjust[out_ACC] >= 0 + && cycles > ps->acc_busy_adjust[out_ACC]) + ps->acc_busy_adjust[out_ACC] = cycles; + } +} + +void +decrease_FR_busy (SIM_CPU *cpu, INT out_FR, int cycles) +{ + if (out_FR >= 0) + { + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + int *fr = ps->fr_busy; + fr[out_FR] -= cycles; + if (ps->fr_busy_adjust[out_FR] >= 0 + && cycles > ps->fr_busy_adjust[out_FR]) + ps->fr_busy_adjust[out_FR] = cycles; + } +} + +void +increase_FR_busy (SIM_CPU *cpu, INT out_FR, int cycles) +{ + if (out_FR >= 0) + { + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + int *fr = ps->fr_busy; + fr[out_FR] += cycles; + } +} + +/* Top up the latency of the given ACC by the given number of cycles. */ +void +update_ACC_latency (SIM_CPU *cpu, INT out_ACC, int cycles) +{ + if (out_ACC >= 0) + { + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + int *acc = ps->acc_latency; + if (acc[out_ACC] < cycles) + acc[out_ACC] = cycles; + } +} + +/* Top up the latency of the given CCR by the given number of cycles. */ +void +update_CCR_latency (SIM_CPU *cpu, INT out_CCR, int cycles) +{ + if (out_CCR >= 0) + { + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + int *ccr = ps->ccr_latency; + if (ccr[out_CCR] < cycles) + ccr[out_CCR] = cycles; + } +} + +/* Top up the latency of the given integer division resource by the given + number of cycles. */ +void +update_idiv_resource_latency (SIM_CPU *cpu, INT in_resource, int cycles) +{ + /* operate directly on the busy cycles since each resource can only + be used once in a VLIW insn. */ + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + int *r = ps->idiv_busy; + r[in_resource] = cycles; +} + +/* Set the latency of the given resource to the given number of cycles. */ +void +update_fdiv_resource_latency (SIM_CPU *cpu, INT in_resource, int cycles) +{ + /* operate directly on the busy cycles since each resource can only + be used once in a VLIW insn. */ + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + int *r = ps->fdiv_busy; + r[in_resource] = cycles; +} + +/* Set the latency of the given resource to the given number of cycles. */ +void +update_fsqrt_resource_latency (SIM_CPU *cpu, INT in_resource, int cycles) +{ + /* operate directly on the busy cycles since each resource can only + be used once in a VLIW insn. */ + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + int *r = ps->fsqrt_busy; + r[in_resource] = cycles; +} + +/* Set the branch penalty to the given number of cycles. */ +void +update_branch_penalty (SIM_CPU *cpu, int cycles) +{ + /* operate directly on the busy cycles since only one branch can occur + in a VLIW insn. */ + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + ps->branch_penalty = cycles; +} + +/* Check the availability of the given GR register and update the number + of cycles the current VLIW insn must wait until it is available. */ +void +vliw_wait_for_GR (SIM_CPU *cpu, INT in_GR) +{ + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + int *gr = ps->gr_busy; + /* If the latency of the register is greater than the current wait + then update the current wait. */ + if (in_GR >= 0 && gr[in_GR] > ps->vliw_wait) + { + if (TRACE_INSN_P (cpu)) + sprintf (hazard_name, "Data hazard for gr%d:", in_GR); + ps->vliw_wait = gr[in_GR]; + } +} + +/* Check the availability of the given GR register and update the number + of cycles the current VLIW insn must wait until it is available. */ +void +vliw_wait_for_GRdouble (SIM_CPU *cpu, INT in_GR) +{ + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + int *gr = ps->gr_busy; + /* If the latency of the register is greater than the current wait + then update the current wait. */ + if (in_GR >= 0) + { + if (gr[in_GR] > ps->vliw_wait) + { + if (TRACE_INSN_P (cpu)) + sprintf (hazard_name, "Data hazard for gr%d:", in_GR); + ps->vliw_wait = gr[in_GR]; + } + if (in_GR < 63 && gr[in_GR + 1] > ps->vliw_wait) + { + if (TRACE_INSN_P (cpu)) + sprintf (hazard_name, "Data hazard for gr%d:", in_GR + 1); + ps->vliw_wait = gr[in_GR + 1]; + } + } +} + +/* Check the availability of the given FR register and update the number + of cycles the current VLIW insn must wait until it is available. */ +void +vliw_wait_for_FR (SIM_CPU *cpu, INT in_FR) +{ + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + int *fr = ps->fr_busy; + /* If the latency of the register is greater than the current wait + then update the current wait. */ + if (in_FR >= 0 && fr[in_FR] > ps->vliw_wait) + { + if (TRACE_INSN_P (cpu)) + sprintf (hazard_name, "Data hazard for fr%d:", in_FR); + ps->vliw_wait = fr[in_FR]; + } +} + +/* Check the availability of the given GR register and update the number + of cycles the current VLIW insn must wait until it is available. */ +void +vliw_wait_for_FRdouble (SIM_CPU *cpu, INT in_FR) +{ + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + int *fr = ps->fr_busy; + /* If the latency of the register is greater than the current wait + then update the current wait. */ + if (in_FR >= 0) + { + if (fr[in_FR] > ps->vliw_wait) + { + if (TRACE_INSN_P (cpu)) + sprintf (hazard_name, "Data hazard for fr%d:", in_FR); + ps->vliw_wait = fr[in_FR]; + } + if (in_FR < 63 && fr[in_FR + 1] > ps->vliw_wait) + { + if (TRACE_INSN_P (cpu)) + sprintf (hazard_name, "Data hazard for fr%d:", in_FR + 1); + ps->vliw_wait = fr[in_FR + 1]; + } + } +} + +/* Check the availability of the given CCR register and update the number + of cycles the current VLIW insn must wait until it is available. */ +void +vliw_wait_for_CCR (SIM_CPU *cpu, INT in_CCR) +{ + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + int *ccr = ps->ccr_busy; + /* If the latency of the register is greater than the current wait + then update the current wait. */ + if (in_CCR >= 0 && ccr[in_CCR] > ps->vliw_wait) + { + if (TRACE_INSN_P (cpu)) + { + if (in_CCR > 3) + sprintf (hazard_name, "Data hazard for icc%d:", in_CCR-4); + else + sprintf (hazard_name, "Data hazard for fcc%d:", in_CCR); + } + ps->vliw_wait = ccr[in_CCR]; + } +} + +/* Check the availability of the given ACC register and update the number + of cycles the current VLIW insn must wait until it is available. */ +void +vliw_wait_for_ACC (SIM_CPU *cpu, INT in_ACC) +{ + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + int *acc = ps->acc_busy; + /* If the latency of the register is greater than the current wait + then update the current wait. */ + if (in_ACC >= 0 && acc[in_ACC] > ps->vliw_wait) + { + if (TRACE_INSN_P (cpu)) + sprintf (hazard_name, "Data hazard for acc%d:", in_ACC); + ps->vliw_wait = acc[in_ACC]; + } +} + +/* Check the availability of the given integer division resource and update + the number of cycles the current VLIW insn must wait until it is available. +*/ +void +vliw_wait_for_idiv_resource (SIM_CPU *cpu, INT in_resource) +{ + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + int *r = ps->idiv_busy; + /* If the latency of the resource is greater than the current wait + then update the current wait. */ + if (r[in_resource] > ps->vliw_wait) + { + if (TRACE_INSN_P (cpu)) + { + sprintf (hazard_name, "Resource hazard for integer division in slot I%d:", in_resource); + } + ps->vliw_wait = r[in_resource]; + } +} + +/* Check the availability of the given float division resource and update + the number of cycles the current VLIW insn must wait until it is available. +*/ +void +vliw_wait_for_fdiv_resource (SIM_CPU *cpu, INT in_resource) +{ + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + int *r = ps->fdiv_busy; + /* If the latency of the resource is greater than the current wait + then update the current wait. */ + if (r[in_resource] > ps->vliw_wait) + { + if (TRACE_INSN_P (cpu)) + { + sprintf (hazard_name, "Resource hazard for integer division in slot I%d:", in_resource); + } + ps->vliw_wait = r[in_resource]; + } +} + +/* Check the availability of the given float square root resource and update + the number of cycles the current VLIW insn must wait until it is available. +*/ +void +vliw_wait_for_fsqrt_resource (SIM_CPU *cpu, INT in_resource) +{ + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + int *r = ps->fsqrt_busy; + /* If the latency of the resource is greater than the current wait + then update the current wait. */ + if (r[in_resource] > ps->vliw_wait) + { + if (TRACE_INSN_P (cpu)) + { + sprintf (hazard_name, "Resource hazard for integer division in slot I%d:", in_resource); + } + ps->vliw_wait = r[in_resource]; + } +} + +/* Run the caches until all requests for the given register(s) are satisfied. */ +void +load_wait_for_GR (SIM_CPU *cpu, INT in_GR) +{ + if (in_GR >= 0) + { + int wait = 0; + while (load_pending_for_register (cpu, in_GR, 1/*words*/, REGTYPE_NONE)) + { + frv_model_advance_cycles (cpu, 1); + ++wait; + } + if (wait) + { + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + ps->vliw_wait += wait; + ps->vliw_load_stall += wait; + if (TRACE_INSN_P (cpu)) + sprintf (hazard_name, "Data hazard for gr%d:", in_GR); + } + } +} + +void +load_wait_for_FR (SIM_CPU *cpu, INT in_FR) +{ + if (in_FR >= 0) + { + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + int *fr; + int wait = 0; + while (load_pending_for_register (cpu, in_FR, 1/*words*/, REGTYPE_FR)) + { + frv_model_advance_cycles (cpu, 1); + ++wait; + } + /* Post processing time may have been added to the register's + latency after the loads were processed. Account for that too. + */ + fr = ps->fr_busy; + if (fr[in_FR]) + { + wait += fr[in_FR]; + frv_model_advance_cycles (cpu, fr[in_FR]); + } + /* Update the vliw_wait with the number of cycles we waited for the + load and any post-processing. */ + if (wait) + { + ps->vliw_wait += wait; + ps->vliw_load_stall += wait; + if (TRACE_INSN_P (cpu)) + sprintf (hazard_name, "Data hazard for fr%d:", in_FR); + } + } +} + +void +load_wait_for_GRdouble (SIM_CPU *cpu, INT in_GR) +{ + if (in_GR >= 0) + { + int wait = 0; + while (load_pending_for_register (cpu, in_GR, 2/*words*/, REGTYPE_NONE)) + { + frv_model_advance_cycles (cpu, 1); + ++wait; + } + if (wait) + { + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + ps->vliw_wait += wait; + ps->vliw_load_stall += wait; + if (TRACE_INSN_P (cpu)) + sprintf (hazard_name, "Data hazard for gr%d:", in_GR); + } + } +} + +void +load_wait_for_FRdouble (SIM_CPU *cpu, INT in_FR) +{ + if (in_FR >= 0) + { + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + int *fr; + int wait = 0; + while (load_pending_for_register (cpu, in_FR, 2/*words*/, REGTYPE_FR)) + { + frv_model_advance_cycles (cpu, 1); + ++wait; + } + /* Post processing time may have been added to the registers' + latencies after the loads were processed. Account for that too. + */ + fr = ps->fr_busy; + if (fr[in_FR]) + { + wait += fr[in_FR]; + frv_model_advance_cycles (cpu, fr[in_FR]); + } + if (in_FR < 63) + { + if (fr[in_FR + 1]) + { + wait += fr[in_FR + 1]; + frv_model_advance_cycles (cpu, fr[in_FR + 1]); + } + } + /* Update the vliw_wait with the number of cycles we waited for the + load and any post-processing. */ + if (wait) + { + ps->vliw_wait += wait; + ps->vliw_load_stall += wait; + if (TRACE_INSN_P (cpu)) + sprintf (hazard_name, "Data hazard for fr%d:", in_FR); + } + } +} + +void +enforce_full_fr_latency (SIM_CPU *cpu, INT in_FR) +{ + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + ps->fr_busy_adjust [in_FR] = -1; +} + +/* Calculate how long the post processing for a floating point insn must + wait for resources to become available. */ +int +post_wait_for_FR (SIM_CPU *cpu, INT in_FR) +{ + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + int *fr = ps->fr_busy; + + if (in_FR >= 0 && fr[in_FR] > ps->post_wait) + { + ps->post_wait = fr[in_FR]; + if (TRACE_INSN_P (cpu)) + sprintf (hazard_name, "Data hazard for fr%d:", in_FR); + } +} + +/* Calculate how long the post processing for a floating point insn must + wait for resources to become available. */ +int +post_wait_for_FRdouble (SIM_CPU *cpu, INT in_FR) +{ + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + int *fr = ps->fr_busy; + + if (in_FR >= 0) + { + if (fr[in_FR] > ps->post_wait) + { + ps->post_wait = fr[in_FR]; + if (TRACE_INSN_P (cpu)) + sprintf (hazard_name, "Data hazard for fr%d:", in_FR); + } + if (in_FR < 63 && fr[in_FR + 1] > ps->post_wait) + { + ps->post_wait = fr[in_FR + 1]; + if (TRACE_INSN_P (cpu)) + sprintf (hazard_name, "Data hazard for fr%d:", in_FR + 1); + } + } +} + +int +post_wait_for_ACC (SIM_CPU *cpu, INT in_ACC) +{ + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + int *acc = ps->acc_busy; + + if (in_ACC >= 0 && acc[in_ACC] > ps->post_wait) + { + ps->post_wait = acc[in_ACC]; + if (TRACE_INSN_P (cpu)) + sprintf (hazard_name, "Data hazard for acc%d:", in_ACC); + } +} + +int +post_wait_for_CCR (SIM_CPU *cpu, INT in_CCR) +{ + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + int *ccr = ps->ccr_busy; + + if (in_CCR >= 0 && ccr[in_CCR] > ps->post_wait) + { + ps->post_wait = ccr[in_CCR]; + if (TRACE_INSN_P (cpu)) + { + if (in_CCR > 3) + sprintf (hazard_name, "Data hazard for icc%d:", in_CCR - 4); + else + sprintf (hazard_name, "Data hazard for fcc%d:", in_CCR); + } + } +} + +int +post_wait_for_fdiv (SIM_CPU *cpu, INT slot) +{ + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + int *fdiv = ps->fdiv_busy; + + /* Multiple floating point divisions in the same slot need only wait 1 + extra cycle. */ + if (fdiv[slot] > 0 && 1 > ps->post_wait) + { + ps->post_wait = 1; + if (TRACE_INSN_P (cpu)) + { + sprintf (hazard_name, "Resource hazard for floating point division in slot F%d:", slot); + } + } +} + +int +post_wait_for_fsqrt (SIM_CPU *cpu, INT slot) +{ + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + int *fsqrt = ps->fsqrt_busy; + + /* Multiple floating point square roots in the same slot need only wait 1 + extra cycle. */ + if (fsqrt[slot] > 0 && 1 > ps->post_wait) + { + ps->post_wait = 1; + if (TRACE_INSN_P (cpu)) + { + sprintf (hazard_name, "Resource hazard for square root in slot F%d:", slot); + } + } +} + +/* Print cpu-specific profile information. */ +#define COMMAS(n) sim_add_commas (comma_buf, sizeof (comma_buf), (n)) + +static void +print_cache (SIM_CPU *cpu, FRV_CACHE *cache, const char *cache_name) +{ + SIM_DESC sd = CPU_STATE (cpu); + + if (cache != NULL) + { + char comma_buf[20]; + unsigned accesses; + + sim_io_printf (sd, " %s Cache\n\n", cache_name); + accesses = cache->statistics.accesses; + sim_io_printf (sd, " Total accesses: %s\n", COMMAS (accesses)); + if (accesses != 0) + { + float rate; + unsigned hits = cache->statistics.hits; + sim_io_printf (sd, " Hits: %s\n", COMMAS (hits)); + rate = (float)hits / accesses; + sim_io_printf (sd, " Hit rate: %.2f%%\n", rate * 100); + } + } + else + sim_io_printf (sd, " Model %s has no %s cache\n", + MODEL_NAME (CPU_MODEL (cpu)), cache_name); + + sim_io_printf (sd, "\n"); +} + +static char * +slot_names[] = +{ + "none", + "I0", "I1", "I01", + "FM1", "FM1", "FM01", + "B0", "B1", "B01", + "C" +}; + +static void +print_parallel (SIM_CPU *cpu, int verbose) +{ + SIM_DESC sd = CPU_STATE (cpu); + PROFILE_DATA *p = CPU_PROFILE_DATA (cpu); + FRV_PROFILE_STATE *ps = CPU_PROFILE_STATE (cpu); + unsigned total, vliw; + char comma_buf[20]; + float average; + + sim_io_printf (sd, "Model %s Parallelization\n\n", + MODEL_NAME (CPU_MODEL (cpu))); + + total = PROFILE_TOTAL_INSN_COUNT (p); + sim_io_printf (sd, " Total instructions: %s\n", COMMAS (total)); + vliw = ps->vliw_insns; + sim_io_printf (sd, " VLIW instructions: %s\n", COMMAS (vliw)); + average = (float)total / vliw; + sim_io_printf (sd, " Average VLIW length: %.2f\n", average); + average = (float)PROFILE_MODEL_TOTAL_CYCLES (p) / vliw; + sim_io_printf (sd, " Cycles per VLIW instruction: %.2f\n", average); + average = (float)total / PROFILE_MODEL_TOTAL_CYCLES (p); + sim_io_printf (sd, " Instructions per cycle: %.2f\n", average); + + if (verbose) + { + int i; + int max_val = 0; + int max_name_len = 0; + for (i = UNIT_NIL + 1; i < UNIT_NUM_UNITS; ++i) + { + int len; + if (INSNS_IN_SLOT (i) > max_val) + max_val = INSNS_IN_SLOT (i); + len = strlen (slot_names[i]); + if (len > max_name_len) + max_name_len = len; + } + if (max_val > 0) + { + sim_io_printf (sd, "\n"); + sim_io_printf (sd, " Instructions per slot:\n"); + sim_io_printf (sd, "\n"); + for (i = UNIT_NIL + 1; i < UNIT_NUM_UNITS; ++i) + { + if (INSNS_IN_SLOT (i) != 0) + { + sim_io_printf (sd, " %*s: %*s: ", + max_name_len, slot_names[i], + max_val < 10000 ? 5 : 10, + COMMAS (INSNS_IN_SLOT (i))); + sim_profile_print_bar (sd, PROFILE_HISTOGRAM_WIDTH, + INSNS_IN_SLOT (i), + max_val); + sim_io_printf (sd, "\n"); + } + } + } /* details to print */ + } /* verbose */ + + sim_io_printf (sd, "\n"); +} + +void +frv_profile_info (SIM_CPU *cpu, int verbose) +{ + /* FIXME: Need to add smp support. */ + PROFILE_DATA *p = CPU_PROFILE_DATA (cpu); + +#if WITH_PROFILE_PARALLEL_P + if (PROFILE_FLAGS (p) [PROFILE_PARALLEL_IDX]) + print_parallel (cpu, verbose); +#endif + +#if WITH_PROFILE_CACHE_P + if (PROFILE_FLAGS (p) [PROFILE_CACHE_IDX]) + { + SIM_DESC sd = CPU_STATE (cpu); + sim_io_printf (sd, "Model %s Cache Statistics\n\n", + MODEL_NAME (CPU_MODEL (cpu))); + print_cache (cpu, CPU_INSN_CACHE (cpu), "Instruction"); + print_cache (cpu, CPU_DATA_CACHE (cpu), "Data"); + } +#endif /* WITH_PROFILE_CACHE_P */ +} + +/* A hack to get registers referenced for profiling. */ +SI frv_ref_SI (SI ref) {return ref;} +#endif /* WITH_PROFILE_MODEL_P */ |