From 8e20a3ac820c9517fc798703a38b3dc3072bfab2 Mon Sep 17 00:00:00 2001 From: Michael Meissner Date: Thu, 12 Oct 1995 15:48:22 +0000 Subject: Inline most things except semantics which causes GCC to balloon, and device{s,_tree} which causes a bug --- sim/ppc/ChangeLog | 103 +++++++ sim/ppc/Makefile.in | 50 ++-- sim/ppc/devices.c | 6 +- sim/ppc/devices.h | 6 +- sim/ppc/std-config.h | 772 +++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 909 insertions(+), 28 deletions(-) create mode 100644 sim/ppc/std-config.h (limited to 'sim/ppc') diff --git a/sim/ppc/ChangeLog b/sim/ppc/ChangeLog index f4afa11..5e657ed 100644 --- a/sim/ppc/ChangeLog +++ b/sim/ppc/ChangeLog @@ -1,3 +1,106 @@ +Thu Oct 12 11:35:53 1995 Michael Meissner + + * Makefile.in (INLINE_CFLAGS): Add -DDEFAULT_INLINE=2 to add + default inline support. Pass INLINE_CFLAGS when compiling. + + * devices.{h,c} (unimp_device_ioctl): Use STATIC_DEVICES, not + INLINE_DEVICES since GCC doesn't like inline functions that + accept variable arguments. + (stack_ioctl_callback): Make function just static because GCC + doesn't like inline functions that accept variable arguments. + + * devices.h (STATIC_DEVICES): Define as empty if not defined. + + * inline.c: Correct pathnames of included C files to match current + implementation. + + * inline.h (STATIC_DEVICES): If DEVICES_INLINE is defined to be + non-zero, define STATIC_DEVICES to be static. + + * std-config.h (INLINE): If GNU C and optimizing, define this as + __inline__. + (DEFAULT_INLINE): If not defined, define as 0. + (ENDIAN_INLINE): If not defined, define as DEFAULT_INLINE. + ({CORE,VM,CPU,EVENTS,REGISTERS,INTERRUPTS}_INLINE): Ditto. + ({SPREG,IDECODE}_INLINE): Ditto. + +Wed Oct 11 17:13:15 1995 Andrew Cagney + + * ppc-instructions: Initial cut of floating point suport added. + Of note include - use of host IEEE floating point instructions, + use of PowerPC manual pseudo code to handle the FPSCR. It is not + currently a pretty sight. + + * memory_map.h, memory_map.c, memory_map_n.h, core.h, core.c: + merge into core.h, core.c, core_n.h. The type memory_map replaced + with core_map. This removes a level of pointer indirection when + translating an address. + + * memory_map.h, memory_map.c, memory_map_n.h: delete. + + * Makefile.in et.al (sorry): tweek to use new core, core_map and + core.h. + +Wed Oct 11 12:10:26 1995 Andrew Cagney + + * sim_calls.c, main.c: Add -g (trace_gdb) option, add tracing to + most of the other functions in sim_calls.c. + + * basics.h (CONCAT3), memory_map.c, memory_map_n.h, Makefile.in: + Add macros to better cover up `generic' code. Makes it possible + to step through the generic code! + + * vm.c, vm_n.h, Makefile.in: ditto + +Tue Oct 10 15:42:59 1995 Andrew Cagney + + * devices.h, devices.c, memory_map.h, memory_map.c: Changed + callback interface so that there is a read/write buffer but no + read/write_word. VEA default memory read/write handler sometimes + couldn't resolve an access and of those some were for a memory + fault and some were because gdb was making a bogus request. + + * devices.h, devices.c, memory_map.h, memory_map.c, vm.h, vm.c: + eliminate transfer_mode (raw or cooked) parameter from read/write + buffer. + +Fri Oct 6 20:23:56 1995 Andrew Cagney + + * ppc-instructions (fmul, fmuls): correct instruction format - had + FRB instead of FRC. + +Wed Oct 4 17:31:12 1995 Andrew Cagney + + * psim.c, device_tree.h, device_tree.c, devices.c (printd_*, + scand_*): new functions to parse/print fields in device names + while hiding any machine dependency. + + * devices.c, psim.c: Change the stack init code so that it is + handled by a device. Arguments passed across using a device ioctl + (hack). + + * devices.h, devices.c: device ioctl callback changed to allow a + variable number of arguments. This gives greater flexability and + greater chance of bugs. + +Tue Oct 3 22:01:56 1995 Andrew Cagney - aka Noid + + * main.c (printf_filtered, error): Missing va_end() to close off + variable argument use. + + * Makefile.in (tmp-gencode): comment out hack to get around some + versions of make not handling files being created as side-effects. + + * gen.c (lf_open): Add -n (real_file_name) option. Specifies an + alternative file name to use in output files for things like #line + macros. + + Makefile.in (tmp-gencode): Use gen -n so that debug info is + correct. + + * Makefile.in (TARGETLIB): Use this instead of libsim.a in the + Makefile. + Sat Oct 7 22:40:59 1995 Michael Meissner * sim_calls.c (sim_set_callbacks): Define new function. diff --git a/sim/ppc/Makefile.in b/sim/ppc/Makefile.in index 4999db1..c9b02e5 100644 --- a/sim/ppc/Makefile.in +++ b/sim/ppc/Makefile.in @@ -18,6 +18,8 @@ # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # +default: all + VPATH = @srcdir@ srcdir = @srcdir@ srcroot = $(srcdir)/../.. @@ -77,15 +79,18 @@ INCLUDES = -I. -I$(srcdir) $(LIB_INCLUDES) $(BFD_INCLUDES) $(GDB_INCLUDES) CONFIG_FILE = std-config.h +# See inline.h for appropriate flags to set +INLINE_CFLAGS = -DDEFAULT_INLINE=2 + LIBIBERTY_LIB = ../../libiberty/libiberty.a BFD_LIB = ../../bfd/libbfd.a TARGETLIB = libsim.a -all: run libsim.a $(GDB_OBJ) +all: run $(TARGETLIB) $(GDB_OBJ) .c.o: - $(CC) -c $(CFLAGS) $(HDEFINES) $(TDEFINES) $(INCLUDES) $< + $(CC) -c $(CFLAGS) $(INLINE_CFLAGS) $(HDEFINES) $(TDEFINES) $(INCLUDES) $< @@ -104,7 +109,6 @@ PSIM_H = \ IDECODE_H = \ idecode.h \ - idecode_insn.h \ idecode_expression.h \ idecode_branch.h \ idecode_fields.h \ @@ -119,7 +123,6 @@ CPU_H = \ $(BASICS_H) \ $(REGISTERS_H) \ device_tree.h \ - memory_map.h \ core.h \ vm.h \ events.h \ @@ -145,7 +148,6 @@ LIB_SRC = \ bits.c \ ppc-endian.c \ debug.c \ - memory_map.c \ vm.c \ core.c \ events.c \ @@ -167,7 +169,6 @@ LIB_OBJ = \ ppc-endian.o \ system.o \ registers.o \ - memory_map.o \ vm.o \ core.o \ spreg.o \ @@ -184,14 +185,14 @@ LIB_OBJ = \ GDB_OBJ = sim_calls.o -psim: libsim.a main.o $(LIBIBERTY_LIB) $(BFD_LIB) $(LIBS) - $(CC) $(CFLAGS) $(LDFLAGS) -o psim main.o libsim.a $(BFD_LIB) $(LIBIBERTY_LIB) $(LIBS) +psim: $(TARGETLIB) main.o $(LIBIBERTY_LIB) $(BFD_LIB) $(LIBS) + $(CC) $(CFLAGS) $(LDFLAGS) -o psim main.o $(TARGETLIB) $(BFD_LIB) $(LIBIBERTY_LIB) $(LIBS) run: psim rm -f run ln psim run -libsim.a: $(BUILT_SRC) $(LIB_OBJ) $(GDB_OBJ) +$(TARGETLIB): tmp-gencode $(LIB_OBJ) $(GDB_OBJ) rm -f $(TARGETLIB) $(AR) $(AR_FLAGS) $(TARGETLIB) $(LIB_OBJ) $(GDB_OBJ) $(RANLIB) $(TARGETLIB) @@ -215,15 +216,12 @@ interrupts.o: interrupts.c $(CPU_H) $(IDECODE_H) system.h idecode.o: idecode.c $(CPU_H) $(IDECODE_H) semantics.h -memory_map.o: memory_map.c memory_map.h $(BASICS_H) device_tree.h interrupts.h - # double.o: double.c dp-bit.c -vm.o: vm.c vm.h $(BASICS_H) $(REGISTERS_H) \ - device_tree.h memory_map.h core.h interrupts.h +vm.o: vm.c vm.h vm_n.h $(BASICS_H) $(REGISTERS_H) \ + device_tree.h core.h interrupts.h -core.o: core.c core.h $(BASICS_H) \ - device_tree.h memory_map.h +core.o: core.c core.h $(BASICS_H) device_tree.h events.o: events.c events.h $(BASICS_H) @@ -251,14 +249,14 @@ ppc-config.h: $(CONFIG_FILE) tmp-gencode: gen ppc-instructions ppc-spr-table $(srcdir)/../../move-if-change ./gen -r $(srcdir)/ppc-spr-table \ - -P tmp-spreg.h \ - -p tmp-spreg.c \ -i $(srcdir)/ppc-instructions \ - -C tmp-icache.h \ - -S tmp-semantics.h \ - -s tmp-semantics.c \ - -D tmp-idecode.h \ - -d tmp-idecode.c + -n spreg.h -P tmp-spreg.h \ + -n spreg.c -p tmp-spreg.c \ + -n icache.h -C tmp-icache.h \ + -n semantics.h -S tmp-semantics.h \ + -n semantics.c -s tmp-semantics.c \ + -n idecode.h -D tmp-idecode.h \ + -n idecode.c -d tmp-idecode.c $(srcdir)/../../move-if-change tmp-icache.h icache.h $(srcdir)/../../move-if-change tmp-idecode.h idecode.h $(srcdir)/../../move-if-change tmp-idecode.c idecode.c @@ -268,7 +266,11 @@ tmp-gencode: gen ppc-instructions ppc-spr-table $(srcdir)/../../move-if-change $(srcdir)/../../move-if-change tmp-spreg.c spreg.c touch tmp-gencode -icache.h idecode.h idecode.c semantics.h semantics.c spreg.h spreg.c: tmp-gencode +# NOTE: Some versions of make don't handle files created as side-effects +# uncomment the below if that is the case. +# +# $(TARGETLIB): tmp-gencode +# icache.h idecode.h idecode.c semantics.h semantics.c spreg.h spreg.c: tmp-gencode gen.o: gen.c config.h ppc-config.h $(CC_FOR_BUILD) -c $(CFLAGS) $(HDEFINES) $(TDEFINES) $(INCLUDES) $(srcdir)/gen.c @@ -286,7 +288,7 @@ TAGS: tmp-gencode config.h ppc-config.h clean mostlyclean: rm -f tmp-* *.[oas] core psim run gen config.log -distclean realclean: clean +distclean maintainer-clean realclean: clean rm -f TAGS $(BUILT_SRC) Makefile config.cache config.status config.h stamp-h Makefile: Makefile.in config.status diff --git a/sim/ppc/devices.c b/sim/ppc/devices.c index 983b693..6b4d940 100644 --- a/sim/ppc/devices.c +++ b/sim/ppc/devices.c @@ -189,7 +189,7 @@ unimp_device_interrupt_ack(const device *me, error("device_interrupt_ack_callback for %s not implemented\n", me->name); } -INLINE_DEVICES void +STATIC_DEVICES void unimp_device_ioctl(const device *me, psim *system, cpu *processor, @@ -834,7 +834,7 @@ vm_io_write_buffer_callback(const device *me, } -STATIC_INLINE_DEVICES void +static void vm_ioctl_callback(const device *me, psim *system, cpu *processor, @@ -1485,7 +1485,7 @@ create_aix_stack_frame(psim *system, -STATIC_INLINE_DEVICES void +static void stack_ioctl_callback(const device *me, psim *system, cpu *processor, diff --git a/sim/ppc/devices.h b/sim/ppc/devices.h index 681992e..df02288 100644 --- a/sim/ppc/devices.h +++ b/sim/ppc/devices.h @@ -26,6 +26,10 @@ #define INLINE_DEVICES #endif +#ifndef STATIC_DEVICES +#define STATIC_DEVICES +#endif + /* forward declaration of types */ /* typedef struct _device device; -- in devices.h */ @@ -313,7 +317,7 @@ INLINE_DEVICES device_config_interrupt_callback unimp_device_attach_interrupt; INLINE_DEVICES device_config_interrupt_callback unimp_device_detach_interrupt; INLINE_DEVICES device_interrupt_callback unimp_device_interrupt; INLINE_DEVICES device_interrupt_ack_callback unimp_device_interrupt_ack; -INLINE_DEVICES device_ioctl_callback unimp_device_ioctl; +STATIC_DEVICES device_ioctl_callback unimp_device_ioctl; /* Pass through and ignore callback functions. A call going towards the root device are passed on up, local calls are ignored and call diff --git a/sim/ppc/std-config.h b/sim/ppc/std-config.h new file mode 100644 index 0000000..07018bf --- /dev/null +++ b/sim/ppc/std-config.h @@ -0,0 +1,772 @@ +/* This file is part of the program psim. + + Copyright (C) 1994-1995, Andrew Cagney + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + */ + + +#ifndef _CONFIG_H_ +#define _CONFIG_H_ + + +/* endianness of the host/target: + + If the build process is aware (at compile time) of the endianness + of the host/target it is able to eliminate slower generic endian + handling code. + + If ENDIAN_OK is true then no byte swapping is required. If it is + false, copy-in / copy-out functions assume that data should be byte + reversed as part of the copy. */ + +#define WITH_HOST_BYTE_ORDER 0 /*unknown*/ +#define WITH_TARGET_BYTE_ORDER 0 /*unknown*/ + +extern int current_host_byte_order; +extern int current_target_byte_order; +#define CURRENT_HOST_BYTE_ORDER (WITH_HOST_BYTE_ORDER \ + ? WITH_HOST_BYTE_ORDER \ + : current_host_byte_order) +#define CURRENT_TARGET_BYTE_ORDER (WITH_TARGET_BYTE_ORDER \ + ? WITH_TARGET_BYTE_ORDER \ + : current_target_byte_order) + + +/* SMP support: + + Sets a limit on the number of processors that can be simulated. If + WITH_SMP is set to zero (0), the simulator is restricted to + suporting only on processor (and as a consequence leaves the SMP + code out of the build process). */ + +#ifndef WITH_SMP +#define WITH_SMP 0 +#endif + + +/* Word size of host/target: + + Set these according to your host and target requirements. At this + point in time, I've only compiled (not run) for a 64bit and never + built for a 64bit host. This will always remain a compile time + option */ + +#ifndef WITH_TARGET_WORD_BITSIZE +#define WITH_TARGET_WORD_BITSIZE 32 /* compiled only */ +#endif +#ifndef WITH_HOST_WORD_BITSIZE +#define WITH_HOST_WORD_BITSIZE 32 /* 64bit ready? */ +#endif + + +/* Program environment: + + Two environments are available. VEA (or virtual environment + architecture) and OEA (or operating environment architecture). The + former is the environment that a user program would see while the + latter is the environment as seen by an operating system. By + setting these to specific values, the build process is able to + eliminate non relevent environment code + + CURRENT_ENVIRONMENT specifies which of vea or oea is required for + the current runtime. */ + +#define WITH_ENVIRONMENT 0 +#define VIRTUAL_ENVIRONMENT 1 +#define OPERATING_ENVIRONMENT 2 + +extern int current_environment; +#define CURRENT_ENVIRONMENT (WITH_ENVIRONMENT \ + ? WITH_ENVIRONMENT \ + : current_environment) + + +/* Optional VEA/OEA code: + + The below, required for the OEA model may also be included in the + VEA model however, as far as I can tell only make things + slower... */ + + +/* Events. Devices modeling real H/W need to be able to efficiently + schedule things to do at known times in the future. The event + queue implements this. Unfortunatly this adds the need to check + for any events once each full instruction cycle. */ + +#define WITH_EVENTS (WITH_ENVIRONMENT != VIRTUAL_ENVIRONMENT) + + +/* Time base: + + The PowerPC architecture includes the addition of both a time base + register and a decrement timer. Like events adds to the overhead + of of some instruction cycles. */ + +#ifndef WITH_TIME_BASE +#define WITH_TIME_BASE 1 +#endif + + +/* Callback/Default Memory. + + Core includes a builtin memory type (raw_memory) that is + implemented using an array. raw_memory does not require any + additional functions etc. + + Callback memory is where the core calls a core device for the data + it requires. + + Default memory is an extenstion of this where for addresses that do + not map into either a callback or core memory range a default map + can be used. + + The OEA model uses callback memory for devices and default memory + for buses. + + The VEA model uses callback memory to capture `page faults'. + + While it may be possible to eliminate callback/default memory (and + hence also eliminate an additional test per memory fetch) it + probably is not worth the effort. + + BTW, while raw_memory could have been implemented as a callback, + profiling has shown that there is a biger win (at least for the + x86) in eliminating a function call for the most common + (raw_memory) case. */ + +#define WITH_CALLBACK_MEMORY 1 + + +/* Alignment: + + The PowerPC may or may not handle miss aligned transfers. An + implementation normally handles miss aligned transfers in big + endian mode but generates an exception in little endian mode. + + This model. Instead allows both little and big endian modes to + either take exceptions or handle miss aligned transfers. + + If 0 is specified then for big-endian mode miss alligned accesses + are permitted (NONSTRICT_ALIGNMENT) while in little-endian mode the + processor will fault on them (STRICT_ALIGNMENT). */ + +#define NONSTRICT_ALIGNMENT 1 +#define STRICT_ALIGNMENT 2 + +#ifndef WITH_ALIGNMENT +#define WITH_ALIGNMENT 0 +#endif +extern int current_alignment; +#define CURRENT_ALIGNMENT (WITH_ALIGNMENT \ + ? WITH_ALIGNMENT \ + : current_alignment) + + +/* Floating point suport: + + Still under development. */ + +#define SOFT_FLOATING_POINT 1 +#define HARD_FLOATING_POINT 2 + +#ifndef WITH_FLOATING_POINT +#define WITH_FLOATING_POINT HARD_FLOATING_POINT +#endif +extern int current_floating_point; +#define CURRENT_FLOATING_POINT (WITH_FLOATING_POINT \ + ? WITH_FLOATING_POINT \ + : current_floating_point) + + +/* Debugging: + + Control the inclusion of debugging code. */ + +/* Include the tracing code. Disabling this eliminates all tracing + code */ + +#ifndef WITH_TRACE +#define WITH_TRACE 1 +#endif + +/* include code that checks assertions scattered through out the + program */ + +#ifndef WITH_ASSERT +#define WITH_ASSERT 1 +#endif + +/* include profiling code that doesn't yet exist */ + +#ifndef WITH_PROFILE +#define WITH_PROFILE 1 +#endif + + +/* INSTRUCTION TABLE CODE GENERATION: + + The program gen takes the files ppc.instructions and spr.table and + creates from them code that provides: + + o instruction decode and issue + o spr information + + The program gen does this according to the configuration + information that follows. */ + + +/* Line numbering of generated code: + + When generating the semantic and idecode files, gen can also output + line number information (w.r.t. ppc.instructions). It may be + useful to disable this if you suspect that gen.c is incorrectly + generating itermediate code files. */ + +#ifndef WITH_LINE_NUMBERS +#define WITH_LINE_NUMBERS 1 +#endif + + +/* Instruction cache: + + Instead of the idecode routine calling the semantic function + directly, idecode can instead return a descriptor of the + instruction (cache entry). + + With level one caching, idecode just returns the address of the + semantic function. With level two caching, in addition to this, + the idecode routine decodes key fields within the instruction and + also enters them into the cache. The table IDECODE_CACHE_RULES + controls what goes into the cache.*/ + +#ifndef WITH_IDECODE_CACHE +#define WITH_IDECODE_CACHE 0 +#endif +#ifndef IDECODE_CACHE_SIZE +#define IDECODE_CACHE_SIZE 1024 +#endif + + +/* Semantic code expansion: + + For a given instruction there is the potential to improve + performance bo creating copies of the instructions code for one or + more of its possible variations. Eg branch being relative. This + macro determines of semantic functions should be expanded. How + well they are expanded is determined by the table + WITH_IDECODE_OPCODE_RULES. */ + +#ifndef WITH_IDECODE_EXPAND_SEMANTICS +#define WITH_IDECODE_EXPAND_SEMANTICS 0 +#endif + + +/* SPR database: + + The attributes of the SPR's are kept in a `lookup table'. This + table can be implemented as either a true table or a switch + statement. + + A swith statement may be a performance advantage if the SPR's are + known at compile time. The compiler is then able to eliminate the + switch. */ + +#ifndef WITH_SPREG_LOOKUP_TABLE +#define WITH_SPREG_LOOKUP_TABLE 1 +#endif + + +/* Instruction decode: + + The table that follows is used by gen to construct a decision tree + that can identify each possible instruction. Gen then outputs this + decision tree as (according to config) a table or switch statement + as the function idecode. + + In parallel to this, as mentioned above, WITH_EXPANDED_SEMANTICS + determines of the semantic functions themselves should be expanded + in a similar way. + + The table contains the following entries: + + + + Must be 1 for the entry to be considered. The last entry must be + zero. + + + + + Range of bits (within the instruction) that should be searched for + an instruction field. Within such ranges, gen looks for opcodes + (constants), registers (strings) and reserved bits (slash) and + according to the rules that follows includes or excludes them from + a possible instruction field. + + + + + If an instructioin field was found, enlarge the field size so that + it is forced to at least include bits starting from + (). To stop this occuring, use = + + 1 and = - 1. + + + + Treat `/' fields as a constant instead of variable when looking for + an instruction field. + + + + Treat any contained register (string) fields as constant when + determining the instruction field. For the instruction decode (and + controled by IDECODE_EXPAND_SEMANTICS) this forces the expansion of + what would otherwize be non constant bits of an instruction. + + + + Should this table be expanded using a switch statement (val 1) and + if so, should it be padded with entries so as to force the compiler + to generate a jump table (val 2). + + + + + + Special rule to fine tune how specific (or groups) of instructions + are expanded. The applicability of the rule is determined by + + != 0 && (instruction> & ) == + + Where is obtained by looking only at constant fields + with in an instructions spec. When determining an expansion, the + rule is only considered when a node contains a single instruction. + can be any of: + + 0: for this instruction, expand by earlier rules + 1: expand bits .. only + 2: boolean expansion of only zero/non-zero cases + + Ok? */ + + +#define WITH_IDECODE_OPCODE_RULES { \ + { 1, 0, 5, 0, 5, 0, 0, 1, 0x00000000, 0x00000000, 0 }, \ + { 1, 21, 31, 32, -1, 0, 0, 1, 0x00000000, 0x00000000, 0 }, \ + { 0 } \ +} + + +/* Instruction unpacking: + + Once the instruction has been decoded, the register (and other) + fields within the instruction need to be extracted. + + The table that follows determines how each field should be treated. + Importantly it considers the case where the extracted field is to + be used immediatly or stored in an instruction cache. + + + + Zero marks the end of the table. More importantly 1. indicates + that the entry is valid and can be cached. 2. indicates that that + the entry is valid but can not be cached. + + + + The field name as given in the instruction spec. + + + + A name for once it has been extracted from the + instructioin (and possibly stored in the instruction cache). + + + + String specifying the storage type for (the extracted + field>. + + + + Specifies how to get from . If null, old and + new name had better be the same. */ + +#define WITH_IDECODE_CACHE_RULES { \ + { 1, "RA", "RA", 0, 0 }, \ + { 1, "RA", "rA", "signed_word *", \ + "(cpu_registers(processor)->gpr + RA)" }, \ + { 1, "RT", "RT", 0, 0 }, \ + { 1, "RT", "rT", "signed_word *", \ + "(cpu_registers(processor)->gpr + RT)" }, \ + { 2, "RS", "RS", 0, 0 }, \ + { 1, "RS", "rS", "signed_word *", \ + "(cpu_registers(processor)->gpr + RS)" }, \ + { 2, "RB", "RB", 0, 0 }, \ + { 1, "RB", "rB", "signed_word *", \ + "(cpu_registers(processor)->gpr + RB)" }, \ + { 2, "FRA", "FRA", 0, 0 }, \ + { 1, "FRA", "frA", "unsigned64 *", \ + "(cpu_registers(processor)->fpr + FRA)" }, \ + { 2, "FRB", "FRB", 0, 0 }, \ + { 1, "FRB", "frB", "unsigned64 *", \ + "(cpu_registers(processor)->fpr + FRB)" }, \ + { 2, "FRC", "FRC", 0, 0 }, \ + { 1, "FRC", "frC", "unsigned64 *", \ + "(cpu_registers(processor)->fpr + FRC)" }, \ + { 2, "FRS", "FRS", 0, 0 }, \ + { 1, "FRS", "frS", "unsigned64 *", \ + "(cpu_registers(processor)->fpr + FRS)" }, \ + { 2, "FRT", "FRT", 0, 0 }, \ + { 1, "FRT", "frT", "unsigned64 *", \ + "(cpu_registers(processor)->fpr + FRT)" }, \ + { 1, "SI", "EXTS_SI", "unsigned_word", \ + "((signed_word)(signed16)instruction)" }, \ + { 2, "BI", "BI", 0, 0 }, \ + { 1, "BI", "BIT32_BI", 0, \ + "BIT32(BI)" }, \ + { 2, "BA", "BA", 0, 0 }, \ + { 1, "BA", "BIT32_BA", 0, \ + "BIT32(BA)" }, \ + { 2, "BB", "BB", 0, 0 }, \ + { 1, "BB", "BIT32_BB", 0, \ + "BIT32(BB)" }, \ + { 1, "BD", "EXTS_BD_0b00", "unsigned_word", \ + "(((signed_word)(signed16)instruction) & ~3)" }, \ +/*{ 1, "BD", "CIA_plus_EXTS_BD_0b00", "unsigned_word", */ \ +/* "CIA + EXTS(BD_0b00)" }, */ \ + { 1, "LI", "EXTS_LI_0b00", "unsigned_word", \ + "((((signed_word)(signed32)(instruction << 6)) >> 6) & ~0x3)" }, \ + { 1, "D", "EXTS_D", "unsigned_word", \ + "((signed_word)(signed16)(instruction))" }, \ + { 1, "DS", "EXTS_DS_0b00", "unsigned_word", \ + "(((signed_word)(signed16)instruction) & ~0x3)" }, \ + { 0 } \ +}; + + + +/* INLINE CODE SELECTION: + + GCC -O3 attempts to inline any function or procedure in scope. The + options below facilitate fine grained control over what is and what + isn't made inline. For instance it can control things down to a + specific modules static routines. This control is implemented in + two parts. Doing this allows the compiler to both eliminate the + overhead of function calls and (as a consequence) also eliminate + further dead code. + + Experementing with CISC (x86) I've found that I can achieve an + order of magintude speed improvement (x3-x5). In the case of RISC + (sparc) while the performance gain isn't as great it is still + significant. + + Part One - Static functions: It is possible to control how static + functions within each module are to be compiled. On a per module + or global basis, it is possible to specify that a modules static + functions should be compiled inline. This is controled by the the + macro's STATIC_INLINE and INLINE_STATIC_. + + Part Two - External functions: Again it is possible to allow the + inlining of calls to external functions. This is far more + complicated and much heaver on the compiler. In this case, it is + controled by the _INLINE macro's. Where each can have a + value: + + 0 ppc.c should call external module + + 1 ppc.c should have local copy (and hence possibly facilitate + the in lineing of that modules external calls) + + 2 ppc.c should inline this module + + Finally, this is not for the faint harted. I've seen GCC get up to + 200mb trying to compile what this can create */ + +/* Your compilers inline reserved word */ + +#ifndef INLINE +#if defined(__GNUC__) && defined(__OPTIMIZE__) +#define INLINE __inline__ +#else +#define INLINE /*inline*/ +#endif +#endif + +/* Default prefix for static functions */ + +#ifndef STATIC_INLINE +#define STATIC_INLINE static INLINE +#endif + +/* Default macro to control several of the inlines */ + +#ifndef DEFAULT_INLINE +#define DEFAULT_INLINE 0 +#endif + +/* Code that does byte swapping used on any memory access */ + +#ifndef ENDIAN_INLINE +#define ENDIAN_INLINE DEFAULT_INLINE +#endif + +/* Instruction cache if in use */ + +#if 0 /*DNE*/ +#ifndef ICACHE_INLINE +#define ICACHE_INLINE 0 +#endif +#endif + +/* Given a translated address, core maps it onto either simulator data + or a function call, this is performed once for each + data/instruction access */ + + +#ifndef CORE_INLINE +#define CORE_INLINE DEFAULT_INLINE +#endif + + +/* The cpu object. May things call upon this module to manipulate + each cpu object for instance register updates (from semantics) or + instruction execution from psim */ + +#ifndef VM_INLINE +#define VM_INLINE DEFAULT_INLINE +#endif + +/* Physical memory is implemented using the memory map module */ + +#ifndef CPU_INLINE +#define CPU_INLINE DEFAULT_INLINE +#endif + +/* handle the queue of events to happen in the future */ + +#ifndef EVENTS_INLINE +#define EVENTS_INLINE DEFAULT_INLINE +#endif + +/* not so important register manipulation code. Most important + register operations are performed directly on the register file */ + +#ifndef REGISTERS_INLINE +#define REGISTERS_INLINE DEFAULT_INLINE +#endif + +/* interrupt handling code */ + +#ifndef INTERRUPTS_INLINE +#define INTERRUPTS_INLINE DEFAULT_INLINE +#endif + +/* device code. While possibly important, this isn't as critical as + the cpu/memory path + + There seems to be some problem with making either device_tree or + devices inline. It reports the message: + device_tree_find_node() not a leaf */ + +#ifndef DEVICE_TREE_INLINE +#define DEVICE_TREE_INLINE 0 +#endif + +#ifndef DEVICES_INLINE +#define DEVICES_INLINE 0 +#endif + +/* Special Purpose Register tables. Provide information on the + attributes of given SPR's. */ + +#ifndef SPREG_INLINE +#define SPREG_INLINE DEFAULT_INLINE +#endif + +/* Functions modeling the semantics of each instruction. Two cases to + consider, firstly of idecode is implemented with a switch then this + allows the idecode function to inline each semantic function + (avoiding a call). The second case is when idecode is using a + table, even then while the semantic functions can't be inlined, + setting it to one still enables each semantic function to inline + anything they call (if that code is marked for being inlined). + + WARNING: you need lots (like 200mb of swap) of swap. Setting this + to 1 is useful when using a table as it enables the sematic code to + inline all of their called functions */ + +#ifndef SEMANTICS_INLINE +#define SEMANTICS_INLINE 0 +#endif + +/* Functions that decode an instruction. Called by the cpu module. + Part of the performance critical fetch - decode - issue sequence */ + +#ifndef IDECODE_INLINE +#define IDECODE_INLINE DEFAULT_INLINE +#endif + + + +/* If you're confused by the above, check out some of the generic + configurations below. */ + + +#if 0 +/* Allow the expansion of the semantic functions. That is, if the + branch instruction is called with AA=0 and AA=1, generate separate + functions for each case */ + +#undef WITH_IDECODE_EXPAND_SEMANTICS +#define WITH_IDECODE_EXPAND_SEMANTICS 1 + +#undef WITH_IDECODE_OPCODE_RULES +#define WITH_IDECODE_OPCODE_RULES { \ + { 1, 0, 5, 0, 5, 0, 0, 0, 0x00000000, 0x00000000, 0 }, \ + { 1, 21, 31, 32, -1, 0, "OE,LR,AA,Rc,LK", 0, 0x00000000, 0x00000000, 0 }, \ + { 1, 6, 9, 6, 9, 0, "BO", 0, 0xfc000000, 0x40000000, 1 }, \ + { 1, 11, 15, 11, 15, 0, "RA", 0, 0xfc000000, 0x38000000, 2 }, \ + { 1, 11, 15, 11, 15, 0, "RA", 0, 0xfc000000, 0x3c000000, 2 }, \ + { 0 } \ +} +#endif + + +#if 0 +/* eliminate any debugging noise */ + +#undef WITH_TRACE +#define WITH_TRACE 0 + +#undef WITH_ASSERT +#define WITH_ASSERT 0 + +#endif + + +#if 0 +/* A reasonable set of inline macro's that give the compiler a + fighting chance at eliminating much of the function call overhead. + + Typically, with the below the -O3 option (to get inline of all + functioins) isn't of any greate benefit. */ + +#undef INLINE +#define INLINE inline + +#undef STATIC_INLINE +#define STATIC_INLINE static INLINE + +#undef ENDIAN_INLINE +#define ENDIAN_INLINE 2 + +#if 0 /*DNE*/ +#undef ICACHE_INLINE +#define ICACHE_INLINE 0 +#endif + +#undef CORE_INLINE +#define CORE_INLINE 2 + +#undef VM_INLINE +#define VM_INLINE 2 + +#undef CPU_INLINE +#define CPU_INLINE 2 + +#undef EVENTS_INLINE +#define EVENTS_INLINE 2 + +#undef REGISTERS_INLINE +#define REGISTERS_INLINE 2 + +#undef INTERRUPTS_INLINE +#define INTERRUPTS_INLINE 2 + +#undef DEVICE_TREE_INLINE +#define DEVICE_TREE_INLINE 0 + +#undef DEVICES_INLINE +#define DEVICES_INLINE 0 + +#undef SPREG_INLINE +#define SPREG_INLINE 2 + +#undef SEMANTICS_INLINE +#define SEMANTICS_INLINE 1 /* not 2! as it blows away the compiler */ + +#undef IDECODE_INLINE +#define IDECODE_INLINE 2 + +#endif + + +#if 0 +/* Enable the full cracking cache. The cracked instruction cache + appears to give best performance if most functions have been lined + as well */ + +#undef WITH_IDECODE_CACHE +#define WITH_IDECODE_CACHE 2 + +#endif + + + +#if 0 +/* With the VEA model, can eliminate some things. Not least of which + is support for the OEA model */ + +#undef WITH_ENVIRONMENT +#define WITH_ENVIRONMENT VIRTUAL_ENVIRONMENT + +#undef WITH_EVENTS +#define WITH_EVENTS 0 + +#undef WITH_SMP +#define WITH_SMP 0 + +#undef WITH_TARGET_BYTE_ORDER +#define WITH_TARGET_BYTE_ORDER WITH_HOST_BYTE_ORDER + +#endif + + + + +#if 0 +/* Finally, the expansion rules below are extreemly agressive. Only + consider them if your build machine is VERY VERY VERY VERY VERY + well configured */ + +#undef WITH_IDECODE_EXPAND_SEMANTICS +#define WITH_IDECODE_EXPAND_SEMANTICS 1 + +#undef WITH_IDECODE_OPCODE_RULES +#define WITH_IDECODE_OPCODE_RULES { \ + { 1, 0, 5, 0, 5, 0, 0, 0, 0x00000000, 0x00000000, 0 }, \ + { 1, 21, 31, 32, -1, 0, "OE,LR,AA,Rc,LK", 0, 0x00000000, 0x00000000, 0 }, \ + { 1, 6, 15, 6, 15, 0, "BO,BI", 0, 0xfc000000, 0x40000000, 0 }, \ + { 1, 11, 15, 11, 15, 0, "RA", 0, 0xfc000000, 0x38000000, 0 }, \ + { 1, 11, 15, 11, 15, 0, "RA", 0, 0xfc000000, 0x3c000000, 0 }, \ + { 1, 11, 20, 11, 20, 0, "spr", 0, 0xfc000000, 0x7c000000, 0 }, \ + { 0 } \ +} +#endif + + +#endif /* _CONFIG_H */ -- cgit v1.1