From faef631a4d1429f48c34da13d446dcd64d1523bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=98yvind=20Harboe?= Date: Tue, 9 Feb 2010 14:26:57 +0100 Subject: arm11: improve performance using minidriver hook MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit zy1000 performance for GDB load went from 100kBytes/s to 300kBytes/s @ 8 MHz by implementing the inner loop of unack arm11 memory writes directly on top of the hw fifo. Profiling info: 78.57 0.77 0.77 arm11_run_instr_data_to_core_noack_inner 5.10 0.82 0.05 memcpy 4.08 0.86 0.04 jtag_tap_next_enabled 3.06 0.89 0.03 gdb_input Signed-off-by: Øyvind Harboe --- src/jtag/zy1000/zy1000.c | 68 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 65 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/jtag/zy1000/zy1000.c b/src/jtag/zy1000/zy1000.c index da5aa36..d920c30 100644 --- a/src/jtag/zy1000/zy1000.c +++ b/src/jtag/zy1000/zy1000.c @@ -845,11 +845,73 @@ void embeddedice_write_dcc(struct jtag_tap *tap, int reg_addr, uint8_t *buffer, } -int arm11_run_instr_data_to_core_noack_inner_default(struct arm11_common * arm11, uint32_t opcode, uint32_t * data, size_t count); -int arm11_run_instr_data_to_core_noack_inner(struct arm11_common * arm11, uint32_t opcode, uint32_t * data, size_t count) +int arm11_run_instr_data_to_core_noack_inner(struct jtag_tap * tap, uint32_t opcode, uint32_t * data, size_t count) { - return arm11_run_instr_data_to_core_noack_inner_default(arm11, opcode, data, count); +#if 0 + int arm11_run_instr_data_to_core_noack_inner_default(struct jtag_tap * tap, uint32_t opcode, uint32_t * data, size_t count); + return arm11_run_instr_data_to_core_noack_inner_default(tap, opcode, data, count); +#else + static const int bits[] = {32, 2}; + uint32_t values[] = {0, 0}; + + /* FIX!!!!!! the target_write_memory() API started this nasty problem + * with unaligned uint32_t * pointers... */ + const uint8_t *t = (const uint8_t *)data; + + while (count--) + { + values[0] = *t++; + values[0] |= (*t++<<8); + values[0] |= (*t++<<16); + values[0] |= (*t++<<24); + + if (count > 0) + { + jtag_add_dr_out(tap, + 2, + bits, + values, + TAP_DRPAUSE); + +#if 1 + /* copy & paste from arm11_dbgtap.c */ + //TAP_DREXIT2, TAP_DRUPDATE, TAP_IDLE, TAP_IDLE, TAP_IDLE, TAP_DRSELECT, TAP_DRCAPTURE, TAP_DRSHIFT + + waitIdle(); + ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 1); + ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 1); + ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 0); + ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 0); + ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 0); + ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 1); + ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 0); + ZY1000_POKE(ZY1000_JTAG_BASE + 0x28, 0); + waitIdle(); + ZY1000_POKE(ZY1000_JTAG_BASE + 0x20, TAP_DRSHIFT); +#else + static const tap_state_t arm11_MOVE_DRPAUSE_IDLE_DRPAUSE_with_delay[] = + { + TAP_DREXIT2, TAP_DRUPDATE, TAP_IDLE, TAP_IDLE, TAP_IDLE, TAP_DRSELECT, TAP_DRCAPTURE, TAP_DRSHIFT + }; + + jtag_add_pathmove(ARRAY_SIZE(arm11_MOVE_DRPAUSE_IDLE_DRPAUSE_with_delay), + arm11_MOVE_DRPAUSE_IDLE_DRPAUSE_with_delay); +#endif + } else + { + /* This will happen on the last iteration updating the current tap state + * so we don't have to track it during the common code path */ + jtag_add_dr_out(tap, + 2, + bits, + values, + TAP_IDLE); + } + } + + return jtag_execute_queue(); +#endif } -- cgit v1.1