aboutsummaryrefslogtreecommitdiff
path: root/gdb/guile/scm-disasm.c
blob: d06c481141046fee43f819e1694d085efd4d5561 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
/* Scheme interface to architecture.

   Copyright (C) 2014-2017 Free Software Foundation, Inc.

   This file is part of GDB.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */

/* See README file in this directory for implementation notes, coding
   conventions, et.al.  */

#include "defs.h"
#include "arch-utils.h"
#include "disasm.h"
#include "dis-asm.h"
#include "gdbarch.h"
#include "gdbcore.h" /* Why is memory_error here? */
#include "guile-internal.h"

static SCM port_keyword;
static SCM offset_keyword;
static SCM size_keyword;
static SCM count_keyword;

static SCM address_symbol;
static SCM asm_symbol;
static SCM length_symbol;

/* Struct used to pass "application data" in disassemble_info.  */

struct gdbscm_disasm_data
{
  struct gdbarch *gdbarch;
  SCM port;
  /* The offset of the address of the first instruction in PORT.  */
  ULONGEST offset;
};

/* Struct used to pass data from gdbscm_disasm_read_memory to
   gdbscm_disasm_read_memory_worker.  */

struct gdbscm_disasm_read_data
{
  bfd_vma memaddr;
  bfd_byte *myaddr;
  unsigned int length;
  struct disassemble_info *dinfo;
};

/* Subroutine of gdbscm_arch_disassemble to simplify it.
   Return the result for one instruction.  */

static SCM
dascm_make_insn (CORE_ADDR pc, const char *assembly, int insn_len)
{
  return scm_list_3 (scm_cons (address_symbol,
			       gdbscm_scm_from_ulongest (pc)),
		     scm_cons (asm_symbol,
			       gdbscm_scm_from_c_string (assembly)),
		     scm_cons (length_symbol,
			       scm_from_int (insn_len)));
}

/* Helper function for gdbscm_disasm_read_memory to safely read from a
   Scheme port.  Called via gdbscm_call_guile.
   The result is a statically allocated error message or NULL if success.  */

static const char *
gdbscm_disasm_read_memory_worker (void *datap)
{
  struct gdbscm_disasm_read_data *data
    = (struct gdbscm_disasm_read_data *) datap;
  struct disassemble_info *dinfo = data->dinfo;
  struct gdbscm_disasm_data *disasm_data
    = (struct gdbscm_disasm_data *) dinfo->application_data;
  SCM seekto, newpos, port = disasm_data->port;
  size_t bytes_read;

  seekto = gdbscm_scm_from_ulongest (data->memaddr - disasm_data->offset);
  newpos = scm_seek (port, seekto, scm_from_int (SEEK_SET));
  if (!scm_is_eq (seekto, newpos))
    return "seek error";

  bytes_read = scm_c_read (port, data->myaddr, data->length);

  if (bytes_read != data->length)
    return "short read";

  /* If we get here the read succeeded.  */
  return NULL;
}

/* disassemble_info.read_memory_func for gdbscm_print_insn_from_port.  */

static int
gdbscm_disasm_read_memory (bfd_vma memaddr, bfd_byte *myaddr,
			   unsigned int length,
			   struct disassemble_info *dinfo)
{
  struct gdbscm_disasm_read_data data;
  const char *status;

  data.memaddr = memaddr;
  data.myaddr = myaddr;
  data.length = length;
  data.dinfo = dinfo;

  status = gdbscm_with_guile (gdbscm_disasm_read_memory_worker, &data);

  /* TODO: IWBN to distinguish problems reading target memory versus problems
     with the port (e.g., EOF).  */
  return status != NULL ? -1 : 0;
}

/* disassemble_info.memory_error_func for gdbscm_print_insn_from_port.
   Technically speaking, we don't need our own memory_error_func,
   but to not provide one would leave a subtle dependency in the code.
   This function exists to keep a clear boundary.  */

static void
gdbscm_disasm_memory_error (int status, bfd_vma memaddr,
			    struct disassemble_info *info)
{
  memory_error (TARGET_XFER_E_IO, memaddr);
}

/* disassemble_info.print_address_func for gdbscm_print_insn_from_port.
   Since we need to use our own application_data value, we need to supply
   this routine as well.  */

static void
gdbscm_disasm_print_address (bfd_vma addr, struct disassemble_info *info)
{
  struct gdbscm_disasm_data *data
    = (struct gdbscm_disasm_data *) info->application_data;
  struct gdbarch *gdbarch = data->gdbarch;

  print_address (gdbarch, addr, (struct ui_file *) info->stream);
}

/* Subroutine of gdbscm_arch_disassemble to simplify it.
   Call gdbarch_print_insn using a port for input.
   PORT must be seekable.
   OFFSET is the offset in PORT from which addresses begin.
   For example, when printing from a bytevector, addresses passed to the
   bv seek routines must be in the range [0,size).  However, the bytevector
   may represent an instruction at address 0x1234.  To handle this case pass
   0x1234 for OFFSET.
   This is based on gdb_print_insn, see it for details.  */

static int
gdbscm_print_insn_from_port (struct gdbarch *gdbarch,
			     SCM port, ULONGEST offset, CORE_ADDR memaddr,
			     struct ui_file *stream, int *branch_delay_insns)
{
  struct disassemble_info di;
  int length;
  struct gdbscm_disasm_data data;

  di = gdb_disassemble_info (gdbarch, stream);
  data.gdbarch = gdbarch;
  data.port = port;
  data.offset = offset;
  di.application_data = &data;
  di.read_memory_func = gdbscm_disasm_read_memory;
  di.memory_error_func = gdbscm_disasm_memory_error;
  di.print_address_func = gdbscm_disasm_print_address;

  length = gdbarch_print_insn (gdbarch, memaddr, &di);

  if (branch_delay_insns)
    {
      if (di.insn_info_valid)
	*branch_delay_insns = di.branch_delay_insns;
      else
	*branch_delay_insns = 0;
    }

  return length;
}

/* (arch-disassemble <gdb:arch> address
     [#:port port] [#:offset address] [#:size integer] [#:count integer])
     -> list

   Returns a list of disassembled instructions.
   If PORT is provided, read bytes from it.  Otherwise read target memory.
   If PORT is #f, read target memory.
   PORT must be seekable.  IWBN to remove this restriction, and a future
   release may.  For now the restriction is in place because it's not clear
   all disassemblers are strictly sequential.
   If SIZE is provided, limit the number of bytes read to this amount.
   If COUNT is provided, limit the number of instructions to this amount.

   Each instruction in the result is an alist:
   (('address . address) ('asm . disassembly) ('length . length)).
   We could use a hash table (dictionary) but there aren't that many fields. */

static SCM
gdbscm_arch_disassemble (SCM self, SCM start_scm, SCM rest)
{
  arch_smob *a_smob
    = arscm_get_arch_smob_arg_unsafe (self, SCM_ARG1, FUNC_NAME);
  struct gdbarch *gdbarch = arscm_get_gdbarch (a_smob);
  const SCM keywords[] = {
    port_keyword, offset_keyword, size_keyword, count_keyword, SCM_BOOL_F
  };
  int port_arg_pos = -1, offset_arg_pos = -1;
  int size_arg_pos = -1, count_arg_pos = -1;
  SCM port = SCM_BOOL_F;
  ULONGEST offset = 0;
  unsigned int count = 1;
  unsigned int size;
  ULONGEST start_arg;
  CORE_ADDR start, end;
  CORE_ADDR pc;
  unsigned int i;
  int using_port;
  SCM result;

  gdbscm_parse_function_args (FUNC_NAME, SCM_ARG2, keywords, "U#OUuu",
			      start_scm, &start_arg, rest,
			      &port_arg_pos, &port,
			      &offset_arg_pos, &offset,
			      &size_arg_pos, &size,
			      &count_arg_pos, &count);
  /* START is first stored in a ULONGEST because we don't have a format char
     for CORE_ADDR, and it's not really worth it to have one yet.  */
  start = start_arg;

  if (port_arg_pos > 0)
    {
      SCM_ASSERT_TYPE (gdbscm_is_false (port)
		       || gdbscm_is_true (scm_input_port_p (port)),
		       port, port_arg_pos, FUNC_NAME, _("input port"));
    }
  using_port = gdbscm_is_true (port);

  if (offset_arg_pos > 0
      && (port_arg_pos < 0
	  || gdbscm_is_false (port)))
    {
      gdbscm_out_of_range_error (FUNC_NAME, offset_arg_pos,
				 gdbscm_scm_from_ulongest (offset),
				 _("offset provided but port is missing"));
    }

  if (size_arg_pos > 0)
    {
      if (size == 0)
	return SCM_EOL;
      /* For now be strict about start+size overflowing.  If it becomes
	 a nuisance we can relax things later.  */
      if (start + size < start)
	{
	  gdbscm_out_of_range_error (FUNC_NAME, 0,
				scm_list_2 (gdbscm_scm_from_ulongest (start),
					    gdbscm_scm_from_ulongest (size)),
				     _("start+size overflows"));
	}
      end = start + size - 1;
    }
  else
    end = ~(CORE_ADDR) 0;

  if (count == 0)
    return SCM_EOL;

  result = SCM_EOL;

  for (pc = start, i = 0; pc <= end && i < count; )
    {
      int insn_len = 0;
      struct ui_file *memfile = mem_fileopen ();
      struct cleanup *cleanups = make_cleanup_ui_file_delete (memfile);

      TRY
	{
	  if (using_port)
	    {
	      insn_len = gdbscm_print_insn_from_port (gdbarch, port, offset,
						      pc, memfile, NULL);
	    }
	  else
	    insn_len = gdb_print_insn (gdbarch, pc, memfile, NULL);
	}
      CATCH (except, RETURN_MASK_ALL)
	{
	  GDBSCM_HANDLE_GDB_EXCEPTION_WITH_CLEANUPS (except, cleanups);
	}
      END_CATCH

      std::string as = ui_file_as_string (memfile);

      result = scm_cons (dascm_make_insn (pc, as.c_str (), insn_len),
			 result);

      pc += insn_len;
      i++;
      do_cleanups (cleanups);
    }

  return scm_reverse_x (result, SCM_EOL);
}

/* Initialize the Scheme architecture support.  */

static const scheme_function disasm_functions[] =
{
  { "arch-disassemble", 2, 0, 1, as_a_scm_t_subr (gdbscm_arch_disassemble),
    "\
Return list of disassembled instructions in memory.\n\
\n\
  Arguments: <gdb:arch> start-address\n\
      [#:port port] [#:offset address]\n\
      [#:size <integer>] [#:count <integer>]\n\
    port: If non-#f, it is an input port to read bytes from.\n\
    offset: Specifies the address offset of the first byte in the port.\n\
      This is useful if the input is from something other than memory\n\
      (e.g., a bytevector) and you want the result to be as if the bytes\n\
      came from that address.  The value to pass for start-address is\n\
      then also the desired disassembly address, not the offset in, e.g.,\n\
      the bytevector.\n\
    size: Limit the number of bytes read to this amount.\n\
    count: Limit the number of instructions to this amount.\n\
\n\
  Returns:\n\
    Each instruction in the result is an alist:\n\
      (('address . address) ('asm . disassembly) ('length . length))." },

  END_FUNCTIONS
};

void
gdbscm_initialize_disasm (void)
{
  gdbscm_define_functions (disasm_functions, 1);

  port_keyword = scm_from_latin1_keyword ("port");
  offset_keyword = scm_from_latin1_keyword ("offset");
  size_keyword = scm_from_latin1_keyword ("size");
  count_keyword = scm_from_latin1_keyword ("count");

  address_symbol = scm_from_latin1_symbol ("address");
  asm_symbol = scm_from_latin1_symbol ("asm");
  length_symbol = scm_from_latin1_symbol ("length");
}