diff options
author | Jose E. Marchesi <jose.marchesi@oracle.com> | 2019-05-23 19:03:59 +0200 |
---|---|---|
committer | Jose E. Marchesi <jose.marchesi@oracle.com> | 2019-05-23 19:33:50 +0200 |
commit | ea195bb04cc5c964126aeff0f87d7161a03ca926 (patch) | |
tree | 460e3677c9b35ad26bdee3c876619b3fffa42e14 | |
parent | fd0de36e274c8141a5dd4579cd04856dc88370da (diff) | |
download | gdb-ea195bb04cc5c964126aeff0f87d7161a03ca926.zip gdb-ea195bb04cc5c964126aeff0f87d7161a03ca926.tar.gz gdb-ea195bb04cc5c964126aeff0f87d7161a03ca926.tar.bz2 |
cpu: add eBPF cpu description
This patch adds a CPU description for the Linux kernel eBPF virtual
machine, plus supporting code for disassembler and assembler.
cpu/ChangeLog:
2019-05-23 Jose E. Marchesi <jose.marchesi@oracle.com>
* bpf.cpu: New file.
* bpf.opc: Likewise.
-rw-r--r-- | cpu/ChangeLog | 5 | ||||
-rw-r--r-- | cpu/bpf.cpu | 647 | ||||
-rw-r--r-- | cpu/bpf.opc | 191 |
3 files changed, 843 insertions, 0 deletions
diff --git a/cpu/ChangeLog b/cpu/ChangeLog index c563860..b573c69 100644 --- a/cpu/ChangeLog +++ b/cpu/ChangeLog @@ -1,3 +1,8 @@ +2019-05-23 Jose E. Marchesi <jose.marchesi@oracle.com> + + * bpf.cpu: New file. + * bpf.opc: Likewise. + 2018-06-24 Nick Clifton <nickc@redhat.com> 2.32 branch created. diff --git a/cpu/bpf.cpu b/cpu/bpf.cpu new file mode 100644 index 0000000..85bac21 --- /dev/null +++ b/cpu/bpf.cpu @@ -0,0 +1,647 @@ +;; Linux BPF CPU description -*- Scheme -*- +;; Copyright (C) 2019 Free Software Foundation, Inc. +;; +;; Contributed by Oracle Inc. +;; +;; This file is part of the GNU Binutils and of GDB. +;; +;; This program is free software; you can redistribute it and/or +;; modify it under the terms of the GNU General Public License as +;; published by the Free Software Foundation; either version 3 of the +;; License, or (at your option) any later version. +;; +;; This program is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with this program; if not, write to the Free Software +;; Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA +;; 02110-1301, USA. + +;; This file contains a CGEN CPU description for the Linux kernel eBPF +;; instruction set. eBPF is documented in the linux kernel source +;; tree. See linux/Documentation/networking/filter.txt, and also the +;; sources in the networking subsystem, notably +;; linux/net/core/filter.c. + +(include "simplify.inc") + +(define-arch + (name bpf) + (comment "Linux kernel BPF") + (insn-lsb0? #t) + (machs bpf) + (isas ebpfle ebpfbe)) + +;;;; The ISAs + +;; Logically, eBPF comforms a single instruction set featuring two +;; kind of instructions: 64-bit instructions and 128-bit instructions. +;; +;; The 64-bit instructions have the form: +;; +;; code:8 regs:8 offset:16 imm:32 +;; +;; Whereas the 128-bit instructions (at the moment there is only one +;; of such instructions, lddw) have the form: +;; +;; code:8 regs:8 offset:16 imm:32 imm:32 unused:32 +;; +;; In both formats `regs' is itself composed by two fields: +;; +;; dst:4 src:4 +;; +;; The ISA is supposed to be orthogonal to endianness: the endianness +;; of the instruction fields follow the endianness of the host running +;; the eBPF program, and that's all. However, this is not entirely +;; true. The definition of an eBPF code in the Linux kernel is: +;; +;; struct bpf_insn { +;; __u8 code; /* opcode */ +;; __u8 dst_reg:4; /* dest register */ +;; __u8 src_reg:4; /* source register */ +;; __s16 off; /* signed offset */ +;; __s32 imm; /* signed immediate constant */ +;; }; +;; +;; Since the ordering of fields in C bitmaps is defined by the +;; implementation, the impact of endianness in the encoding of eBPF +;; instructions is effectively defined by GCC. In particular, GCC +;; places dst_reg before src_reg in little-endian code, and the other +;; way around in big-endian code. +;; +;; So, in reality, eBPF comprises two instruction sets: one for +;; little-endian with instructions like: +;; +;; code:8 src:4 dst:4 offset:16 imm:32 [unused:32 imm:32] +;; +;; and another for big-endian with instructions like: +;; +;; code:8 dst:4 src:4 offset:16 imm:32 [unused:32 imm:32] +;; +;; where `offset' and the immediate fields are encoded in +;; little-endian and big-endian byte-order, respectively. + +(define-pmacro (define-bpf-isa x-endian) + (define-isa + (name (.sym ebpf x-endian)) + (comment "The eBPF instruction set") + ;; Default length to record in ifields. This is used in + ;; calculations involving bit numbers. + (default-insn-word-bitsize 64) + ;; Length of an unknown instruction. Used by disassembly and by the + ;; simulator's invalid insn handler. + (default-insn-bitsize 64) + ;; Number of bits of insn that can be initially fetched. XXX this + ;; should be 64 (the size of the smallest insn) but until CGEN + ;; gets fixed to place constant fields in their own words, we have + ;; to use this workaround to avoid the opcode byte to be placed at + ;; the wrong side of the instruction when assembling in + ;; big-endian. + (base-insn-bitsize 8))) + +(define-bpf-isa le) +(define-bpf-isa be) + +(define-pmacro all-isas () (ISA ebpfle,ebpfbe)) + +;;;; Hardware Hierarchy + +;; +;; bpf architecture +;; | +;; bpfbf cpu-family +;; | +;; bpf machine +;; | +;; bpf-def model + +(define-cpu + (name bpfbf) + (comment "Linux kernel eBPF virtual CPU") + (word-bitsize 32)) + +(define-mach + (name bpf) + (comment "Linux eBPF") + (cpu bpfbf) + (isas ebpfle ebpfbe)) + +(define-model + (name bpf-def) + (comment "Linux eBPF default model") + (mach bpf) + (unit u-exec "execution unit" () + 1 ; issue + 1 ; done + () ; state + () ; inputs + () ; outputs + () ; profile action (default) + )) + +;;;; Hardware Elements + +;; eBPF programs can access 10 general-purpose registers which are +;; 64-bit. + +(define-hardware + (name h-gpr) + (comment "General Purpose Registers") + (attrs all-isas (MACH bpf)) + (type register DI (16)) + (indices keyword "%" + ;; XXX the frame pointer fp is read-only, so it should + ;; go in a different hardware. + (;; ABI names. Take priority when disassembling. + (a 0) (r1 1) (r2 2) (r3 3) (r4 4) (r5 5) (ctx 6) + (r7 7) (r8 8) (r9 9) (fp 10) + ;; Additional names recognized when assembling. + (r0 0) (r6 6) (r10 10)))) + +;; The program counter. CGEN requires it, even if it is not visible +;; to eBPF programs. + +(dnh h-pc "program counter" (PC PROFILE) (pc) () () ()) + +;; A 64-bit h-sint to be used by the imm64 operand below. XXX this +;; shouldn't be needed, as h-sint is supposed to be able to hold +;; 64-bit values. However, in practice CGEN limits h-sint to 32 bits +;; in 32-bit hosts. To be fixed in CGEN. + +(dnh h-sint64 "signed 64-bit integer" (all-isas) (immediate DI) + () () ()) + +;;;; The Instruction Sets + +;;; Fields and Opcodes + +;; Convenience macro to shorten the definition of the fields below. +(define-pmacro (dwf x-name x-comment x-attrs + x-word-offset x-word-length x-start x-length + x-mode) + "Define a field including its containing word." + (define-ifield + (name x-name) + (comment x-comment) + (.splice attrs (.unsplice x-attrs)) + (word-offset x-word-offset) + (word-length x-word-length) + (start x-start) + (length x-length) + (mode x-mode))) + +;; For arithmetic and jump instructions the 8-bit code field is +;; subdivided in: +;; +;; op-code:4 op-src:1 op-class:3 + +(dwf f-op-code "eBPF opcode code" (all-isas) 0 8 7 4 UINT) +(dwf f-op-src "eBPF opcode source" (all-isas) 0 8 3 1 UINT) +(dwf f-op-class "eBPF opcode instruction class" (all-isas) 0 8 2 3 UINT) + +(define-normal-insn-enum insn-op-code-alu "eBPF instruction codes" + (all-isas) OP_CODE_ f-op-code + (;; Codes for OP_CLASS_ALU and OP_CLASS_ALU64 + (ADD #x0) (SUB #x1) (MUL #x2) (DIV #x3) (OR #x4) (AND #x5) + (LSH #x6) (RSH #x7) (NEG #x8) (MOD #x9) (XOR #xa) (MOV #xb) + (ARSH #xc) (END #xd) + ;; Codes for OP_CLASS_JMP + (JA #x0) (JEQ #x1) (JGT #x2) (JGE #x3) (JSET #x4) + (JNE #x5) (JSGT #x6) (JSGE #x7) (CALL #x8) (EXIT #x9) + (JLT #xa) (JLE #xb) (JSLT #xc) (JSLE #xd))) + +(define-normal-insn-enum insn-op-src "eBPF instruction source" + (all-isas) OP_SRC_ f-op-src + ;; X => use `src' as source operand. + ;; K => use `imm32' as source operand. + ((K #b0) (X #b1))) + +(define-normal-insn-enum insn-op-class "eBPF instruction class" + (all-isas) OP_CLASS_ f-op-class + ((LD #b000) (LDX #b001) (ST #b010) (STX #b011) + (ALU #b100) (JMP #b101) (ALU64 #b111))) + +;; For load/store instructions, the 8-bit code field is subdivided in: +;; +;; op-mode:3 op-size:2 op-class:3 + +(dwf f-op-mode "eBPF opcode mode" (all-isas) 0 8 7 3 UINT) +(dwf f-op-size "eBPF opcode size" (all-isas) 0 8 4 2 UINT) + +(define-normal-insn-enum insn-op-mode "eBPF load/store instruction modes" + (all-isas) OP_MODE_ f-op-mode + ((IMM #b000) (ABS #b001) (IND #b010) (MEM #b011) + ;; #b100 and #b101 are used in classic BPF only, reserved in eBPF. + (XADD #b110))) + +(define-normal-insn-enum insn-op-size "eBPF load/store instruction sizes" + (all-isas) OP_SIZE_ f-op-size + ((W #b00) ;; Word: 4 byte + (H #b01) ;; Half-word: 2 byte + (B #b10) ;; Byte: 1 byte + (DW #b11))) ;; Double-word: 8 byte + +;; The fields for the source and destination registers are a bit +;; tricky. Due to the bizarre nibble swap between little-endian and +;; big-endian ISAs we need to keep different variants of the fields. +;; +;; Note that f-regs is used in the format spec of instructions that do +;; NOT use registers, where endianness is irrelevant i.e. f-regs is a +;; constant 0 opcode. + +(dwf f-dstle "eBPF dst register field" ((ISA ebpfle)) 8 8 3 4 UINT) +(dwf f-srcle "eBPF source register field" ((ISA ebpfle)) 8 8 7 4 UINT) + +(dwf f-dstbe "eBPF dst register field" ((ISA ebpfbe)) 8 8 7 4 UINT) +(dwf f-srcbe "eBPF source register field" ((ISA ebpfbe)) 8 8 3 4 UINT) + +(dwf f-regs "eBPF registers field" (all-isas) 8 8 7 8 UINT) + +;; Finally, the fields for the immediates. +;; +;; The 16-bit offsets and 32-bit immediates do not present any special +;; difficulty: we put them in their own instruction word so the +;; byte-endianness will be properly applied. + +(dwf f-offset16 "eBPF offset field" (all-isas) 16 16 15 16 INT) +(dwf f-imm32 "eBPF 32-bit immediate field" (all-isas) 32 32 31 32 INT) + +;; For the disjoint 64-bit signed immediate, however, we need to use a +;; multi-ifield. + +(dwf f-imm64-a "eBPF 64-bit immediate a" (all-isas) 32 32 31 32 UINT) +(dwf f-imm64-b "eBPF 64-bit immediate b" (all-isas) 64 32 31 32 UINT) +(dwf f-imm64-c "eBPF 64-bit immediate c" (all-isas) 96 32 31 32 UINT) + +(define-multi-ifield + (name f-imm64) + (comment "eBPF 64-bit immediate field") + (attrs all-isas) + (mode DI) + (subfields f-imm64-a f-imm64-b f-imm64-c) + (insert (sequence () + (set (ifield f-imm64-b) (const 0)) + (set (ifield f-imm64-c) (srl (ifield f-imm64) (const 32))) + (set (ifield f-imm64-a) (and (ifield f-imm64) (const #xffffffff))))) + (extract (sequence () + (set (ifield f-imm64) + (or (sll DI (zext DI (ifield f-imm64-c)) (const 32)) + (zext DI (ifield f-imm64-a))))))) + +;;; Operands + +;; A couple of source and destination register operands are defined +;; for each ISA: ebpfle and ebpfbe. + +(dno dstle "destination register" ((ISA ebpfle)) h-gpr f-dstle) +(dno srcle "source register" ((ISA ebpfle)) h-gpr f-srcle) + +(dno dstbe "destination register" ((ISA ebpfbe)) h-gpr f-dstbe) +(dno srcbe "source register" ((ISA ebpfbe)) h-gpr f-srcbe) + +;; Jump instructions have a 16-bit PC-relative address. +;; CALL instructions have a 32-bit PC-relative address. + +(dno disp16 "16-bit PC-relative address" (all-isas PCREL-ADDR) h-sint + f-offset16) +(dno disp32 "32-bit PC-relative address" (all-isas PCREL-ADDR) h-sint + f-imm32) + +;; Immediate operands in eBPF are signed, and we want the disassembler +;; to print negative values in a sane way. Therefore we use the macro +;; below to register a printer, which is itself defined as a C +;; function in bpf.opc. + +;; define-normal-signed-immediate-operand +(define-pmacro (dnsio x-name x-comment x-attrs x-type x-index) + (define-operand + (name x-name) + (comment x-comment) + (.splice attrs (.unsplice x-attrs)) + (type x-type) + (index x-index) + (handlers (print "immediate")))) + +(dnsio imm32 "32-bit immediate" (all-isas) h-sint f-imm32) +(dnsio offset16 "16-bit offset" (all-isas) h-sint f-offset16) + +;; The 64-bit immediate cannot use the default +;; cgen_parse_signed_integer, because it assumes operands are at much +;; 32-bit wide. Use our own. + +(define-operand + (name imm64) + (comment "64-bit immediate") + (attrs all-isas) + (type h-sint64) + (index f-imm64) + (handlers (parse "imm64") (print "immediate"))) + +;; The endle/endbe instructions take an operand to specify the word +;; width in endianness conversions. We use both a parser and printer, +;; which are defined as C functions in bpf.opc. + +(define-operand + (name endsize) + (comment "endianness size immediate: 16, 32 or 64") + (attrs all-isas) + (type h-uint) + (index f-imm32) + (handlers (parse "endsize") (print "endsize"))) + +;;; ALU instructions + +;; For each opcode in insn-op-code-alu representing and integer +;; arithmetic instruction (ADD, SUB, etc) we define a bunch of +;; instruction variants: +;; +;; ADD[32]{i,r}le for the little-endian ISA +;; ADD[32]{i,r}be for the big-endian ISA +;; +;; The `i' variants perform `src OP dst -> dst' operations. +;; The `r' variants perform `dst OP imm32 -> dst' operations. +;; +;; The variants with 32 in their name are of ALU class. Otherwise +;; they are ALU64 class. + +(define-pmacro (define-alu-insn-un x-basename x-suffix x-op-class x-op-code x-endian) + (dni (.sym x-basename x-suffix x-endian) + (.str x-basename x-suffix) + ((ISA (.sym ebpf x-endian))) + (.str x-basename x-suffix " $dst" x-endian) + (+ (f-imm32 0) (f-offset16 0) ((.sym f-src x-endian) 0) (.sym dst x-endian) + x-op-class OP_SRC_X x-op-code) () ())) + +(define-pmacro (define-alu-insn-bin x-basename x-suffix x-op-class x-op-code x-endian) + (begin + (dni (.sym x-basename x-suffix "i" x-endian) + (.str x-basename x-suffix " immediate") + ((ISA (.sym ebpf x-endian))) + (.str x-basename x-suffix " $dst" x-endian ",$imm32") + (+ imm32 (f-offset16 0) ((.sym f-src x-endian) 0) (.sym dst x-endian) + x-op-class OP_SRC_K x-op-code) () ()) + (dni (.sym x-basename x-suffix "r" x-endian) + (.str x-basename x-suffix " register") + ((ISA (.sym ebpf x-endian))) + (.str x-basename x-suffix " $dst" x-endian ",$src" x-endian) + (+ (f-imm32 0) (f-offset16 0) (.sym src x-endian) (.sym dst x-endian) + x-op-class OP_SRC_X x-op-code) () ()))) + +(define-pmacro (daiu x-basename x-op-code x-endian) + (begin + (define-alu-insn-un x-basename "" OP_CLASS_ALU64 x-op-code x-endian) + (define-alu-insn-un x-basename "32" OP_CLASS_ALU x-op-code x-endian))) + +(define-pmacro (daib x-basename x-op-code x-endian) + (begin + (define-alu-insn-bin x-basename "" OP_CLASS_ALU64 x-op-code x-endian) + (define-alu-insn-bin x-basename "32" OP_CLASS_ALU x-op-code x-endian))) + +(define-pmacro (define-alu-instructions x-endian) + (begin + (daib add OP_CODE_ADD x-endian) + (daib sub OP_CODE_SUB x-endian) + (daib mul OP_CODE_MUL x-endian) + (daib div OP_CODE_DIV x-endian) + (daib or OP_CODE_OR x-endian) + (daib and OP_CODE_AND x-endian) + (daib lsh OP_CODE_LSH x-endian) + (daib rsh OP_CODE_RSH x-endian) + (daib mod OP_CODE_MOD x-endian) + (daib xor OP_CODE_XOR x-endian) + (daib mov OP_CODE_MOV x-endian) + (daib arsh OP_CODE_ARSH x-endian) + (daiu neg OP_CODE_NEG x-endian))) + +(define-alu-instructions le) +(define-alu-instructions be) + +;;; Endianness conversion instructions + +;; The endianness conversion instructions come in several variants: +;; +;; END{le,be}le for the little-endian ISA +;; END{le,be}be for the big-endian ISA +;; +;; Please do not be confused by the repeated `be' and `le' here. Each +;; ISA has both endle and endbe instructions. It is the disposition +;; of the source and destination register fields that change between +;; ISAs, not the semantics of the instructions themselves (see section +;; "The ISAs" above in this very file.) + +(define-pmacro (define-endian-insn x-suffix x-op-src x-endian) + (dni (.sym "end" x-suffix x-endian) + (.str "end" x-suffix " register") + ((ISA (.sym ebpf x-endian))) + (.str "end" x-suffix " $dst" x-endian ",$endsize") + (+ (f-offset16 0) ((.sym f-src x-endian) 0) (.sym dst x-endian) endsize + OP_CLASS_ALU x-op-src OP_CODE_END) () ())) + +(define-endian-insn "le" OP_SRC_K le) +(define-endian-insn "be" OP_SRC_X le) +(define-endian-insn "le" OP_SRC_K be) +(define-endian-insn "be" OP_SRC_X be) + +;;; Load/Store instructions + +;; The lddw instruction takes a 64-bit immediate as an operand. Since +;; this instruction also takes a `dst' operand, we need to define a +;; variant for each ISA: +;; +;; LDDWle for the little-endian ISA +;; LDDWbe for the big-endian ISA + +(define-pmacro (define-lddw x-endian) + (dni (.sym lddw x-endian) + (.str "lddw" x-endian) + ((ISA (.sym ebpf x-endian))) + (.str "lddw $dst" x-endian ",$imm64") + (+ imm64 (f-offset16 0) ((.sym f-src x-endian) 0) + (.sym dst x-endian) + OP_CLASS_LD OP_SIZE_DW OP_MODE_IMM) () ())) + +(define-lddw le) +(define-lddw be) + +;; The absolute/indirect load instructions are non-generic loads +;; designed to be used in socket filters. They come in several +;; variants: +;; +;; LD{abs,ind}{w,h,b,dw}le for the little-endian ISA +;; LD{abs,ind}[w,h,b,dw}be for the big-endian ISA + +(define-pmacro (dlsi x-basename x-suffix x-class x-size x-mode x-endian) + (dni (.sym x-basename x-suffix x-endian) + (.str x-basename x-suffix) + ((ISA (.sym ebpf x-endian))) + (.str x-basename x-suffix " $dst" x-endian ",$src" x-endian ",$imm32") + (+ imm32 (f-offset16 0) (.sym src x-endian) (.sym dst x-endian) + (.sym OP_CLASS_ x-class) (.sym OP_SIZE_ x-size) + (.sym OP_MODE_ x-mode)) () ())) + +(define-pmacro (define-ldabsind x-endian) + (begin + (dlsi "ldabs" "w" LD W ABS x-endian) + (dlsi "ldabs" "h" LD H ABS x-endian) + (dlsi "ldabs" "b" LD B ABS x-endian) + (dlsi "ldabs" "dw" LD DW ABS x-endian) + + (dlsi "ldind" "w" LD W IND x-endian) + (dlsi "ldind" "h" LD H IND x-endian) + (dlsi "ldind" "b" LD B IND x-endian) + (dlsi "ldind" "dw" LD DW IND x-endian))) + +(define-ldabsind le) +(define-ldabsind be) + +;; Generic load and store instructions are provided for several word +;; sizes. They come in several variants: +;; +;; LDX{b,h,w,dw}le, STX{b,h,w,dw}le for the little-endian ISA +;; +;; LDX{b,h,w,dw}be, STX{b,h,w,dw}be for the big-endian ISA +;; +;; Loads operate on [$SRC+-OFFSET] -> $DST +;; Stores operate on $SRC -> [$DST+-OFFSET] + +(define-pmacro (dxli x-basename x-suffix x-size x-endian) + (dni (.sym x-basename x-suffix x-endian) + (.str x-basename x-suffix) + ((ISA (.sym ebpf x-endian))) + (.str x-basename x-suffix " $dst" x-endian ",[$src" x-endian "+$offset16]") + (+ (f-imm32 0) offset16 (.sym src x-endian) (.sym dst x-endian) + OP_CLASS_LDX (.sym OP_SIZE_ x-size) OP_MODE_MEM) + () ())) + +(define-pmacro (dxsi x-basename x-suffix x-size x-endian) + (dni (.sym x-basename x-suffix x-endian) + (.str x-basename x-suffix) + ((ISA (.sym ebpf x-endian))) + (.str x-basename x-suffix " [$dst" x-endian "+$offset16],$src" x-endian) + (+ (f-imm32 0) offset16 (.sym src x-endian) (.sym dst x-endian) + OP_CLASS_STX (.sym OP_SIZE_ x-size) OP_MODE_MEM) + () ())) + +(define-pmacro (define-ldstx-insns x-endian) + (begin + (dxli "ldx" "w" W x-endian) + (dxli "ldx" "h" H x-endian) + (dxli "ldx" "b" B x-endian) + (dxli "ldx" "dw" DW x-endian) + + (dxsi "stx" "w" W x-endian) + (dxsi "stx" "h" H x-endian) + (dxsi "stx" "b" B x-endian) + (dxsi "stx" "dw" DW x-endian))) + +(define-ldstx-insns le) +(define-ldstx-insns be) + +;; Generic store instructions of the form IMM32 -> [$DST+OFFSET] are +;; provided in several variants: +;; +;; ST{b,h,w,dw}le for the little-endian ISA +;; ST{b,h,w,dw}be for the big-endian ISA + +(define-pmacro (dsti x-suffix x-size x-endian) + (dni (.sym "st" x-suffix x-endian) + (.str "st" x-suffix) + ((ISA (.sym ebpf x-endian))) + (.str "st" x-suffix " [$dst" x-endian "+$offset16],$imm32") + (+ imm32 offset16 ((.sym f-src x-endian) 0) (.sym dst x-endian) + OP_CLASS_ST (.sym OP_SIZE_ x-size) OP_MODE_MEM) () ())) + +(define-pmacro (define-st-insns x-endian) + (begin + (dsti "b" B x-endian) + (dsti "h" H x-endian) + (dsti "w" W x-endian) + (dsti "dw" DW x-endian))) + +(define-st-insns le) +(define-st-insns be) + +;;; Jump instructions + +;; Compare-and-jump instructions, on the other hand, make use of +;; registers. Therefore, we need to define several variants in both +;; ISAs: +;; +;; J{eq,gt,ge,lt,le,set,ne,sgt,sge,slt,sle}{i,r}le for the +;; little-endian ISA. +;; J{eq,gt,ge,lt,le,set,ne.sgt,sge,slt,sle}{i,r}be for the +;; big-endian ISA. + +(define-pmacro (dcji x-cond x-op-code x-endian) + (begin + (dni (.sym j x-cond i x-endian) + (.str j x-cond "i") + ((ISA (.sym ebpf x-endian))) + (.str "j" x-cond " $dst" x-endian ",$imm32,$disp16") + (+ imm32 disp16 ((.sym f-src x-endian) 0) (.sym dst x-endian) + OP_CLASS_JMP OP_SRC_K (.sym OP_CODE_ x-op-code)) () ()) + (dni (.sym j x-cond r x-endian) + (.str j x-cond "r") + ((ISA (.sym ebpf x-endian))) + (.str "j" x-cond " $dst" x-endian ",$src" x-endian ",$disp16") + (+ (f-imm32 0) disp16 (.sym src x-endian) (.sym dst x-endian) + OP_CLASS_JMP OP_SRC_X (.sym OP_CODE_ x-op-code)) () ()))) + +(define-pmacro (define-condjump-insns x-endian) + (begin + (dcji "eq" JEQ x-endian) + (dcji "gt" JGT x-endian) + (dcji "ge" JGE x-endian) + (dcji "lt" JLT x-endian) + (dcji "le" JLE x-endian) + (dcji "set" JSET x-endian) + (dcji "ne" JNE x-endian) + (dcji "sgt" JSGT x-endian) + (dcji "sge" JSGE x-endian) + (dcji "slt" JSLT x-endian) + (dcji "sle" JSLE x-endian))) + +(define-condjump-insns le) +(define-condjump-insns be) + +;; The jump-always, `call' and `exit' instructions dont make use of +;; either source nor destination registers, so only one variant per +;; instruction is defined. + +(dni ja "ja" (all-isas) "ja $disp16" + (+ (f-imm32 0) disp16 (f-regs 0) + OP_CLASS_JMP OP_SRC_K OP_CODE_JA) () ()) + +(dni call "call" (all-isas) "call $disp32" + (+ disp32 (f-offset16 0) (f-regs 0) + OP_CLASS_JMP OP_SRC_K OP_CODE_CALL) () ()) + +(dni "exit" "exit" (all-isas) "exit" + (+ (f-imm32 0) (f-offset16 0) (f-regs 0) + OP_CLASS_JMP (f-op-src 0) OP_CODE_EXIT) () ()) + +;;; Atomic instructions + +;; The atomic exchange-and-add instructions come in two flavors: one +;; for swapping 64-bit quantities and another for 32-bit quantities. + +(define-pmacro (define-atomic-insns x-endian) + (begin + (dni (.str "xadddw" x-endian) + "xadddw" + ((ISA (.sym ebpf x-endian))) + (.str "xadddw [$dst" x-endian "+$offset16],$src" x-endian) + (+ (f-imm32 0) (.sym src x-endian) (.sym dst x-endian) + offset16 OP_MODE_XADD OP_SIZE_DW OP_CLASS_STX) () ()) + (dni (.str "xaddw" x-endian) + "xaddw" + ((ISA (.sym ebpf x-endian))) + (.str "xaddw [$dst" x-endian "+$offset16],$src" x-endian) + (+ (f-imm32 0) (.sym src x-endian) (.sym dst x-endian) + offset16 OP_MODE_XADD OP_SIZE_W OP_CLASS_STX) () ()))) + +(define-atomic-insns le) +(define-atomic-insns be) diff --git a/cpu/bpf.opc b/cpu/bpf.opc new file mode 100644 index 0000000..e70ee04 --- /dev/null +++ b/cpu/bpf.opc @@ -0,0 +1,191 @@ +/* EBPF opcode support. -*- c -*- + + Copyright (C) 2019 Free Software Foundation, Inc. + + Contributed by Oracle, Inc. + + This file is part of the GNU Binutils and of GDB. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/* + Each section is delimited with start and end markers. + + <arch>-opc.h additions use: "-- opc.h" + <arch>-opc.c additions use: "-- opc.c" + <arch>-asm.c additions use: "-- asm.c" + <arch>-dis.c additions use: "-- dis.c" + <arch>-ibd.h additions use: "-- ibd.h". */ + +/* -- opc.h */ + +#undef CGEN_DIS_HASH_SIZE +#define CGEN_DIS_HASH_SIZE 1 + +#undef CGEN_DIS_HASH +#define CGEN_DIS_HASH(buffer, value) 0 + +/* Allows reason codes to be output when assembler errors occur. */ +#define CGEN_VERBOSE_ASSEMBLER_ERRORS + +#define CGEN_VALIDATE_INSN_SUPPORTED +extern int bpf_cgen_insn_supported (CGEN_CPU_DESC, const CGEN_INSN *); + + +/* -- opc.c */ + +/* -- asm.c */ + +/* Parse a signed 64-bit immediate. */ + +static const char * +parse_imm64 (CGEN_CPU_DESC cd, + const char **strp, + int opindex, + int64_t *valuep) +{ + bfd_vma value; + enum cgen_parse_operand_result result; + const char *errmsg; + + errmsg = (* cd->parse_operand_fn) + (cd, CGEN_PARSE_OPERAND_INTEGER, strp, opindex, BFD_RELOC_NONE, + &result, &value); + if (!errmsg) + *valuep = value; + + return errmsg; +} + +/* Endianness size operands are integer immediates whose values can be + 16, 32 or 64. */ + +static const char * +parse_endsize (CGEN_CPU_DESC cd, + const char **strp, + int opindex, + unsigned long *valuep) +{ + const char *errmsg; + + errmsg = cgen_parse_unsigned_integer (cd, strp, opindex, valuep); + if (errmsg) + return errmsg; + + switch (*valuep) + { + case 16: + case 32: + case 64: + break; + default: + return _("expected 16, 32 or 64 in"); + } + + return NULL; +} + +/* Special check to ensure that the right instruction variant is used + for the given endianness induced by the ISA selected in the CPU. + See bpf.cpu for a discussion on how eBPF is really two instruction + sets. */ + +int +bpf_cgen_insn_supported (CGEN_CPU_DESC cd, const CGEN_INSN *insn) +{ + CGEN_BITSET isas = CGEN_INSN_BITSET_ATTR_VALUE (insn, CGEN_INSN_ISA); + + return cgen_bitset_intersect_p (&isas, cd->isas); +} + + +/* -- dis.c */ + +/* We need to customize the disassembler a bit: + - Use 8 bytes per line by default. +*/ + +#define CGEN_PRINT_INSN bpf_print_insn + +static int +bpf_print_insn (CGEN_CPU_DESC cd, bfd_vma pc, disassemble_info *info) +{ + bfd_byte buf[CGEN_MAX_INSN_SIZE]; + int buflen; + int status; + + info->bytes_per_chunk = 1; + info->bytes_per_line = 8; + + /* Attempt to read the base part of the insn. */ + buflen = cd->base_insn_bitsize / 8; + status = (*info->read_memory_func) (pc, buf, buflen, info); + + /* Try again with the minimum part, if min < base. */ + if (status != 0 && (cd->min_insn_bitsize < cd->base_insn_bitsize)) + { + buflen = cd->min_insn_bitsize / 8; + status = (*info->read_memory_func) (pc, buf, buflen, info); + } + + if (status != 0) + { + (*info->memory_error_func) (status, pc, info); + return -1; + } + + return print_insn (cd, pc, info, buf, buflen); +} + +/* Signed immediates should be printed in hexadecimal. */ + +static void +print_immediate (CGEN_CPU_DESC cd ATTRIBUTE_UNUSED, + void *dis_info, + int64_t value, + unsigned int attrs ATTRIBUTE_UNUSED, + bfd_vma pc ATTRIBUTE_UNUSED, + int length ATTRIBUTE_UNUSED) +{ + disassemble_info *info = (disassemble_info *) dis_info; + + if (value <= 9) + (*info->fprintf_func) (info->stream, "%" PRId64, value); + else + (*info->fprintf_func) (info->stream, "%#" PRIx64, value); + + /* This is to avoid -Wunused-function for print_normal. */ + if (0) + print_normal (cd, dis_info, value, attrs, pc, length); +} + +/* Endianness bit sizes should be printed in decimal. */ + +static void +print_endsize (CGEN_CPU_DESC cd ATTRIBUTE_UNUSED, + void *dis_info, + unsigned long value, + unsigned int attrs ATTRIBUTE_UNUSED, + bfd_vma pc ATTRIBUTE_UNUSED, + int length ATTRIBUTE_UNUSED) +{ + disassemble_info *info = (disassemble_info *) dis_info; + (*info->fprintf_func) (info->stream, "%lu", value); +} + + +/* -- */ + |