aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/nvptx/nvptx.md
diff options
context:
space:
mode:
authorBernd Schmidt <bernds@codesourcery.com>2014-11-10 16:12:42 +0000
committerBernd Schmidt <bernds@gcc.gnu.org>2014-11-10 16:12:42 +0000
commit738f25224b78c40ba48d6debd95946bf73f89e53 (patch)
tree64dd54686832409d0db5195b6155c4b408fed0d0 /gcc/config/nvptx/nvptx.md
parentc81b4a0e83c8577ba09767ded86f715b6fcd21b4 (diff)
downloadgcc-738f25224b78c40ba48d6debd95946bf73f89e53.zip
gcc-738f25224b78c40ba48d6debd95946bf73f89e53.tar.gz
gcc-738f25224b78c40ba48d6debd95946bf73f89e53.tar.bz2
Add the nvptx port.
* configure.ac: Handle nvptx-*-*. * configure: Regenerate. gcc/ * config/nvptx/nvptx.c: New file. * config/nvptx/nvptx.h: New file. * config/nvptx/nvptx-protos.h: New file. * config/nvptx/nvptx.md: New file. * config/nvptx/t-nvptx: New file. * config/nvptx/nvptx.opt: New file. * common/config/nvptx/nvptx-common.c: New file. * config.gcc: Handle nvptx-*-*. libgcc/ * config.host: Handle nvptx-*-*. * shared-object.mk (as-flags-$o): Define. ($(base)$(objext), $(base)_s$(objext)): Use it instead of -xassembler-with-cpp. * static-object.mk: Identical changes. * config/nvptx/t-nvptx: New file. * config/nvptx/crt0.s: New file. * config/nvptx/free.asm: New file. * config/nvptx/malloc.asm: New file. * config/nvptx/realloc.c: New file. From-SVN: r217295
Diffstat (limited to 'gcc/config/nvptx/nvptx.md')
-rw-r--r--gcc/config/nvptx/nvptx.md1376
1 files changed, 1376 insertions, 0 deletions
diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
new file mode 100644
index 0000000..966c28b
--- /dev/null
+++ b/gcc/config/nvptx/nvptx.md
@@ -0,0 +1,1376 @@
+;; Machine description for NVPTX.
+;; Copyright (C) 2014 Free Software Foundation, Inc.
+;; Contributed by Bernd Schmidt <bernds@codesourcery.com>
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_c_enum "unspec" [
+ UNSPEC_ARG_REG
+ UNSPEC_FROM_GLOBAL
+ UNSPEC_FROM_LOCAL
+ UNSPEC_FROM_PARAM
+ UNSPEC_FROM_SHARED
+ UNSPEC_FROM_CONST
+ UNSPEC_TO_GLOBAL
+ UNSPEC_TO_LOCAL
+ UNSPEC_TO_PARAM
+ UNSPEC_TO_SHARED
+ UNSPEC_TO_CONST
+
+ UNSPEC_CPLX_LOWPART
+ UNSPEC_CPLX_HIGHPART
+
+ UNSPEC_COPYSIGN
+ UNSPEC_LOG2
+ UNSPEC_EXP2
+ UNSPEC_SIN
+ UNSPEC_COS
+
+ UNSPEC_FPINT_FLOOR
+ UNSPEC_FPINT_BTRUNC
+ UNSPEC_FPINT_CEIL
+ UNSPEC_FPINT_NEARBYINT
+
+ UNSPEC_BITREV
+
+ UNSPEC_ALLOCA
+
+ UNSPEC_NTID
+ UNSPEC_TID
+])
+
+(define_c_enum "unspecv" [
+ UNSPECV_LOCK
+ UNSPECV_CAS
+ UNSPECV_XCHG
+])
+
+(define_attr "subregs_ok" "false,true"
+ (const_string "false"))
+
+(define_predicate "nvptx_register_operand"
+ (match_code "reg,subreg")
+{
+ if (REG_P (op))
+ return !HARD_REGISTER_P (op);
+ if (GET_CODE (op) == SUBREG && MEM_P (SUBREG_REG (op)))
+ return false;
+ if (GET_CODE (op) == SUBREG)
+ return false;
+ return register_operand (op, mode);
+})
+
+(define_predicate "nvptx_reg_or_mem_operand"
+ (match_code "mem,reg,subreg")
+{
+ if (REG_P (op))
+ return !HARD_REGISTER_P (op);
+ if (GET_CODE (op) == SUBREG && MEM_P (SUBREG_REG (op)))
+ return false;
+ if (GET_CODE (op) == SUBREG)
+ return false;
+ return memory_operand (op, mode) || register_operand (op, mode);
+})
+
+;; Allow symbolic constants.
+(define_predicate "symbolic_operand"
+ (match_code "symbol_ref,const"))
+
+;; Allow registers or symbolic constants. We can allow frame, arg or stack
+;; pointers here since they are actually symbolic constants.
+(define_predicate "nvptx_register_or_symbolic_operand"
+ (match_code "reg,subreg,symbol_ref,const")
+{
+ if (GET_CODE (op) == SUBREG && MEM_P (SUBREG_REG (op)))
+ return false;
+ if (GET_CODE (op) == SUBREG)
+ return false;
+ if (CONSTANT_P (op))
+ return true;
+ return register_operand (op, mode);
+})
+
+;; Registers or constants for normal instructions. Does not allow symbolic
+;; constants.
+(define_predicate "nvptx_nonmemory_operand"
+ (match_code "reg,subreg,const_int,const_double")
+{
+ if (REG_P (op))
+ return !HARD_REGISTER_P (op);
+ if (GET_CODE (op) == SUBREG && MEM_P (SUBREG_REG (op)))
+ return false;
+ if (GET_CODE (op) == SUBREG)
+ return false;
+ return nonmemory_operand (op, mode);
+})
+
+;; A source operand for a move instruction. This is the only predicate we use
+;; that accepts symbolic constants.
+(define_predicate "nvptx_general_operand"
+ (match_code "reg,subreg,mem,const,symbol_ref,label_ref,const_int,const_double")
+{
+ if (REG_P (op))
+ return !HARD_REGISTER_P (op);
+ return general_operand (op, mode);
+})
+
+;; A destination operand for a move instruction. This is the only destination
+;; predicate that accepts the return register since it requires special handling.
+(define_predicate "nvptx_nonimmediate_operand"
+ (match_code "reg,subreg,mem")
+{
+ if (REG_P (op))
+ return (op != frame_pointer_rtx
+ && op != arg_pointer_rtx
+ && op != stack_pointer_rtx);
+ return nonimmediate_operand (op, mode);
+})
+
+(define_predicate "const_0_operand"
+ (and (match_code "const_int,const_double,const_vector")
+ (match_test "op == CONST0_RTX (GET_MODE (op))")))
+
+(define_predicate "global_mem_operand"
+ (and (match_code "mem")
+ (match_test "MEM_ADDR_SPACE (op) == ADDR_SPACE_GLOBAL")))
+
+(define_predicate "const_mem_operand"
+ (and (match_code "mem")
+ (match_test "MEM_ADDR_SPACE (op) == ADDR_SPACE_CONST")))
+
+(define_predicate "param_mem_operand"
+ (and (match_code "mem")
+ (match_test "MEM_ADDR_SPACE (op) == ADDR_SPACE_PARAM")))
+
+(define_predicate "shared_mem_operand"
+ (and (match_code "mem")
+ (match_test "MEM_ADDR_SPACE (op) == ADDR_SPACE_SHARED")))
+
+(define_predicate "const0_operand"
+ (and (match_code "const_int")
+ (match_test "op == const0_rtx")))
+
+;; True if this operator is valid for predication.
+(define_predicate "predicate_operator"
+ (match_code "eq,ne"))
+
+(define_predicate "ne_operator"
+ (match_code "ne"))
+
+(define_predicate "nvptx_comparison_operator"
+ (match_code "eq,ne,le,ge,lt,gt,leu,geu,ltu,gtu"))
+
+(define_predicate "nvptx_float_comparison_operator"
+ (match_code "eq,ne,le,ge,lt,gt,uneq,unle,unge,unlt,ungt,unordered,ordered"))
+
+;; Test for a valid operand for a call instruction.
+(define_special_predicate "call_insn_operand"
+ (match_code "symbol_ref,reg")
+{
+ if (GET_CODE (op) == SYMBOL_REF)
+ {
+ tree decl = SYMBOL_REF_DECL (op);
+ /* This happens for libcalls. */
+ if (decl == NULL_TREE)
+ return true;
+ return TREE_CODE (SYMBOL_REF_DECL (op)) == FUNCTION_DECL;
+ }
+ return true;
+})
+
+;; Return true if OP is a call with parallel USEs of the argument
+;; pseudos.
+(define_predicate "call_operation"
+ (match_code "parallel")
+{
+ unsigned i;
+
+ for (i = 1; i < XVECLEN (op, 0); i++)
+ {
+ rtx elt = XVECEXP (op, 0, i);
+ enum machine_mode mode;
+ unsigned regno;
+
+ if (GET_CODE (elt) != USE
+ || GET_CODE (XEXP (elt, 0)) != REG
+ || XEXP (elt, 0) == frame_pointer_rtx
+ || XEXP (elt, 0) == arg_pointer_rtx
+ || XEXP (elt, 0) == stack_pointer_rtx)
+
+ return false;
+ }
+ return true;
+})
+
+(define_constraint "P0"
+ "An integer with the value 0."
+ (and (match_code "const_int")
+ (match_test "ival == 0")))
+
+(define_constraint "P1"
+ "An integer with the value 1."
+ (and (match_code "const_int")
+ (match_test "ival == 1")))
+
+(define_constraint "Pn"
+ "An integer with the value -1."
+ (and (match_code "const_int")
+ (match_test "ival == -1")))
+
+(define_constraint "R"
+ "A pseudo register."
+ (match_code "reg"))
+
+(define_constraint "Ia"
+ "Any integer constant."
+ (and (match_code "const_int") (match_test "true")))
+
+(define_mode_iterator QHSDISDFM [QI HI SI DI SF DF])
+(define_mode_iterator QHSDIM [QI HI SI DI])
+(define_mode_iterator HSDIM [HI SI DI])
+(define_mode_iterator BHSDIM [BI HI SI DI])
+(define_mode_iterator SDIM [SI DI])
+(define_mode_iterator SDISDFM [SI DI SF DF])
+(define_mode_iterator QHIM [QI HI])
+(define_mode_iterator QHSIM [QI HI SI])
+(define_mode_iterator SDFM [SF DF])
+(define_mode_iterator SDCM [SC DC])
+
+;; This mode iterator allows :P to be used for patterns that operate on
+;; pointer-sized quantities. Exactly one of the two alternatives will match.
+(define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
+
+;; We should get away with not defining memory alternatives, since we don't
+;; get variables in this mode and pseudos are never spilled.
+(define_insn "movbi"
+ [(set (match_operand:BI 0 "nvptx_register_operand" "=R,R,R")
+ (match_operand:BI 1 "nvptx_nonmemory_operand" "R,P0,Pn"))]
+ ""
+ "@
+ %.\\tmov%t0\\t%0, %1;
+ %.\\tsetp.eq.u32\\t%0, 1, 0;
+ %.\\tsetp.eq.u32\\t%0, 1, 1;")
+
+(define_insn "*mov<mode>_insn"
+ [(set (match_operand:QHSDIM 0 "nvptx_nonimmediate_operand" "=R,R,R,m")
+ (match_operand:QHSDIM 1 "general_operand" "n,Ri,m,R"))]
+ "!(MEM_P (operands[0])
+ && (!REG_P (operands[1]) || REGNO (operands[1]) <= LAST_VIRTUAL_REGISTER))"
+{
+ if (which_alternative == 2)
+ return "%.\\tld%A1%u1\\t%0, %1;";
+ if (which_alternative == 3)
+ return "%.\\tst%A0%u0\\t%0, %1;";
+
+ rtx dst = operands[0];
+ rtx src = operands[1];
+
+ enum machine_mode dst_mode = nvptx_underlying_object_mode (dst);
+ enum machine_mode src_mode = nvptx_underlying_object_mode (src);
+ if (GET_CODE (dst) == SUBREG)
+ dst = SUBREG_REG (dst);
+ if (GET_CODE (src) == SUBREG)
+ src = SUBREG_REG (src);
+ if (src_mode == QImode)
+ src_mode = SImode;
+ if (dst_mode == QImode)
+ dst_mode = SImode;
+ if (CONSTANT_P (src))
+ {
+ if (GET_MODE_CLASS (dst_mode) != MODE_INT)
+ return "%.\\tmov.b%T0\\t%0, %1;";
+ else
+ return "%.\\tmov%t0\\t%0, %1;";
+ }
+
+ /* Special handling for the return register; we allow this register to
+ only occur in the destination of a move insn. */
+ if (REG_P (dst) && REGNO (dst) == NVPTX_RETURN_REGNUM
+ && dst_mode == HImode)
+ dst_mode = SImode;
+ if (dst_mode == src_mode)
+ return "%.\\tmov%t0\\t%0, %1;";
+ /* Mode-punning between floating point and integer. */
+ if (GET_MODE_SIZE (dst_mode) == GET_MODE_SIZE (src_mode))
+ return "%.\\tmov.b%T0\\t%0, %1;";
+ return "%.\\tcvt%t0%t1\\t%0, %1;";
+}
+ [(set_attr "subregs_ok" "true")])
+
+(define_insn "*mov<mode>_insn"
+ [(set (match_operand:SDFM 0 "nvptx_nonimmediate_operand" "=R,R,m")
+ (match_operand:SDFM 1 "general_operand" "RF,m,R"))]
+ "!(MEM_P (operands[0]) && !REG_P (operands[1]))"
+{
+ if (which_alternative == 1)
+ return "%.\\tld%A1%u0\\t%0, %1;";
+ if (which_alternative == 2)
+ return "%.\\tst%A0%u1\\t%0, %1;";
+
+ rtx dst = operands[0];
+ rtx src = operands[1];
+ if (GET_CODE (dst) == SUBREG)
+ dst = SUBREG_REG (dst);
+ if (GET_CODE (src) == SUBREG)
+ src = SUBREG_REG (src);
+ enum machine_mode dst_mode = GET_MODE (dst);
+ enum machine_mode src_mode = GET_MODE (src);
+ if (dst_mode == src_mode)
+ return "%.\\tmov%t0\\t%0, %1;";
+ if (GET_MODE_SIZE (dst_mode) == GET_MODE_SIZE (src_mode))
+ return "%.\\tmov.b%T0\\t%0, %1;";
+ gcc_unreachable ();
+}
+ [(set_attr "subregs_ok" "true")])
+
+(define_insn "load_arg_reg<mode>"
+ [(set (match_operand:QHIM 0 "nvptx_register_operand" "=R")
+ (unspec:QHIM [(match_operand 1 "const_int_operand" "i")]
+ UNSPEC_ARG_REG))]
+ ""
+ "%.\\tcvt%t0.u32\\t%0, %%ar%1;")
+
+(define_insn "load_arg_reg<mode>"
+ [(set (match_operand:SDISDFM 0 "nvptx_register_operand" "=R")
+ (unspec:SDISDFM [(match_operand 1 "const_int_operand" "i")]
+ UNSPEC_ARG_REG))]
+ ""
+ "%.\\tmov%t0\\t%0, %%ar%1;")
+
+(define_expand "mov<mode>"
+ [(set (match_operand:QHSDISDFM 0 "nvptx_nonimmediate_operand" "")
+ (match_operand:QHSDISDFM 1 "general_operand" ""))]
+ ""
+{
+ operands[1] = nvptx_maybe_convert_symbolic_operand (operands[1]);
+ /* Record the mode of the return register so that we can prevent
+ later optimization passes from changing it. */
+ if (REG_P (operands[0]) && REGNO (operands[0]) == NVPTX_RETURN_REGNUM
+ && cfun)
+ {
+ if (cfun->machine->ret_reg_mode == VOIDmode)
+ cfun->machine->ret_reg_mode = GET_MODE (operands[0]);
+ else
+ gcc_assert (cfun->machine->ret_reg_mode == GET_MODE (operands[0]));
+ }
+
+ /* Hard registers are often actually symbolic operands on this target.
+ Don't allow them when storing to memory. */
+ if (MEM_P (operands[0])
+ && (!REG_P (operands[1])
+ || REGNO (operands[1]) <= LAST_VIRTUAL_REGISTER))
+ {
+ rtx tmp = gen_reg_rtx (<MODE>mode);
+ emit_move_insn (tmp, operands[1]);
+ emit_move_insn (operands[0], tmp);
+ DONE;
+ }
+ if (GET_CODE (operands[1]) == SYMBOL_REF)
+ nvptx_record_needed_fndecl (SYMBOL_REF_DECL (operands[1]));
+})
+
+(define_insn "highpartscsf2"
+ [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
+ (unspec:SF [(match_operand:SC 1 "nvptx_register_operand")]
+ UNSPEC_CPLX_HIGHPART))]
+ ""
+ "%.\\tmov%t0\\t%0, %f1$1;")
+
+(define_insn "set_highpartsfsc2"
+ [(set (match_operand:SC 0 "nvptx_register_operand" "+R")
+ (unspec:SC [(match_dup 0)
+ (match_operand:SF 1 "nvptx_register_operand")]
+ UNSPEC_CPLX_HIGHPART))]
+ ""
+ "%.\\tmov%t1\\t%f0$1, %1;")
+
+(define_insn "lowpartscsf2"
+ [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
+ (unspec:SF [(match_operand:SC 1 "nvptx_register_operand")]
+ UNSPEC_CPLX_LOWPART))]
+ ""
+ "%.\\tmov%t0\\t%0, %f1$0;")
+
+(define_insn "set_lowpartsfsc2"
+ [(set (match_operand:SC 0 "nvptx_register_operand" "+R")
+ (unspec:SC [(match_dup 0)
+ (match_operand:SF 1 "nvptx_register_operand")]
+ UNSPEC_CPLX_LOWPART))]
+ ""
+ "%.\\tmov%t1\\t%f0$0, %1;")
+
+(define_expand "mov<mode>"
+ [(set (match_operand:SDCM 0 "nvptx_nonimmediate_operand" "")
+ (match_operand:SDCM 1 "general_operand" ""))]
+ ""
+{
+ enum machine_mode submode = <MODE>mode == SCmode ? SFmode : DFmode;
+ int sz = GET_MODE_SIZE (submode);
+ rtx xops[4];
+ rtx punning_reg = NULL_RTX;
+ rtx copyback = NULL_RTX;
+
+ if (GET_CODE (operands[0]) == SUBREG)
+ {
+ rtx inner = SUBREG_REG (operands[0]);
+ enum machine_mode inner_mode = GET_MODE (inner);
+ int sz2 = GET_MODE_SIZE (inner_mode);
+ gcc_assert (sz2 >= sz);
+ cfun->machine->punning_buffer_size
+ = MAX (cfun->machine->punning_buffer_size, sz2);
+ if (punning_reg == NULL_RTX)
+ punning_reg = gen_rtx_REG (Pmode, NVPTX_PUNNING_BUFFER_REGNUM);
+ copyback = gen_move_insn (inner, gen_rtx_MEM (inner_mode, punning_reg));
+ operands[0] = gen_rtx_MEM (<MODE>mode, punning_reg);
+ }
+ if (GET_CODE (operands[1]) == SUBREG)
+ {
+ rtx inner = SUBREG_REG (operands[1]);
+ enum machine_mode inner_mode = GET_MODE (inner);
+ int sz2 = GET_MODE_SIZE (inner_mode);
+ gcc_assert (sz2 >= sz);
+ cfun->machine->punning_buffer_size
+ = MAX (cfun->machine->punning_buffer_size, sz2);
+ if (punning_reg == NULL_RTX)
+ punning_reg = gen_rtx_REG (Pmode, NVPTX_PUNNING_BUFFER_REGNUM);
+ emit_move_insn (gen_rtx_MEM (inner_mode, punning_reg), inner);
+ operands[1] = gen_rtx_MEM (<MODE>mode, punning_reg);
+ }
+
+ if (REG_P (operands[0]) && submode == SFmode)
+ {
+ xops[0] = gen_reg_rtx (submode);
+ xops[1] = gen_reg_rtx (submode);
+ }
+ else
+ {
+ xops[0] = gen_lowpart (submode, operands[0]);
+ if (MEM_P (operands[0]))
+ xops[1] = adjust_address_nv (operands[0], submode, sz);
+ else
+ xops[1] = gen_highpart (submode, operands[0]);
+ }
+
+ if (REG_P (operands[1]) && submode == SFmode)
+ {
+ xops[2] = gen_reg_rtx (submode);
+ xops[3] = gen_reg_rtx (submode);
+ emit_insn (gen_lowpartscsf2 (xops[2], operands[1]));
+ emit_insn (gen_highpartscsf2 (xops[3], operands[1]));
+ }
+ else
+ {
+ xops[2] = gen_lowpart (submode, operands[1]);
+ if (MEM_P (operands[1]))
+ xops[3] = adjust_address_nv (operands[1], submode, sz);
+ else
+ xops[3] = gen_highpart (submode, operands[1]);
+ }
+
+ emit_move_insn (xops[0], xops[2]);
+ emit_move_insn (xops[1], xops[3]);
+ if (REG_P (operands[0]) && submode == SFmode)
+ {
+ emit_insn (gen_set_lowpartsfsc2 (operands[0], xops[0]));
+ emit_insn (gen_set_highpartsfsc2 (operands[0], xops[1]));
+ }
+ if (copyback)
+ emit_insn (copyback);
+ DONE;
+})
+
+(define_insn "zero_extendqihi2"
+ [(set (match_operand:HI 0 "nvptx_register_operand" "=R,R")
+ (zero_extend:HI (match_operand:QI 1 "nvptx_reg_or_mem_operand" "R,m")))]
+ ""
+ "@
+ %.\\tcvt.u16.u%T1\\t%0, %1;
+ %.\\tld%A1.u8\\t%0, %1;"
+ [(set_attr "subregs_ok" "true")])
+
+(define_insn "zero_extend<mode>si2"
+ [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R")
+ (zero_extend:SI (match_operand:QHIM 1 "nvptx_reg_or_mem_operand" "R,m")))]
+ ""
+ "@
+ %.\\tcvt.u32.u%T1\\t%0, %1;
+ %.\\tld%A1.u%T1\\t%0, %1;"
+ [(set_attr "subregs_ok" "true")])
+
+(define_insn "zero_extend<mode>di2"
+ [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R")
+ (zero_extend:DI (match_operand:QHSIM 1 "nvptx_reg_or_mem_operand" "R,m")))]
+ ""
+ "@
+ %.\\tcvt.u64.u%T1\\t%0, %1;
+ %.\\tld%A1%u1\\t%0, %1;"
+ [(set_attr "subregs_ok" "true")])
+
+(define_insn "extend<mode>si2"
+ [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R")
+ (sign_extend:SI (match_operand:QHIM 1 "nvptx_reg_or_mem_operand" "R,m")))]
+ ""
+ "@
+ %.\\tcvt.s32.s%T1\\t%0, %1;
+ %.\\tld%A1.s%T1\\t%0, %1;"
+ [(set_attr "subregs_ok" "true")])
+
+(define_insn "extend<mode>di2"
+ [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R")
+ (sign_extend:DI (match_operand:QHSIM 1 "nvptx_reg_or_mem_operand" "R,m")))]
+ ""
+ "@
+ %.\\tcvt.s64.s%T1\\t%0, %1;
+ %.\\tld%A1.s%T1\\t%0, %1;"
+ [(set_attr "subregs_ok" "true")])
+
+(define_insn "trunchiqi2"
+ [(set (match_operand:QI 0 "nvptx_reg_or_mem_operand" "=R,m")
+ (truncate:QI (match_operand:HI 1 "nvptx_register_operand" "R,R")))]
+ ""
+ "@
+ %.\\tcvt%t0.u16\\t%0, %1;
+ %.\\tst%A0.u8\\t%0, %1;"
+ [(set_attr "subregs_ok" "true")])
+
+(define_insn "truncsi<mode>2"
+ [(set (match_operand:QHIM 0 "nvptx_reg_or_mem_operand" "=R,m")
+ (truncate:QHIM (match_operand:SI 1 "nvptx_register_operand" "R,R")))]
+ ""
+ "@
+ %.\\tcvt%t0.u32\\t%0, %1;
+ %.\\tst%A0.u%T0\\t%0, %1;"
+ [(set_attr "subregs_ok" "true")])
+
+(define_insn "truncdi<mode>2"
+ [(set (match_operand:QHSIM 0 "nvptx_reg_or_mem_operand" "=R,m")
+ (truncate:QHSIM (match_operand:DI 1 "nvptx_register_operand" "R,R")))]
+ ""
+ "@
+ %.\\tcvt%t0.u64\\t%0, %1;
+ %.\\tst%A0.u%T0\\t%0, %1;"
+ [(set_attr "subregs_ok" "true")])
+
+;; Pointer address space conversions
+
+(define_int_iterator cvt_code
+ [UNSPEC_FROM_GLOBAL
+ UNSPEC_FROM_LOCAL
+ UNSPEC_FROM_SHARED
+ UNSPEC_FROM_CONST
+ UNSPEC_TO_GLOBAL
+ UNSPEC_TO_LOCAL
+ UNSPEC_TO_SHARED
+ UNSPEC_TO_CONST])
+
+(define_int_attr cvt_name
+ [(UNSPEC_FROM_GLOBAL "from_global")
+ (UNSPEC_FROM_LOCAL "from_local")
+ (UNSPEC_FROM_SHARED "from_shared")
+ (UNSPEC_FROM_CONST "from_const")
+ (UNSPEC_TO_GLOBAL "to_global")
+ (UNSPEC_TO_LOCAL "to_local")
+ (UNSPEC_TO_SHARED "to_shared")
+ (UNSPEC_TO_CONST "to_const")])
+
+(define_int_attr cvt_str
+ [(UNSPEC_FROM_GLOBAL ".global")
+ (UNSPEC_FROM_LOCAL ".local")
+ (UNSPEC_FROM_SHARED ".shared")
+ (UNSPEC_FROM_CONST ".const")
+ (UNSPEC_TO_GLOBAL ".to.global")
+ (UNSPEC_TO_LOCAL ".to.local")
+ (UNSPEC_TO_SHARED ".to.shared")
+ (UNSPEC_TO_CONST ".to.const")])
+
+(define_insn "convaddr_<cvt_name><mode>"
+ [(set (match_operand:P 0 "nvptx_register_operand" "=R")
+ (unspec:P [(match_operand:P 1 "nvptx_register_or_symbolic_operand" "Rs")] cvt_code))]
+ ""
+ "%.\\tcvta<cvt_str>%t0\\t%0, %1;")
+
+;; Integer arithmetic
+
+(define_insn "add<mode>3"
+ [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
+ (plus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
+ (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
+ ""
+ "%.\\tadd%t0\\t%0, %1, %2;")
+
+(define_insn "sub<mode>3"
+ [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
+ (minus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
+ (match_operand:HSDIM 2 "nvptx_register_operand" "R")))]
+ ""
+ "%.\\tsub%t0\\t%0, %1, %2;")
+
+(define_insn "mul<mode>3"
+ [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
+ (mult:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
+ (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
+ ""
+ "%.\\tmul.lo%t0\\t%0, %1, %2;")
+
+(define_insn "*mad<mode>3"
+ [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
+ (plus:HSDIM (mult:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
+ (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri"))
+ (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
+ ""
+ "%.\\tmad.lo%t0\\t%0, %1, %2, %3;")
+
+(define_insn "div<mode>3"
+ [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
+ (div:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
+ (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
+ ""
+ "%.\\tdiv.s%T0\\t%0, %1, %2;")
+
+(define_insn "udiv<mode>3"
+ [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
+ (udiv:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
+ (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
+ ""
+ "%.\\tdiv.u%T0\\t%0, %1, %2;")
+
+(define_insn "mod<mode>3"
+ [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
+ (mod:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "Ri")
+ (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
+ ""
+ "%.\\trem.s%T0\\t%0, %1, %2;")
+
+(define_insn "umod<mode>3"
+ [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
+ (umod:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "Ri")
+ (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
+ ""
+ "%.\\trem.u%T0\\t%0, %1, %2;")
+
+(define_insn "smin<mode>3"
+ [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
+ (smin:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
+ (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
+ ""
+ "%.\\tmin.s%T0\\t%0, %1, %2;")
+
+(define_insn "umin<mode>3"
+ [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
+ (umin:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
+ (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
+ ""
+ "%.\\tmin.u%T0\\t%0, %1, %2;")
+
+(define_insn "smax<mode>3"
+ [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
+ (smax:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
+ (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
+ ""
+ "%.\\tmax.s%T0\\t%0, %1, %2;")
+
+(define_insn "umax<mode>3"
+ [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
+ (umax:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
+ (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
+ ""
+ "%.\\tmax.u%T0\\t%0, %1, %2;")
+
+(define_insn "abs<mode>2"
+ [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
+ (abs:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))]
+ ""
+ "%.\\tabs.s%T0\\t%0, %1;")
+
+(define_insn "neg<mode>2"
+ [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
+ (neg:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))]
+ ""
+ "%.\\tneg.s%T0\\t%0, %1;")
+
+(define_insn "one_cmpl<mode>2"
+ [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
+ (not:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))]
+ ""
+ "%.\\tnot.b%T0\\t%0, %1;")
+
+(define_insn "bitrev<mode>2"
+ [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
+ (unspec:SDIM [(match_operand:SDIM 1 "nvptx_register_operand" "R")]
+ UNSPEC_BITREV))]
+ ""
+ "%.\\tbrev.b%T0\\t%0, %1;")
+
+(define_insn "clz<mode>2"
+ [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
+ (clz:SI (match_operand:SDIM 1 "nvptx_register_operand" "R")))]
+ ""
+ "%.\\tclz.b%T0\\t%0, %1;")
+
+(define_expand "ctz<mode>2"
+ [(set (match_operand:SI 0 "nvptx_register_operand" "")
+ (ctz:SI (match_operand:SDIM 1 "nvptx_register_operand" "")))]
+ ""
+{
+ rtx tmpreg = gen_reg_rtx (<MODE>mode);
+ emit_insn (gen_bitrev<mode>2 (tmpreg, operands[1]));
+ emit_insn (gen_clz<mode>2 (operands[0], tmpreg));
+ DONE;
+})
+
+;; Shifts
+
+(define_insn "ashl<mode>3"
+ [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
+ (ashift:SDIM (match_operand:SDIM 1 "nvptx_register_operand" "R")
+ (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))]
+ ""
+ "%.\\tshl.b%T0\\t%0, %1, %2;")
+
+(define_insn "ashr<mode>3"
+ [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
+ (ashiftrt:SDIM (match_operand:SDIM 1 "nvptx_register_operand" "R")
+ (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))]
+ ""
+ "%.\\tshr.s%T0\\t%0, %1, %2;")
+
+(define_insn "lshr<mode>3"
+ [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
+ (lshiftrt:SDIM (match_operand:SDIM 1 "nvptx_register_operand" "R")
+ (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))]
+ ""
+ "%.\\tshr.u%T0\\t%0, %1, %2;")
+
+;; Logical operations
+
+(define_insn "and<mode>3"
+ [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R")
+ (and:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R")
+ (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
+ ""
+ "%.\\tand.b%T0\\t%0, %1, %2;")
+
+(define_insn "ior<mode>3"
+ [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R")
+ (ior:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R")
+ (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
+ ""
+ "%.\\tor.b%T0\\t%0, %1, %2;")
+
+(define_insn "xor<mode>3"
+ [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R")
+ (xor:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R")
+ (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
+ ""
+ "%.\\txor.b%T0\\t%0, %1, %2;")
+
+;; Comparisons and branches
+
+(define_insn "*cmp<mode>"
+ [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
+ (match_operator:BI 1 "nvptx_comparison_operator"
+ [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
+ (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))]
+ ""
+ "%.\\tsetp%c1 %0,%2,%3;")
+
+(define_insn "*cmp<mode>"
+ [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
+ (match_operator:BI 1 "nvptx_float_comparison_operator"
+ [(match_operand:SDFM 2 "nvptx_register_operand" "R")
+ (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))]
+ ""
+ "%.\\tsetp%c1 %0,%2,%3;")
+
+(define_insn "jump"
+ [(set (pc)
+ (label_ref (match_operand 0 "" "")))]
+ ""
+ "%.\\tbra\\t%l0;")
+
+(define_insn "br_true"
+ [(set (pc)
+ (if_then_else (ne (match_operand:BI 0 "nvptx_register_operand" "R")
+ (const_int 0))
+ (label_ref (match_operand 1 "" ""))
+ (pc)))]
+ ""
+ "%j0\\tbra\\t%l1;")
+
+(define_insn "br_false"
+ [(set (pc)
+ (if_then_else (eq (match_operand:BI 0 "nvptx_register_operand" "R")
+ (const_int 0))
+ (label_ref (match_operand 1 "" ""))
+ (pc)))]
+ ""
+ "%J0\\tbra\\t%l1;")
+
+(define_expand "cbranch<mode>4"
+ [(set (pc)
+ (if_then_else (match_operator 0 "nvptx_comparison_operator"
+ [(match_operand:HSDIM 1 "nvptx_register_operand" "")
+ (match_operand:HSDIM 2 "nvptx_register_operand" "")])
+ (label_ref (match_operand 3 "" ""))
+ (pc)))]
+ ""
+{
+ rtx t = nvptx_expand_compare (operands[0]);
+ operands[0] = t;
+ operands[1] = XEXP (t, 0);
+ operands[2] = XEXP (t, 1);
+})
+
+(define_expand "cbranch<mode>4"
+ [(set (pc)
+ (if_then_else (match_operator 0 "nvptx_float_comparison_operator"
+ [(match_operand:SDFM 1 "nvptx_register_operand" "")
+ (match_operand:SDFM 2 "nvptx_register_operand" "")])
+ (label_ref (match_operand 3 "" ""))
+ (pc)))]
+ ""
+{
+ rtx t = nvptx_expand_compare (operands[0]);
+ operands[0] = t;
+ operands[1] = XEXP (t, 0);
+ operands[2] = XEXP (t, 1);
+})
+
+(define_expand "cbranchbi4"
+ [(set (pc)
+ (if_then_else (match_operator 0 "predicate_operator"
+ [(match_operand:BI 1 "nvptx_register_operand" "")
+ (match_operand:BI 2 "const0_operand" "")])
+ (label_ref (match_operand 3 "" ""))
+ (pc)))]
+ ""
+ "")
+
+;; Conditional stores
+
+(define_insn "setcc_from_bi"
+ [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
+ (ne:SI (match_operand:BI 1 "nvptx_register_operand" "R")
+ (const_int 0)))]
+ ""
+ "%.\\tselp%t0 %0,-1,0,%1;")
+
+(define_insn "setcc_int<mode>"
+ [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
+ (match_operator:SI 1 "nvptx_comparison_operator"
+ [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
+ (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))]
+ ""
+ "%.\\tset%t0%c1 %0,%2,%3;")
+
+(define_insn "setcc_int<mode>"
+ [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
+ (match_operator:SI 1 "nvptx_float_comparison_operator"
+ [(match_operand:SDFM 2 "nvptx_register_operand" "R")
+ (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))]
+ ""
+ "%.\\tset%t0%c1 %0,%2,%3;")
+
+(define_insn "setcc_float<mode>"
+ [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
+ (match_operator:SF 1 "nvptx_comparison_operator"
+ [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
+ (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))]
+ ""
+ "%.\\tset%t0%c1 %0,%2,%3;")
+
+(define_insn "setcc_float<mode>"
+ [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
+ (match_operator:SF 1 "nvptx_float_comparison_operator"
+ [(match_operand:SDFM 2 "nvptx_register_operand" "R")
+ (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))]
+ ""
+ "%.\\tset%t0%c1 %0,%2,%3;")
+
+(define_expand "cstorebi4"
+ [(set (match_operand:SI 0 "nvptx_register_operand")
+ (match_operator:SI 1 "ne_operator"
+ [(match_operand:BI 2 "nvptx_register_operand")
+ (match_operand:BI 3 "const0_operand")]))]
+ ""
+ "")
+
+(define_expand "cstore<mode>4"
+ [(set (match_operand:SI 0 "nvptx_register_operand")
+ (match_operator:SI 1 "nvptx_comparison_operator"
+ [(match_operand:HSDIM 2 "nvptx_register_operand")
+ (match_operand:HSDIM 3 "nvptx_nonmemory_operand")]))]
+ ""
+ "")
+
+(define_expand "cstore<mode>4"
+ [(set (match_operand:SI 0 "nvptx_register_operand")
+ (match_operator:SI 1 "nvptx_float_comparison_operator"
+ [(match_operand:SDFM 2 "nvptx_register_operand")
+ (match_operand:SDFM 3 "nvptx_nonmemory_operand")]))]
+ ""
+ "")
+
+;; Calls
+
+(define_insn "call_insn"
+ [(match_parallel 2 "call_operation"
+ [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "Rs"))
+ (match_operand 1))])]
+ ""
+{
+ return nvptx_output_call_insn (insn, NULL_RTX, operands[0]);
+})
+
+(define_insn "call_value_insn"
+ [(match_parallel 3 "call_operation"
+ [(set (match_operand 0 "nvptx_register_operand" "=R")
+ (call (mem:QI (match_operand:SI 1 "call_insn_operand" "Rs"))
+ (match_operand 2)))])]
+ ""
+{
+ return nvptx_output_call_insn (insn, operands[0], operands[1]);
+})
+
+(define_expand "call"
+ [(match_operand 0 "" "")]
+ ""
+{
+ nvptx_expand_call (NULL_RTX, operands[0]);
+ DONE;
+})
+
+(define_expand "call_value"
+ [(match_operand 0 "" "")
+ (match_operand 1 "" "")]
+ ""
+{
+ nvptx_expand_call (operands[0], operands[1]);
+ DONE;
+})
+
+;; Floating point arithmetic.
+
+(define_insn "add<mode>3"
+ [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
+ (plus:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
+ (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
+ ""
+ "%.\\tadd%t0\\t%0, %1, %2;")
+
+(define_insn "sub<mode>3"
+ [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
+ (minus:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
+ (match_operand:SDFM 2 "nvptx_register_operand" "R")))]
+ ""
+ "%.\\tsub%t0\\t%0, %1, %2;")
+
+(define_insn "mul<mode>3"
+ [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
+ (mult:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
+ (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
+ ""
+ "%.\\tmul%t0\\t%0, %1, %2;")
+
+(define_insn "fma<mode>4"
+ [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
+ (fma:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
+ (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
+ (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
+ ""
+ "%.\\tfma%#%t0\\t%0, %1, %2, %3;")
+
+(define_insn "div<mode>3"
+ [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
+ (div:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
+ (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
+ ""
+ "%.\\tdiv%#%t0\\t%0, %1, %2;")
+
+(define_insn "copysign<mode>3"
+ [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
+ (unspec:SDFM [(match_operand:SDFM 1 "nvptx_register_operand" "R")
+ (match_operand:SDFM 2 "nvptx_register_operand" "R")]
+ UNSPEC_COPYSIGN))]
+ ""
+ "%.\\tcopysign%t0\\t%0, %2, %1;")
+
+(define_insn "smin<mode>3"
+ [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
+ (smin:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
+ (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
+ ""
+ "%.\\tmin%t0\\t%0, %1, %2;")
+
+(define_insn "smax<mode>3"
+ [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
+ (smax:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
+ (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
+ ""
+ "%.\\tmax%t0\\t%0, %1, %2;")
+
+(define_insn "abs<mode>2"
+ [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
+ (abs:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
+ ""
+ "%.\\tabs%t0\\t%0, %1;")
+
+(define_insn "neg<mode>2"
+ [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
+ (neg:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
+ ""
+ "%.\\tneg%t0\\t%0, %1;")
+
+(define_insn "sqrt<mode>2"
+ [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
+ (sqrt:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
+ ""
+ "%.\\tsqrt%#%t0\\t%0, %1;")
+
+(define_insn "sinsf2"
+ [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
+ (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
+ UNSPEC_SIN))]
+ "flag_unsafe_math_optimizations"
+ "%.\\tsin.approx%t0\\t%0, %1;")
+
+(define_insn "cossf2"
+ [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
+ (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
+ UNSPEC_COS))]
+ "flag_unsafe_math_optimizations"
+ "%.\\tcos.approx%t0\\t%0, %1;")
+
+(define_insn "log2sf2"
+ [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
+ (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
+ UNSPEC_LOG2))]
+ "flag_unsafe_math_optimizations"
+ "%.\\tlg2.approx%t0\\t%0, %1;")
+
+(define_insn "exp2sf2"
+ [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
+ (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
+ UNSPEC_EXP2))]
+ "flag_unsafe_math_optimizations"
+ "%.\\tex2.approx%t0\\t%0, %1;")
+
+;; Conversions involving floating point
+
+(define_insn "extendsfdf2"
+ [(set (match_operand:DF 0 "nvptx_register_operand" "=R")
+ (float_extend:DF (match_operand:SF 1 "nvptx_register_operand" "R")))]
+ ""
+ "%.\\tcvt%t0%t1\\t%0, %1;")
+
+(define_insn "truncdfsf2"
+ [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
+ (float_truncate:SF (match_operand:DF 1 "nvptx_register_operand" "R")))]
+ ""
+ "%.\\tcvt%#%t0%t1\\t%0, %1;")
+
+(define_insn "floatunssi<mode>2"
+ [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
+ (unsigned_float:SDFM (match_operand:SI 1 "nvptx_register_operand" "R")))]
+ ""
+ "%.\\tcvt%#%t0.u%T1\\t%0, %1;")
+
+(define_insn "floatsi<mode>2"
+ [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
+ (float:SDFM (match_operand:SI 1 "nvptx_register_operand" "R")))]
+ ""
+ "%.\\tcvt%#%t0.s%T1\\t%0, %1;")
+
+(define_insn "floatunsdi<mode>2"
+ [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
+ (unsigned_float:SDFM (match_operand:DI 1 "nvptx_register_operand" "R")))]
+ ""
+ "%.\\tcvt%#%t0.u%T1\\t%0, %1;")
+
+(define_insn "floatdi<mode>2"
+ [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
+ (float:SDFM (match_operand:DI 1 "nvptx_register_operand" "R")))]
+ ""
+ "%.\\tcvt%#%t0.s%T1\\t%0, %1;")
+
+(define_insn "fixuns_trunc<mode>si2"
+ [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
+ (unsigned_fix:SI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
+ ""
+ "%.\\tcvt.rzi.u%T0%t1\\t%0, %1;")
+
+(define_insn "fix_trunc<mode>si2"
+ [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
+ (fix:SI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
+ ""
+ "%.\\tcvt.rzi.s%T0%t1\\t%0, %1;")
+
+(define_insn "fixuns_trunc<mode>di2"
+ [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
+ (unsigned_fix:DI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
+ ""
+ "%.\\tcvt.rzi.u%T0%t1\\t%0, %1;")
+
+(define_insn "fix_trunc<mode>di2"
+ [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
+ (fix:DI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
+ ""
+ "%.\\tcvt.rzi.s%T0%t1\\t%0, %1;")
+
+(define_int_iterator FPINT [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_BTRUNC
+ UNSPEC_FPINT_CEIL UNSPEC_FPINT_NEARBYINT])
+(define_int_attr fpint_name [(UNSPEC_FPINT_FLOOR "floor")
+ (UNSPEC_FPINT_BTRUNC "btrunc")
+ (UNSPEC_FPINT_CEIL "ceil")
+ (UNSPEC_FPINT_NEARBYINT "nearbyint")])
+(define_int_attr fpint_roundingmode [(UNSPEC_FPINT_FLOOR ".rmi")
+ (UNSPEC_FPINT_BTRUNC ".rzi")
+ (UNSPEC_FPINT_CEIL ".rpi")
+ (UNSPEC_FPINT_NEARBYINT "%#i")])
+
+(define_insn "<FPINT:fpint_name><SDFM:mode>2"
+ [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
+ (unspec:SDFM [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
+ FPINT))]
+ ""
+ "%.\\tcvt<FPINT:fpint_roundingmode>%t0%t1\\t%0, %1;")
+
+(define_int_iterator FPINT2 [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_CEIL])
+(define_int_attr fpint2_name [(UNSPEC_FPINT_FLOOR "lfloor")
+ (UNSPEC_FPINT_CEIL "lceil")])
+(define_int_attr fpint2_roundingmode [(UNSPEC_FPINT_FLOOR ".rmi")
+ (UNSPEC_FPINT_CEIL ".rpi")])
+
+(define_insn "<FPINT2:fpint2_name><SDFM:mode><SDIM:mode>2"
+ [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
+ (unspec:SDIM [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
+ FPINT2))]
+ ""
+ "%.\\tcvt<FPINT2:fpint2_roundingmode>.s%T0%t1\\t%0, %1;")
+
+;; Miscellaneous
+
+(define_insn "nop"
+ [(const_int 0)]
+ ""
+ "")
+
+(define_insn "return"
+ [(return)]
+ ""
+{
+ return nvptx_output_return ();
+})
+
+(define_expand "epilogue"
+ [(clobber (const_int 0))]
+ ""
+{
+ emit_jump_insn (gen_return ());
+ DONE;
+})
+
+(define_expand "nonlocal_goto"
+ [(match_operand 0 "" "")
+ (match_operand 1 "" "")
+ (match_operand 2 "" "")
+ (match_operand 3 "" "")]
+ ""
+{
+ sorry ("target cannot support nonlocal goto.");
+ emit_insn (gen_nop ());
+ DONE;
+})
+
+(define_expand "nonlocal_goto_receiver"
+ [(const_int 0)]
+ ""
+{
+ sorry ("target cannot support nonlocal goto.");
+})
+
+(define_insn "allocate_stack"
+ [(set (match_operand 0 "nvptx_register_operand" "=R")
+ (unspec [(match_operand 1 "nvptx_register_operand" "R")]
+ UNSPEC_ALLOCA))]
+ ""
+ "%.\\tcall (%0), %%alloca, (%1);")
+
+(define_expand "restore_stack_block"
+ [(match_operand 0 "register_operand" "")
+ (match_operand 1 "register_operand" "")]
+ ""
+{
+ DONE;
+})
+
+(define_expand "restore_stack_function"
+ [(match_operand 0 "register_operand" "")
+ (match_operand 1 "register_operand" "")]
+ ""
+{
+ DONE;
+})
+
+(define_insn "trap"
+ [(trap_if (const_int 1) (const_int 0))]
+ ""
+ "trap;")
+
+(define_insn "trap_if_true"
+ [(trap_if (ne (match_operand:BI 0 "nvptx_register_operand" "R")
+ (const_int 0))
+ (const_int 0))]
+ ""
+ "%j0 trap;")
+
+(define_insn "trap_if_false"
+ [(trap_if (eq (match_operand:BI 0 "nvptx_register_operand" "R")
+ (const_int 0))
+ (const_int 0))]
+ ""
+ "%J0 trap;")
+
+(define_expand "ctrap<mode>4"
+ [(trap_if (match_operator 0 "nvptx_comparison_operator"
+ [(match_operand:SDIM 1 "nvptx_register_operand")
+ (match_operand:SDIM 2 "nvptx_nonmemory_operand")])
+ (match_operand 3 "const_0_operand"))]
+ ""
+{
+ rtx t = nvptx_expand_compare (operands[0]);
+ emit_insn (gen_trap_if_true (t));
+ DONE;
+})
+
+(define_insn "*oacc_ntid_insn"
+ [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
+ (unspec:SI [(match_operand:SI 1 "const_int_operand" "n")] UNSPEC_NTID))]
+ ""
+ "%.\\tmov.u32 %0, %%ntid%d1;")
+
+(define_expand "oacc_ntid"
+ [(set (match_operand:SI 0 "nvptx_register_operand" "")
+ (unspec:SI [(match_operand:SI 1 "const_int_operand" "")] UNSPEC_NTID))]
+ ""
+{
+ if (INTVAL (operands[1]) < 0 || INTVAL (operands[1]) > 2)
+ FAIL;
+})
+
+(define_insn "*oacc_tid_insn"
+ [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
+ (unspec:SI [(match_operand:SI 1 "const_int_operand" "n")] UNSPEC_TID))]
+ ""
+ "%.\\tmov.u32 %0, %%tid%d1;")
+
+(define_expand "oacc_tid"
+ [(set (match_operand:SI 0 "nvptx_register_operand" "")
+ (unspec:SI [(match_operand:SI 1 "const_int_operand" "")] UNSPEC_TID))]
+ ""
+{
+ if (INTVAL (operands[1]) < 0 || INTVAL (operands[1]) > 2)
+ FAIL;
+})
+
+;; Atomic insns.
+
+(define_expand "atomic_compare_and_swap<mode>"
+ [(match_operand:SI 0 "nvptx_register_operand") ;; bool success output
+ (match_operand:SDIM 1 "nvptx_register_operand") ;; oldval output
+ (match_operand:SDIM 2 "memory_operand") ;; memory
+ (match_operand:SDIM 3 "nvptx_register_operand") ;; expected input
+ (match_operand:SDIM 4 "nvptx_register_operand") ;; newval input
+ (match_operand:SI 5 "const_int_operand") ;; is_weak
+ (match_operand:SI 6 "const_int_operand") ;; success model
+ (match_operand:SI 7 "const_int_operand")] ;; failure model
+ ""
+{
+ emit_insn (gen_atomic_compare_and_swap<mode>_1 (operands[1], operands[2], operands[3],
+ operands[4], operands[6]));
+
+ rtx tmp = gen_reg_rtx (GET_MODE (operands[0]));
+ emit_insn (gen_cstore<mode>4 (tmp,
+ gen_rtx_EQ (SImode, operands[1], operands[3]),
+ operands[1], operands[3]));
+ emit_insn (gen_andsi3 (operands[0], tmp, GEN_INT (1)));
+ DONE;
+})
+
+(define_insn "atomic_compare_and_swap<mode>_1"
+ [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
+ (unspec_volatile:SDIM
+ [(match_operand:SDIM 1 "memory_operand" "+m")
+ (match_operand:SDIM 2 "nvptx_register_operand" "R")
+ (match_operand:SDIM 3 "nvptx_register_operand" "R")
+ (match_operand:SI 4 "const_int_operand")]
+ UNSPECV_CAS))
+ (set (match_dup 1)
+ (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS))]
+ ""
+ "%.\\tatom%A1.cas.b%T0\\t%0, %1, %2, %3;")
+
+(define_insn "atomic_exchange<mode>"
+ [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") ;; output
+ (unspec_volatile:SDIM
+ [(match_operand:SDIM 1 "memory_operand" "+m") ;; memory
+ (match_operand:SI 3 "const_int_operand")] ;; model
+ UNSPECV_XCHG))
+ (set (match_dup 1)
+ (match_operand:SDIM 2 "nvptx_register_operand" "R"))] ;; input
+ ""
+ "%.\\tatom%A1.exch.b%T0\\t%0, %1, %2;")
+
+(define_insn "atomic_fetch_add<mode>"
+ [(set (match_operand:SDIM 1 "memory_operand" "+m")
+ (unspec_volatile:SDIM
+ [(plus:SDIM (match_dup 1)
+ (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))
+ (match_operand:SI 3 "const_int_operand")] ;; model
+ UNSPECV_LOCK))
+ (set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
+ (match_dup 1))]
+ ""
+ "%.\\tatom%A1.add%t0\\t%0, %1, %2;")
+
+(define_insn "atomic_fetch_addsf"
+ [(set (match_operand:SF 1 "memory_operand" "+m")
+ (unspec_volatile:SF
+ [(plus:SF (match_dup 1)
+ (match_operand:SF 2 "nvptx_nonmemory_operand" "RF"))
+ (match_operand:SI 3 "const_int_operand")] ;; model
+ UNSPECV_LOCK))
+ (set (match_operand:SF 0 "nvptx_register_operand" "=R")
+ (match_dup 1))]
+ ""
+ "%.\\tatom%A1.add%t0\\t%0, %1, %2;")
+
+(define_code_iterator any_logic [and ior xor])
+(define_code_attr logic [(and "and") (ior "or") (xor "xor")])
+
+;; Currently disabled until we add better subtarget support - requires sm_32.
+(define_insn "atomic_fetch_<logic><mode>"
+ [(set (match_operand:SDIM 1 "memory_operand" "+m")
+ (unspec_volatile:SDIM
+ [(any_logic:SDIM (match_dup 1)
+ (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))
+ (match_operand:SI 3 "const_int_operand")] ;; model
+ UNSPECV_LOCK))
+ (set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
+ (match_dup 1))]
+ "0"
+ "%.\\tatom%A1.b%T0.<logic>\\t%0, %1, %2;")