diff options
author | SimplyTheOther <simplytheother@gmail.com> | 2020-05-01 08:25:27 +0800 |
---|---|---|
committer | Philip Herron <philip.herron@embecosm.com> | 2020-11-28 19:09:39 +0000 |
commit | c7c6f785c8e893ec7bcacd1a2319ce309d2450f2 (patch) | |
tree | 390cd5960fb125655ab707cb12567d2a9a0ffeb5 | |
parent | 7266d66d2321562ef74daf90ffb1ad70d081d890 (diff) | |
download | gcc-c7c6f785c8e893ec7bcacd1a2319ce309d2450f2.zip gcc-c7c6f785c8e893ec7bcacd1a2319ce309d2450f2.tar.gz gcc-c7c6f785c8e893ec7bcacd1a2319ce309d2450f2.tar.bz2 |
Adding Rust target hook documentation
Added powerpc target hook and improved aarch64 feature handling
Added DEC Alpha target hook
Added ARC target hook
Created ARM target hook (at least preliminary support)
36 files changed, 3906 insertions, 3514 deletions
diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 778ec09..1af578b 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -560,6 +560,8 @@ tm_p_file_list=@tm_p_file_list@ tm_p_include_list=@tm_p_include_list@ tm_d_file_list=@tm_d_file_list@ tm_d_include_list=@tm_d_include_list@ +tm_rust_file_list=@tm_rust_file_list@ +tm_rust_include_list=@tm_rust_include_list@ build_xm_file_list=@build_xm_file_list@ build_xm_include_list=@build_xm_include_list@ build_xm_defines=@build_xm_defines@ @@ -855,6 +857,7 @@ CONFIG_H = config.h $(host_xm_file_list) TCONFIG_H = tconfig.h $(xm_file_list) TM_P_H = tm_p.h $(tm_p_file_list) TM_D_H = tm_d.h $(tm_d_file_list) +TM_RUST_H = tm_rust.h $(tm_rust_file_list) GTM_H = tm.h $(tm_file_list) insn-constants.h TM_H = $(GTM_H) insn-flags.h $(OPTIONS_H) @@ -917,6 +920,7 @@ TARGET_H = $(TM_H) target.h $(TARGET_DEF) insn-modes.h insn-codes.h C_TARGET_H = c-family/c-target.h $(C_TARGET_DEF) COMMON_TARGET_H = common/common-target.h $(INPUT_H) $(COMMON_TARGET_DEF) D_TARGET_H = d/d-target.h $(D_TARGET_DEF) +RUST_TARGET_H = rust/rust-target.h $(RUST_TARGET_DEF) MACHMODE_H = machmode.h mode-classes.def HOOKS_H = hooks.h HOSTHOOKS_DEF_H = hosthooks-def.h $(HOOKS_H) @@ -1203,6 +1207,9 @@ CXX_TARGET_OBJS=@cxx_target_objs@ # Target specific, D specific object file D_TARGET_OBJS=@d_target_objs@ +# Target specific, D specific object file +RUST_TARGET_OBJS=@rust_target_objs@ + # Target specific, Fortran specific object file FORTRAN_TARGET_OBJS=@fortran_target_objs@ @@ -1876,6 +1883,7 @@ tconfig.h: cs-tconfig.h ; @true tm.h: cs-tm.h ; @true tm_p.h: cs-tm_p.h ; @true tm_d.h: cs-tm_d.h ; @true +tm_rust.h: cs-tm_rust.h ; @true cs-config.h: Makefile TARGET_CPU_DEFAULT="" \ @@ -1907,6 +1915,11 @@ cs-tm_d.h: Makefile HEADERS="$(tm_d_include_list)" DEFINES="" \ $(SHELL) $(srcdir)/mkconfig.sh tm_d.h +cs-tm_rust.h: Makefile + TARGET_CPU_DEFAULT="" \ + HEADERS="$(tm_rust_include_list)" DEFINES="" \ + $(SHELL) $(srcdir)/mkconfig.sh tm_rust.h + # Don't automatically run autoconf, since configure.ac might be accidentally # newer than configure. Also, this writes into the source directory which # might be on a read-only file system. If configured for maintainer mode @@ -2243,6 +2256,12 @@ default-d.o: config/default-d.c $(COMPILE) $< $(POSTCOMPILE) +# Files used by the Rust language front end. + +default-rust.o: config/default-rust.c + $(COMPILE) $< + $(POSTCOMPILE) + # Language-independent files. DRIVER_DEFINES = \ @@ -2553,6 +2572,15 @@ s-d-target-hooks-def-h: build/genhooks$(build_exeext) d/d-target-hooks-def.h $(STAMP) s-d-target-hooks-def-h +rust/rust-target-hooks-def.h: s-rust-target-hooks-def-h; @true + +s-rust-target-hooks-def-h: build/genhooks$(build_exeext) + $(RUN_GEN) build/genhooks$(build_exeext) "Rust Target Hook" \ + > tmp-rust-target-hooks-def.h + $(SHELL) $(srcdir)/../move-if-change tmp-rust-target-hooks-def.h \ + rust/rust-target-hooks-def.h + $(STAMP) s-rust-target-hooks-def-h + # check if someone mistakenly only changed tm.texi. # We use a different pathname here to avoid a circular dependency. s-tm-texi: $(srcdir)/doc/../doc/tm.texi @@ -2577,6 +2605,7 @@ s-tm-texi: build/genhooks$(build_exeext) $(srcdir)/doc/tm.texi.in || test $(srcdir)/doc/tm.texi -nt $(srcdir)/c-family/c-target.def \ || test $(srcdir)/doc/tm.texi -nt $(srcdir)/common/common-target.def \ || test $(srcdir)/doc/tm.texi -nt $(srcdir)/d/d-target.def \ + || test $(srcdir)/doc/tm.texi -nt $(srcdir)/rust/rust-target.def \ ); then \ echo >&2 ; \ echo You should edit $(srcdir)/doc/tm.texi.in rather than $(srcdir)/doc/tm.texi . >&2 ; \ @@ -2721,15 +2750,15 @@ s-gtype: build/gengtype$(build_exeext) $(filter-out [%], $(GTFILES)) \ -r gtype.state $(STAMP) s-gtype -generated_files = config.h tm.h $(TM_P_H) $(TM_D_H) $(TM_H) multilib.h \ - $(simple_generated_h) specs.h \ +generated_files = config.h tm.h $(TM_P_H) $(TM_D_H) $(TM_RUST_H) $(TM_H) \ + multilib.h $(simple_generated_h) specs.h \ tree-check.h genrtl.h insn-modes.h insn-modes-inline.h \ tm-preds.h tm-constrs.h \ $(ALL_GTFILES_H) gtype-desc.c gtype-desc.h gcov-iov.h \ options.h target-hooks-def.h insn-opinit.h \ common/common-target-hooks-def.h pass-instances.def \ c-family/c-target-hooks-def.h d/d-target-hooks-def.h \ - case-cfn-macros.h \ + rust/rust-target-hooks-def.h case-cfn-macros.h \ cfn-operators.pd omp-device-properties.h # diff --git a/gcc/config.gcc b/gcc/config.gcc index 0ae5848..a9fd6a7 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -89,6 +89,9 @@ # tm_d_file A list of headers with definitions of target hook # macros for the D compiler. # +# tm_rust_file A list of headers with definitions of target hook +# macros for the Rust compiler. +# # out_file The name of the machine description C support # file, if different from "$cpu_type/$cpu_type.c". # @@ -145,6 +148,9 @@ # d_target_objs List of extra target-dependent objects that be # linked into the D compiler only. # +# rust_target_objs List of extra target-dependent objects that be +# linked into the Rust compiler only. +# # fortran_target_objs List of extra target-dependent objects that be # linked into the fortran compiler only. # @@ -200,6 +206,9 @@ # # target_has_targetdm Set to yes or no depending on whether the target # has its own definition of targetdm. +# +# target_has_targetrustm Set to yes or no depending on whether the target +# has its own definition of targetrustm. out_file= common_out_file= @@ -216,6 +225,7 @@ extra_options= c_target_objs= cxx_target_objs= d_target_objs= +rust_target_objs= fortran_target_objs= target_has_targetcm=no target_has_targetm_common=yes @@ -321,12 +331,14 @@ aarch64*-*-*) c_target_objs="aarch64-c.o" cxx_target_objs="aarch64-c.o" d_target_objs="aarch64-d.o" + rust_target_objs="aarch64-rust.o" extra_objs="aarch64-builtins.o aarch-common.o aarch64-sve-builtins.o aarch64-sve-builtins-shapes.o aarch64-sve-builtins-base.o aarch64-sve-builtins-sve2.o cortex-a57-fma-steering.o aarch64-speculation.o falkor-tag-collision-avoidance.o aarch64-bti-insert.o" target_gtfiles="\$(srcdir)/config/aarch64/aarch64-builtins.c \$(srcdir)/config/aarch64/aarch64-sve-builtins.h \$(srcdir)/config/aarch64/aarch64-sve-builtins.cc" target_has_targetm_common=yes ;; alpha*-*-*) cpu_type=alpha + rust_target_objs="alpha-rust.o" extra_options="${extra_options} g.opt" ;; amdgcn*) @@ -340,6 +352,7 @@ arc*-*-*) cpu_type=arc c_target_objs="arc-c.o" cxx_target_objs="arc-c.o" + rust_target_objs="arc-rust.o" extra_options="${extra_options} arc/arc-tables.opt g.opt" extra_headers="arc-simd.h" ;; @@ -351,6 +364,7 @@ arm*-*-*) c_target_objs="arm-c.o" cxx_target_objs="arm-c.o" d_target_objs="arm-d.o" + rust_target_objs="arm-rust.o" extra_options="${extra_options} arm/arm-tables.opt" target_gtfiles="\$(srcdir)/config/arm/arm-builtins.c" ;; @@ -384,6 +398,7 @@ i[34567]86-*-*) c_target_objs="i386-c.o" cxx_target_objs="i386-c.o" d_target_objs="i386-d.o" + rust_target_objs="i386-rust.o" extra_objs="x86-tune-sched.o x86-tune-sched-bd.o x86-tune-sched-atom.o x86-tune-sched-core.o i386-options.o i386-builtins.o i386-expand.o i386-features.o" target_gtfiles="\$(srcdir)/config/i386/i386-builtins.c \$(srcdir)/config/i386/i386-expand.c \$(srcdir)/config/i386/i386-options.c" extra_options="${extra_options} fused-madd.opt" @@ -421,6 +436,7 @@ x86_64-*-*) c_target_objs="i386-c.o" cxx_target_objs="i386-c.o" d_target_objs="i386-d.o" + rust_target_objs="i386-rust.o" extra_options="${extra_options} fused-madd.opt" extra_objs="x86-tune-sched.o x86-tune-sched-bd.o x86-tune-sched-atom.o x86-tune-sched-core.o i386-options.o i386-builtins.o i386-expand.o i386-features.o" target_gtfiles="\$(srcdir)/config/i386/i386-builtins.c \$(srcdir)/config/i386/i386-expand.c \$(srcdir)/config/i386/i386-options.c" @@ -578,10 +594,12 @@ esac tm_file=${cpu_type}/${cpu_type}.h tm_d_file=${cpu_type}/${cpu_type}.h +tm_rust_file=${cpu_type}/${cpu_type}.h if test -f ${srcdir}/config/${cpu_type}/${cpu_type}-protos.h then tm_p_file=${cpu_type}/${cpu_type}-protos.h tm_d_file="${tm_d_file} ${cpu_type}/${cpu_type}-protos.h" + tm_rust_file="${tm_rust_file} ${cpu_type}/${cpu_type}-protos.h" fi extra_modes= @@ -3593,6 +3611,10 @@ if [ "$target_has_targetdm" = "no" ]; then d_target_objs="$d_target_objs default-d.o" fi +if [ "$target_has_targetrustm" = "no" ]; then + rust_target_objs="$rust_target_objs default-rust.o" +fi + # Support for --with-cpu and related options (and a few unrelated options, # too). case ${with_cpu} in @@ -5338,6 +5360,7 @@ case ${target} in c_target_objs="${c_target_objs} ${cpu_type}-c.o" cxx_target_objs="${cxx_target_objs} ${cpu_type}-c.o" d_target_objs="${d_target_objs} ${cpu_type}-d.o" + rust_target_objs="${rust_target_objs} ${cpu_type}-rust.o" tmake_file="${cpu_type}/t-${cpu_type} ${tmake_file}" ;; diff --git a/gcc/config/aarch64/aarch64-rust.c b/gcc/config/aarch64/aarch64-rust.c new file mode 100644 index 0000000..c1a4e73 --- /dev/null +++ b/gcc/config/aarch64/aarch64-rust.c @@ -0,0 +1,126 @@ +/* Subroutines for the Rust front end on the AArch64 architecture. + Copyright (C) 2020 Free Software Foundation, Inc. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rust/rust-target.h" +#include "rust/rust-target-def.h" + +/* Implement TARGET_RUST_CPU_INFO for AArch64 targets. */ + +void +aarch64_rust_target_cpu_info (void) +{ + rust_add_target_info ("target_arch", "aarch64"); + + // TODO: properly change at some point instead of macro def +#ifndef isa_flag +# define isa_flag aarch64_isa_flags +# define isa_flag2 aarch64_isa_flags2 +//# define fpmath aarch64_fpmath +#else +# error "isa_flag and isa_flag2 already defined in aarch64-rust.c - weird things might happen" +#endif + // FIXME: almost feature-complete with rustc, missing "ras" feature (which may not be in gcc) + + if (TARGET_SIMD) + rust_add_target_info ("target_feature", "neon"); + /* appears to be what is referred to what seems to be referred to + as "fp-armv8" in rust, at least in target def, based on closeness of it in aarch64.h to TARGET_SIMD */ + if (TARGET_FLOAT) { + rust_add_target_info ("target_feature", "fp-armv8"); + // seems to be translated to "fp", but can't tell if "fp-armv8" exists too + rust_add_target_info ("target_feature", "fp"); + } + /*if (TARGET_CYCLONE) - need to find this feature (if it exists) + rust_add_target_info ("target_feature", "cyclone");*/ + /* appears to be what is referred to what seems to be referred to + as "strict-align" in rust, at least in target def according to notes in aarch64.h + otherwise strict-align could be STRICT_ALIGNMENT (which evaluates to same thing unless macro redefed) */ + if (TARGET_STRICT_ALIGN) + rust_add_target_info ("target_feature", "strict-align"); + // below all derived from llvm code - i'm pretty sure they correspond + if (TARGET_CRC32) + rust_add_target_info ("target_feature", "crc"); + if (TARGET_CRYPTO) + rust_add_target_info ("target_feature", "crypto"); + if (TARGET_DOTPROD) + rust_add_target_info ("target_feature", "dotprod"); + if (TARGET_F16FML) + rust_add_target_info ("target_feature", "fp16fml"); + if (TARGET_FP_F16INST) { + rust_add_target_info ("target_feature", "fullfp16"); + // seems to be translated to "fp16", but not sure, so keep that here too + rust_add_target_info ("target_feature", "fp16"); + } + // TODO: some feature relating to profiling with feature name "spe" - can't find atm + if (TARGET_LSE) + rust_add_target_info ("target_feature", "lse"); + // hope this is the right thing - llvm calls it "rdm" - TODO ensure that it is + if (AARCH64_ISA_RDMA) + rust_add_target_info ("target_feature", "rdm"); + if (TARGET_SVE) + rust_add_target_info ("target_feature", "sve"); + if (TARGET_SVE2) + rust_add_target_info ("target_feature", "sve2"); + if (isa_flag & AARCH64_FL_SVE2_AES) + rust_add_target_info ("target_feature", "sve2-aes"); + if (isa_flag & AARCH64_FL_SVE2_SM4) + rust_add_target_info ("target_feature", "sve2-sm4"); + if (isa_flag & AARCH64_FL_SVE2_SHA3) + rust_add_target_info ("target_feature", "sve2-sha3"); + if (isa_flag & AARCH64_FL_SVE2_BITPERM) + rust_add_target_info ("target_feature", "sve2-bitperm"); + // TODO: assuming that this is the correct RCPC and that the AARCH64_FL_RCPC8_4 is not + if (isa_flag & AARCH64_FL_RCPC) + rust_add_target_info ("target_feature", "rcpc"); + // TODO: find below target features if they exist + /*if (TARGET_ZCM) + rust_add_target_info ("target_feature", "zcm");*/ + /*if (TARGET_ZCZ) + rust_add_target_info ("target_feature", "zcz");*/ + // some possible target features: "thumb-mode" + if (TARGET_SM4) + rust_add_target_info ("target_feature", "sm4"); + if (TARGET_SHA3) + rust_add_target_info ("target_feature", "sha3"); + if (TARGET_SHA2) + rust_add_target_info ("target_feature", "sha2"); + if (TARGET_AES) + rust_add_target_info ("target_feature", "aes"); + if (TARGET_TME) + rust_add_target_info ("target_feature", "tme"); + if (TARGET_MEMTAG) + rust_add_target_info ("target_feature", "mte"); + + if (AARCH64_ISA_V8_1) + rust_add_target_info ("target_feature", "v8.1a"); + if (AARCH64_ISA_V8_2) + rust_add_target_info ("target_feature", "v8.2a"); + if (AARCH64_ISA_V8_3) + rust_add_target_info ("target_feature", "v8.3a"); + if (AARCH64_ISA_V8_4) + rust_add_target_info ("target_feature", "v8.4a"); + if (AARCH64_ISA_V8_5) + rust_add_target_info ("target_feature", "v8.5a"); + +#undef isa_flag +#undef isa_flag2 +//#undef fpmath +} diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index d241c5b..a0115a5 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -29,6 +29,9 @@ /* Target CPU versions for D. */ #define TARGET_D_CPU_VERSIONS aarch64_d_target_versions +/* Target CPU info for Rust. */ +#define TARGET_RUST_CPU_INFO aarch64_rust_target_cpu_info + #define REGISTER_TARGET_PRAGMAS() aarch64_register_pragmas () diff --git a/gcc/config/aarch64/t-aarch64 b/gcc/config/aarch64/t-aarch64 index 11d20b7..c4a2296 100644 --- a/gcc/config/aarch64/t-aarch64 +++ b/gcc/config/aarch64/t-aarch64 @@ -118,6 +118,10 @@ aarch64-d.o: $(srcdir)/config/aarch64/aarch64-d.c $(COMPILE) $< $(POSTCOMPILE) +aarch64-rust.o: $(srcdir)/config/aarch64/aarch64-rust.c + $(COMPILE) $< + $(POSTCOMPILE) + PASSES_EXTRA += $(srcdir)/config/aarch64/aarch64-passes.def cortex-a57-fma-steering.o: $(srcdir)/config/aarch64/cortex-a57-fma-steering.c \ diff --git a/gcc/config/alpha/alpha-protos.h b/gcc/config/alpha/alpha-protos.h index ba4cb3b..8748c53 100644 --- a/gcc/config/alpha/alpha-protos.h +++ b/gcc/config/alpha/alpha-protos.h @@ -112,6 +112,9 @@ extern bool some_small_symbolic_operand_int (rtx); extern int tls_symbolic_operand_1 (rtx, int, int); extern rtx resolve_reload_operand (rtx); +/* Declare functions in alpha-rust.c */ +extern void alpha_rust_target_cpu_info (void); + namespace gcc { class context; } class rtl_opt_pass; diff --git a/gcc/config/alpha/alpha-rust.c b/gcc/config/alpha/alpha-rust.c new file mode 100644 index 0000000..48f03a5 --- /dev/null +++ b/gcc/config/alpha/alpha-rust.c @@ -0,0 +1,44 @@ +/* Subroutines for the Rust front end on the DEC Alpha. + Copyright (C) 2020 Free Software Foundation, Inc. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rust/rust-target.h" +#include "rust/rust-target-def.h" + +/* Implement TARGET_RUST_CPU_INFO for DEC Alpha targets. */ + +void +alpha_rust_target_cpu_info (void) +{ + /* i couldn't actually confirm that this was the arch name (removed from llvm and no rustc support), + * but i don't think they would choose something different to gcc */ + rust_add_target_info ("target_arch", "alpha"); + + // CIX was actually the only llvm option available when it was removed, but adding other gcc ones + if (TARGET_CIX) + rust_add_target_info ("target_feature", "cix"); + if (TARGET_FIX) + rust_add_target_info ("target_feature", "fix"); + if (TARGET_BWX) + rust_add_target_info ("target_feature", "bwx"); + // may be called "mvi" under rustc (but they have no support for it atm, so who cares amirite?) + if (TARGET_MAX) + rust_add_target_info ("target_feature", "max"); +} diff --git a/gcc/config/alpha/alpha.h b/gcc/config/alpha/alpha.h index 8da9ebc..d35db6e 100644 --- a/gcc/config/alpha/alpha.h +++ b/gcc/config/alpha/alpha.h @@ -94,6 +94,9 @@ along with GCC; see the file COPYING3. If not see while (0) #endif +/* Target CPU info for Rust. */ +#define TARGET_RUST_CPU_INFO alpha_rust_target_cpu_info + /* Run-time compilation parameters selecting different hardware subsets. */ /* Which processor to schedule for. The cpu attribute defines a list that diff --git a/gcc/config/alpha/t-alpha b/gcc/config/alpha/t-alpha index b62cc60..6820af9 100644 --- a/gcc/config/alpha/t-alpha +++ b/gcc/config/alpha/t-alpha @@ -17,3 +17,7 @@ # <http://www.gnu.org/licenses/>. PASSES_EXTRA += $(srcdir)/config/alpha/alpha-passes.def + +alpha-rust.o: $(srcdir)/config/alpha/alpha-rust.c + $(COMPILE) $< + $(POSTCOMPILE) diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h index c72d78e..cfe7e32 100644 --- a/gcc/config/arc/arc-protos.h +++ b/gcc/config/arc/arc-protos.h @@ -110,5 +110,8 @@ extern bool arc_is_jli_call_p (rtx); extern void arc_file_end (void); extern bool arc_is_secure_call_p (rtx); +/* Declare functions in arc-rust.c */ +extern void arc_rust_target_cpu_info (void); + rtl_opt_pass * make_pass_arc_ifcvt (gcc::context *ctxt); rtl_opt_pass * make_pass_arc_predicate_delay_insns (gcc::context *ctxt); diff --git a/gcc/config/arc/arc-rust.c b/gcc/config/arc/arc-rust.c new file mode 100644 index 0000000..ab5fd62 --- /dev/null +++ b/gcc/config/arc/arc-rust.c @@ -0,0 +1,104 @@ +/* Subroutines for the Rust front end on the Synopsys DesignWare ARC cpu. + Copyright (C) 2020 Free Software Foundation, Inc. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rust/rust-target.h" +#include "rust/rust-target-def.h" + +/* Implement TARGET_RUST_CPU_INFO for Synopsys DesignWare ARC targets. */ + +void +arc_rust_target_cpu_info (void) +{ + rust_add_target_info ("target_arch", "arc"); + + /* in llvm, the arc target has no "subtargets" (features according to rustc) as far as I can tell + * gcc has a bunch of target macros that look like they could represent features, which I've added + * provisionally. TODO add and rename features based on llvm. */ + if (TARGET_NORM) + rust_add_target_info ("target_feature", "norm"); + if (TARGET_OPTFPE) + rust_add_target_info ("target_feature", "optfpe"); + if (TARGET_SWAP) + rust_add_target_info ("target_feature", "swap"); + + if (TARGET_UNALIGN_BRANCH) + rust_add_target_info ("target_feature", "unalign-branch"); + if (TARGET_PAD_RETURN) + rust_add_target_info ("target_feature", "pad-return"); + if (TARGET_AT_DBR_CONDEXEC) + rust_add_target_info ("target_feature", "at-dbr-condexec"); + + // TODO: maybe define different cpu types? + + // TODO: are all these below needed and useful? + if (TARGET_MPYW) + rust_add_target_info ("target_feature", "mpyw"); + if (TARGET_MULTI) + rust_add_target_info ("target_feature", "multi"); + if (TARGET_MPY) + rust_add_target_info ("target_feature", "mpy"); + if (TARGET_ARC700_MPY) + rust_add_target_info ("target_feature", "arc700-mpy"); + if (TARGET_ANY_MPY) + rust_add_target_info ("target_feature", "any-mpy"); + if (TARGET_PLUS_DMPY) + rust_add_target_info ("target_feature", "plus-dmpy"); + if (TARGET_PLUS_MACD) + rust_add_target_info ("target_feature", "plus-macd"); + if (TARGET_PLUS_QMACW) + rust_add_target_info ("target_feature", "plus-qmacw"); + if (TARGET_LP_WR_INTERLOCK) + rust_add_target_info ("target_feature", "lp-wr-interlock"); + + // TODO: should different cpu families be removed? + if (TARGET_ARC600_FAMILY) + rust_add_target_info ("target_feature", "arc600-family"); + if (TARGET_ARCOMPACT_FAMILY) + rust_add_target_info ("target_feature", "arcompact-family"); + + if (TARGET_HARD_FLOAT) + rust_add_target_info ("target_feature", "hard-float"); + if (TARGET_FP_SP_BASE) + rust_add_target_info ("target_feature", "fp-sp-base"); + if (TARGET_FP_DP_BASE) + rust_add_target_info ("target_feature", "fp-dp-base"); + if (TARGET_FP_SP_FUSED) + rust_add_target_info ("target_feature", "fp-sp-fused"); + if (TARGET_FP_DP_FUSED) + rust_add_target_info ("target_feature", "fp-dp-fused"); + if (TARGET_FP_SP_CONV) + rust_add_target_info ("target_feature", "fp-sp-conv"); + if (TARGET_FP_DP_CONV) + rust_add_target_info ("target_feature", "fp-dp-conv"); + if (TARGET_FP_SP_SQRT) + rust_add_target_info ("target_feature", "fp-sp-sqrt"); + if (TARGET_FP_DP_SQRT) + rust_add_target_info ("target_feature", "fp-dp-sqrt"); + if (TARGET_FP_DP_AX) + rust_add_target_info ("target_feature", "fp-dp-ax"); + if (TARGET_FPX_QUARK) + rust_add_target_info ("target_feature", "fpx-quark"); + if (TARGET_DBNZ) + rust_add_target_info ("target_feature", "dbnz"); + + if (TARGET_BI_BIH) + rust_add_target_info ("target_feature", "bi-bih"); +} diff --git a/gcc/config/arc/arc.h b/gcc/config/arc/arc.h index fd6e21a..838f4eb 100644 --- a/gcc/config/arc/arc.h +++ b/gcc/config/arc/arc.h @@ -52,6 +52,9 @@ along with GCC; see the file COPYING3. If not see /* Names to predefine in the preprocessor for this target machine. */ #define TARGET_CPU_CPP_BUILTINS() arc_cpu_cpp_builtins (pfile) +/* CPU info for Rust for this target machine. */ +#define TARGET_RUST_CPU_INFO arc_rust_target_cpu_info + /* Macros enabled by specific command line option. FIXME: to be deprecatd. */ #define CPP_SPEC "\ diff --git a/gcc/config/arc/t-arc b/gcc/config/arc/t-arc index 6029403..da2e0e3 100644 --- a/gcc/config/arc/t-arc +++ b/gcc/config/arc/t-arc @@ -28,6 +28,11 @@ $(TREE_H) $(TM_H) $(TM_P_H) coretypes.h $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ $(srcdir)/config/arc/arc-c.c +arc-rust.o: $(srcdir)/config/arc/arc-rust.c $(CONFIG_H) $(SYSTEM_H) \ +$(TREE_H) $(TM_H) $(TM_P_H) coretypes.h + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ + $(srcdir)/config/arc/arc-rust.c + #Run `arc-cpus` if you changed something in arc-cpus.def .PHONY: arc-cpus diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index 1ba318a..6e5a220 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -398,6 +398,9 @@ extern void arm_cpu_cpp_builtins (struct cpp_reader *); /* Defined in arm-d.c */ extern void arm_d_target_versions (void); +/* Defined in arm-rust.c */ +extern void arm_rust_target_cpu_info (void); + extern bool arm_is_constant_pool_ref (rtx); /* The bits in this mask specify which instruction scheduling options should diff --git a/gcc/config/arm/arm-rust.c b/gcc/config/arm/arm-rust.c new file mode 100644 index 0000000..7c83e3f --- /dev/null +++ b/gcc/config/arm/arm-rust.c @@ -0,0 +1,304 @@ +/* Subroutines for the Rust front end on the ARM architecture. + Copyright (C) 2020 Free Software Foundation, Inc. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "tm_p.h" +#include "rust/rust-target.h" +#include "rust/rust-target-def.h" + +/* Implement TARGET_RUST_CPU_INFO for ARM targets. */ + +void arm_rust_target_cpu_info(void) { + rust_add_target_info("target_arch", "arm"); + + /* TODO: further research support for CLREX, acquire-release (lda/ldaex), slow-fp-brcc (slow FP + * compare and branch), perfmon, trustzone, fpao, fuse-aes, fuse-literals, read-tp-hard, zcz, + * prof-unpr, slow-vgetlni32, slow-vdup32, prefer-vmovsr, prefer-ishst, muxed-units, slow-odd-reg, + * slow-load-D-subreg, wide-stride-vfp, dont-widen-vmovs, splat-vfp-neon, expand-fp-mlx, + * vmlx-hazards, neon-fpmovs, neonfp (as in using neon for scalar fp), vldn-align, + * nonpipelined-vfp, slowfpvmlx, slowfpvfmx, vmlx-forwarding, 32bit (prefer 32-bit Thumb), + * loop-align, mve1beat, mve2beat, mve4beat, avoid-partial-cpsr, cheap-predictable-cpsr, + * avoid-movs-shop, ret-addr-stack, no-branch-predictor, virtualization, nacl-trap, execute-only, + * reserve-r9, no-movt, no-neg-immediates, use-misched, disable-postra-scheduler, lob (Low + * Overhead Branch), noarm, cde - can't find them. */ + /* TODO: figure out if gcc has an equivalent to "fpregs" (floating-point registers even if only + * used for integer - shared between VFP and MVE). */ + if (TARGET_VFPD32) + rust_add_target_info("target_feature", "d32"); + bool hasFeatureVFP2 = bitmap_bit_p(arm_active_target.isa, isa_bit_vfpv2) && TARGET_VFP_DOUBLE; + if (hasFeatureVFP2) { + rust_add_target_info("target_feature", "vfp2"); + + // also added implied features that aren't separately supported in gcc + rust_add_target_info("target_feature", "vfp2sp"); + } + // minimal VFPv3 support - support for instruction set, not necessarily full + bool minVFP3 = TARGET_VFP3 && bitmap_bit_p(arm_active_target.isa, isa_bit_vfpv2); + if (minVFP3) { + rust_add_target_info("target_feature", "vfp3d16sp"); + + if (TARGET_VFPD32) + rust_add_target_info("target_feature", "vfp3sp"); + + if (TARGET_VFP_DOUBLE) { + rust_add_target_info("target_feature", "vfp3d16"); + + if (TARGET_VFPD32) { + rust_add_target_info("target_feature", "vfp3"); + + if (bitmap_bit_p(arm_active_target.isa, isa_bit_neon)) + rust_add_target_info("target_feature", "neon"); + } + } + } + bool hasFeatureVFP3 = minVFP3 && TARGET_VFP_DOUBLE && TARGET_VFPD32; + bool hasFeatureFP16 = bitmap_bit_p(arm_active_target.isa, isa_bit_fp16conv); + if (hasFeatureFP16) + rust_add_target_info("target_info", "fp16"); + bool minVFP4 = minVFP3 && bitmap_bit_p(arm_active_target.isa, isa_bit_vfpv4) && hasFeatureFP16; + if (minVFP4) { + rust_add_target_info("target_feature", "vfp4d16sp"); + + if (TARGET_VFPD32) + rust_add_target_info("target_feature", "vfp4sp"); + + if (TARGET_VFP_DOUBLE) { + rust_add_target_info("target_feature", "vfp4d16"); + + if (TARGET_VFPD32) { + rust_add_target_info("target_feature", "vfp4"); + } + } + } + // NOTE: supposedly "fp-armv8" features in llvm are the same as "fpv5", so creating them based on + // that + bool minFP_ARMv8 = minVFP4 && TARGET_VFP5; + if (minFP_ARMv8) { + rust_add_target_info("target_feature", "fp-armv8d16sp"); + + if (TARGET_VFPD32) + rust_add_target_info("target_feature", "fp-armv8sp"); + + if (TARGET_VFP_DOUBLE) { + rust_add_target_info("target_feature", "fp-armv8d16"); + + if (TARGET_VFPD32) { + rust_add_target_info("target_feature", "fp-armv8"); + } + } + + if (bitmap_bit_p(arm_active_target.isa, isa_bit_fp16)) { + rust_add_target_info("target_feature", "fullfp16"); + + if (bitmap_bit_p(arm_active_target.isa, isa_bit_fp16fml)) + rust_add_target_info("target_feature", "fp16fml"); + } + } + if (bitmap_bit_p(arm_active_target.isa, isa_bit_tdiv)) + rust_add_target_info("target_feature", "hwdiv"); + if (bitmap_bit_p(arm_active_target.isa, isa_bit_adiv)) + rust_add_target_info("target_feature", "hwdiv-arm"); + // TODO: I'm not sure if there's an exact correlation here (data barrier), so maybe research + // There's also the question of whether this also means "full data barrier" ("fdb" in llvm) + if (TARGET_HAVE_MEMORY_BARRIER) + rust_add_target_info("target_feature", "db"); + if (bitmap_bit_p(arm_active_target.isa, isa_bit_cmse)) + rust_add_target_info("target_feature", "8msecext"); + /* TODO: note that sha2 is an option for aarch64 in gcc but not for arm, so no feature here + * possible. The same goes for aes. However, as llvm has them as prerequisites for crypto, they + * are enabled with it. */ + if (TARGET_CRYPTO) { + rust_add_target_info("target_feature", "crypto"); + rust_add_target_info("target_feature", "sha2"); + rust_add_target_info("target_feature", "aes"); + } + if (TARGET_CRC32) + rust_add_target_info("target_feature", "crc"); + if (TARGET_DOTPROD) + rust_add_target_info("target_feature", "dotprod"); + // TODO: supposedly gcc supports RAS, but I couldn't find the option, so leaving out "ras" for now + if (TARGET_DSP_MULTIPLY) + rust_add_target_info("target_feature", "dsp"); + if (bitmap_bit_p(arm_active_target.isa, isa_bit_mp)) + rust_add_target_info("target_feature", "mp"); + // TODO: figure out the exact strict-align feature, which I'm pretty sure GCC has + // TODO: figure out how to access long call data (which is in GCC) for "long-calls" + if (bitmap_bit_p(arm_active_target.isa, isa_bit_sb)) + rust_add_target_info("target_feature", "sb"); + if (bitmap_bit_p(arm_active_target.isa, isa_bit_bf16)) + rust_add_target_info("target_feature", "bf16"); + if (bitmap_bit_p(arm_active_target.isa, isa_bit_i8mm)) + rust_add_target_info("target_feature", "i8mm"); + switch (TARGET_ARM_ARCH_PROFILE) { + case 'A': + rust_add_target_info("target_feature", "aclass"); + break; + case 'R': + rust_add_target_info("target_feature", "rclass"); + break; + case 'M': + rust_add_target_info("target_feature", "mclass"); + break; + default: + fprintf(stderr, "Screwed up profile selection in arm-rust.c - unknown profile '%c'", + TARGET_ARM_ARCH_PROFILE); + break; + } + if (bitmap_bit_p(arm_active_target.isa, isa_bit_thumb2)) + rust_add_target_info("target_feature", "thumb2"); + if (bitmap_bit_p(arm_active_target.isa, isa_bit_armv4) + && bitmap_bit_p(arm_active_target.isa, isa_bit_notm) + && bitmap_bit_p(arm_active_target.isa, isa_bit_thumb)) { + rust_add_target_info("target_feature", "v4t"); + + if (bitmap_bit_p(arm_active_target.isa, isa_bit_armv5t)) { + rust_add_target_info("target_feature", "v5t"); + + if (bitmap_bit_p(arm_active_target.isa, isa_bit_armv5te)) { + rust_add_target_info("target_feature", "v5te"); + + if (bitmap_bit_p(arm_active_target.isa, isa_bit_armv6) + && bitmap_bit_p(arm_active_target.isa, isa_bit_be8)) { + rust_add_target_info("target_feature", "v6"); + + // note: this definition of "ARMv6m" listed as "suspect" in arm-cpus.in + rust_add_target_info("target_feature", "v6m"); + + bool hasV8BaselineOps = bitmap_bit_p(arm_active_target.isa, isa_bit_armv8) + && bitmap_bit_p(arm_active_target.isa, isa_bit_cmse) + && bitmap_bit_p(arm_active_target.isa, isa_bit_tdiv); + if (hasV8BaselineOps) + rust_add_target_info("target_feature", "v8m"); + + bool hasV6kOps = bitmap_bit_p(arm_active_target.isa, isa_bit_armv6k); + if (hasV6kOps) { + rust_add_target_info("target_feature", "v6k"); + } + + if (bitmap_bit_p(arm_active_target.isa, isa_bit_thumb2) && hasV8BaselineOps + && hasV6kOps) { + rust_add_target_info("target_feature", "v6t2"); + + // note that arm-cpus.in refers to this (ARMv7) as suspect + if (bitmap_bit_p(arm_active_target.isa, isa_bit_armv7)) { + rust_add_target_info("target_feature", "v7"); + + rust_add_target_info("target_feature", "v8m.main"); + + if (bitmap_bit_p(arm_active_target.isa, isa_bit_armv8_1m_main)) + rust_add_target_info("target_feature", "v8.1m.main"); + + // dummy: can't find feature acquire-release, so dummy true variable + bool hasAcquireRelease = true; + if (hasAcquireRelease && bitmap_bit_p(arm_active_target.isa, isa_bit_adiv) + && bitmap_bit_p(arm_active_target.isa, isa_bit_lpae) + && bitmap_bit_p(arm_active_target.isa, isa_bit_mp) + && bitmap_bit_p(arm_active_target.isa, isa_bit_sec)) { + rust_add_target_info("target_feature", "v8"); + + if (TARGET_CRC32 + && bitmap_bit_p(arm_active_target.isa, isa_bit_armv8_1)) { + rust_add_target_info("target_feature", "v8.1a"); + + if (bitmap_bit_p(arm_active_target.isa, isa_bit_armv8_2)) { + rust_add_target_info("target_feature", "v8.2a"); + + if (bitmap_bit_p(arm_active_target.isa, isa_bit_armv8_3)) { + rust_add_target_info("target_feature", "v8.3a"); + + if (bitmap_bit_p( + arm_active_target.isa, isa_bit_armv8_4)) { + rust_add_target_info("target_feature", "v8.4a"); + // note: llvm, but not gcc, also wants dotprod for + // v8.4 + + if (bitmap_bit_p(arm_active_target.isa, isa_bit_sb) + && bitmap_bit_p( + arm_active_target.isa, isa_bit_predres) + && bitmap_bit_p( + arm_active_target.isa, isa_bit_armv8_5)) { + rust_add_target_info("target_feature", "v8.5a"); + + if (bitmap_bit_p( + arm_active_target.isa, isa_bit_armv8_6)) + rust_add_target_info( + "target_feature", "v8.6a"); + } + } + } + } + } + } + } + } + } + } + } + } + if (bitmap_bit_p(arm_active_target.isa, isa_bit_mve) + && bitmap_bit_p(arm_active_target.isa, isa_bit_vfp_base) + && bitmap_bit_p(arm_active_target.isa, isa_bit_armv7em)) { + rust_add_target_info("target_feature", "mve"); + + if (minFP_ARMv8 && bitmap_bit_p(arm_active_target.isa, isa_bit_fp16) + && bitmap_bit_p(arm_active_target.isa, isa_bit_mve_float)) + rust_add_target_info("target_feature", "mve.fp"); + } + // Note: no direct option for "cde" found, but it is implicitly activated via cdecpx, so do it + if (bitmap_bit_p(arm_active_target.isa, isa_bit_cdecp0)) { + rust_add_target_info("target_feature", "cdecp0"); + rust_add_target_info("target_feature", "cde"); + } + if (bitmap_bit_p(arm_active_target.isa, isa_bit_cdecp1)) { + rust_add_target_info("target_feature", "cdecp1"); + rust_add_target_info("target_feature", "cde"); + } + if (bitmap_bit_p(arm_active_target.isa, isa_bit_cdecp2)) { + rust_add_target_info("target_feature", "cdecp2"); + rust_add_target_info("target_feature", "cde"); + } + if (bitmap_bit_p(arm_active_target.isa, isa_bit_cdecp3)) { + rust_add_target_info("target_feature", "cdecp3"); + rust_add_target_info("target_feature", "cde"); + } + if (bitmap_bit_p(arm_active_target.isa, isa_bit_cdecp4)) { + rust_add_target_info("target_feature", "cdecp4"); + rust_add_target_info("target_feature", "cde"); + } + if (bitmap_bit_p(arm_active_target.isa, isa_bit_cdecp5)) { + rust_add_target_info("target_feature", "cdecp5"); + rust_add_target_info("target_feature", "cde"); + } + if (bitmap_bit_p(arm_active_target.isa, isa_bit_cdecp6)) { + rust_add_target_info("target_feature", "cdecp6"); + rust_add_target_info("target_feature", "cde"); + } + if (bitmap_bit_p(arm_active_target.isa, isa_bit_cdecp7)) { + rust_add_target_info("target_feature", "cdecp7"); + rust_add_target_info("target_feature", "cde"); + } + if (TARGET_SOFT_FLOAT) + rust_add_target_info("target_feature", "soft-float"); + // should be correct option (i.e. thumb mode rather than just thumb-aware) as TARGET_ARM is + // inverse + if (TARGET_THUMB) + rust_add_target_info("target_feature", "thumb-mode"); + // TODO: consider doing the processors as target features, but honestly they don't seem to fit +} diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index 4a63d33..8ea4c7f 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -50,6 +50,9 @@ extern char arm_arch_name[]; /* Target CPU versions for D. */ #define TARGET_D_CPU_VERSIONS arm_d_target_versions +/* Target CPU info for Rust. */ +#define TARGET_RUST_CPU_INFO arm_rust_target_cpu_info + #include "config/arm/arm-opts.h" /* The processor for which instructions should be scheduled. */ diff --git a/gcc/config/arm/t-arm b/gcc/config/arm/t-arm index 1f7f169..de70aea 100644 --- a/gcc/config/arm/t-arm +++ b/gcc/config/arm/t-arm @@ -162,6 +162,10 @@ arm-d.o: $(srcdir)/config/arm/arm-d.c $(COMPILE) $< $(POSTCOMPILE) +arm-rust.o: $(srcdir)/config/arm/arm-rust.c + $(COMPILE) $< + $(POSTCOMPILE) + arm-common.o: arm-cpu-cdata.h driver-arm.o: arm-native.h diff --git a/gcc/config/default-rust.c b/gcc/config/default-rust.c new file mode 100644 index 0000000..2ac1eca --- /dev/null +++ b/gcc/config/default-rust.c @@ -0,0 +1,26 @@ +/* Default Rust language target hooks initializer. + Copyright (C) 2020 Free Software Foundation, Inc. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "memmodel.h" +#include "tm_rust.h" +#include "rust/rust-target.h" +#include "rust/rust-target-def.h" + +struct gcc_targetrustm targetrustm = TARGETRUSTM_INITIALIZER; diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 65347a5..f092d90 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -265,6 +265,9 @@ extern void ix86_register_pragmas (void); /* In i386-d.c */ extern void ix86_d_target_versions (void); +/* In i386-rust.c */ +extern void ix86_rust_target_cpu_info (void); + /* In winnt.c */ extern void i386_pe_unique_section (tree, int); extern void i386_pe_declare_function_type (FILE *, const char *, int); diff --git a/gcc/config/i386/i386-rust.c b/gcc/config/i386/i386-rust.c index 44ca0c9..86a1842 100644 --- a/gcc/config/i386/i386-rust.c +++ b/gcc/config/i386/i386-rust.c @@ -22,9 +22,6 @@ along with GCC; see the file COPYING3. If not see #include "rust/rust-target.h" #include "rust/rust-target-def.h" -// FIXME: remove: this is only here to make intellisense happy -#include "i386.h" - // HACK: allows conversion of (presumably) numeric values to string #ifndef STR_HELPER_RUST #define STR_HELPER_RUST(x) #x @@ -47,12 +44,13 @@ ix86_rust_target_cpu_info (void) rust_add_target_info("target_arch", "x86_64"); // TODO: should these go here or is there a platform-neutral way of getting them (since they aren't defined in i386-c.c or i386-d.c)? - rust_add_target_info("target_pointer_width", STRINGIFY_RUST(POINTER_SIZE)); - rust_add_target_info("target_endian", BYTES_BIG_ENDIAN ? "big" : "little"); + //rust_add_target_info("target_pointer_width", STRINGIFY_RUST(POINTER_SIZE)); // this did not work + //rust_add_target_info("target_endian", BYTES_BIG_ENDIAN ? "big" : "little"); + // there is a platform-neutral way actually, I'm pretty sure - see cppbuiltins.c if (TARGET_X32) { // this means it uses 32-bit pointers with 64-bit, basically (ILP32) - rust_add_target_info("target_pointer_width", "32"); + //rust_add_target_info("target_pointer_width", "32"); // TODO: may also change x86_64-...-linux-gnu to x86_64-...-linux-gnux32 // is this better than just putting in pointer width outside of if statement? @@ -71,16 +69,26 @@ ix86_rust_target_cpu_info (void) // note: options that don't seem to have a target feature in rust are commented out - if (isa_flag2 & OPTION_MASK_ISA_WBNOINVD) - //def_or_undef (parse_in, "__WBNOINVD__"); - if (isa_flag2 & OPTION_MASK_ISA_AVX512VP2INTERSECT) - //def_or_undef (parse_in, "__AVX512VP2INTERSECT__"); + // TODO: properly change at some point instead of macro def +#ifndef isa_flag +# define isa_flag ix86_isa_flags +# define isa_flag2 ix86_isa_flags2 +# define fpmath ix86_fpmath +#else +# error "isa_flag and isa_flag2 already defined in i386-rust.c - weird things might happen" +#endif + + // options should be feature complete for rustc atm + if (isa_flag2 & OPTION_MASK_ISA2_WBNOINVD) + ; //def_or_undef (parse_in, "__WBNOINVD__"); + if (isa_flag2 & OPTION_MASK_ISA2_AVX512VP2INTERSECT) + ; //def_or_undef (parse_in, "__AVX512VP2INTERSECT__"); if (isa_flag & OPTION_MASK_ISA_MMX) rust_add_target_info("target_feature", "mmx"); if (isa_flag & OPTION_MASK_ISA_3DNOW) - //def_or_undef (parse_in, "__3dNOW__"); + ; //def_or_undef (parse_in, "__3dNOW__"); if (isa_flag & OPTION_MASK_ISA_3DNOW_A) - //def_or_undef (parse_in, "__3dNOW_A__"); + ; //def_or_undef (parse_in, "__3dNOW_A__"); if (isa_flag & OPTION_MASK_ISA_SSE) rust_add_target_info("target_feature", "sse"); if (isa_flag & OPTION_MASK_ISA_SSE2) @@ -98,59 +106,59 @@ ix86_rust_target_cpu_info (void) if (isa_flag & OPTION_MASK_ISA_SHA) rust_add_target_info("target_feature", "sha"); if (isa_flag & OPTION_MASK_ISA_PCLMUL) - //def_or_undef (parse_in, "__PCLMUL__"); + ; //def_or_undef (parse_in, "__PCLMUL__"); if (isa_flag & OPTION_MASK_ISA_AVX) rust_add_target_info("target_feature", "avx"); if (isa_flag & OPTION_MASK_ISA_AVX2) rust_add_target_info("target_feature", "avx2"); if (isa_flag & OPTION_MASK_ISA_AVX512F) - //def_or_undef (parse_in, "__AVX512F__"); + rust_add_target_info("target_feature", "avx512f"); if (isa_flag & OPTION_MASK_ISA_AVX512ER) - //def_or_undef (parse_in, "__AVX512ER__"); + rust_add_target_info("target_feature", "avx512er"); if (isa_flag & OPTION_MASK_ISA_AVX512CD) - //def_or_undef (parse_in, "__AVX512CD__"); + rust_add_target_info("target_feature", "avx512cd"); if (isa_flag & OPTION_MASK_ISA_AVX512PF) - //def_or_undef (parse_in, "__AVX512PF__"); + rust_add_target_info("target_feature", "avx512pf"); if (isa_flag & OPTION_MASK_ISA_AVX512DQ) - //def_or_undef (parse_in, "__AVX512DQ__"); + rust_add_target_info("target_feature", "avx512dq"); if (isa_flag & OPTION_MASK_ISA_AVX512BW) - //def_or_undef (parse_in, "__AVX512BW__"); + rust_add_target_info("target_feature", "avx512bw"); if (isa_flag & OPTION_MASK_ISA_AVX512VL) - //def_or_undef (parse_in, "__AVX512VL__"); + rust_add_target_info("target_feature", "avx512vl"); if (isa_flag & OPTION_MASK_ISA_AVX512VBMI) - //def_or_undef (parse_in, "__AVX512VBMI__"); + rust_add_target_info("target_feature", "avx512vbmi"); if (isa_flag & OPTION_MASK_ISA_AVX512IFMA) - //def_or_undef (parse_in, "__AVX512IFMA__"); - if (isa_flag2 & OPTION_MASK_ISA_AVX5124VNNIW) - //def_or_undef (parse_in, "__AVX5124VNNIW__"); + rust_add_target_info("target_feature", "avx512ifma"); + if (isa_flag2 & OPTION_MASK_ISA2_AVX5124VNNIW) + ; //def_or_undef (parse_in, "__AVX5124VNNIW__"); if (isa_flag & OPTION_MASK_ISA_AVX512VBMI2) - //def_or_undef (parse_in, "__AVX512VBMI2__"); + ; //def_or_undef (parse_in, "__AVX512VBMI2__"); if (isa_flag & OPTION_MASK_ISA_AVX512VNNI) - //def_or_undef (parse_in, "__AVX512VNNI__"); - if (isa_flag2 & OPTION_MASK_ISA_PCONFIG) - //def_or_undef (parse_in, "__PCONFIG__"); - if (isa_flag2 & OPTION_MASK_ISA_SGX) - //def_or_undef (parse_in, "__SGX__"); - if (isa_flag2 & OPTION_MASK_ISA_AVX5124FMAPS) - //def_or_undef (parse_in, "__AVX5124FMAPS__"); + ; //def_or_undef (parse_in, "__AVX512VNNI__"); + if (isa_flag2 & OPTION_MASK_ISA2_PCONFIG) + ; //def_or_undef (parse_in, "__PCONFIG__"); + if (isa_flag2 & OPTION_MASK_ISA2_SGX) + ; //def_or_undef (parse_in, "__SGX__"); + if (isa_flag2 & OPTION_MASK_ISA2_AVX5124FMAPS) + ; //def_or_undef (parse_in, "__AVX5124FMAPS__"); if (isa_flag & OPTION_MASK_ISA_AVX512BITALG) - //def_or_undef (parse_in, "__AVX512BITALG__"); + ; //def_or_undef (parse_in, "__AVX512BITALG__"); if (isa_flag & OPTION_MASK_ISA_AVX512VPOPCNTDQ) - //def_or_undef (parse_in, "__AVX512VPOPCNTDQ__"); + rust_add_target_info("target_feature", "avx512vpopcntdq"); if (isa_flag & OPTION_MASK_ISA_FMA) rust_add_target_info("target_feature", "fma"); if (isa_flag & OPTION_MASK_ISA_RTM) - //def_or_undef (parse_in, "__RTM__"); + rust_add_target_info("target_feature", "rtm"); if (isa_flag & OPTION_MASK_ISA_SSE4A) - //def_or_undef (parse_in, "__SSE4A__"); + rust_add_target_info("target_feature", "sse4a"); if (isa_flag & OPTION_MASK_ISA_FMA4) - //def_or_undef (parse_in, "__FMA4__"); + ; //def_or_undef (parse_in, "__FMA4__"); if (isa_flag & OPTION_MASK_ISA_XOP) - //def_or_undef (parse_in, "__XOP__"); + ; //def_or_undef (parse_in, "__XOP__"); if (isa_flag & OPTION_MASK_ISA_LWP) - //def_or_undef (parse_in, "__LWP__"); + ; //def_or_undef (parse_in, "__LWP__"); if (isa_flag & OPTION_MASK_ISA_ABM) - //def_or_undef (parse_in, "__ABM__"); + ; //def_or_undef (parse_in, "__ABM__"); if (isa_flag & OPTION_MASK_ISA_BMI) rust_add_target_info("target_feature", "bmi1"); if (isa_flag & OPTION_MASK_ISA_BMI2) @@ -158,21 +166,21 @@ ix86_rust_target_cpu_info (void) if (isa_flag & OPTION_MASK_ISA_LZCNT) rust_add_target_info("target_feature", "lzcnt"); if (isa_flag & OPTION_MASK_ISA_TBM) - //def_or_undef (parse_in, "__TBM__"); + rust_add_target_info("target_feature", "tbm"); if (isa_flag & OPTION_MASK_ISA_POPCNT) rust_add_target_info("target_feature", "popcnt"); if (isa_flag & OPTION_MASK_ISA_FSGSBASE) - //def_or_undef (parse_in, "__FSGSBASE__"); + ; //def_or_undef (parse_in, "__FSGSBASE__"); if (isa_flag & OPTION_MASK_ISA_RDRND) rust_add_target_info("target_feature", "rdrand"); if (isa_flag & OPTION_MASK_ISA_F16C) - //def_or_undef (parse_in, "__F16C__"); + rust_add_target_info("target_feature", "f16c"); if (isa_flag & OPTION_MASK_ISA_RDSEED) rust_add_target_info("target_feature", "rdseed"); if (isa_flag & OPTION_MASK_ISA_PRFCHW) - //def_or_undef (parse_in, "__PRFCHW__"); + ; //def_or_undef (parse_in, "__PRFCHW__"); if (isa_flag & OPTION_MASK_ISA_ADX) - //def_or_undef (parse_in, "__ADX__"); + rust_add_target_info("target_feature", "adx"); if (isa_flag & OPTION_MASK_ISA_FXSR) rust_add_target_info("target_feature", "fxsr"); if (isa_flag & OPTION_MASK_ISA_XSAVE) @@ -180,57 +188,65 @@ ix86_rust_target_cpu_info (void) if (isa_flag & OPTION_MASK_ISA_XSAVEOPT) rust_add_target_info("target_feature", "xsaveopt"); if (isa_flag & OPTION_MASK_ISA_PREFETCHWT1) - //def_or_undef (parse_in, "__PREFETCHWT1__"); + ; //def_or_undef (parse_in, "__PREFETCHWT1__"); if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE)) - //def_or_undef (parse_in, "__SSE_MATH__"); + ; //def_or_undef (parse_in, "__SSE_MATH__"); if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE2)) - //def_or_undef (parse_in, "__SSE2_MATH__"); + ; //def_or_undef (parse_in, "__SSE2_MATH__"); if (isa_flag & OPTION_MASK_ISA_CLFLUSHOPT) - //def_or_undef (parse_in, "__CLFLUSHOPT__"); - if (isa_flag2 & OPTION_MASK_ISA_CLZERO) - //def_or_undef (parse_in, "__CLZERO__"); + ; //def_or_undef (parse_in, "__CLFLUSHOPT__"); + if (isa_flag2 & OPTION_MASK_ISA2_CLZERO) + ; //def_or_undef (parse_in, "__CLZERO__"); if (isa_flag & OPTION_MASK_ISA_XSAVEC) rust_add_target_info("target_feature", "xsavec"); if (isa_flag & OPTION_MASK_ISA_XSAVES) rust_add_target_info("target_feature", "xsaves"); if (isa_flag & OPTION_MASK_ISA_CLWB) - //def_or_undef (parse_in, "__CLWB__"); - if (isa_flag2 & OPTION_MASK_ISA_MWAITX) - //def_or_undef (parse_in, "__MWAITX__"); + ; //def_or_undef (parse_in, "__CLWB__"); + if (isa_flag2 & OPTION_MASK_ISA2_MWAITX) + ; //def_or_undef (parse_in, "__MWAITX__"); if (isa_flag & OPTION_MASK_ISA_PKU) - //def_or_undef (parse_in, "__PKU__"); - if (isa_flag2 & OPTION_MASK_ISA_RDPID) - //def_or_undef (parse_in, "__RDPID__"); + ; //def_or_undef (parse_in, "__PKU__"); + if (isa_flag2 & OPTION_MASK_ISA2_RDPID) + ; //def_or_undef (parse_in, "__RDPID__"); if (isa_flag & OPTION_MASK_ISA_GFNI) - //def_or_undef (parse_in, "__GFNI__"); + ; //def_or_undef (parse_in, "__GFNI__"); if ((isa_flag & OPTION_MASK_ISA_SHSTK)) - //def_or_undef (parse_in, "__SHSTK__"); - if (isa_flag2 & OPTION_MASK_ISA_VAES) - //def_or_undef (parse_in, "__VAES__"); + ; //def_or_undef (parse_in, "__SHSTK__"); + if (isa_flag2 & OPTION_MASK_ISA2_VAES) + ; //def_or_undef (parse_in, "__VAES__"); if (isa_flag & OPTION_MASK_ISA_VPCLMULQDQ) rust_add_target_info("target_feature", "pclmulqdq"); if (isa_flag & OPTION_MASK_ISA_MOVDIRI) - //def_or_undef (parse_in, "__MOVDIRI__"); - if (isa_flag2 & OPTION_MASK_ISA_MOVDIR64B) - //def_or_undef (parse_in, "__MOVDIR64B__"); - if (isa_flag2 & OPTION_MASK_ISA_WAITPKG) - //def_or_undef (parse_in, "__WAITPKG__"); - if (isa_flag2 & OPTION_MASK_ISA_CLDEMOTE) - //def_or_undef (parse_in, "__CLDEMOTE__"); - if (isa_flag2 & OPTION_MASK_ISA_PTWRITE) - //def_or_undef (parse_in, "__PTWRITE__"); - if (isa_flag2 & OPTION_MASK_ISA_AVX512BF16) - //def_or_undef (parse_in, "__AVX512BF16__"); + ; //def_or_undef (parse_in, "__MOVDIRI__"); + if (isa_flag2 & OPTION_MASK_ISA2_MOVDIR64B) + ; //def_or_undef (parse_in, "__MOVDIR64B__"); + if (isa_flag2 & OPTION_MASK_ISA2_WAITPKG) + ; //def_or_undef (parse_in, "__WAITPKG__"); + if (isa_flag2 & OPTION_MASK_ISA2_CLDEMOTE) + ; //def_or_undef (parse_in, "__CLDEMOTE__"); + if (isa_flag2 & OPTION_MASK_ISA2_PTWRITE) + ; //def_or_undef (parse_in, "__PTWRITE__"); + if (isa_flag2 & OPTION_MASK_ISA2_AVX512BF16) + ; //def_or_undef (parse_in, "__AVX512BF16__"); if (TARGET_MMX_WITH_SSE) - //def_or_undef (parse_in, "__MMX_WITH_SSE__"); - if (isa_flag2 & OPTION_MASK_ISA_ENQCMD) - //def_or_undef (parse_in, "__ENQCMD__"); + ; //def_or_undef (parse_in, "__MMX_WITH_SSE__"); + if (isa_flag2 & OPTION_MASK_ISA2_ENQCMD) + ; //def_or_undef (parse_in, "__ENQCMD__"); if (TARGET_IAMCU) { //def_or_undef (parse_in, "__iamcu"); //def_or_undef (parse_in, "__iamcu__"); } + if (TARGET_CMPXCHG16B) + rust_add_target_info("target_feature", "cmpxchg16b"); + if (TARGET_MOVBE) + rust_add_target_info("target_feature", "movbe"); + +#undef isa_flag +#undef isa_flag2 +#undef fpmath } #undef STR_HELPER_RUST -#undef STRINGIFY_RUST
\ No newline at end of file +#undef STRINGIFY_RUST diff --git a/gcc/config/i386/linux-common.h b/gcc/config/i386/linux-common.h index 96dc632..f7fd065 100644 --- a/gcc/config/i386/linux-common.h +++ b/gcc/config/i386/linux-common.h @@ -34,7 +34,13 @@ along with GCC; see the file COPYING3. If not see ANDROID_TARGET_RUST_OS_INFO(); // TODO: decide on whether following c frontend style or d one - leaning towards c -#undef TARGET_RUST_OS_INFO + +/*#ifdef TARGET_RUST_OS_INFO +# error "TARGET_RUST_OS_INFO already defined in linux-common.h (i386) - c++ undefines it and redefines it." +# error "note that this above error (linux-common-i386) is expected due to already defining EXTRA_TARGET stuff" +#endif*/ +/* This is previously defined in gnu-user-common.h, but has no linux-specific info. */ +#undef TARGET_RUST_OS_INFO #define TARGET_RUST_OS_INFO() \ do { \ GNU_USER_TARGET_RUST_OS_INFO(); \ diff --git a/gcc/config/linux.h b/gcc/config/linux.h index ae8f67b..9d40cf2 100644 --- a/gcc/config/linux.h +++ b/gcc/config/linux.h @@ -72,6 +72,12 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see builtin_rust_info ("target_vendor", "unknown"); \ /*is there way of determining target_os and target_env here since could also be android?*/ \ /*target_vendor may not be "unknown" - FIXME ensure it is*/ \ + if (OPTION_GLIBC) \ + builtin_rust_info ("target_env", "gnu"); \ + else if (OPTION_MUSL) \ + builtin_rust_info ("target_env", "musl"); \ + else /*TODO: determine if bionic and uclibc are considered to be different envs in rustc*/ \ + builtin_rust_info ("target_env", ""); \ } while (0) /* Determine which dynamic linker to use depending on whether GLIBC or diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 3c4682b..0a6bff2 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -293,6 +293,9 @@ extern void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, /* Declare functions in rs6000-d.c */ extern void rs6000_d_target_versions (void); +/* Declare functions in rs6000-rust.c */ +extern void rs6000_rust_target_cpu_info (void); + #ifdef NO_DOLLAR_IN_LABEL const char * rs6000_xcoff_strip_dollar (const char *); #endif diff --git a/gcc/config/rs6000/rs6000-rust.c b/gcc/config/rs6000/rs6000-rust.c new file mode 100644 index 0000000..d28ed9f --- /dev/null +++ b/gcc/config/rs6000/rs6000-rust.c @@ -0,0 +1,81 @@ +/* Subroutines for the Rust front end on the PowerPC architecture. + Copyright (C) 2020 Free Software Foundation, Inc. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rust/rust-target.h" +#include "rust/rust-target-def.h" + +/* Implement TARGET_RUST_CPU_INFO for PowerPC targets. */ + +void +rs6000_rust_target_cpu_info (void) +{ + // note that rustc makes no arch distinction between powerpc64 and powerpc64 little endian + if (TARGET_64BIT) + rust_add_target_info ("target_arch", "powerpc64"); + else + rust_add_target_info ("target_arch", "powerpc"); + + // TODO: define properly instead of macros +#ifdef flags +# error "multiple flags already defined in rs6000-rust.c" +#endif +#define flags rs6000_isa_flags + + // options should be (almost at least - i.e. power8-altivec and the like) feature complete with rustc + if ((flags & OPTION_MASK_ALTIVEC) != 0) + rust_add_target_info ("target_feature", "altivec"); + if ((flags & OPTION_MASK_VSX) != 0) + rust_add_target_info ("target_feature", "vsx"); + /* I can't find any separate gcc equivalent to "power8-altivec" in llvm, but power8-vector has it as a + * prerequisite, so just implicitly enable it when enabling the vector. TODO search for it. */ + if ((flags & OPTION_MASK_P8_VECTOR) != 0) { + rust_add_target_info ("target_feature", "power8-vector"); + rust_add_target_info ("target_feature", "power8-altivec"); + } + if ((flags & OPTION_MASK_CRYPTO) != 0) + rust_add_target_info ("target_feature", "crypto"); + if ((flags & OPTION_MASK_HTM) != 0) + rust_add_target_info ("target_feature", "htm"); + if ((flags & OPTION_MASK_FLOAT128_KEYWORD) != 0) + rust_add_target_info ("target_feature", "float128"); + // Same implicit enabling of power9-altivec happens with power9-vector. + if ((flags & OPTION_MASK_P9_VECTOR) != 0) { + rust_add_target_info ("target_feature", "power9-vector"); + rust_add_target_info ("target_feature", "power9-altivec"); + } + if ((flags & OPTION_MASK_DIRECT_MOVE) != 0) + rust_add_target_info ("target_feature", "direct-move"); + + if (TARGET_SECURE_PLT) + rust_add_target_info ("target_feature", "secure-plt"); + + if ((flags & OPTION_MASK_SOFT_FLOAT) != 0) + ; // apparently not an option - TODO find out if it is + else + rust_add_target_info ("target_feature", "hard-float"); + + // TODO: some possible features (in rustc, listed under powerpc-wrs-vxworks-spe) - "msync" + // other possible features (in clang) - "qpx" (when cpu = "a2q"), "bpermd", "extdiv", "spe" + + // note: in gcc, it is possible bpermd is available if popcntd is available (which is power 7) + +#undef flags +} diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 5bf9c83..cf3651c 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -643,6 +643,9 @@ extern unsigned char rs6000_recip_bits[]; /* Target CPU versions for D. */ #define TARGET_D_CPU_VERSIONS rs6000_d_target_versions +/* Target CPU info for Rust. */ +#define TARGET_RUST_CPU_INFO rs6000_rust_target_cpu_info + /* This is used by rs6000_cpu_cpp_builtins to indicate the byte order we're compiling for. Some configurations may need to override it. */ #define RS6000_CPU_CPP_ENDIAN_BUILTINS() \ diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000 index 1ddb572..7b6b878 100644 --- a/gcc/config/rs6000/t-rs6000 +++ b/gcc/config/rs6000/t-rs6000 @@ -39,6 +39,10 @@ rs6000-d.o: $(srcdir)/config/rs6000/rs6000-d.c $(COMPILE) $< $(POSTCOMPILE) +rs6000-rust.o: $(srcdir)/config/rs6000/rs6000-rust.c + $(COMPILE) $< + $(POSTCOMPILE) + rs6000-logue.o: $(srcdir)/config/rs6000/rs6000-logue.c $(COMPILE) $< $(POSTCOMPILE) diff --git a/gcc/configure b/gcc/configure index 3316dd7..695335d 100755 --- a/gcc/configure +++ b/gcc/configure @@ -645,6 +645,7 @@ GMPINC GMPLIBS target_cpu_default d_target_objs +rust_target_objs fortran_target_objs cxx_target_objs c_target_objs @@ -654,6 +655,8 @@ xm_include_list xm_file_list tm_d_include_list tm_d_file_list +tm_rust_include_list +tm_rust_file_list tm_p_include_list tm_p_file_list tm_defines @@ -12498,6 +12501,7 @@ fi tm_file="${tm_file} defaults.h" tm_p_file="${tm_p_file} tm-preds.h" tm_d_file="${tm_d_file} defaults.h" +tm_rust_file="${tm_rust_file} defaults.h" host_xm_file="auto-host.h ansidecl.h ${host_xm_file}" build_xm_file="${build_auto} ansidecl.h ${build_xm_file}" # We don't want ansidecl.h in target files, write code there in ISO/GNU C. @@ -12906,6 +12910,21 @@ for f in $tm_d_file; do esac done +tm_rust_file_list= +tm_rust_include_list="options.h insn-constants.h" +for f in $tm_rust_file; do + case $f in + defaults.h ) + tm_rust_file_list="${tm_rust_file_list} \$(srcdir)/$f" + tm_rust_include_list="${tm_rust_include_list} $f" + ;; + * ) + tm_rust_file_list="${tm_rust_file_list} \$(srcdir)/config/$f" + tm_rust_include_list="${tm_rust_include_list} config/$f" + ;; + esac +done + xm_file_list= xm_include_list= for f in $xm_file; do diff --git a/gcc/configure.ac b/gcc/configure.ac index b410428..d9df20f 100644 --- a/gcc/configure.ac +++ b/gcc/configure.ac @@ -1994,6 +1994,7 @@ AC_SUBST(HAVE_AUTO_BUILD) tm_file="${tm_file} defaults.h" tm_p_file="${tm_p_file} tm-preds.h" tm_d_file="${tm_d_file} defaults.h" +tm_rust_file="${tm_rust_file} defaults.h" host_xm_file="auto-host.h ansidecl.h ${host_xm_file}" build_xm_file="${build_auto} ansidecl.h ${build_xm_file}" # We don't want ansidecl.h in target files, write code there in ISO/GNU C. @@ -2251,6 +2252,21 @@ for f in $tm_d_file; do esac done +tm_rust_file_list= +tm_rust_include_list="options.h insn-constants.h" +for f in $tm_rust_file; do + case $f in + defaults.h ) + tm_rust_file_list="${tm_rust_file_list} \$(srcdir)/$f" + tm_rust_include_list="${tm_rust_include_list} $f" + ;; + * ) + tm_rust_file_list="${tm_rust_file_list} \$(srcdir)/config/$f" + tm_rust_include_list="${tm_rust_include_list} config/$f" + ;; + esac +done + xm_file_list= xm_include_list= for f in $xm_file; do @@ -7070,6 +7086,8 @@ AC_SUBST(tm_p_file_list) AC_SUBST(tm_p_include_list) AC_SUBST(tm_d_file_list) AC_SUBST(tm_d_include_list) +AC_SUBST(tm_rust_file_list) +AC_SUBST(tm_rust_include_list) AC_SUBST(xm_file_list) AC_SUBST(xm_include_list) AC_SUBST(xm_defines) @@ -7078,6 +7096,7 @@ AC_SUBST(c_target_objs) AC_SUBST(cxx_target_objs) AC_SUBST(fortran_target_objs) AC_SUBST(d_target_objs) +AC_SUBST(rust_target_objs) AC_SUBST(target_cpu_default) AC_SUBST_FILE(language_hooks) diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index d9502c2..39da827 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -53,6 +53,7 @@ through the macros defined in the @file{.h} file. * PCH Target:: Validity checking for precompiled headers. * C++ ABI:: Controlling C++ ABI changes. * D Language and ABI:: Controlling D ABI changes. +* Rust Language and ABI:: Controlling Rust ABI changes. * Named Address Spaces:: Adding support for named address spaces * Misc:: Everything else. @end menu @@ -10822,6 +10823,22 @@ Similarly to @code{TARGET_D_CPU_VERSIONS}, but is used for versions relating to the target operating system. @end deftypefn +@node Rust Language and ABI +@section Rust ABI parameters +@cindex parameters, rust abi + +@deftypefn {Rust Target Hook} void TARGET_RUST_CPU_INFO (void) +Declare all environmental CPU info and features relating to the target CPU +using the function @code{rust_add_target_info}, which takes a string representing +the feature key and a string representing the feature value. Configuration pairs +predefined by this hook apply to all files that are being compiled. +@end deftypefn + +@deftypefn {Rust Target Hook} void TARGET_RUST_OS_INFO (void) +Similarly to @code{TARGET_RUST_CPU_INFO}, but is used for configuration info +relating to the target operating system. +@end deftypefn + @node Named Address Spaces @section Adding support for named address spaces @cindex named address spaces diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index b08923c..e658e38 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -53,6 +53,7 @@ through the macros defined in the @file{.h} file. * PCH Target:: Validity checking for precompiled headers. * C++ ABI:: Controlling C++ ABI changes. * D Language and ABI:: Controlling D ABI changes. +* Rust Language and ABI:: Controlling Rust ABI changes. * Named Address Spaces:: Adding support for named address spaces * Misc:: Everything else. @end menu @@ -7353,6 +7354,14 @@ floating-point support; they are not included in this mechanism. @hook TARGET_D_OS_VERSIONS +@node Rust Language and ABI +@section Rust ABI parameters +@cindex parameters, rust abi + +@hook TARGET_RUST_CPU_INFO + +@hook TARGET_RUST_OS_INFO + @node Named Address Spaces @section Adding support for named address spaces @cindex named address spaces diff --git a/gcc/genhooks.c b/gcc/genhooks.c index 3bcd31b..bee1212 100644 --- a/gcc/genhooks.c +++ b/gcc/genhooks.c @@ -35,6 +35,7 @@ static struct hook_desc hook_array[] = { #include "c-family/c-target.def" #include "common/common-target.def" #include "d/d-target.def" +#include "rust/rust-target.def" #undef DEFHOOK }; diff --git a/gcc/rust/Make-lang.in b/gcc/rust/Make-lang.in index 6512f61..1a5622e 100644 --- a/gcc/rust/Make-lang.in +++ b/gcc/rust/Make-lang.in @@ -72,12 +72,15 @@ GRS_OBJS = \ $(END) # removed object files from here -rust_OBJS = $(GRS_OBJS) rust/rustspec.o +# All language-specific object files for Rust. +RUST_ALL_OBJS = $(GRS_OBJS) $(RUST_TARGET_OBJS) + +rust_OBJS = $(RUST_ALL_OBJS) rust/rustspec.o # The compiler itself is called rust1 (formerly grs1) -rust1$(exeext): $(GRS_OBJS) attribs.o $(BACKEND) $(LIBDEPS) +rust1$(exeext): $(RUST_ALL_OBJS) attribs.o $(BACKEND) $(LIBDEPS) +$(LLINKER) $(ALL_LINKERFLAGS) $(LDFLAGS) -o $@ \ - $(GRS_OBJS) attribs.o $(BACKEND) $(LIBS) $(BACKENDLIBS) + $(RUST_ALL_OBJS) attribs.o $(BACKEND) $(LIBS) $(BACKENDLIBS) # Build hooks. @@ -169,7 +172,7 @@ rust.install-plugin: rust.uninstall: # -rm -rf $(DESTDIR)/$(bindir)/$(GCCRS_INSTALL_NAME)$(exeext) -rm -f gccrs$(exeext) grs1$(exeext) - -rm -f $(GRS_OBJS) + -rm -f $(RUST_ALL_OBJS) # ^those two are a maybe # No rust-specific selftests diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc index dc51b01..54adf48 100644 --- a/gcc/rust/lex/rust-lex.cc +++ b/gcc/rust/lex/rust-lex.cc @@ -1,5 +1,6 @@ #include "rust-lex.h" +#include "rust-system.h" // for rust_assert and rust_unreachable #include "rust-diagnostics.h" // for rust_error_at #include "rust-linemap.h" #include "safe-ctype.h" @@ -7,2247 +8,1853 @@ #include <sstream> // for ostringstream namespace Rust { -// TODO: move to separate compilation unit? -// overload += for uint32_t to allow 32-bit encoded utf-8 to be added -::std::string & -operator+= (::std::string &str, Codepoint char32) -{ - if (char32.value < 0x80) - { - str += static_cast<char> (char32.value); - } - else if (char32.value < (0x1F + 1) << (1 * 6)) - { - str += static_cast<char> (0xC0 | ((char32.value >> 6) & 0x1F)); - str += static_cast<char> (0x80 | ((char32.value >> 0) & 0x3F)); - } - else if (char32.value < (0x0F + 1) << (2 * 6)) - { - str += static_cast<char> (0xE0 | ((char32.value >> 12) & 0x0F)); - str += static_cast<char> (0x80 | ((char32.value >> 6) & 0x3F)); - str += static_cast<char> (0x80 | ((char32.value >> 0) & 0x3F)); - } - else if (char32.value < (0x07 + 1) << (3 * 6)) - { - str += static_cast<char> (0xF0 | ((char32.value >> 18) & 0x07)); - str += static_cast<char> (0x80 | ((char32.value >> 12) & 0x3F)); - str += static_cast<char> (0x80 | ((char32.value >> 6) & 0x3F)); - str += static_cast<char> (0x80 | ((char32.value >> 0) & 0x3F)); - } - else - { - fprintf (stderr, "Invalid unicode codepoint found: '%u' \n", - char32.value); - // rust_error_at(get_current_location(), "Invalid unicode codepoint found: - // '%u'", char32.value); + // TODO: move to separate compilation unit? + // overload += for uint32_t to allow 32-bit encoded utf-8 to be added + ::std::string& operator+=(::std::string& str, Codepoint char32) { + if (char32.value < 0x80) { + str += static_cast<char>(char32.value); + } else if (char32.value < (0x1F + 1) << (1 * 6)) { + str += static_cast<char>(0xC0 | ((char32.value >> 6) & 0x1F)); + str += static_cast<char>(0x80 | ((char32.value >> 0) & 0x3F)); + } else if (char32.value < (0x0F + 1) << (2 * 6)) { + str += static_cast<char>(0xE0 | ((char32.value >> 12) & 0x0F)); + str += static_cast<char>(0x80 | ((char32.value >> 6) & 0x3F)); + str += static_cast<char>(0x80 | ((char32.value >> 0) & 0x3F)); + } else if (char32.value < (0x07 + 1) << (3 * 6)) { + str += static_cast<char>(0xF0 | ((char32.value >> 18) & 0x07)); + str += static_cast<char>(0x80 | ((char32.value >> 12) & 0x3F)); + str += static_cast<char>(0x80 | ((char32.value >> 6) & 0x3F)); + str += static_cast<char>(0x80 | ((char32.value >> 0) & 0x3F)); + } else { + fprintf(stderr, "Invalid unicode codepoint found: '%u' \n", char32.value); + // rust_error_at(get_current_location(), "Invalid unicode codepoint found: '%u'", + // char32.value); + } + return str; } - return str; -} -::std::string -Codepoint::as_string () -{ - std::string str; - - // do i need to do this? or can i just do str += value due to op overloading? - - // ok can't figure out how to just convert to codepoint or use "this" so - // create new one - str += Codepoint (value); - - /*if (value < 0x80) { - str += static_cast<char>(value); - } else if (value < (0x1F + 1) << (1 * 6)) { - str += static_cast<char>(0xC0 | ((value >> 6) & 0x1F)); - str += static_cast<char>(0x80 | ((value >> 0) & 0x3F)); - } else if (value < (0x0F + 1) << (2 * 6)) { - str += static_cast<char>(0xE0 | ((value >> 12) & 0x0F)); - str += static_cast<char>(0x80 | ((value >> 6) & 0x3F)); - str += static_cast<char>(0x80 | ((value >> 0) & 0x3F)); - } else if (value < (0x07 + 1) << (3 * 6)) { - str += static_cast<char>(0xF0 | ((value >> 18) & 0x07)); - str += static_cast<char>(0x80 | ((value >> 12) & 0x3F)); - str += static_cast<char>(0x80 | ((value >> 6) & 0x3F)); - str += static_cast<char>(0x80 | ((value >> 0) & 0x3F)); - } else { - rust_error_at(get_current_location(), "Invalid unicode codepoint found: - '%u'", value); - }*/ - return str; -} + ::std::string Codepoint::as_string() { + std::string str; + + // do i need to do this? or can i just do str += value due to op overloading? + + // ok can't figure out how to just convert to codepoint or use "this" so create new one + str += Codepoint(value); + + /*if (value < 0x80) { + str += static_cast<char>(value); + } else if (value < (0x1F + 1) << (1 * 6)) { + str += static_cast<char>(0xC0 | ((value >> 6) & 0x1F)); + str += static_cast<char>(0x80 | ((value >> 0) & 0x3F)); + } else if (value < (0x0F + 1) << (2 * 6)) { + str += static_cast<char>(0xE0 | ((value >> 12) & 0x0F)); + str += static_cast<char>(0x80 | ((value >> 6) & 0x3F)); + str += static_cast<char>(0x80 | ((value >> 0) & 0x3F)); + } else if (value < (0x07 + 1) << (3 * 6)) { + str += static_cast<char>(0xF0 | ((value >> 18) & 0x07)); + str += static_cast<char>(0x80 | ((value >> 12) & 0x3F)); + str += static_cast<char>(0x80 | ((value >> 6) & 0x3F)); + str += static_cast<char>(0x80 | ((value >> 0) & 0x3F)); + } else { + rust_error_at(get_current_location(), "Invalid unicode codepoint found: '%u'", value); + }*/ + return str; + } -// Includes all allowable float digits EXCEPT _ and . as that needs lookahead -// for handling. -inline bool -is_float_digit (char number) -{ - return ISDIGIT (number) || number == 'E' || number == 'e'; -} + // Includes all allowable float digits EXCEPT _ and . as that needs lookahead for handling. + inline bool is_float_digit(char number) { + return ISDIGIT(number) || number == 'E' || number == 'e'; + } -// Basically ISXDIGIT from safe-ctype but may change if Rust's encoding or -// whatever is different -inline bool -is_x_digit (char number) -{ - return ISXDIGIT (number); -} + // Basically ISXDIGIT from safe-ctype but may change if Rust's encoding or whatever is different + inline bool is_x_digit(char number) { + return ISXDIGIT(number); + } -inline bool -is_octal_digit (char number) -{ - return number >= '0' && number <= '7'; -} + inline bool is_octal_digit(char number) { + return number >= '0' && number <= '7'; + } -inline bool -is_bin_digit (char number) -{ - return number == '0' || number == '1'; -} + inline bool is_bin_digit(char number) { + return number == '0' || number == '1'; + } -inline bool -check_valid_float_dot_end (char character) -{ - return character != '.' && character != '_' && !ISALPHA (character); -} + inline bool check_valid_float_dot_end(char character) { + return character != '.' && character != '_' && !ISALPHA(character); + } -// ISSPACE from safe-ctype but may change in future -inline bool -is_whitespace (char character) -{ - return ISSPACE (character); -} + // ISSPACE from safe-ctype but may change in future + inline bool is_whitespace(char character) { + return ISSPACE(character); + } -Lexer::Lexer (const char *filename, FILE *input, Linemap *linemap) - : input (input), current_line (1), current_column (1), line_map (linemap), - input_source (input), input_queue (input_source), token_source (this), - token_queue (token_source) -{ - // inform line_table that file is being entered and is in line 1 - line_map->start_file (filename, current_line); -} + Lexer::Lexer(const char* filename, FILE* input, Linemap* linemap) : + input(input), current_line(1), current_column(1), line_map(linemap), input_source(input), + input_queue(input_source), token_source(this), token_queue(token_source) { + // inform line_table that file is being entered and is in line 1 + line_map->start_file(filename, current_line); + } -Lexer::~Lexer () -{ - /* ok apparently stop (which is equivalent of original code in destructor) is - * meant to be called after all files have finished parsing, for cleanup. On - * the other hand, actual code that it calls to leave a certain line map is - * mentioned in GCC docs as being useful for "just leaving an included header" - * and stuff like that, so this line mapping functionality may need fixing. - * FIXME: find out whether this occurs. */ - // line_map->stop(); -} + Lexer::~Lexer() { + /* ok apparently stop (which is equivalent of original code in destructor) is meant to be + * called after all files have finished parsing, for cleanup. On the other hand, actual code + * that it calls to leave a certain line map is mentioned in GCC docs as being useful for + * "just leaving an included header" and stuff like that, so this line mapping functionality + * may need fixing. + * FIXME: find out whether this occurs. */ + // line_map->stop(); + } -// TODO: need to optimise somehow to avoid the virtual function call in the -// tight loop. Best idea at the moment is CRTP, but that might make lexer -// implementation annoying when storing the "base class" (i.e. would need -// template parameter everywhere), although in practice it would mostly just -// look ugly and make enclosing classes like Parser also require a type -// parameter. At this point a macro might be better. OK I guess macros can be -// replaced by constexpr if or something if possible. -Location -Lexer::get_current_location () -{ - return line_map->get_location (current_column); -} + // TODO: need to optimise somehow to avoid the virtual function call in the tight loop. + // Best idea at the moment is CRTP, but that might make lexer implementation annoying when storing + // the "base class" (i.e. would need template parameter everywhere), although in practice it would + // mostly just look ugly and make enclosing classes like Parser also require a type parameter. + // At this point a macro might be better. + // OK I guess macros can be replaced by constexpr if or something if possible. + Location Lexer::get_current_location() { + return line_map->get_location(current_column); + } -int -Lexer::peek_input (int n) -{ - return input_queue.peek (n); -} + int Lexer::peek_input(int n) { + return input_queue.peek(n); + } -int -Lexer::peek_input () -{ - return peek_input (0); -} + int Lexer::peek_input() { + return peek_input(0); + } -void -Lexer::skip_input (int n) -{ - input_queue.skip (n); -} + void Lexer::skip_input(int n) { + input_queue.skip(n); + } -void -Lexer::skip_input () -{ - skip_input (0); -} + void Lexer::skip_input() { + skip_input(0); + } -const_TokenPtr -Lexer::peek_token (int n) -{ - return token_queue.peek (n); -} + const_TokenPtr Lexer::peek_token(int n) { + return token_queue.peek(n); + } -const_TokenPtr -Lexer::peek_token () -{ - return peek_token (0); -} + const_TokenPtr Lexer::peek_token() { + return peek_token(0); + } -void -Lexer::skip_token (int n) -{ - token_queue.skip (n); -} + void Lexer::skip_token(int n) { + token_queue.skip(n); + } -void -Lexer::skip_token () -{ - skip_token (0); -} + void Lexer::skip_token() { + skip_token(0); + } -void -Lexer::replace_current_token (TokenPtr replacement) -{ - token_queue.replace_current_value (replacement); -} + void Lexer::replace_current_token(TokenPtr replacement) { + token_queue.replace_current_value(replacement); + } -/* shitty anonymous namespace that can only be accessed inside the compilation - * unit - used for classify_keyword Binary search in sorted array of keywords - * created with x-macros. */ -namespace { -const std::string keyword_index[] = { + /* shitty anonymous namespace that can only be accessed inside the compilation unit - used for + * classify_keyword + * Binary search in sorted array of keywords created with x-macros. */ + namespace { + const std::string keyword_index[] = { #define RS_TOKEN(x, y) #define RS_TOKEN_KEYWORD(name, keyword) keyword, - RS_TOKEN_LIST + RS_TOKEN_LIST #undef RS_TOKEN_KEYWORD #undef RS_TOKEN -}; + }; -TokenId keyword_keys[] = { + TokenId keyword_keys[] = { #define RS_TOKEN(x, y) #define RS_TOKEN_KEYWORD(name, keyword) name, - RS_TOKEN_LIST + RS_TOKEN_LIST #undef RS_TOKEN_KEYWORD #undef RS_TOKEN -}; - -const int num_keywords = sizeof (keyword_index) / sizeof (*keyword_index); -} // namespace - -/* Determines whether the string passed in is a keyword or not. If it is, it - * returns the keyword name. */ -TokenId -Lexer::classify_keyword (const std::string &str) -{ - const std::string *last = keyword_index + num_keywords; - const std::string *idx = std::lower_bound (keyword_index, last, str); - - if (idx == last || str != *idx) - { - return IDENTIFIER; - } - else - { - return keyword_keys[idx - keyword_index]; - } -} + }; -TokenPtr -Lexer::build_token () -{ - // loop to go through multiple characters to build a single token - while (true) - { - Location loc = get_current_location (); - /*int */ current_char = peek_input (); - skip_input (); - - // return end of file token if end of file - if (current_char == EOF) - { - return Token::make (END_OF_FILE, loc); - } - - // detect shebang - if (loc == 1 && current_line == 1 && current_char == '#') - { - current_char = peek_input (); - - if (current_char == '!') - { - skip_input (); - current_char = peek_input (); - - switch (current_char) - { - case '/': - // shebang - - skip_input (); - - // ignore rest of line - while (current_char != '\n') - { - current_char = peek_input (); - skip_input (); - } - - // newline - current_line++; - current_column = 1; - // tell line_table that new line starts - line_map->start_line (current_line, max_column_hint); - continue; - } - } - } - - // if not end of file, start tokenising - switch (current_char) - { - // ignore whitespace characters for tokens but continue updating - // location - case '\n': // newline - current_line++; - current_column = 1; - // tell line_table that new line starts - linemap_line_start (::line_table, current_line, max_column_hint); - continue; - case ' ': // space - current_column++; - continue; - case '\t': // tab - // width of a tab is not well-defined, assume 8 spaces - current_column += 8; - continue; - - // punctuation - actual tokens - case '=': - if (peek_input () == '>') - { - // match arm arrow - skip_input (); - current_column += 2; - - return Token::make (MATCH_ARROW, loc); - } - else if (peek_input () == '=') - { - // equality operator - skip_input (); - current_column += 2; - - return Token::make (EQUAL_EQUAL, loc); - } - else - { - // assignment operator - current_column++; - return Token::make (EQUAL, loc); - } - case '(': - current_column++; - return Token::make (LEFT_PAREN, loc); - case '-': - if (peek_input () == '>') - { - // return type specifier - skip_input (); - current_column += 2; - - return Token::make (RETURN_TYPE, loc); - } - else if (peek_input () == '=') - { - // minus-assign - skip_input (); - current_column += 2; - - return Token::make (MINUS_EQ, loc); - } - else - { - // minus - current_column++; - return Token::make (MINUS, loc); - } - case '+': - if (peek_input () == '=') - { - // add-assign - skip_input (); - current_column += 2; - - return Token::make (PLUS_EQ, loc); - } - else - { - // add - current_column++; - return Token::make (PLUS, loc); - } - case ')': - current_column++; - return Token::make (RIGHT_PAREN, loc); - case ';': - current_column++; - return Token::make (SEMICOLON, loc); - case '*': - if (peek_input () == '=') - { - // multiplication-assign - skip_input (); - current_column += 2; - - return Token::make (ASTERISK_EQ, loc); - } - else - { - // multiplication - current_column++; - return Token::make (ASTERISK, loc); - } - case ',': - current_column++; - return Token::make (COMMA, loc); - case '/': - if (peek_input () == '=') - { - // division-assign - skip_input (); - current_column += 2; - - return Token::make (DIV_EQ, loc); - } - else if (peek_input () == '/') - { - // TODO: single-line doc comments - - // single line comment - skip_input (); - current_column += 2; - - // basically ignore until line finishes - while (current_char != '\n' && current_char != EOF) - { - skip_input (); - current_column++; // not used - current_char = peek_input (); - } - continue; - break; - } - else if (peek_input () == '*') - { - // block comment - skip_input (); - current_column += 2; - - // TODO: block doc comments - - current_char = peek_input (); - - int level = 1; - while (level > 0) - { - skip_input (); - current_column++; // for error-handling - current_char = peek_input (); - - // if /* found - if (current_char == '/') - { - if (peek_input (1) == '*') - { - // skip /* characters - skip_input (1); - - current_column += 2; - - level += 1; - } - } - - // ignore until */ is found - if (current_char == '*') - { - if (peek_input (1) == '/') - { - // skip */ characters - skip_input (1); - - current_column += 2; - // should only break inner loop here - seems to do so - // break; - - level -= 1; - } - } - } - - // refresh new token - continue; - break; - } - else - { - // division - current_column++; - return Token::make (DIV, loc); - } - case '%': - if (peek_input () == '=') - { - // modulo-assign - current_column += 2; - return Token::make (PERCENT_EQ, loc); - } - else - { - // modulo - current_column++; - return Token::make (PERCENT, loc); - } - case '^': - if (peek_input () == '=') - { - // xor-assign? - current_column += 2; - return Token::make (CARET_EQ, loc); - } - else - { - // xor? - current_column++; - return Token::make (CARET, loc); - } - case '<': - if (peek_input () == '<') - { - if (peek_input (1) == '=') - { - // left-shift assign - skip_input (1); - current_column += 3; - - return Token::make (LEFT_SHIFT_EQ, loc); - } - else - { - // left-shift - skip_input (); - current_column += 2; - - return Token::make (LEFT_SHIFT, loc); - } - } - else if (peek_input () == '=') - { - // smaller than or equal to - skip_input (); - current_column += 2; - - return Token::make (LESS_OR_EQUAL, loc); - } - else - { - // smaller than - current_column++; - return Token::make (LEFT_ANGLE, loc); - } - break; - case '>': - if (peek_input () == '>') - { - if (peek_input (1) == '=') - { - // right-shift-assign - skip_input (1); - current_column += 3; - - return Token::make (RIGHT_SHIFT_EQ, loc); - } - else - { - // right-shift - skip_input (); - current_column += 2; - - return Token::make (RIGHT_SHIFT, loc); - } - } - else if (peek_input () == '=') - { - // larger than or equal to - skip_input (); - current_column += 2; - - return Token::make (GREATER_OR_EQUAL, loc); - } - else - { - // larger than - current_column++; - return Token::make (RIGHT_ANGLE, loc); - } - case ':': - if (peek_input () == ':') - { - // scope resolution :: - skip_input (); - current_column += 2; - - return Token::make (SCOPE_RESOLUTION, loc); - } - else - { - // single colon : - current_column++; - return Token::make (COLON, loc); - } - case '!': - // no special handling for macros in lexer? - if (peek_input () == '=') - { - // not equal boolean operator - skip_input (); - current_column += 2; - - return Token::make (NOT_EQUAL, loc); - } - else - { - // not equal unary operator - current_column++; - - return Token::make (EXCLAM, loc); - } - case '?': - current_column++; - return Token::make (QUESTION_MARK, loc); - case '#': - current_column++; - return Token::make (HASH, loc); - case '[': - current_column++; - return Token::make (LEFT_SQUARE, loc); - case ']': - current_column++; - return Token::make (RIGHT_SQUARE, loc); - case '{': - current_column++; - return Token::make (LEFT_CURLY, loc); - case '}': - current_column++; - return Token::make (RIGHT_CURLY, loc); - case '@': - // TODO: i don't know what this does, does it need special handling? - current_column++; - return Token::make (PATTERN_BIND, loc); - case '$': - // TODO: i don't know what this does, does it need special handling? - current_column++; - return Token::make (DOLLAR_SIGN, loc); - case '~': - // TODO: i don't know what this does, does it need special handling? - current_column++; - return Token::make (TILDE, loc); - case '\\': - // TODO: i don't know what this does, does it need special handling? - current_column++; - return Token::make (BACKSLASH, loc); - case '`': - // TODO: i don't know what this does, does it need special handling? - current_column++; - return Token::make (BACKTICK, loc); - case '|': - if (peek_input () == '=') - { - // bitwise or-assign? - skip_input (); - current_column += 2; - - return Token::make (PIPE_EQ, loc); - } - else if (peek_input () == '|') - { - // logical or - skip_input (); - current_column += 2; - - return Token::make (OR, loc); - } - else - { - // bitwise or - current_column++; - - return Token::make (PIPE, loc); - } - case '&': - if (peek_input () == '=') - { - // bitwise and-assign? - skip_input (); - current_column += 2; - - return Token::make (AMP_EQ, loc); - } - else if (peek_input () == '&') - { - // logical and - skip_input (); - current_column += 2; - - return Token::make (LOGICAL_AND, loc); - } - else - { - // bitwise and/reference - current_column++; - - return Token::make (AMP, loc); - } - case '.': - if (peek_input () == '.') - { - if (peek_input (1) == '.') - { - // ellipsis - skip_input (1); - current_column += 3; - - return Token::make (ELLIPSIS, loc); - } - else if (peek_input (1) == '=') - { - // ..= - skip_input (1); - current_column += 3; - - return Token::make (DOT_DOT_EQ, loc); - } - else - { - // .. - skip_input (); - current_column += 2; - - return Token::make (DOT_DOT, loc); - } - } - else if (!ISDIGIT (peek_input ())) - { - // single dot . - // Only if followed by a non-number - current_column++; - return Token::make (DOT, loc); - } - } - // TODO: special handling of _ in the lexer? instead of being identifier - - // byte and byte string test - if (current_char == 'b') - { - if (peek_input () == '\'') - { - // byte - allows any ascii or escapes - // would also have to take into account escapes: \x hex_digit - // hex_digit, \n, \r, \t, \\, \0 - - int length = 1; - - // char to save - char byte_char; - - skip_input (); - // make current char the next character - current_char = peek_input (); - - // detect escapes - if (current_char == '\\') - { - /*skip_input(); - - // make current_char next character (letter) - current_char = peek_input();*/ - - parse_escape (length, byte_char, '\''); - - if (byte_char > 127) - { - rust_error_at (get_current_location (), - "byte char '%c' out of range", byte_char); - byte_char = 0; - } - - // skip_input(); - current_char = peek_input (); - length++; - - if (current_char != '\'') - { - rust_error_at (get_current_location (), - "unclosed byte char"); - } - - // TODO: ensure skipping is needed here - skip_input (); - current_char = peek_input (); - length++; // go to next char - } - else if (current_char != '\'') - { - // otherwise, get character from direct input character - byte_char = current_char; - - skip_input (); - current_char = peek_input (); - - if (current_char != '\'') - { - rust_error_at (get_current_location (), - "unclosed byte char"); - } - - // TODO: ensure skipping is needed here - skip_input (); - current_char = peek_input (); - length++; // go to next char - } - else - { - rust_error_at (get_current_location (), - "no character inside '' for byte char"); - } - - current_column += length; - - return Token::make_byte_char (loc, byte_char); - } - else if (peek_input () == '"') - { - // byte string - - // skip quote character - skip_input (); - - std::string str; - str.reserve (16); // some sensible default - - int length = 1; - current_char = peek_input (); - // TODO: handle escapes properly - - while (current_char != '"' && current_char != '\n') - { - if (current_char == '\\') - { - char output_char = 0; - parse_escape (length, output_char, '"'); - - if (output_char > 127) - { - rust_error_at ( - get_current_location (), - "char '%c' in byte string out of range", - output_char); - output_char = 0; - } - - str += output_char; - - continue; - } - - length++; - - str += current_char; - skip_input (); - current_char = peek_input (); - } - - current_column += length; - - if (current_char == '\n') - { - rust_error_at (get_current_location (), - "unended byte string literal"); - } - else if (current_char == '"') - { - skip_input (); - current_char = peek_input (); - } - else - { - gcc_unreachable (); - } - - return Token::make_byte_string (loc, str); - // TODO: ensure escapes and string continue work properly - } - else if (peek_input () == 'r' - && (peek_input (1) == '#' || peek_input (1) == '"')) - { - // raw byte string literals - std::string str; - str.reserve (16); // some sensible default - - int length = 1; - int hash_count = 0; - - // get hash count at beginnning - skip_input (); - current_char = peek_input (); - while (current_char == '#') - { - hash_count++; - length++; - - skip_input (); - current_char = peek_input (); - } - - if (current_char != '"') - { - rust_error_at (get_current_location (), - "raw byte string has no opening '\"'"); - } - - skip_input (); - current_char = peek_input (); - - while (true) - { - if (current_char == '"') - { - bool enough_hashes = true; - - for (int i = 0; i < hash_count; i++) - { - if (peek_input (i + 1) != '#') - { - enough_hashes = false; // could continue here - - // improve performance - } - } - - if (enough_hashes) - { - // skip enough input and peek enough input - skip_input (hash_count); // is this enough? - current_char = peek_input (); - length += hash_count + 1; - break; - } - } - - length++; - - str += current_char; - skip_input (); - current_char = peek_input (); - } - - current_column += length; - - // TODO: does this work properly - return Token::make_byte_string (loc, str); - } - } - - // raw stuff - if (current_char == 'r') - { - int peek = peek_input (); - int peek1 = peek_input (1); - - if (peek == '#' && (ISALPHA (peek1) || peek1 == '_')) - { - // raw identifier - std::string str; - str.reserve (16); // default - - skip_input (); - current_char = peek_input (); - - current_column += 2; - - str += current_char; - - bool first_is_underscore = current_char == '_'; - - int length = 1; - current_char = peek_input (); - // loop through entire name - while (ISALPHA (current_char) || ISDIGIT (current_char) - || current_char == '_') - { - length++; - - str += current_char; - skip_input (); - current_char = peek_input (); - } - - current_column += length; - - // if just a single underscore, not an identifier - if (first_is_underscore && length == 1) - { - rust_error_at (get_current_location (), - "'_' is not a valid raw identifier"); - } - - if (str == "crate" || str == "extern" || str == "self" - || str == "super" || str == "Self") - { - rust_error_at (get_current_location (), - "'%s' is a forbidden raw identifier", - str.c_str ()); - } - else - { - return Token::make_identifier (loc, str); - } - } - else if (peek == '"' - || (peek == '#' && (ISALPHA (peek1) || peek1 == '_'))) - { - // raw string literals - std::string str; - str.reserve (16); // some sensible default - - int length = 1; - int hash_count = 0; - - // get hash count at beginnning - current_char = peek; - while (current_char == '#') - { - hash_count++; - length++; - - skip_input (); - current_char = peek_input (); - } - - if (current_char != '"') - { - rust_error_at (get_current_location (), - "raw string has no opening '\"'"); - } - - skip_input (); - Codepoint current_char32 = test_peek_codepoint_input (); - - while (true) - { - if (current_char32.value == '"') - { - bool enough_hashes = true; - - for (int i = 0; i < hash_count; i++) - { - // if (test_peek_codepoint_input(i + 1) != '#') { - // TODO: ensure this is a good enough replacement - if (peek_input (i + 1) != '#') - { - enough_hashes = false; // could continue here - - // improve performance - } - } - - if (enough_hashes) - { - // skip enough input and peek enough input - skip_input (hash_count); // is this enough? - current_char = peek_input (); - length += hash_count + 1; - break; - } - } - - length++; - - str += current_char32; - test_skip_codepoint_input (); - current_char32 = test_peek_codepoint_input (); - } - - current_column += length; - - // TODO: does this work properly - return Token::make_string (loc, str); - } - } - - // find identifiers and keywords - if (ISALPHA (current_char) || current_char == '_') - { - std::string str; - str.reserve (16); // default - str += current_char; - - bool first_is_underscore = current_char == '_'; - - int length = 1; - current_char = peek_input (); - // loop through entire name - while (ISALPHA (current_char) || ISDIGIT (current_char) - || current_char == '_') - { - length++; - - str += current_char; - skip_input (); - current_char = peek_input (); - } - - current_column += length; - - // if just a single underscore, not an identifier - if (first_is_underscore && length == 1) - { - return Token::make (UNDERSCORE, loc); - } - - TokenId keyword = classify_keyword (str); - if (keyword == IDENTIFIER) - { - return Token::make_identifier (loc, str); - } - else - { - return Token::make (keyword, loc); - } - } - - // identify literals - // int or float literals - not processed properly - if (ISDIGIT (current_char) || current_char == '.') - { // _ not allowed as first char - std::string str; - str.reserve (16); // some sensible default - str += current_char; - - PrimitiveCoreType type_hint = CORETYPE_UNKNOWN; - - bool is_real = (current_char == '.'); - - int length = 1; - - // handle binary, octal, hex literals - if (current_char == '0' && !ISDIGIT (peek_input ())) - { - current_char = peek_input (); - - if (current_char == 'x') - { - // hex (integer only) - - skip_input (); - current_char = peek_input (); - - length++; - - // add 'x' to string after 0 so it is 0xFFAA or whatever - str += 'x'; - - // loop through to add entire hex number to string - while (is_x_digit (current_char) || current_char == '_') - { - if (current_char == '_') - { - // don't add _ to number - skip_input (); - current_char = peek_input (); - - length++; - - continue; - } - - length++; - - // add raw hex numbers - str += current_char; - skip_input (); - current_char = peek_input (); - } - - current_column += length; - - // convert hex value to decimal representation - long hex_num = ::std::strtol (str.c_str (), NULL, 16); - - // create output string stream for hex value to be converted - // to string again - // TODO: if too slow, use sprintf - ::std::ostringstream ostr; - ostr << hex_num; - - // reassign string representation to converted value - str = ostr.str (); - - // parse in type suffix if it exists - parse_in_type_suffix (/*current_char, */ type_hint, length); - - if (type_hint == CORETYPE_F32 || type_hint == CORETYPE_F64) - { - rust_error_at ( - get_current_location (), - "invalid type suffix '%s' for integer (hex) literal", - get_type_hint_string (type_hint)); - } - } - else if (current_char == 'o') - { - // octal (integer only) - - skip_input (); - current_char = peek_input (); - - length++; - - // don't add any characters as C octals are just 0124 or - // whatever - - // loop through to add entire octal number to string - while (is_octal_digit (current_char) || current_char == '_') - { - if (current_char == '_') - { - // don't add _ to number - skip_input (); - current_char = peek_input (); - - length++; - - continue; - } - - length++; - - // add raw octal numbers - str += current_char; - skip_input (); - current_char = peek_input (); - } - - current_column += length; - - // convert octal value to decimal representation - long octal_num = ::std::strtol (str.c_str (), NULL, 8); - - // create output string stream for octal value to be converted - // to string again - // TODO: if too slow, use sprintf - ::std::ostringstream ostr; - ostr << octal_num; - - // reassign string representation to converted value - str = ostr.str (); - - // parse in type suffix if it exists - parse_in_type_suffix (/*current_char, */ type_hint, length); - - if (type_hint == CORETYPE_F32 || type_hint == CORETYPE_F64) - { - rust_error_at ( - get_current_location (), - "invalid type suffix '%s' for integer (octal) literal", - get_type_hint_string (type_hint)); - } - } - else if (current_char == 'b') - { - // binary (integer only) - - skip_input (); - current_char = peek_input (); - - length++; - - // don't add any characters as C binary numbers are not really - // supported - - // loop through to add entire binary number to string - while (is_bin_digit (current_char) || current_char == '_') - { - if (current_char == '_') - { - // don't add _ to number - skip_input (); - current_char = peek_input (); - - length++; - - continue; - } - - length++; - - // add raw binary numbers - str += current_char; - skip_input (); - current_char = peek_input (); - } - - current_column += length; - - // convert binary value to decimal representation - long bin_num = ::std::strtol (str.c_str (), NULL, 2); - - // create output string stream for binary value to be - // converted to string again - // TODO: if too slow, use sprintf - ::std::ostringstream ostr; - ostr << bin_num; - - // reassign string representation to converted value - str = ostr.str (); - - // parse in type suffix if it exists - parse_in_type_suffix (/*current_char, */ type_hint, length); - - if (type_hint == CORETYPE_F32 || type_hint == CORETYPE_F64) - { - rust_error_at ( - get_current_location (), - "invalid type suffix '%s' for integer (binary) literal", - get_type_hint_string (type_hint)); - } - } - } - else - { - // handle decimals (integer or float) - - current_char = peek_input (); - - // parse initial decimal literal - assuming integer - // TODO: test if works - parse_in_decimal (/*current_char, */ str, length); - - // detect float literal - TODO: fix: "242." is not recognised as a - // float literal - if (current_char == '.' && is_float_digit (peek_input (1))) - { - // float with a '.', parse another decimal into it - - is_real = true; - - // add . to str - str += current_char; - skip_input (); - current_char = peek_input (); - - length++; - - // parse another decimal number for float - // TODO: test if works - parse_in_decimal (/*current_char, */ str, length); - - // parse in exponent part if it exists - // test to see if this works: - parse_in_exponent_part (/*current_char, */ str, length); - - // parse in type suffix if it exists - // TODO: see if works: - parse_in_type_suffix (/*current_char, */ type_hint, length); - - if (type_hint != CORETYPE_F32 && type_hint != CORETYPE_F64 - && type_hint != CORETYPE_UNKNOWN) - { - rust_error_at ( - get_current_location (), - "invalid type suffix '%s' for float literal", - get_type_hint_string (type_hint)); - } - } - else if (current_char == '.' - && check_valid_float_dot_end (peek_input (1))) - { - is_real = true; - - // add . to str - str += current_char; - skip_input (); - current_char = peek_input (); - length++; - - // add a '0' after the . to stop ambiguity - str += '0'; - - // don't parse another decimal number for float - - // parse in exponent part if it exists - shouldn't exist? - // parse_in_exponent_part(/*current_char, */ str, length); - - // parse in type suffix if it exists - shouldn't exist? - // TODO: see if works: - // parse_in_type_suffix(/*current_char, */ type_hint, length); - - if (type_hint != CORETYPE_F32 && type_hint != CORETYPE_F64 - && type_hint != CORETYPE_UNKNOWN) - { - rust_error_at ( - get_current_location (), - "invalid type suffix '%s' for float literal", - get_type_hint_string (type_hint)); - } - } - else if (current_char == 'E' || current_char == 'e') - { - is_real = true; - - // parse exponent part - parse_in_exponent_part (/*current_char, */ str, length); - - // parse in type suffix if it exists - parse_in_type_suffix (/*current_char, */ type_hint, length); - - if (type_hint != CORETYPE_F32 && type_hint != CORETYPE_F64 - && type_hint != CORETYPE_UNKNOWN) - { - rust_error_at ( - get_current_location (), - "invalid type suffix '%s' for float literal", - get_type_hint_string (type_hint)); - } - } - else - { - // is an integer - - // parse in type suffix if it exists - parse_in_type_suffix (/*current_char, */ type_hint, length); - - if (type_hint == CORETYPE_F32 || type_hint == CORETYPE_F64) - { - rust_error_at (get_current_location (), - "invalid type suffix '%s' for integer " - "(decimal) literal", - get_type_hint_string (type_hint)); - } - } - - current_column += length; - } - - // actually make the tokens - if (is_real) - { - return Token::make_float (loc, str, type_hint); - } - else - { - return Token::make_int (loc, str, type_hint); - } - } - - // string literals - not processed properly - if (current_char == '"') - { - Codepoint current_char32; - - std::string str; - str.reserve (16); // some sensible default - - int length = 1; - current_char32 = test_peek_codepoint_input (); - - // ok initial peek_codepoint seems to work without "too long" - - while (current_char32.value != '\n' && current_char32.value != '"') - { - // TODO: handle escapes and string continue - if (current_char32.value == '\\') - { - // parse escape - parse_utf8_escape (length, current_char32, '\''); - - // TODO: find a way to parse additional characters after the - // escape? return after parsing escape? - - str += current_char32; - - // required as parsing utf8 escape only changes current_char - // or something - current_char32 = test_peek_codepoint_input (); - - continue; - } - - length += test_get_input_codepoint_length (); - - // does this work? not technically a char. maybe have to convert - // to char series - str += current_char32; - test_skip_codepoint_input (); - current_char32 = test_peek_codepoint_input (); - } - - current_column += length; - - if (current_char32.value == '\n') - { - rust_error_at (get_current_location (), "unended string literal"); - } - else if (current_char32.value == '"') - { - skip_input (); - - current_char = peek_input (); - } - else - { - gcc_unreachable (); - } - - return Token::make_string (loc, str); - // TODO: account for escapes and string continue - // also, in rust a string is a series of unicode characters (4 bytes) - } - - // char literal attempt - if (current_char == '\'') - { - // rust chars are 4 bytes and have some weird unicode representation - // thing - Codepoint current_char32; - - int length = 1; - - current_char32 = test_peek_codepoint_input (); - - // parse escaped char literal - if (current_char32.value == '\\') - { - // parse escape - parse_utf8_escape (length, current_char32, '\''); - - // TODO - this skip may not be needed? - // test_skip_codepoint_input(); - - if (test_peek_codepoint_input ().value != '\'') - { - rust_error_at (get_current_location (), - "unended char literal"); - } - else - { - test_skip_codepoint_input (); - current_char = peek_input (); - length++; - } - - current_column += length; - - // TODO: FIX - char is actually 4 bytes in Rust (uint32) due to - // unicode - return Token::make_char (loc, current_char32); - } - else - { - // current_char32 = test_peek_codepoint_input(); - test_skip_codepoint_input (); - - if (test_peek_codepoint_input ().value == '\'') - { - // parse normal char literal - // TODO: FIX - char is actually 4 bytes in Rust (uint32) due - // to unicode - - // skip the ' character - skip_input (); - current_char = peek_input (); - - // TODO fix due to different widths of utf-8 chars - current_column += 3; - - return Token::make_char (loc, current_char32); - } - else if (ISDIGIT (current_char32.value) - || ISALPHA (current_char32.value) - || current_char32.value == '_') - { - // parse lifetime name - ::std::string str; - // TODO: does this work properly? - str += current_char32; - - // TODO: fix lifetime name thing - actually, why am I even - // using utf-8 here? - - int length = 1; - - current_char32 = test_peek_codepoint_input (); - - while (ISDIGIT (current_char32.value) - || ISALPHA (current_char32.value) - || current_char32.value == '_') - { - length += test_get_input_codepoint_length (); - - str += current_char32; - test_skip_codepoint_input (); - current_char32 = test_peek_codepoint_input (); - } - - current_column += length; - - return Token::make_lifetime (loc, str); - } - else - { - rust_error_at (get_current_location (), - "expected ' after character constant"); - } - } - } - - // didn't match anything so error - rust_error_at (loc, "unexpected character '%x'", current_char); - current_column++; + const int num_keywords = sizeof(keyword_index) / sizeof(*keyword_index); } -} -// Shitty pass-by-reference way of parsing in type suffix. -bool -Lexer::parse_in_type_suffix ( - /*char& current_char, */ PrimitiveCoreType &type_hint, int &length) -{ - ::std::string suffix; - suffix.reserve (5); - - // get suffix - while (ISALPHA (current_char) || ISDIGIT (current_char) - || current_char == '_') - { - if (current_char == '_') - { - // don't add _ to suffix - skip_input (); - current_char = peek_input (); - - length++; - - continue; - } - - length++; - - suffix += current_char; - skip_input (); - current_char = peek_input (); - } + /* Determines whether the string passed in is a keyword or not. If it is, it returns the keyword + * name. */ + TokenId Lexer::classify_keyword(const std::string& str) { + const std::string* last = keyword_index + num_keywords; + const std::string* idx = std::lower_bound(keyword_index, last, str); - if (suffix.empty ()) - { - // no type suffix: do nothing but also no error - return false; - } - else if (suffix == "f32") - { - type_hint = CORETYPE_F32; - } - else if (suffix == "f64") - { - type_hint = CORETYPE_F64; - } - else if (suffix == "i8") - { - type_hint = CORETYPE_I8; - } - else if (suffix == "i16") - { - type_hint = CORETYPE_I16; - } - else if (suffix == "i32") - { - type_hint = CORETYPE_I32; - } - else if (suffix == "i64") - { - type_hint = CORETYPE_I64; - } - else if (suffix == "i128") - { - type_hint = CORETYPE_I128; - } - else if (suffix == "isize") - { - type_hint = CORETYPE_ISIZE; - } - else if (suffix == "u8") - { - type_hint = CORETYPE_U8; - } - else if (suffix == "u16") - { - type_hint = CORETYPE_U16; - } - else if (suffix == "u32") - { - type_hint = CORETYPE_U32; - } - else if (suffix == "u64") - { - type_hint = CORETYPE_U64; - } - else if (suffix == "u128") - { - type_hint = CORETYPE_U128; + if (idx == last || str != *idx) { + return IDENTIFIER; + } else { + return keyword_keys[idx - keyword_index]; + } } - else if (suffix == "usize") - { - type_hint = CORETYPE_USIZE; + + TokenPtr Lexer::build_token() { + // loop to go through multiple characters to build a single token + while (true) { + Location loc = get_current_location(); + /*int */ current_char = peek_input(); + skip_input(); + + // return end of file token if end of file + if (current_char == EOF) { + return Token::make(END_OF_FILE, loc); + } + + // detect shebang + if (loc == 1 && current_line == 1 && current_char == '#') { + current_char = peek_input(); + + if (current_char == '!') { + skip_input(); + current_char = peek_input(); + + switch (current_char) { + case '/': + // shebang + + skip_input(); + + // ignore rest of line + while (current_char != '\n') { + current_char = peek_input(); + skip_input(); + } + + // newline + current_line++; + current_column = 1; + // tell line_table that new line starts + line_map->start_line(current_line, max_column_hint); + continue; + } + } + } + + // if not end of file, start tokenising + switch (current_char) { + // ignore whitespace characters for tokens but continue updating location + case '\n': // newline + current_line++; + current_column = 1; + // tell line_table that new line starts + linemap_line_start(::line_table, current_line, max_column_hint); + continue; + case ' ': // space + current_column++; + continue; + case '\t': // tab + // width of a tab is not well-defined, assume 8 spaces + current_column += 8; + continue; + + // punctuation - actual tokens + case '=': + if (peek_input() == '>') { + // match arm arrow + skip_input(); + current_column += 2; + + return Token::make(MATCH_ARROW, loc); + } else if (peek_input() == '=') { + // equality operator + skip_input(); + current_column += 2; + + return Token::make(EQUAL_EQUAL, loc); + } else { + // assignment operator + current_column++; + return Token::make(EQUAL, loc); + } + case '(': + current_column++; + return Token::make(LEFT_PAREN, loc); + case '-': + if (peek_input() == '>') { + // return type specifier + skip_input(); + current_column += 2; + + return Token::make(RETURN_TYPE, loc); + } else if (peek_input() == '=') { + // minus-assign + skip_input(); + current_column += 2; + + return Token::make(MINUS_EQ, loc); + } else { + // minus + current_column++; + return Token::make(MINUS, loc); + } + case '+': + if (peek_input() == '=') { + // add-assign + skip_input(); + current_column += 2; + + return Token::make(PLUS_EQ, loc); + } else { + // add + current_column++; + return Token::make(PLUS, loc); + } + case ')': + current_column++; + return Token::make(RIGHT_PAREN, loc); + case ';': + current_column++; + return Token::make(SEMICOLON, loc); + case '*': + if (peek_input() == '=') { + // multiplication-assign + skip_input(); + current_column += 2; + + return Token::make(ASTERISK_EQ, loc); + } else { + // multiplication + current_column++; + return Token::make(ASTERISK, loc); + } + case ',': + current_column++; + return Token::make(COMMA, loc); + case '/': + if (peek_input() == '=') { + // division-assign + skip_input(); + current_column += 2; + + return Token::make(DIV_EQ, loc); + } else if (peek_input() == '/') { + // TODO: single-line doc comments + + // single line comment + skip_input(); + current_column += 2; + + // basically ignore until line finishes + while (current_char != '\n' && current_char != EOF) { + skip_input(); + current_column++; // not used + current_char = peek_input(); + } + continue; + break; + } else if (peek_input() == '*') { + // block comment + skip_input(); + current_column += 2; + + // TODO: block doc comments + + current_char = peek_input(); + + int level = 1; + while (level > 0) { + skip_input(); + current_column++; // for error-handling + current_char = peek_input(); + + // if /* found + if (current_char == '/') { + if (peek_input(1) == '*') { + // skip /* characters + skip_input(1); + + current_column += 2; + + level += 1; + } + } + + // ignore until */ is found + if (current_char == '*') { + if (peek_input(1) == '/') { + // skip */ characters + skip_input(1); + + current_column += 2; + // should only break inner loop here - seems to do so + // break; + + level -= 1; + } + } + } + + // refresh new token + continue; + break; + } else { + // division + current_column++; + return Token::make(DIV, loc); + } + case '%': + if (peek_input() == '=') { + // modulo-assign + current_column += 2; + return Token::make(PERCENT_EQ, loc); + } else { + // modulo + current_column++; + return Token::make(PERCENT, loc); + } + case '^': + if (peek_input() == '=') { + // xor-assign? + current_column += 2; + return Token::make(CARET_EQ, loc); + } else { + // xor? + current_column++; + return Token::make(CARET, loc); + } + case '<': + if (peek_input() == '<') { + if (peek_input(1) == '=') { + // left-shift assign + skip_input(1); + current_column += 3; + + return Token::make(LEFT_SHIFT_EQ, loc); + } else { + // left-shift + skip_input(); + current_column += 2; + + return Token::make(LEFT_SHIFT, loc); + } + } else if (peek_input() == '=') { + // smaller than or equal to + skip_input(); + current_column += 2; + + return Token::make(LESS_OR_EQUAL, loc); + } else { + // smaller than + current_column++; + return Token::make(LEFT_ANGLE, loc); + } + break; + case '>': + if (peek_input() == '>') { + if (peek_input(1) == '=') { + // right-shift-assign + skip_input(1); + current_column += 3; + + return Token::make(RIGHT_SHIFT_EQ, loc); + } else { + // right-shift + skip_input(); + current_column += 2; + + return Token::make(RIGHT_SHIFT, loc); + } + } else if (peek_input() == '=') { + // larger than or equal to + skip_input(); + current_column += 2; + + return Token::make(GREATER_OR_EQUAL, loc); + } else { + // larger than + current_column++; + return Token::make(RIGHT_ANGLE, loc); + } + case ':': + if (peek_input() == ':') { + // scope resolution :: + skip_input(); + current_column += 2; + + return Token::make(SCOPE_RESOLUTION, loc); + } else { + // single colon : + current_column++; + return Token::make(COLON, loc); + } + case '!': + // no special handling for macros in lexer? + if (peek_input() == '=') { + // not equal boolean operator + skip_input(); + current_column += 2; + + return Token::make(NOT_EQUAL, loc); + } else { + // not equal unary operator + current_column++; + + return Token::make(EXCLAM, loc); + } + case '?': + current_column++; + return Token::make(QUESTION_MARK, loc); + case '#': + current_column++; + return Token::make(HASH, loc); + case '[': + current_column++; + return Token::make(LEFT_SQUARE, loc); + case ']': + current_column++; + return Token::make(RIGHT_SQUARE, loc); + case '{': + current_column++; + return Token::make(LEFT_CURLY, loc); + case '}': + current_column++; + return Token::make(RIGHT_CURLY, loc); + case '@': + // TODO: i don't know what this does, does it need special handling? + current_column++; + return Token::make(PATTERN_BIND, loc); + case '$': + // TODO: i don't know what this does, does it need special handling? + current_column++; + return Token::make(DOLLAR_SIGN, loc); + case '~': + // TODO: i don't know what this does, does it need special handling? + current_column++; + return Token::make(TILDE, loc); + case '\\': + // TODO: i don't know what this does, does it need special handling? + current_column++; + return Token::make(BACKSLASH, loc); + case '`': + // TODO: i don't know what this does, does it need special handling? + current_column++; + return Token::make(BACKTICK, loc); + case '|': + if (peek_input() == '=') { + // bitwise or-assign? + skip_input(); + current_column += 2; + + return Token::make(PIPE_EQ, loc); + } else if (peek_input() == '|') { + // logical or + skip_input(); + current_column += 2; + + return Token::make(OR, loc); + } else { + // bitwise or + current_column++; + + return Token::make(PIPE, loc); + } + case '&': + if (peek_input() == '=') { + // bitwise and-assign? + skip_input(); + current_column += 2; + + return Token::make(AMP_EQ, loc); + } else if (peek_input() == '&') { + // logical and + skip_input(); + current_column += 2; + + return Token::make(LOGICAL_AND, loc); + } else { + // bitwise and/reference + current_column++; + + return Token::make(AMP, loc); + } + case '.': + if (peek_input() == '.') { + if (peek_input(1) == '.') { + // ellipsis + skip_input(1); + current_column += 3; + + return Token::make(ELLIPSIS, loc); + } else if (peek_input(1) == '=') { + // ..= + skip_input(1); + current_column += 3; + + return Token::make(DOT_DOT_EQ, loc); + } else { + // .. + skip_input(); + current_column += 2; + + return Token::make(DOT_DOT, loc); + } + } else if (!ISDIGIT(peek_input())) { + // single dot . + // Only if followed by a non-number + current_column++; + return Token::make(DOT, loc); + } + } + // TODO: special handling of _ in the lexer? instead of being identifier + + // byte and byte string test + if (current_char == 'b') { + if (peek_input() == '\'') { + // byte - allows any ascii or escapes + // would also have to take into account escapes: \x hex_digit hex_digit, + // \n, \r, \t, \\, \0 + + int length = 1; + + // char to save + char byte_char; + + skip_input(); + // make current char the next character + current_char = peek_input(); + + // detect escapes + if (current_char == '\\') { + /*skip_input(); + + // make current_char next character (letter) + current_char = peek_input();*/ + + parse_escape(length, byte_char, '\''); + + if (byte_char > 127) { + rust_error_at( + get_current_location(), "byte char '%c' out of range", byte_char); + byte_char = 0; + } + + // skip_input(); + current_char = peek_input(); + length++; + + if (current_char != '\'') { + rust_error_at(get_current_location(), "unclosed byte char"); + } + + // TODO: ensure skipping is needed here + skip_input(); + current_char = peek_input(); + length++; // go to next char + } else if (current_char != '\'') { + // otherwise, get character from direct input character + byte_char = current_char; + + skip_input(); + current_char = peek_input(); + + if (current_char != '\'') { + rust_error_at(get_current_location(), "unclosed byte char"); + } + + // TODO: ensure skipping is needed here + skip_input(); + current_char = peek_input(); + length++; // go to next char + } else { + rust_error_at(get_current_location(), "no character inside '' for byte char"); + } + + current_column += length; + + return Token::make_byte_char(loc, byte_char); + } else if (peek_input() == '"') { + // byte string + + // skip quote character + skip_input(); + + std::string str; + str.reserve(16); // some sensible default + + int length = 1; + current_char = peek_input(); + // TODO: handle escapes properly + + while (current_char != '"' && current_char != '\n') { + if (current_char == '\\') { + char output_char = 0; + parse_escape(length, output_char, '"'); + + if (output_char > 127) { + rust_error_at(get_current_location(), + "char '%c' in byte string out of range", output_char); + output_char = 0; + } + + str += output_char; + + continue; + } + + length++; + + str += current_char; + skip_input(); + current_char = peek_input(); + } + + current_column += length; + + if (current_char == '\n') { + rust_error_at(get_current_location(), "unended byte string literal"); + } else if (current_char == '"') { + skip_input(); + current_char = peek_input(); + } else { + rust_unreachable(); + } + + return Token::make_byte_string(loc, str); + // TODO: ensure escapes and string continue work properly + } else if (peek_input() == 'r' && (peek_input(1) == '#' || peek_input(1) == '"')) { + // raw byte string literals + std::string str; + str.reserve(16); // some sensible default + + int length = 1; + int hash_count = 0; + + // get hash count at beginnning + skip_input(); + current_char = peek_input(); + while (current_char == '#') { + hash_count++; + length++; + + skip_input(); + current_char = peek_input(); + } + + if (current_char != '"') { + rust_error_at(get_current_location(), "raw byte string has no opening '\"'"); + } + + skip_input(); + current_char = peek_input(); + + while (true) { + if (current_char == '"') { + bool enough_hashes = true; + + for (int i = 0; i < hash_count; i++) { + if (peek_input(i + 1) != '#') { + enough_hashes + = false; // could continue here - improve performance + } + } + + if (enough_hashes) { + // skip enough input and peek enough input + skip_input(hash_count); // is this enough? + current_char = peek_input(); + length += hash_count + 1; + break; + } + } + + length++; + + str += current_char; + skip_input(); + current_char = peek_input(); + } + + current_column += length; + + return Token::make_byte_string(loc, str); // TODO: does this work properly + } + } + + // raw stuff + if (current_char == 'r') { + int peek = peek_input(); + int peek1 = peek_input(1); + + if (peek == '#' && (ISALPHA(peek1) || peek1 == '_')) { + // raw identifier + std::string str; + str.reserve(16); // default + + skip_input(); + current_char = peek_input(); + + current_column += 2; + + str += current_char; + + bool first_is_underscore = current_char == '_'; + + int length = 1; + current_char = peek_input(); + // loop through entire name + while (ISALPHA(current_char) || ISDIGIT(current_char) || current_char == '_') { + length++; + + str += current_char; + skip_input(); + current_char = peek_input(); + } + + current_column += length; + + // if just a single underscore, not an identifier + if (first_is_underscore && length == 1) { + rust_error_at(get_current_location(), "'_' is not a valid raw identifier"); + } + + if (str == "crate" || str == "extern" || str == "self" || str == "super" + || str == "Self") { + rust_error_at( + get_current_location(), "'%s' is a forbidden raw identifier", str.c_str()); + } else { + return Token::make_identifier(loc, str); + } + } else if (peek == '"' || (peek == '#' && (ISALPHA(peek1) || peek1 == '_'))) { + // raw string literals + std::string str; + str.reserve(16); // some sensible default + + int length = 1; + int hash_count = 0; + + // get hash count at beginnning + current_char = peek; + while (current_char == '#') { + hash_count++; + length++; + + skip_input(); + current_char = peek_input(); + } + + if (current_char != '"') { + rust_error_at(get_current_location(), "raw string has no opening '\"'"); + } + + skip_input(); + Codepoint current_char32 = test_peek_codepoint_input(); + + while (true) { + if (current_char32.value == '"') { + bool enough_hashes = true; + + for (int i = 0; i < hash_count; i++) { + // if (test_peek_codepoint_input(i + 1) != '#') { + // TODO: ensure this is a good enough replacement + if (peek_input(i + 1) != '#') { + enough_hashes + = false; // could continue here - improve performance + } + } + + if (enough_hashes) { + // skip enough input and peek enough input + skip_input(hash_count); // is this enough? + current_char = peek_input(); + length += hash_count + 1; + break; + } + } + + length++; + + str += current_char32; + test_skip_codepoint_input(); + current_char32 = test_peek_codepoint_input(); + } + + current_column += length; + + return Token::make_string(loc, str); // TODO: does this work properly + } + } + + // find identifiers and keywords + if (ISALPHA(current_char) || current_char == '_') { + std::string str; + str.reserve(16); // default + str += current_char; + + bool first_is_underscore = current_char == '_'; + + int length = 1; + current_char = peek_input(); + // loop through entire name + while (ISALPHA(current_char) || ISDIGIT(current_char) || current_char == '_') { + length++; + + str += current_char; + skip_input(); + current_char = peek_input(); + } + + current_column += length; + + // if just a single underscore, not an identifier + if (first_is_underscore && length == 1) { + return Token::make(UNDERSCORE, loc); + } + + TokenId keyword = classify_keyword(str); + if (keyword == IDENTIFIER) { + return Token::make_identifier(loc, str); + } else { + return Token::make(keyword, loc); + } + } + + // identify literals + // int or float literals - not processed properly + if (ISDIGIT(current_char) || current_char == '.') { // _ not allowed as first char + std::string str; + str.reserve(16); // some sensible default + str += current_char; + + PrimitiveCoreType type_hint = CORETYPE_UNKNOWN; + + bool is_real = (current_char == '.'); + + int length = 1; + + // handle binary, octal, hex literals + if (current_char == '0' && !ISDIGIT(peek_input())) { + current_char = peek_input(); + + if (current_char == 'x') { + // hex (integer only) + + skip_input(); + current_char = peek_input(); + + length++; + + // add 'x' to string after 0 so it is 0xFFAA or whatever + str += 'x'; + + // loop through to add entire hex number to string + while (is_x_digit(current_char) || current_char == '_') { + if (current_char == '_') { + // don't add _ to number + skip_input(); + current_char = peek_input(); + + length++; + + continue; + } + + length++; + + // add raw hex numbers + str += current_char; + skip_input(); + current_char = peek_input(); + } + + current_column += length; + + // convert hex value to decimal representation + long hex_num = ::std::strtol(str.c_str(), NULL, 16); + + // create output string stream for hex value to be converted to string + // again + // TODO: if too slow, use sprintf + ::std::ostringstream ostr; + ostr << hex_num; + + // reassign string representation to converted value + str = ostr.str(); + + // parse in type suffix if it exists + parse_in_type_suffix(/*current_char, */ type_hint, length); + + if (type_hint == CORETYPE_F32 || type_hint == CORETYPE_F64) { + rust_error_at(get_current_location(), + "invalid type suffix '%s' for integer (hex) literal", + get_type_hint_string(type_hint)); + } + } else if (current_char == 'o') { + // octal (integer only) + + skip_input(); + current_char = peek_input(); + + length++; + + // don't add any characters as C octals are just 0124 or whatever + + // loop through to add entire octal number to string + while (is_octal_digit(current_char) || current_char == '_') { + if (current_char == '_') { + // don't add _ to number + skip_input(); + current_char = peek_input(); + + length++; + + continue; + } + + length++; + + // add raw octal numbers + str += current_char; + skip_input(); + current_char = peek_input(); + } + + current_column += length; + + // convert octal value to decimal representation + long octal_num = ::std::strtol(str.c_str(), NULL, 8); + + // create output string stream for octal value to be converted to + // string again + // TODO: if too slow, use sprintf + ::std::ostringstream ostr; + ostr << octal_num; + + // reassign string representation to converted value + str = ostr.str(); + + // parse in type suffix if it exists + parse_in_type_suffix(/*current_char, */ type_hint, length); + + if (type_hint == CORETYPE_F32 || type_hint == CORETYPE_F64) { + rust_error_at(get_current_location(), + "invalid type suffix '%s' for integer (octal) literal", + get_type_hint_string(type_hint)); + } + } else if (current_char == 'b') { + // binary (integer only) + + skip_input(); + current_char = peek_input(); + + length++; + + // don't add any characters as C binary numbers are not really + // supported + + // loop through to add entire binary number to string + while (is_bin_digit(current_char) || current_char == '_') { + if (current_char == '_') { + // don't add _ to number + skip_input(); + current_char = peek_input(); + + length++; + + continue; + } + + length++; + + // add raw binary numbers + str += current_char; + skip_input(); + current_char = peek_input(); + } + + current_column += length; + + // convert binary value to decimal representation + long bin_num = ::std::strtol(str.c_str(), NULL, 2); + + // create output string stream for binary value to be converted to + // string again + // TODO: if too slow, use sprintf + ::std::ostringstream ostr; + ostr << bin_num; + + // reassign string representation to converted value + str = ostr.str(); + + // parse in type suffix if it exists + parse_in_type_suffix(/*current_char, */ type_hint, length); + + if (type_hint == CORETYPE_F32 || type_hint == CORETYPE_F64) { + rust_error_at(get_current_location(), + "invalid type suffix '%s' for integer (binary) literal", + get_type_hint_string(type_hint)); + } + } + } else { + // handle decimals (integer or float) + + current_char = peek_input(); + + // parse initial decimal literal - assuming integer + // TODO: test if works + parse_in_decimal(/*current_char, */ str, length); + + // detect float literal - TODO: fix: "242." is not recognised as a float literal + if (current_char == '.' && is_float_digit(peek_input(1))) { + // float with a '.', parse another decimal into it + + is_real = true; + + // add . to str + str += current_char; + skip_input(); + current_char = peek_input(); + + length++; + + // parse another decimal number for float + // TODO: test if works + parse_in_decimal(/*current_char, */ str, length); + + // parse in exponent part if it exists + // test to see if this works: + parse_in_exponent_part(/*current_char, */ str, length); + + // parse in type suffix if it exists + // TODO: see if works: + parse_in_type_suffix(/*current_char, */ type_hint, length); + + if (type_hint != CORETYPE_F32 && type_hint != CORETYPE_F64 + && type_hint != CORETYPE_UNKNOWN) { + rust_error_at(get_current_location(), + "invalid type suffix '%s' for float literal", + get_type_hint_string(type_hint)); + } + + } else if (current_char == '.' && check_valid_float_dot_end(peek_input(1))) { + is_real = true; + + // add . to str + str += current_char; + skip_input(); + current_char = peek_input(); + length++; + + // add a '0' after the . to stop ambiguity + str += '0'; + + // don't parse another decimal number for float + + // parse in exponent part if it exists - shouldn't exist? + // parse_in_exponent_part(/*current_char, */ str, length); + + // parse in type suffix if it exists - shouldn't exist? + // TODO: see if works: + // parse_in_type_suffix(/*current_char, */ type_hint, length); + + if (type_hint != CORETYPE_F32 && type_hint != CORETYPE_F64 + && type_hint != CORETYPE_UNKNOWN) { + rust_error_at(get_current_location(), + "invalid type suffix '%s' for float literal", + get_type_hint_string(type_hint)); + } + } else if (current_char == 'E' || current_char == 'e') { + is_real = true; + + // parse exponent part + parse_in_exponent_part(/*current_char, */ str, length); + + // parse in type suffix if it exists + parse_in_type_suffix(/*current_char, */ type_hint, length); + + if (type_hint != CORETYPE_F32 && type_hint != CORETYPE_F64 + && type_hint != CORETYPE_UNKNOWN) { + rust_error_at(get_current_location(), + "invalid type suffix '%s' for float literal", + get_type_hint_string(type_hint)); + } + } else { + // is an integer + + // parse in type suffix if it exists + parse_in_type_suffix(/*current_char, */ type_hint, length); + + if (type_hint == CORETYPE_F32 || type_hint == CORETYPE_F64) { + rust_error_at(get_current_location(), + "invalid type suffix '%s' for integer (decimal) literal", + get_type_hint_string(type_hint)); + } + } + + current_column += length; + } + + // actually make the tokens + if (is_real) { + return Token::make_float(loc, str, type_hint); + } else { + return Token::make_int(loc, str, type_hint); + } + } + + // string literals - not processed properly + if (current_char == '"') { + Codepoint current_char32; + + std::string str; + str.reserve(16); // some sensible default + + int length = 1; + current_char32 = test_peek_codepoint_input(); + + // ok initial peek_codepoint seems to work without "too long" + + while (current_char32.value != '\n' && current_char32.value != '"') { + // TODO: handle escapes and string continue + if (current_char32.value == '\\') { + // parse escape + parse_utf8_escape(length, current_char32, '\''); + + // TODO: find a way to parse additional characters after the escape? + // return after parsing escape? + + str += current_char32; + + // required as parsing utf8 escape only changes current_char or something + current_char32 = test_peek_codepoint_input(); + + continue; + } + + length += test_get_input_codepoint_length(); + + // does this work? not technically a char. maybe have to convert to char series + str += current_char32; + test_skip_codepoint_input(); + current_char32 = test_peek_codepoint_input(); + } + + current_column += length; + + if (current_char32.value == '\n') { + rust_error_at(get_current_location(), "unended string literal"); + } else if (current_char32.value == '"') { + skip_input(); + + current_char = peek_input(); + } else { + rust_unreachable(); + } + + return Token::make_string(loc, str); + // TODO: account for escapes and string continue + // also, in rust a string is a series of unicode characters (4 bytes) + } + + // char literal attempt + if (current_char == '\'') { + // rust chars are 4 bytes and have some weird unicode representation thing + Codepoint current_char32; + + int length = 1; + + current_char32 = test_peek_codepoint_input(); + + // parse escaped char literal + if (current_char32.value == '\\') { + // parse escape + parse_utf8_escape(length, current_char32, '\''); + + // TODO - this skip may not be needed? + // test_skip_codepoint_input(); + + if (test_peek_codepoint_input().value != '\'') { + rust_error_at(get_current_location(), "unended char literal"); + } else { + test_skip_codepoint_input(); + current_char = peek_input(); + length++; + } + + current_column += length; + + // TODO: FIX - char is actually 4 bytes in Rust (uint32) due to unicode + return Token::make_char(loc, current_char32); + } else { + // current_char32 = test_peek_codepoint_input(); + test_skip_codepoint_input(); + + if (test_peek_codepoint_input().value == '\'') { + // parse normal char literal + // TODO: FIX - char is actually 4 bytes in Rust (uint32) due to unicode + + // skip the ' character + skip_input(); + current_char = peek_input(); + + // TODO fix due to different widths of utf-8 chars + current_column += 3; + + return Token::make_char(loc, current_char32); + } else if (ISDIGIT(current_char32.value) || ISALPHA(current_char32.value) + || current_char32.value == '_') { + // parse lifetime name + ::std::string str; + // TODO: does this work properly? + str += current_char32; + + // TODO: fix lifetime name thing - actually, why am I even using utf-8 here? + + int length = 1; + + current_char32 = test_peek_codepoint_input(); + + while (ISDIGIT(current_char32.value) || ISALPHA(current_char32.value) + || current_char32.value == '_') { + length += test_get_input_codepoint_length(); + + str += current_char32; + test_skip_codepoint_input(); + current_char32 = test_peek_codepoint_input(); + } + + current_column += length; + + return Token::make_lifetime(loc, str); + } else { + rust_error_at(get_current_location(), "expected ' after character constant"); + } + } + } + + // didn't match anything so error + rust_error_at(loc, "unexpected character '%x'", current_char); + current_column++; + } } - else - { - rust_error_at (get_current_location (), "unknown number suffix '%s'", - suffix.c_str ()); - return false; + // Shitty pass-by-reference way of parsing in type suffix. + bool Lexer::parse_in_type_suffix( + /*char& current_char, */ PrimitiveCoreType& type_hint, int& length) { + ::std::string suffix; + suffix.reserve(5); + + // get suffix + while (ISALPHA(current_char) || ISDIGIT(current_char) || current_char == '_') { + if (current_char == '_') { + // don't add _ to suffix + skip_input(); + current_char = peek_input(); + + length++; + + continue; + } + + length++; + + suffix += current_char; + skip_input(); + current_char = peek_input(); + } + + if (suffix.empty()) { + // no type suffix: do nothing but also no error + return false; + } else if (suffix == "f32") { + type_hint = CORETYPE_F32; + } else if (suffix == "f64") { + type_hint = CORETYPE_F64; + } else if (suffix == "i8") { + type_hint = CORETYPE_I8; + } else if (suffix == "i16") { + type_hint = CORETYPE_I16; + } else if (suffix == "i32") { + type_hint = CORETYPE_I32; + } else if (suffix == "i64") { + type_hint = CORETYPE_I64; + } else if (suffix == "i128") { + type_hint = CORETYPE_I128; + } else if (suffix == "isize") { + type_hint = CORETYPE_ISIZE; + } else if (suffix == "u8") { + type_hint = CORETYPE_U8; + } else if (suffix == "u16") { + type_hint = CORETYPE_U16; + } else if (suffix == "u32") { + type_hint = CORETYPE_U32; + } else if (suffix == "u64") { + type_hint = CORETYPE_U64; + } else if (suffix == "u128") { + type_hint = CORETYPE_U128; + } else if (suffix == "usize") { + type_hint = CORETYPE_USIZE; + } else { + rust_error_at(get_current_location(), "unknown number suffix '%s'", suffix.c_str()); + + return false; + } + + return true; } - return true; -} + void Lexer::parse_in_exponent_part(/*char& current_char, */ std::string& str, int& length) { + if (current_char == 'E' || current_char == 'e') { + // add exponent to string as strtod works with it + str += current_char; + skip_input(); + current_char = peek_input(); + + length++; + + // special - and + handling + if (current_char == '-') { + str += '-'; + + skip_input(); + current_char = peek_input(); + + length++; + } else if (current_char == '+') { + // don't add + but still skip input + skip_input(); + current_char = peek_input(); + + length++; + } -void -Lexer::parse_in_exponent_part (/*char& current_char, */ std::string &str, - int &length) -{ - if (current_char == 'E' || current_char == 'e') - { - // add exponent to string as strtod works with it - str += current_char; - skip_input (); - current_char = peek_input (); - - length++; - - // special - and + handling - if (current_char == '-') - { - str += '-'; - - skip_input (); - current_char = peek_input (); - - length++; - } - else if (current_char == '+') - { - // don't add + but still skip input - skip_input (); - current_char = peek_input (); - - length++; - } - - // parse another decimal number for exponent - parse_in_decimal (/*current_char, */ str, length); + // parse another decimal number for exponent + parse_in_decimal(/*current_char, */ str, length); + } } -} -void -Lexer::parse_in_decimal (/*char& current_char, */ std::string &str, int &length) -{ - while (ISDIGIT (current_char) || current_char == '_') - { - if (current_char == '_') - { - // don't add _ to number - skip_input (); - current_char = peek_input (); + void Lexer::parse_in_decimal(/*char& current_char, */ std::string& str, int& length) { + while (ISDIGIT(current_char) || current_char == '_') { + if (current_char == '_') { + // don't add _ to number + skip_input(); + current_char = peek_input(); - length++; + length++; - continue; - } + continue; + } - length++; + length++; - str += current_char; - skip_input (); - current_char = peek_input (); + str += current_char; + skip_input(); + current_char = peek_input(); + } } -} -// Replace all assorted parse_x_escape with this? Avoids the backwards/peek -// issue. -bool -Lexer::parse_escape (int &length, char &output_char, char opening_char) -{ - // skip to actual letter - skip_input (); - current_char = peek_input (); - length++; - - switch (current_char) - { - case 'x': - { - // hex char string (null-terminated) - char hexNum[3] = {0, 0, 0}; - - // first hex char - skip_input (); - current_char = peek_input (); - length++; - - if (!ISXDIGIT (current_char)) - { - rust_error_at (get_current_location (), - "invalid character '\\x%c' in \\x sequence", - current_char); - } - hexNum[0] = current_char; - - // second hex char - skip_input (); - current_char = peek_input (); - length++; - - if (!ISXDIGIT (current_char)) - { - rust_error_at (get_current_location (), - "invalid character '\\x%c' in \\x sequence", - current_char); - } - hexNum[1] = current_char; - - long hexLong = ::std::strtol (hexNum, NULL, 16); - - if (hexLong > 127) - rust_error_at ( - get_current_location (), - "ascii \\x escape '\\x%s' out of range - allows up to '\\x7F'", - hexNum); - // gcc_assert(hexLong < 128); // as ascii - char hexChar = static_cast<char> (hexLong); - - // TODO: fix - does this actually give the right character? - output_char = hexChar; - } - break; - case 'n': - output_char = '\n'; - break; - case 'r': - output_char = '\r'; - break; - case 't': - output_char = '\t'; - break; - case '\\': - output_char = '\\'; - break; - case '0': - output_char = '\0'; - break; - case '\'': - output_char = '\''; - break; - case '"': - output_char = '"'; - break; - case 'u': - { - // TODO: shouldn't be used with this - use parse_utf8_escape - - skip_input (); - current_char = peek_input (); - length++; - - bool need_close_brace = false; - - // TODO: rustc lexer doesn't seem to allow not having { but mrustc lexer - // does? look at spec? - if (current_char == '{') - { - need_close_brace = true; - - skip_input (); - current_char = peek_input (); - length++; - } - - // parse unicode escape - // 1-6 hex digits? - ::std::string num_str; - num_str.reserve (6); - - // test adding number directly - uint32_t test_val; - - // loop through to add entire hex number to string - while (is_x_digit (current_char) || current_char == '_') - { - if (current_char == '_') - { - // don't add _ to number - skip_input (); - current_char = peek_input (); - - length++; - - continue; - } - - length++; - - // add raw hex numbers - num_str += current_char; - - // test adding number directly - char tmp[2] = {current_char, 0}; - test_val *= 16; - test_val += ::std::strtol (tmp, NULL, 16); - - skip_input (); - current_char = peek_input (); - } - - // ensure closing brace - if (need_close_brace && current_char != '}') - { - // actually an error - rust_error_at (get_current_location (), - "expected terminating '}' in unicode escape"); - return false; - } - - // ensure 1-6 hex characters - if (num_str.length () > 6 || num_str.length () < 1) - { - rust_error_at (get_current_location (), - "unicode escape should be between 1 and 6 hex " - "characters; it is %lu", - num_str.length ()); - return false; - } - - long hex_num = ::std::strtol (num_str.c_str (), NULL, 16); - - // as debug, check hex_num = test_val - if (hex_num > 255) - { - rust_error_at ( - get_current_location (), - "non-ascii chars not implemented yet, defaulting to 0"); - hex_num = 0; - } - - // make output_char the value - UTF-8? - // TODO: actually make this work - output char must be 4 bytes, do I - // need a string for this? - output_char = static_cast</*uint32_t*/ char> (hex_num); - - return true; - } - break; - case '\r': - case '\n': - // string continue - while (is_whitespace (current_char)) - { - if (current_char == '\n') - { - current_line++; - current_column = 1; - // tell line_table that new line starts - linemap_line_start (::line_table, current_line, max_column_hint); - - // reset "length" - length = 1; - - // get next char - skip_input (); - current_char = peek_input (); - - continue; - } - - skip_input (); - current_char = peek_input (); - length++; - } - - if (current_char == '\\') - { - parse_escape (length, output_char, opening_char); - return true; - } - else if (current_char == opening_char) - { - // TODO: does this skip the ' or " character? It shouldn't. - output_char = 0; - return true; - } - else - { - output_char = current_char; - - // TODO: test has right result - /*skip_input(); - current_char = peek_input();*/ - - return true; - } - default: - rust_error_at (get_current_location (), "unknown escape sequence '\\%c'", - current_char); - // returns false if no parsing could be done - return false; - break; + // Replace all assorted parse_x_escape with this? Avoids the backwards/peek issue. + bool Lexer::parse_escape(int& length, char& output_char, char opening_char) { + // skip to actual letter + skip_input(); + current_char = peek_input(); + length++; + + switch (current_char) { + case 'x': { + // hex char string (null-terminated) + char hexNum[3] = { 0, 0, 0 }; + + // first hex char + skip_input(); + current_char = peek_input(); + length++; + + if (!ISXDIGIT(current_char)) { + rust_error_at(get_current_location(), "invalid character '\\x%c' in \\x sequence", + current_char); + } + hexNum[0] = current_char; + + // second hex char + skip_input(); + current_char = peek_input(); + length++; + + if (!ISXDIGIT(current_char)) { + rust_error_at(get_current_location(), "invalid character '\\x%c' in \\x sequence", + current_char); + } + hexNum[1] = current_char; + + long hexLong = ::std::strtol(hexNum, NULL, 16); + + if (hexLong > 127) + rust_error_at(get_current_location(), + "ascii \\x escape '\\x%s' out of range - allows up to '\\x7F'", hexNum); + // gcc_assert(hexLong < 128); // as ascii + char hexChar = static_cast<char>(hexLong); + + // TODO: fix - does this actually give the right character? + output_char = hexChar; + } break; + case 'n': + output_char = '\n'; + break; + case 'r': + output_char = '\r'; + break; + case 't': + output_char = '\t'; + break; + case '\\': + output_char = '\\'; + break; + case '0': + output_char = '\0'; + break; + case '\'': + output_char = '\''; + break; + case '"': + output_char = '"'; + break; + case 'u': { + // TODO: shouldn't be used with this - use parse_utf8_escape + + skip_input(); + current_char = peek_input(); + length++; + + bool need_close_brace = false; + + // TODO: rustc lexer doesn't seem to allow not having { but mrustc lexer does? look at + // spec? + if (current_char == '{') { + need_close_brace = true; + + skip_input(); + current_char = peek_input(); + length++; + } + + // parse unicode escape + // 1-6 hex digits? + ::std::string num_str; + num_str.reserve(6); + + // test adding number directly + uint32_t test_val; + + // loop through to add entire hex number to string + while (is_x_digit(current_char) || current_char == '_') { + if (current_char == '_') { + // don't add _ to number + skip_input(); + current_char = peek_input(); + + length++; + + continue; + } + + length++; + + // add raw hex numbers + num_str += current_char; + + // test adding number directly + char tmp[2] = { current_char, 0 }; + test_val *= 16; + test_val += ::std::strtol(tmp, NULL, 16); + + skip_input(); + current_char = peek_input(); + } + + // ensure closing brace + if (need_close_brace && current_char != '}') { + // actually an error + rust_error_at( + get_current_location(), "expected terminating '}' in unicode escape"); + return false; + } + + // ensure 1-6 hex characters + if (num_str.length() > 6 || num_str.length() < 1) { + rust_error_at(get_current_location(), + "unicode escape should be between 1 and 6 hex characters; it is %lu", + num_str.length()); + return false; + } + + long hex_num = ::std::strtol(num_str.c_str(), NULL, 16); + + // as debug, check hex_num = test_val + if (hex_num > 255) { + rust_error_at( + get_current_location(), "non-ascii chars not implemented yet, defaulting to 0"); + hex_num = 0; + } + + // make output_char the value - UTF-8? + // TODO: actually make this work - output char must be 4 bytes, do I need a string for + // this? + output_char = static_cast</*uint32_t*/ char>(hex_num); + + return true; + } break; + case '\r': + case '\n': + // string continue + while (is_whitespace(current_char)) { + if (current_char == '\n') { + current_line++; + current_column = 1; + // tell line_table that new line starts + linemap_line_start(::line_table, current_line, max_column_hint); + + // reset "length" + length = 1; + + // get next char + skip_input(); + current_char = peek_input(); + + continue; + } + + skip_input(); + current_char = peek_input(); + length++; + } + + if (current_char == '\\') { + parse_escape(length, output_char, opening_char); + return true; + } else if (current_char == opening_char) { + // TODO: does this skip the ' or " character? It shouldn't. + output_char = 0; + return true; + } else { + output_char = current_char; + + // TODO: test has right result + /*skip_input(); + current_char = peek_input();*/ + + return true; + } + default: + rust_error_at(get_current_location(), "unknown escape sequence '\\%c'", current_char); + // returns false if no parsing could be done + return false; + break; + } + // all non-special cases (unicode, string continue) should skip their used char + skip_input(); + current_char = peek_input(); + length++; + + // returns true if parsing was successful + return true; } - // all non-special cases (unicode, string continue) should skip their used - // char - skip_input (); - current_char = peek_input (); - length++; - - // returns true if parsing was successful - return true; -} -bool -Lexer::parse_utf8_escape (int &length, Codepoint &output_char, - char opening_char) -{ - // skip to actual letter - skip_input (); - current_char = peek_input (); - length++; - - switch (current_char) - { - case 'x': - { - // hex char string (null-terminated) - char hexNum[3] = {0, 0, 0}; - - // first hex char - skip_input (); - current_char = peek_input (); - length++; - - if (!ISXDIGIT (current_char)) - { - rust_error_at (get_current_location (), - "invalid character '\\x%c' in \\x sequence", - current_char); - } - hexNum[0] = current_char; - - // second hex char - skip_input (); - current_char = peek_input (); - length++; - - if (!ISXDIGIT (current_char)) - { - rust_error_at (get_current_location (), - "invalid character '\\x%c' in \\x sequence", - current_char); - } - hexNum[1] = current_char; - - long hexLong = ::std::strtol (hexNum, NULL, 16); - - if (hexLong > 127) - rust_error_at ( - get_current_location (), - "ascii \\x escape '\\x%s' out of range - allows up to '\\x7F'", - hexNum); - // gcc_assert(hexLong < 128); // as ascii - char hexChar = static_cast<char> (hexLong); - - // TODO: fix - does this actually give the right character? - output_char = hexChar; - } - break; - case 'n': - output_char = '\n'; - break; - case 'r': - output_char = '\r'; - break; - case 't': - output_char = '\t'; - break; - case '\\': - output_char = '\\'; - break; - case '0': - output_char = '\0'; - break; - case '\'': - output_char = '\''; - break; - case '"': - output_char = '"'; - break; - case 'u': - { - skip_input (); - current_char = peek_input (); - length++; - - bool need_close_brace = false; - - // TODO: rustc lexer doesn't seem to allow not having { but mrustc lexer - // does? look at spec? - if (current_char == '{') - { - need_close_brace = true; - - skip_input (); - current_char = peek_input (); - length++; - } - - // parse unicode escape - // 1-6 hex digits? - ::std::string num_str; - num_str.reserve (6); - - // test adding number directly - uint32_t test_val; - - // loop through to add entire hex number to string - while (is_x_digit (current_char) || current_char == '_') - { - if (current_char == '_') - { - // don't add _ to number - skip_input (); - current_char = peek_input (); - - length++; - - continue; - } - - length++; - - // add raw hex numbers - num_str += current_char; - - // test adding number directly - char tmp[2] = {current_char, 0}; - test_val *= 16; - test_val += ::std::strtol (tmp, NULL, 16); - - skip_input (); - current_char = peek_input (); - } - - // ensure closing brace if required - if (need_close_brace) - { - if (current_char == '}') - { - skip_input (); - current_char = peek_input (); - length++; - } - else - { - // actually an error - rust_error_at (get_current_location (), - "expected terminating '}' in unicode escape"); - return false; - } - } - - // ensure 1-6 hex characters - if (num_str.length () > 6 || num_str.length () < 1) - { - rust_error_at (get_current_location (), - "unicode escape should be between 1 and 6 hex " - "characters; it is %lu", - num_str.length ()); - return false; - } - - long hex_num = ::std::strtol (num_str.c_str (), NULL, 16); - - // assert fits a uint32_t - gcc_assert (hex_num < 4294967296); - - // ok can't figure out how to just convert to codepoint or use "this" so - // create new one - output_char = Codepoint (static_cast<uint32_t> (hex_num)); - - // TODO: what is being outputted? the escape code for the unicode char - // (unicode number) or the character number? - - return true; - } - break; - case '\r': - case '\n': - // string continue - while (is_whitespace (current_char)) - { - if (current_char == '\n') - { - current_line++; - current_column = 1; - // tell line_table that new line starts - linemap_line_start (::line_table, current_line, max_column_hint); - - // reset "length" - length = 1; - - // get next char - skip_input (); - current_char = peek_input (); - - continue; - } - - skip_input (); - current_char = peek_input (); - length++; - } - - if (current_char == '\\') - { - parse_utf8_escape (length, output_char, opening_char); - return true; - } - else if (current_char == opening_char) - { - // TODO: does this skip the ' or " character? It shouldn't. - output_char = 0; - return true; - } - else - { - output_char = current_char; - - // TODO: test has right result - /*skip_input(); - current_char = peek_input();*/ - - return true; - } - default: - rust_error_at (get_current_location (), "unknown escape sequence '\\%c'", - current_char); - // returns false if no parsing could be done - return false; - break; + bool Lexer::parse_utf8_escape(int& length, Codepoint& output_char, char opening_char) { + // skip to actual letter + skip_input(); + current_char = peek_input(); + length++; + + switch (current_char) { + case 'x': { + // hex char string (null-terminated) + char hexNum[3] = { 0, 0, 0 }; + + // first hex char + skip_input(); + current_char = peek_input(); + length++; + + if (!ISXDIGIT(current_char)) { + rust_error_at(get_current_location(), "invalid character '\\x%c' in \\x sequence", + current_char); + } + hexNum[0] = current_char; + + // second hex char + skip_input(); + current_char = peek_input(); + length++; + + if (!ISXDIGIT(current_char)) { + rust_error_at(get_current_location(), "invalid character '\\x%c' in \\x sequence", + current_char); + } + hexNum[1] = current_char; + + long hexLong = ::std::strtol(hexNum, NULL, 16); + + if (hexLong > 127) + rust_error_at(get_current_location(), + "ascii \\x escape '\\x%s' out of range - allows up to '\\x7F'", hexNum); + // gcc_assert(hexLong < 128); // as ascii + char hexChar = static_cast<char>(hexLong); + + // TODO: fix - does this actually give the right character? + output_char = hexChar; + } break; + case 'n': + output_char = '\n'; + break; + case 'r': + output_char = '\r'; + break; + case 't': + output_char = '\t'; + break; + case '\\': + output_char = '\\'; + break; + case '0': + output_char = '\0'; + break; + case '\'': + output_char = '\''; + break; + case '"': + output_char = '"'; + break; + case 'u': { + skip_input(); + current_char = peek_input(); + length++; + + bool need_close_brace = false; + + // TODO: rustc lexer doesn't seem to allow not having { but mrustc lexer does? look at + // spec? + if (current_char == '{') { + need_close_brace = true; + + skip_input(); + current_char = peek_input(); + length++; + } + + // parse unicode escape + // 1-6 hex digits? + ::std::string num_str; + num_str.reserve(6); + + // test adding number directly + uint32_t test_val; + + // loop through to add entire hex number to string + while (is_x_digit(current_char) || current_char == '_') { + if (current_char == '_') { + // don't add _ to number + skip_input(); + current_char = peek_input(); + + length++; + + continue; + } + + length++; + + // add raw hex numbers + num_str += current_char; + + // test adding number directly + char tmp[2] = { current_char, 0 }; + test_val *= 16; + test_val += ::std::strtol(tmp, NULL, 16); + + skip_input(); + current_char = peek_input(); + } + + // ensure closing brace if required + if (need_close_brace) { + if (current_char == '}') { + skip_input(); + current_char = peek_input(); + length++; + } else { + // actually an error + rust_error_at( + get_current_location(), "expected terminating '}' in unicode escape"); + return false; + } + } + + // ensure 1-6 hex characters + if (num_str.length() > 6 || num_str.length() < 1) { + rust_error_at(get_current_location(), + "unicode escape should be between 1 and 6 hex characters; it is %lu", + num_str.length()); + return false; + } + + long hex_num = ::std::strtol(num_str.c_str(), NULL, 16); + + // assert fits a uint32_t + rust_assert(hex_num < 4294967296); + + // ok can't figure out how to just convert to codepoint or use "this" so create new + // one + output_char = Codepoint(static_cast<uint32_t>(hex_num)); + + // TODO: what is being outputted? the escape code for the unicode char (unicode + // number) or the character number? + + return true; + } break; + case '\r': + case '\n': + // string continue + while (is_whitespace(current_char)) { + if (current_char == '\n') { + current_line++; + current_column = 1; + // tell line_table that new line starts + linemap_line_start(::line_table, current_line, max_column_hint); + + // reset "length" + length = 1; + + // get next char + skip_input(); + current_char = peek_input(); + + continue; + } + + skip_input(); + current_char = peek_input(); + length++; + } + + if (current_char == '\\') { + parse_utf8_escape(length, output_char, opening_char); + return true; + } else if (current_char == opening_char) { + // TODO: does this skip the ' or " character? It shouldn't. + output_char = 0; + return true; + } else { + output_char = current_char; + + // TODO: test has right result + /*skip_input(); + current_char = peek_input();*/ + + return true; + } + default: + rust_error_at(get_current_location(), "unknown escape sequence '\\%c'", current_char); + // returns false if no parsing could be done + return false; + break; + } + // all non-special cases (unicode, string continue) should skip their used char + skip_input(); + current_char = peek_input(); + length++; + + // returns true if parsing was successful + return true; } - // all non-special cases (unicode, string continue) should skip their used - // char - skip_input (); - current_char = peek_input (); - length++; - - // returns true if parsing was successful - return true; -} #if 0 bool Lexer::parse_ascii_escape(/*char& current_char, */ int& length, char& output_char) { @@ -2495,305 +2102,262 @@ Lexer::parse_utf8_escape (int &length, Codepoint &output_char, } #endif -int -Lexer::test_get_input_codepoint_length () -{ - uint8_t input = peek_input (); - - if (input < 128) - { - // ascii -- 1 byte - // return input; - - return 1; + int Lexer::test_get_input_codepoint_length() { + uint8_t input = peek_input(); + + if (input < 128) { + // ascii -- 1 byte + // return input; + + return 1; + } else if ((input & 0xC0) == 0x80) { + // invalid (continuation; can't be first char) + // return 0xFFFE; + + return 0; + } else if ((input & 0xE0) == 0xC0) { + // 2 bytes + uint8_t input2 = peek_input(1); + if ((input2 & 0xC0) != 0x80) + return 0; + // return 0xFFFE; + + // uint32_t output = ((input & 0x1F) << 6) | ((input2 & 0x3F) << 0); + // return output; + return 2; + } else if ((input & 0xF0) == 0xE0) { + // 3 bytes + uint8_t input2 = peek_input(1); + if ((input2 & 0xC0) != 0x80) + return 0; + // return 0xFFFE; + + uint8_t input3 = peek_input(2); + if ((input3 & 0xC0) != 0x80) + return 0; + // return 0xFFFE; + + /*uint32_t output + = ((input & 0x0F) << 12) | ((input2 & 0x3F) << 6) | ((input3 & 0x3F) << 0); + return output;*/ + return 3; + } else if ((input & 0xF8) == 0xF0) { + // 4 bytes + uint8_t input2 = peek_input(1); + if ((input2 & 0xC0) != 0x80) + return 0; + // return 0xFFFE; + + uint8_t input3 = peek_input(2); + if ((input3 & 0xC0) != 0x80) + return 0; + // return 0xFFFE; + + uint8_t input4 = peek_input(3); + if ((input4 & 0xC0) != 0x80) + return 0; + // return 0xFFFE; + + /*uint32_t output = ((input & 0x07) << 18) | ((input2 & 0x3F) << 12) + | ((input3 & 0x3F) << 6) | ((input4 & 0x3F) << 0); + return output;*/ + return 4; + } else { + rust_error_at(get_current_location(), "invalid UTF-8 (too long)"); + return 0; + } } - else if ((input & 0xC0) == 0x80) - { - // invalid (continuation; can't be first char) - // return 0xFFFE; - return 0; - } - else if ((input & 0xE0) == 0xC0) - { - // 2 bytes - uint8_t input2 = peek_input (1); - if ((input2 & 0xC0) != 0x80) - return 0; - // return 0xFFFE; - - // uint32_t output = ((input & 0x1F) << 6) | ((input2 & 0x3F) << 0); - // return output; - return 2; - } - else if ((input & 0xF0) == 0xE0) - { - // 3 bytes - uint8_t input2 = peek_input (1); - if ((input2 & 0xC0) != 0x80) - return 0; - // return 0xFFFE; - - uint8_t input3 = peek_input (2); - if ((input3 & 0xC0) != 0x80) - return 0; - // return 0xFFFE; - - /*uint32_t output - = ((input & 0x0F) << 12) | ((input2 & 0x3F) << 6) | ((input3 & 0x3F) << - 0); return output;*/ - return 3; - } - else if ((input & 0xF8) == 0xF0) - { - // 4 bytes - uint8_t input2 = peek_input (1); - if ((input2 & 0xC0) != 0x80) - return 0; - // return 0xFFFE; - - uint8_t input3 = peek_input (2); - if ((input3 & 0xC0) != 0x80) - return 0; - // return 0xFFFE; - - uint8_t input4 = peek_input (3); - if ((input4 & 0xC0) != 0x80) - return 0; - // return 0xFFFE; - - /*uint32_t output = ((input & 0x07) << 18) | ((input2 & 0x3F) << 12) - | ((input3 & 0x3F) << 6) | ((input4 & 0x3F) << 0); - return output;*/ - return 4; - } - else - { - rust_error_at (get_current_location (), "invalid UTF-8 (too long)"); - return 0; + // TODO: rewrite lexing system to use utf-8 "codepoints" rather than bytes? + Codepoint Lexer::test_peek_codepoint_input() { + uint8_t input = peek_input(); + + if (input < 128) { + // ascii -- 1 byte + return { input }; + } else if ((input & 0xC0) == 0x80) { + // invalid (continuation; can't be first char) + return { 0xFFFE }; + } else if ((input & 0xE0) == 0xC0) { + // 2 bytes + uint8_t input2 = peek_input(1); + if ((input2 & 0xC0) != 0x80) + return { 0xFFFE }; + + uint32_t output = ((input & 0x1F) << 6) | ((input2 & 0x3F) << 0); + return { output }; + } else if ((input & 0xF0) == 0xE0) { + // 3 bytes + uint8_t input2 = peek_input(1); + if ((input2 & 0xC0) != 0x80) + return { 0xFFFE }; + + uint8_t input3 = peek_input(2); + if ((input3 & 0xC0) != 0x80) + return { 0xFFFE }; + + uint32_t output + = ((input & 0x0F) << 12) | ((input2 & 0x3F) << 6) | ((input3 & 0x3F) << 0); + return { output }; + } else if ((input & 0xF8) == 0xF0) { + // 4 bytes + uint8_t input2 = peek_input(1); + if ((input2 & 0xC0) != 0x80) + return { 0xFFFE }; + + uint8_t input3 = peek_input(2); + if ((input3 & 0xC0) != 0x80) + return { 0xFFFE }; + + uint8_t input4 = peek_input(3); + if ((input4 & 0xC0) != 0x80) + return { 0xFFFE }; + + uint32_t output = ((input & 0x07) << 18) | ((input2 & 0x3F) << 12) + | ((input3 & 0x3F) << 6) | ((input4 & 0x3F) << 0); + return { output }; + } else { + rust_error_at(get_current_location(), "invalid UTF-8 (too long)"); + return { 0xFFFE }; + } } -} -// TODO: rewrite lexing system to use utf-8 "codepoints" rather than bytes? -Codepoint -Lexer::test_peek_codepoint_input () -{ - uint8_t input = peek_input (); + void Lexer::test_skip_codepoint_input() { + int toSkip = test_get_input_codepoint_length(); + rust_assert(toSkip >= 1); - if (input < 128) - { - // ascii -- 1 byte - return {input}; - } - else if ((input & 0xC0) == 0x80) - { - // invalid (continuation; can't be first char) - return {0xFFFE}; - } - else if ((input & 0xE0) == 0xC0) - { - // 2 bytes - uint8_t input2 = peek_input (1); - if ((input2 & 0xC0) != 0x80) - return {0xFFFE}; - - uint32_t output = ((input & 0x1F) << 6) | ((input2 & 0x3F) << 0); - return {output}; - } - else if ((input & 0xF0) == 0xE0) - { - // 3 bytes - uint8_t input2 = peek_input (1); - if ((input2 & 0xC0) != 0x80) - return {0xFFFE}; - - uint8_t input3 = peek_input (2); - if ((input3 & 0xC0) != 0x80) - return {0xFFFE}; - - uint32_t output = ((input & 0x0F) << 12) | ((input2 & 0x3F) << 6) - | ((input3 & 0x3F) << 0); - return {output}; - } - else if ((input & 0xF8) == 0xF0) - { - // 4 bytes - uint8_t input2 = peek_input (1); - if ((input2 & 0xC0) != 0x80) - return {0xFFFE}; - - uint8_t input3 = peek_input (2); - if ((input3 & 0xC0) != 0x80) - return {0xFFFE}; - - uint8_t input4 = peek_input (3); - if ((input4 & 0xC0) != 0x80) - return {0xFFFE}; - - uint32_t output = ((input & 0x07) << 18) | ((input2 & 0x3F) << 12) - | ((input3 & 0x3F) << 6) | ((input4 & 0x3F) << 0); - return {output}; + skip_input(toSkip - 1); } - else - { - rust_error_at (get_current_location (), "invalid UTF-8 (too long)"); - return {0xFFFE}; - } -} - -void -Lexer::test_skip_codepoint_input () -{ - int toSkip = test_get_input_codepoint_length (); - gcc_assert (toSkip >= 1); - - skip_input (toSkip - 1); -} - -int -Lexer::test_get_input_codepoint_n_length (int n_start_offset) -{ - uint8_t input = peek_input (n_start_offset); - if (input < 128) - { - // ascii -- 1 byte - // return input; - return 1; - } - else if ((input & 0xC0) == 0x80) - { - // invalid (continuation; can't be first char) - // return 0xFFFE; - return 0; - } - else if ((input & 0xE0) == 0xC0) - { - // 2 bytes - uint8_t input2 = peek_input (n_start_offset + 1); - if ((input2 & 0xC0) != 0x80) - // return 0xFFFE; - return 0; - - // uint32_t output = ((input & 0x1F) << 6) | ((input2 & 0x3F) << 0); - // return output; - return 2; - } - else if ((input & 0xF0) == 0xE0) - { - // 3 bytes - uint8_t input2 = peek_input (n_start_offset + 1); - if ((input2 & 0xC0) != 0x80) - // return 0xFFFE; - return 0; - - uint8_t input3 = peek_input (n_start_offset + 2); - if ((input3 & 0xC0) != 0x80) - // return 0xFFFE; - return 0; - - /*uint32_t output - = ((input & 0x0F) << 12) | ((input2 & 0x3F) << 6) | ((input3 & 0x3F) << - 0); return output;*/ - return 3; - } - else if ((input & 0xF8) == 0xF0) - { - // 4 bytes - uint8_t input2 = peek_input (n_start_offset + 1); - if ((input2 & 0xC0) != 0x80) - // return 0xFFFE; - return 0; - - uint8_t input3 = peek_input (n_start_offset + 2); - if ((input3 & 0xC0) != 0x80) - // return 0xFFFE; - return 0; - - uint8_t input4 = peek_input (n_start_offset + 3); - if ((input4 & 0xC0) != 0x80) - // return 0xFFFE; - return 0; - - /*uint32_t output = ((input & 0x07) << 18) | ((input2 & 0x3F) << 12) - | ((input3 & 0x3F) << 6) | ((input4 & 0x3F) << 0); - return output;*/ - return 4; - } - else - { - rust_error_at (get_current_location (), "invalid UTF-8 (too long)"); - return 0; + int Lexer::test_get_input_codepoint_n_length(int n_start_offset) { + uint8_t input = peek_input(n_start_offset); + + if (input < 128) { + // ascii -- 1 byte + // return input; + return 1; + } else if ((input & 0xC0) == 0x80) { + // invalid (continuation; can't be first char) + // return 0xFFFE; + return 0; + } else if ((input & 0xE0) == 0xC0) { + // 2 bytes + uint8_t input2 = peek_input(n_start_offset + 1); + if ((input2 & 0xC0) != 0x80) + // return 0xFFFE; + return 0; + + // uint32_t output = ((input & 0x1F) << 6) | ((input2 & 0x3F) << 0); + // return output; + return 2; + } else if ((input & 0xF0) == 0xE0) { + // 3 bytes + uint8_t input2 = peek_input(n_start_offset + 1); + if ((input2 & 0xC0) != 0x80) + // return 0xFFFE; + return 0; + + uint8_t input3 = peek_input(n_start_offset + 2); + if ((input3 & 0xC0) != 0x80) + // return 0xFFFE; + return 0; + + /*uint32_t output + = ((input & 0x0F) << 12) | ((input2 & 0x3F) << 6) | ((input3 & 0x3F) << 0); + return output;*/ + return 3; + } else if ((input & 0xF8) == 0xF0) { + // 4 bytes + uint8_t input2 = peek_input(n_start_offset + 1); + if ((input2 & 0xC0) != 0x80) + // return 0xFFFE; + return 0; + + uint8_t input3 = peek_input(n_start_offset + 2); + if ((input3 & 0xC0) != 0x80) + // return 0xFFFE; + return 0; + + uint8_t input4 = peek_input(n_start_offset + 3); + if ((input4 & 0xC0) != 0x80) + // return 0xFFFE; + return 0; + + /*uint32_t output = ((input & 0x07) << 18) | ((input2 & 0x3F) << 12) + | ((input3 & 0x3F) << 6) | ((input4 & 0x3F) << 0); + return output;*/ + return 4; + } else { + rust_error_at(get_current_location(), "invalid UTF-8 (too long)"); + return 0; + } } -} -// peeks the codepoint input at n codepoints ahead of current codepoint - try -// not to use -Codepoint -Lexer::test_peek_codepoint_input (int n) -{ - int totalOffset = 0; - - // add up all offsets into total offset? does this do what I want? - for (int i = 0; i < n; i++) - { - totalOffset += test_get_input_codepoint_n_length (totalOffset); + // peeks the codepoint input at n codepoints ahead of current codepoint - try not to use + Codepoint Lexer::test_peek_codepoint_input(int n) { + int totalOffset = 0; + + // add up all offsets into total offset? does this do what I want? + for (int i = 0; i < n; i++) { + totalOffset += test_get_input_codepoint_n_length(totalOffset); + } + // issues: this would have (at least) O(n) lookup time, not O(1) like the rest? + + // TODO: implement if still needed + + // error out of function as it is not implemented + rust_assert(1 == 0); + return { 0 }; + /* + uint8_t input = peek_input(); + + if (input < 128) { + // ascii -- 1 byte + return input; + } else if ((input & 0xC0) == 0x80) { + // invalid (continuation; can't be first char) + return 0xFFFE; + } else if ((input & 0xE0) == 0xC0) { + // 2 bytes + uint8_t input2 = peek_input(1); + if ((input2 & 0xC0) != 0x80) + return 0xFFFE; + + uint32_t output = ((input & 0x1F) << 6) | ((input2 & 0x3F) << 0); + return output; + } else if ((input & 0xF0) == 0xE0) { + // 3 bytes + uint8_t input2 = peek_input(1); + if ((input2 & 0xC0) != 0x80) + return 0xFFFE; + + uint8_t input3 = peek_input(2); + if ((input3 & 0xC0) != 0x80) + return 0xFFFE; + + uint32_t output + = ((input & 0x0F) << 12) | ((input2 & 0x3F) << 6) | ((input3 & 0x3F) << 0); + return output; + } else if ((input & 0xF8) == 0xF0) { + // 4 bytes + uint8_t input2 = peek_input(1); + if ((input2 & 0xC0) != 0x80) + return 0xFFFE; + + uint8_t input3 = peek_input(2); + if ((input3 & 0xC0) != 0x80) + return 0xFFFE; + + uint8_t input4 = peek_input(3); + if ((input4 & 0xC0) != 0x80) + return 0xFFFE; + + uint32_t output = ((input & 0x07) << 18) | ((input2 & 0x3F) << 12) + | ((input3 & 0x3F) << 6) | ((input4 & 0x3F) << 0); + return output; + } else { + rust_error_at(get_current_location(), "invalid UTF-8 (too long)"); + return 0xFFFE; + }*/ } - // issues: this would have (at least) O(n) lookup time, not O(1) like the - // rest? - - // TODO: implement if still needed - - // error out of function as it is not implemented - gcc_assert (1 == 0); - return {0}; - /* - uint8_t input = peek_input(); - - if (input < 128) { - // ascii -- 1 byte - return input; - } else if ((input & 0xC0) == 0x80) { - // invalid (continuation; can't be first char) - return 0xFFFE; - } else if ((input & 0xE0) == 0xC0) { - // 2 bytes - uint8_t input2 = peek_input(1); - if ((input2 & 0xC0) != 0x80) - return 0xFFFE; - - uint32_t output = ((input & 0x1F) << 6) | ((input2 & 0x3F) << 0); - return output; - } else if ((input & 0xF0) == 0xE0) { - // 3 bytes - uint8_t input2 = peek_input(1); - if ((input2 & 0xC0) != 0x80) - return 0xFFFE; - - uint8_t input3 = peek_input(2); - if ((input3 & 0xC0) != 0x80) - return 0xFFFE; - - uint32_t output - = ((input & 0x0F) << 12) | ((input2 & 0x3F) << 6) | ((input3 & - 0x3F) << 0); return output; } else if ((input & 0xF8) == 0xF0) { - // 4 bytes - uint8_t input2 = peek_input(1); - if ((input2 & 0xC0) != 0x80) - return 0xFFFE; - - uint8_t input3 = peek_input(2); - if ((input3 & 0xC0) != 0x80) - return 0xFFFE; - - uint8_t input4 = peek_input(3); - if ((input4 & 0xC0) != 0x80) - return 0xFFFE; - - uint32_t output = ((input & 0x07) << 18) | ((input2 & 0x3F) << 12) - | ((input3 & 0x3F) << 6) | ((input4 & 0x3F) << - 0); return output; } else { rust_error_at(get_current_location(), "invalid - UTF-8 (too long)"); return 0xFFFE; - }*/ } -} // namespace Rust diff --git a/gcc/rust/rust-session-manager.cc b/gcc/rust/rust-session-manager.cc index 2247519..8e9e4a3 100644 --- a/gcc/rust/rust-session-manager.cc +++ b/gcc/rust/rust-session-manager.cc @@ -5,6 +5,7 @@ #include "target.h" #include "tm.h" +#include "tm_p.h" #include "rust-lex.h" #include "rust-parse.h" @@ -13,799 +14,764 @@ #include <algorithm> -extern Linemap * -rust_get_linemap (); +extern Linemap* rust_get_linemap(); namespace Rust { -// Simple wrapper for FILE* that simplifies destruction. -struct RAIIFile -{ - FILE *file; - - RAIIFile (const char *filename) : file (fopen (filename, "r")) {} - - ~RAIIFile () { fclose (file); } -}; - -// Implicitly enable a target_feature (and recursively enable dependencies). -void -Session::implicitly_enable_feature (::std::string feature_name) -{ - // TODO: is this really required since features added would be complete via - // target spec? - - if (!options.target_data.has_key_value_pair ("target_data", feature_name)) - { - // if feature has dependencies, enable them - if (feature_name == "aes") - { - implicitly_enable_feature ("sse2"); - } - else if (feature_name == "avx") - { - implicitly_enable_feature ("sse4.2"); - } - else if (feature_name == "avx2") - { - implicitly_enable_feature ("avx"); - } - else if (feature_name == "fma") - { - implicitly_enable_feature ("avx"); - } - else if (feature_name == "pclmulqdq") - { - implicitly_enable_feature ("sse2"); - } - else if (feature_name == "sha") - { - implicitly_enable_feature ("sse2"); - } - else if (feature_name == "sse2") - { - implicitly_enable_feature ("sse"); - } - else if (feature_name == "sse3") - { - implicitly_enable_feature ("sse2"); - } - else if (feature_name == "sse4.1") - { - implicitly_enable_feature ("sse3"); - } - else if (feature_name == "sse4.2") - { - implicitly_enable_feature ("sse4.1"); - } - else if (feature_name == "ssse3") - { - implicitly_enable_feature ("sse3"); - } - - options.target_data.insert_key_value_pair ("target_feature", - ::std::move (feature_name)); + // Simple wrapper for FILE* that simplifies destruction. + struct RAIIFile { + FILE* file; + + RAIIFile(const char* filename) : file(fopen(filename, "r")) {} + + ~RAIIFile() { + fclose(file); + } + }; + + // Implicitly enable a target_feature (and recursively enable dependencies). + void Session::implicitly_enable_feature(::std::string feature_name) { + // TODO: is this really required since features added would be complete via target spec? + + if (!options.target_data.has_key_value_pair("target_data", feature_name)) { + // if feature has dependencies, enable them + if (feature_name == "aes") { + implicitly_enable_feature("sse2"); + } else if (feature_name == "avx") { + implicitly_enable_feature("sse4.2"); + } else if (feature_name == "avx2") { + implicitly_enable_feature("avx"); + } else if (feature_name == "fma") { + implicitly_enable_feature("avx"); + } else if (feature_name == "pclmulqdq") { + implicitly_enable_feature("sse2"); + } else if (feature_name == "sha") { + implicitly_enable_feature("sse2"); + } else if (feature_name == "sse2") { + implicitly_enable_feature("sse"); + } else if (feature_name == "sse3") { + implicitly_enable_feature("sse2"); + } else if (feature_name == "sse4.1") { + implicitly_enable_feature("sse3"); + } else if (feature_name == "sse4.2") { + implicitly_enable_feature("sse4.1"); + } else if (feature_name == "ssse3") { + implicitly_enable_feature("sse3"); + } + + options.target_data.insert_key_value_pair("target_feature", ::std::move(feature_name)); + } } -} -// Meant to enable all target features. As this will be done by target hook, -// this method's deprecated. -void -Session::enable_features () -{ - bool has_target_crt_static = false; - const char *target = "PLACEHOLDER"; - - fprintf ( - stderr, - "ERROR: Somewhere in call chain Session::enable_features is called.\n"); - - if (has_target_crt_static) - { - // enable "crt-static" attribute + // Meant to enable all target features. As this will be done by target hook, this method's + // deprecated. + void Session::enable_features() { + bool has_target_crt_static = false; + const char* target = "PLACEHOLDER"; + + fprintf(stderr, "ERROR: Somewhere in call chain Session::enable_features is called.\n"); + + if (has_target_crt_static) { + // enable "crt-static" attribute + } + + /* TODO: do this via target hook. have one for each target that implicitly enables the + * features for that platform. Would probably have to make custom target hook. */ + + /* + if (target == "x86" || target == "x86_64") { + if (TARGET_ISA_AES) { + // enable aes, implicitly enable sse2 + implicitly_enable_feature("aes"); + } + + if (TARGET_ISA_AVX) { + // enable avx, implicitly enable sse4.2 + implicitly_enable_feature("sse4.2"); + } + + if (TARGET_ISA_AVX2) { + // enable avx2, implicitly enable avx + implicitly_enable_feature("avx"); + } + + if (TARGET_ISA_BMI) { + // enable bmi1 + implicitly_enable_feature("bmi1"); + } + + if (TARGET_ISA_BMI2) { + // enable bmi2 + implicitly_enable_feature("bmi2"); + } + + if (TARGET_ISA_FMA) { + // enable fma, implicitly enable avx + implicitly_enable_feature("fma"); + } + + if (TARGET_ISA_FXSR) { + // enable fxsr + implicitly_enable_feature("fxsr"); + } + + if (TARGET_ISA_LZCNT) { + // enable lzcnt + implicitly_enable_feature("lzcnt"); + } + + if (TARGET_ISA_VPCLMULQDQ) { + // enable pclmulqdq, implicitly enable sse2 + implicitly_enable_feature("pclmulqdq"); + } + + if (TARGET_ISA_POPCNT) { + // enable popcnt + implicitly_enable_feature("popcnt"); + } + + if (TARGET_ISA_RDRND) { + // enable rdrand + implicitly_enable_feature("rdrand"); + } + + if (TARGET_ISA_RDSEED) { + // enable rdseed + implicitly_enable_feature("rdseed"); + } + + if (TARGET_ISA_SHA) { + // enable sha, implicitly enable sse2 + implicitly_enable_feature("sha"); + } + + if (TARGET_ISA_SSE) { + // enable sse + implicitly_enable_feature("sse"); + } + + if (TARGET_ISA_SSE2) { + // enable sse2, implicitly enable sse + implicitly_enable_feature("sse2"); + } + + if (TARGET_ISA_SSE3) { + // enable sse3, implicitly enable sse2 + implicitly_enable_feature("sse3"); + } + + if (TARGET_ISA_SSE4_1) { + // enable sse4.1, implicitly enable sse3 + implicitly_enable_feature("sse4.1"); + } + + if (TARGET_ISA_SSE4_2) { + // enable sse4.2, implicitly enable sse4.1 + implicitly_enable_feature("sse4.2"); + } + + if (TARGET_ISA_SSSE3) { + // enable ssse3, implicitly enable sse3 + implicitly_enable_feature("ssse3"); + } + + if (TARGET_ISA_XSAVE) { + // enable xsave + implicitly_enable_feature("xsave"); + } + + if (TARGET_ISA_XSAVEC) { + // enable xsavec + implicitly_enable_feature("xsavec"); + } + + if (TARGET_ISA_XSAVEOPT) { + // enable xsaveopt + implicitly_enable_feature("xsaveopt"); + } + + if (TARGET_ISA_XSAVES) { + // enable xsaves + implicitly_enable_feature("xsaves"); + } + } + options.target_data.features.shrink_to_fit(); + ::std::sort(options.target_data.features.begin(), options.target_data.features.end());*/ } - /* TODO: do this via target hook. have one for each target that implicitly - * enables the - * features for that platform. Would probably have to make custom target hook. - */ - - /* - if (target == "x86" || target == "x86_64") { - if (TARGET_ISA_AES) { - // enable aes, implicitly enable sse2 - implicitly_enable_feature("aes"); - } - - if (TARGET_ISA_AVX) { - // enable avx, implicitly enable sse4.2 - implicitly_enable_feature("sse4.2"); - } - - if (TARGET_ISA_AVX2) { - // enable avx2, implicitly enable avx - implicitly_enable_feature("avx"); - } - - if (TARGET_ISA_BMI) { - // enable bmi1 - implicitly_enable_feature("bmi1"); - } - - if (TARGET_ISA_BMI2) { - // enable bmi2 - implicitly_enable_feature("bmi2"); - } - - if (TARGET_ISA_FMA) { - // enable fma, implicitly enable avx - implicitly_enable_feature("fma"); - } - - if (TARGET_ISA_FXSR) { - // enable fxsr - implicitly_enable_feature("fxsr"); - } - - if (TARGET_ISA_LZCNT) { - // enable lzcnt - implicitly_enable_feature("lzcnt"); - } - - if (TARGET_ISA_VPCLMULQDQ) { - // enable pclmulqdq, implicitly enable sse2 - implicitly_enable_feature("pclmulqdq"); - } - - if (TARGET_ISA_POPCNT) { - // enable popcnt - implicitly_enable_feature("popcnt"); - } - - if (TARGET_ISA_RDRND) { - // enable rdrand - implicitly_enable_feature("rdrand"); - } - - if (TARGET_ISA_RDSEED) { - // enable rdseed - implicitly_enable_feature("rdseed"); - } - - if (TARGET_ISA_SHA) { - // enable sha, implicitly enable sse2 - implicitly_enable_feature("sha"); - } - - if (TARGET_ISA_SSE) { - // enable sse - implicitly_enable_feature("sse"); - } - - if (TARGET_ISA_SSE2) { - // enable sse2, implicitly enable sse - implicitly_enable_feature("sse2"); - } - - if (TARGET_ISA_SSE3) { - // enable sse3, implicitly enable sse2 - implicitly_enable_feature("sse3"); - } - - if (TARGET_ISA_SSE4_1) { - // enable sse4.1, implicitly enable sse3 - implicitly_enable_feature("sse4.1"); - } - - if (TARGET_ISA_SSE4_2) { - // enable sse4.2, implicitly enable sse4.1 - implicitly_enable_feature("sse4.2"); - } - - if (TARGET_ISA_SSSE3) { - // enable ssse3, implicitly enable sse3 - implicitly_enable_feature("ssse3"); - } - - if (TARGET_ISA_XSAVE) { - // enable xsave - implicitly_enable_feature("xsave"); - } - - if (TARGET_ISA_XSAVEC) { - // enable xsavec - implicitly_enable_feature("xsavec"); - } - - if (TARGET_ISA_XSAVEOPT) { - // enable xsaveopt - implicitly_enable_feature("xsaveopt"); - } - - if (TARGET_ISA_XSAVES) { - // enable xsaves - implicitly_enable_feature("xsaves"); - } - } - options.target_data.features.shrink_to_fit(); - ::std::sort(options.target_data.features.begin(), - options.target_data.features.end());*/ -} - -void -Session::init () -{ - // nothing yet -} - -// Initialise default options. Actually called before handle_option, unlike init -// itself. -void -Session::init_options () -{ - options.dump_option = CompileOptions::NO_DUMP; -} - -// Handle option selection. -bool -Session::handle_option ( - enum opt_code code, const char *arg, HOST_WIDE_INT value ATTRIBUTE_UNUSED, - int kind ATTRIBUTE_UNUSED, location_t loc ATTRIBUTE_UNUSED, - const struct cl_option_handlers *handlers ATTRIBUTE_UNUSED) -{ - // used to store whether results of various stuff are successful - bool ret = true; - - // Handles options as listed in lang.opt. - switch (code) - { - case OPT_I: - // TODO: add search path - break; - case OPT_L: - // TODO: add library link path or something - break; - case OPT_frust_dump_: - // enable dump and return whether this was successful - if (arg != NULL) - { - ret = enable_dump (::std::string (arg)); - } - else - { - ret = false; - } - break; - // no option handling for -o - default: - // return 1 to indicate option is valid - break; + void Session::init() { +#ifndef TARGET_RUST_OS_INFO +# define TARGET_RUST_OS_INFO() +#endif +//#define builtin_rust_info(KEY, VALUE) rust_add_target_info (KEY, VALUE) +// might as well use c++ stuff +#define builtin_rust_info(KEY, VALUE) options.target_data.insert_key_value_pair(KEY, VALUE) + + // initialise target hooks + //targetrustm.rust_cpu_info(); + //targetrustm.rust_os_info(); + // ok, that's not working too well TODO - see if can salvage old implementation + TARGET_RUST_CPU_INFO(); + TARGET_RUST_OS_INFO(); + +#undef builtin_rust_info + + // target-independent values that should exist in all targets + options.target_data.insert_key_value_pair("target_pointer_width", std::to_string(POINTER_SIZE)); + options.target_data.insert_key_value_pair("target_endian", BYTES_BIG_ENDIAN ? "big" : "little"); + + // TODO: find min atomic width and max atomic width + // from it, add atomic-related stuff for sizes 8, 16, 32, 64, and 128 (if inside bounds) + // in rustc, min atomic width is a known quantity (or 8 if not known), and max is also a known quantity (or is pointer size if not known) + // TODO: add atomic pointer if some criteria is satisfied + + // TODO: find whether target has "atomic cas" + + // add debug_assertions if enabled and proc_macro if crate type has it or whatever + + // derived values from hook + options.target_data.init_derived_values(); } - return ret; -} - -/* Enables a certain dump depending on the name passed in. Returns true if name - * is valid, false otherwise. */ -bool -Session::enable_dump (::std::string arg) -{ - // FIXME: change dumping algorithm when new non-inhibiting dump system is - // created - if (arg == "all") - { - error_at ( - UNKNOWN_LOCATION, - "dumping all is not supported as of now. choose 'lex' or 'parse'"); - return false; - } - else if (arg == "lex") - { - options.dump_option = CompileOptions::LEXER_DUMP; - } - else if (arg == "parse") - { - options.dump_option = CompileOptions::PARSER_AST_DUMP; - } - else if (arg == "register_plugins") - { - options.dump_option = CompileOptions::REGISTER_PLUGINS_DUMP; + // Initialise default options. Actually called before handle_option, unlike init itself. + void Session::init_options() { + options.dump_option = CompileOptions::NO_DUMP; } - else if (arg == "injection") - { - options.dump_option = CompileOptions::INJECTION_DUMP; - } - else if (arg == "expansion") - { - options.dump_option = CompileOptions::EXPANSION_DUMP; - } - else if (arg == "name_resolution") - { - options.dump_option = CompileOptions::NAME_RESOLUTION_DUMP; - } - else if (arg == "") - { - error_at (UNKNOWN_LOCATION, - "dump option was not given a name. choose 'lex' or 'parse'"); - return false; - } - else - { - error_at (UNKNOWN_LOCATION, - "dump option '%s' was unrecognised. choose 'lex' or 'parse'", - arg.c_str ()); - return false; - } - return true; -} -/* Actual main entry point for front-end. Called from langhook to parse files. - */ -void -Session::parse_files (int num_files, const char **files) -{ - for (int i = 0; i < num_files; i++) - { - parse_file (files[i]); + // Handle option selection. + bool Session::handle_option(enum opt_code code, const char* arg, + HOST_WIDE_INT value ATTRIBUTE_UNUSED, int kind ATTRIBUTE_UNUSED, + location_t loc ATTRIBUTE_UNUSED, const struct cl_option_handlers* handlers ATTRIBUTE_UNUSED) { + // used to store whether results of various stuff are successful + bool ret = true; + + // Handles options as listed in lang.opt. + switch (code) { + case OPT_I: + // TODO: add search path + break; + case OPT_L: + // TODO: add library link path or something + break; + case OPT_frust_dump_: + // enable dump and return whether this was successful + if (arg != NULL) { + ret = enable_dump(::std::string(arg)); + } else { + ret = false; + } + break; + // no option handling for -o + default: + // return 1 to indicate option is valid + break; + } + + return ret; } - // TODO: should semantic analysis be dealed with here? or per file? for now, - // per-file. -} - -// Parses a single file with filename filename. -void -Session::parse_file (const char *filename) -{ - RAIIFile file_wrap (filename); - if (file_wrap.file == NULL) - { - fatal_error (UNKNOWN_LOCATION, "cannot open filename %s: %m", filename); + /* Enables a certain dump depending on the name passed in. Returns true if name is valid, false + * otherwise. */ + bool Session::enable_dump(::std::string arg) { + // FIXME: change dumping algorithm when new non-inhibiting dump system is created + if (arg == "all") { + error_at( + UNKNOWN_LOCATION, "dumping all is not supported as of now. choose 'lex' or 'parse'"); + return false; + } else if (arg == "lex") { + options.dump_option = CompileOptions::LEXER_DUMP; + } else if (arg == "parse") { + options.dump_option = CompileOptions::PARSER_AST_DUMP; + } else if (arg == "register_plugins") { + options.dump_option = CompileOptions::REGISTER_PLUGINS_DUMP; + } else if (arg == "injection") { + options.dump_option = CompileOptions::INJECTION_DUMP; + } else if (arg == "expansion") { + options.dump_option = CompileOptions::EXPANSION_DUMP; + } else if (arg == "name_resolution") { + options.dump_option = CompileOptions::NAME_RESOLUTION_DUMP; + } else if (arg == "target_options") { + // special case - dump all target options, and then quit compilation + // nope, option handling called before init, so have to make this an actual compile option + //options.target_data.dump_target_options(); + //return false; + options.dump_option = CompileOptions::TARGET_OPTION_DUMP; + } else if (arg == "") { + error_at(UNKNOWN_LOCATION, "dump option was not given a name. choose 'lex' or 'parse'"); + return false; + } else { + error_at(UNKNOWN_LOCATION, "dump option '%s' was unrecognised. choose 'lex' or 'parse'", + arg.c_str()); + return false; + } + return true; } - // parse file here - // create lexer and parser - these are file-specific and so aren't instance - // variables - Rust::Lexer lex (filename, file_wrap.file, rust_get_linemap ()); - Rust::Parser parser (lex); - - // determine parsing method from options - /* FIXME: currently, the dump means that full compilation will not occur as of - * present. In future, dumps should not inhibit full compilation. */ - switch (options.dump_option) - { - case CompileOptions::NO_DUMP: - fatal_error (UNKNOWN_LOCATION, - "no-dump parsing has not been enabled yet"); - return; - case CompileOptions::LEXER_DUMP: - parser.debug_dump_lex_output (); - return; - case CompileOptions::PARSER_AST_DUMP: - parser.debug_dump_ast_output (); - return; - case CompileOptions::REGISTER_PLUGINS_DUMP: - case CompileOptions::INJECTION_DUMP: - case CompileOptions::EXPANSION_DUMP: - case CompileOptions::NAME_RESOLUTION_DUMP: - // will break later after more stages - break; - // semantic analysis when completed - default: - fatal_error (UNKNOWN_LOCATION, "unrecognised dump option: '%u'", - options.dump_option); - return; + /* Actual main entry point for front-end. Called from langhook to parse files. */ + void Session::parse_files(int num_files, const char** files) { + for (int i = 0; i < num_files; i++) { + parse_file(files[i]); + } + // TODO: should semantic analysis be dealed with here? or per file? for now, per-file. } - /* basic pipeline: - * - lex - * - parse - * - register plugins (dummy stage for now) - attribute injection? what is - * this? (attribute injection is injecting attributes specified in command - * line into crate root) - * - injection (some lint checks or dummy, register builtin macros, crate - * injection) - * - expansion (expands all macros, maybe build test harness, AST validation, - * maybe macro crate) - * - name resolution (name resolution, maybe feature checking, maybe buffered - * lints) - * TODO not done */ - - // generate crate from parser - AST::Crate parsed_crate = parser.parse_crate (); - - fprintf (stderr, "\033[0;31mSUCCESSFULLY PARSED CRATE \n\033[0m"); - - // register plugins pipeline stage - register_plugins (parsed_crate); - fprintf (stderr, "\033[0;31mSUCCESSFULLY REGISTERED PLUGINS \n\033[0m"); - - if (options.dump_option == CompileOptions::REGISTER_PLUGINS_DUMP) - { - // TODO: what do I dump here? - return; - } - - // injection pipeline stage - injection (parsed_crate); - fprintf (stderr, "\033[0;31mSUCCESSFULLY FINISHED INJECTION \n\033[0m"); - - if (options.dump_option == CompileOptions::INJECTION_DUMP) - { - // TODO: what do I dump here? injected crate names? - return; + // Parses a single file with filename filename. + void Session::parse_file(const char* filename) { + RAIIFile file_wrap(filename); + + if (file_wrap.file == NULL) { + fatal_error(UNKNOWN_LOCATION, "cannot open filename %s: %m", filename); + } + + // parse file here + // create lexer and parser - these are file-specific and so aren't instance variables + Rust::Lexer lex(filename, file_wrap.file, rust_get_linemap()); + Rust::Parser parser(lex); + + // determine parsing method from options + /* FIXME: currently, the dump means that full compilation will not occur as of present. In + * future, dumps should not inhibit full compilation. */ + switch (options.dump_option) { + case CompileOptions::NO_DUMP: + fatal_error(UNKNOWN_LOCATION, "no-dump parsing has not been enabled yet"); + return; + case CompileOptions::LEXER_DUMP: + parser.debug_dump_lex_output(); + return; + case CompileOptions::PARSER_AST_DUMP: + parser.debug_dump_ast_output(); + return; + case CompileOptions::REGISTER_PLUGINS_DUMP: + case CompileOptions::INJECTION_DUMP: + case CompileOptions::EXPANSION_DUMP: + case CompileOptions::NAME_RESOLUTION_DUMP: + // will break later after more stages + break; + // semantic analysis when completed + case CompileOptions::TARGET_OPTION_DUMP: + options.target_data.dump_target_options(); + return; + default: + fatal_error(UNKNOWN_LOCATION, "unrecognised dump option: '%u'", options.dump_option); + return; + } + + /* basic pipeline: + * - lex + * - parse + * - register plugins (dummy stage for now) - attribute injection? what is this? + * (attribute injection is injecting attributes specified in command line into crate root) + * - injection (some lint checks or dummy, register builtin macros, crate injection) + * - expansion (expands all macros, maybe build test harness, AST validation, maybe macro + * crate) + * - name resolution (name resolution, maybe feature checking, maybe buffered lints) + * TODO not done */ + + // generate crate from parser + AST::Crate parsed_crate = parser.parse_crate(); + + fprintf(stderr, "\033[0;31mSUCCESSFULLY PARSED CRATE \n\033[0m"); + + // register plugins pipeline stage + register_plugins(parsed_crate); + fprintf(stderr, "\033[0;31mSUCCESSFULLY REGISTERED PLUGINS \n\033[0m"); + + if (options.dump_option == CompileOptions::REGISTER_PLUGINS_DUMP) { + // TODO: what do I dump here? + return; + } + + // injection pipeline stage + injection(parsed_crate); + fprintf(stderr, "\033[0;31mSUCCESSFULLY FINISHED INJECTION \n\033[0m"); + + if (options.dump_option == CompileOptions::INJECTION_DUMP) { + // TODO: what do I dump here? injected crate names? + return; + } + + // expansion pipeline stage + expansion(parsed_crate); + fprintf(stderr, "\033[0;31mSUCCESSFULLY FINISHED EXPANSION \n\033[0m"); + + if (options.dump_option == CompileOptions::EXPANSION_DUMP) { + // TODO: what do I dump here? expanded macros? AST with expanded macros? + return; + } + + // name resolution pipeline stage + name_resolution(parsed_crate); + fprintf(stderr, "\033[0;31mSUCCESSFULLY FINISHED NAME RESOLUTION \n\033[0m"); + + if (options.dump_option == CompileOptions::NAME_RESOLUTION_DUMP) { + // TODO: what do I dump here? resolved names? AST with resolved names? + return; + } } - // expansion pipeline stage - expansion (parsed_crate); - fprintf (stderr, "\033[0;31mSUCCESSFULLY FINISHED EXPANSION \n\033[0m"); + // Checks whether 'cfg' attribute prevents compilation. + bool check_cfg(const AST::Attribute& attr ATTRIBUTE_UNUSED) { + // if "has sub items", and if 'cfg' attr, recursively call this on sub items? - if (options.dump_option == CompileOptions::EXPANSION_DUMP) - { - // TODO: what do I dump here? expanded macros? AST with expanded macros? - return; - } - - // name resolution pipeline stage - name_resolution (parsed_crate); - fprintf (stderr, "\033[0;31mSUCCESSFULLY FINISHED NAME RESOLUTION \n\033[0m"); + // TODO: actually implement. assume true for now - if (options.dump_option == CompileOptions::NAME_RESOLUTION_DUMP) - { - // TODO: what do I dump here? resolved names? AST with resolved names? - return; + return true; } -} + // TODO: deprecated - don't use -// Checks whether 'cfg' attribute prevents compilation. -bool -check_cfg (const AST::Attribute &attr ATTRIBUTE_UNUSED) -{ - // if "has sub items", and if 'cfg' attr, recursively call this on sub items? + // Checks whether any 'cfg' attribute on the item prevents compilation of that item. + bool check_item_cfg(::std::vector<AST::Attribute> attrs) { + for (const auto& attr : attrs) { + if (attr.get_path() == "cfg" && !check_cfg(attr)) { + return false; + } + } - // TODO: actually implement. assume true for now - - return true; -} -// TODO: deprecated - don't use - -// Checks whether any 'cfg' attribute on the item prevents compilation of that -// item. -bool -check_item_cfg (::std::vector<AST::Attribute> attrs) -{ - for (const auto &attr : attrs) - { - if (attr.get_path () == "cfg" && !check_cfg (attr)) - { - return false; - } + return true; } - - return true; -} -// TODO: deprecated - don't use - -// TODO: actually implement method -void -load_extern_crate (::std::string crate_name ATTRIBUTE_UNUSED) -{} -// TODO: deprecated - don't use - -// Parses up to the "load (external) crates" part of the frontend. -// TODO: lots of this code is probably actually useful outside of dumping, so -// maybe split off function -void -Session::debug_dump_load_crates (Parser &parser) -{ - // parse crate as AST - AST::Crate crate = parser.parse_crate (); - - /* TODO: search through inner attrs and see whether any of those attr paths - * contain "no_core", "no_std", "compiler_builtins". If so/not, save certain - * crate names. In these names, insert items at beginning of crate items. This - * is crate injection. Also, inject prelude use decl at beginning (first name - * is assumed to be prelude - prelude is a use decl automatically generated to - * enable using Option and Copy without qualifying it or importing it via - * 'use' manually) */ - - ::std::vector< ::std::string> crate_names; - for (const auto &item : crate.items) - { - // if item is extern crate, add name? to list of stuff ONLY IF config is - // checked if item is module, iterate this loop inside it as well - // (recursive?) ONLY IF config is checked - - // TODO: actually do the checks somewhere - probably in the items - - item->add_crate_name (crate_names); + // TODO: deprecated - don't use + + // TODO: actually implement method + void load_extern_crate(::std::string crate_name ATTRIBUTE_UNUSED) {} + // TODO: deprecated - don't use + + // Parses up to the "load (external) crates" part of the frontend. + // TODO: lots of this code is probably actually useful outside of dumping, so maybe split off + // function + void Session::debug_dump_load_crates(Parser& parser) { + // parse crate as AST + AST::Crate crate = parser.parse_crate(); + + /* TODO: search through inner attrs and see whether any of those attr paths contain "no_core", + * "no_std", "compiler_builtins". If so/not, save certain crate names. In these names, insert + * items at beginning of crate items. This is crate injection. Also, inject prelude use decl + * at beginning (first name is assumed to be prelude - prelude is a use decl automatically + * generated to enable using Option and Copy without qualifying it or importing it via 'use' + * manually) */ + + ::std::vector< ::std::string> crate_names; + for (const auto& item : crate.items) { + // if item is extern crate, add name? to list of stuff ONLY IF config is checked + // if item is module, iterate this loop inside it as well (recursive?) ONLY IF config is + // checked + + // TODO: actually do the checks somewhere - probably in the items + + item->add_crate_name(crate_names); + } + + /* loop through list of crate names/paths/whatever, attempting to load each one. save loaded + * crates to a Session variable? Or save to current AST::Crate? */ + for (const auto& name : crate_names) { + load_extern_crate(name /*, basename = ""?*/); + } + // for each loaded crate, load dependencies of it as well } + // TODO: deprecated - don't use - /* loop through list of crate names/paths/whatever, attempting to load each - * one. save loaded crates to a Session variable? Or save to current - * AST::Crate? */ - for (const auto &name : crate_names) - { - load_extern_crate (name /*, basename = ""?*/); + void Session::register_plugins(AST::Crate& crate ATTRIBUTE_UNUSED) { + fprintf(stderr, "ran register_plugins (with no body)\n"); } - // for each loaded crate, load dependencies of it as well -} -// TODO: deprecated - don't use -void -Session::register_plugins (AST::Crate &crate ATTRIBUTE_UNUSED) -{ - fprintf (stderr, "ran register_plugins (with no body)\n"); -} + // TODO: move somewhere else + bool contains_name(const std::vector<AST::Attribute>& attrs, std::string name) { + for (const auto& attr : attrs) { + if (attr.get_path() == name) { + return true; + } + } -// TODO: move somewhere else -bool -contains_name (::std::vector<AST::Attribute> attrs, ::std::string name) -{ - for (const auto &attr : attrs) - { - if (attr.get_path () == name) - { - return true; - } + return false; } - return false; -} - -void -Session::injection (AST::Crate &crate) -{ - fprintf (stderr, "started injection\n"); - - // lint checks in future maybe? - - // register builtin macros - /* In rustc, builtin macros are divided into 3 categories depending on use - - * "bang" macros, "attr" macros, and "derive" macros. I think the meanings of - * these categories should be fairly obvious to anyone who has used rust. - * Builtin macro list by category: Bang - * - asm - * - assert - * - cfg - * - column - * - compile_error - * - concat_idents - * - concat - * - env - * - file - * - format_args_nl - * - format_args - * - global_asm - * - include_bytes - * - include_str - * - include - * - line - * - log_syntax - * - module_path - * - option_env - * - stringify - * - trace_macros - * Attr - * - bench - * - global_allocator - * - test - * - test_case - * Derive - * - Clone - * - Copy - * - Debug - * - Default - * - Eq - * - Hash - * - Ord - * - PartialEq - * - PartialOrd - * - RustcDecodable - * - RustcEncodable - * rustc also has a "quote" macro that is defined differently and is - * supposedly not stable so eh. */ - /* TODO: actually implement injection of these macros. In particular, derive - * macros, cfg, and - * test should be prioritised since they seem to be used the most. */ - - // crate injection - ::std::vector< ::std::string> names; - if (contains_name (crate.inner_attrs, "no_core")) - { - // no prelude - injected_crate_name = ""; + void Session::injection(AST::Crate& crate) { + fprintf(stderr, "started injection\n"); + + // lint checks in future maybe? + + // register builtin macros + /* In rustc, builtin macros are divided into 3 categories depending on use - "bang" macros, + * "attr" macros, and "derive" macros. I think the meanings of these categories should be + * fairly obvious to anyone who has used rust. Builtin macro list by category: Bang + * - asm + * - assert + * - cfg + * - column + * - compile_error + * - concat_idents + * - concat + * - env + * - file + * - format_args_nl + * - format_args + * - global_asm + * - include_bytes + * - include_str + * - include + * - line + * - log_syntax + * - module_path + * - option_env + * - stringify + * - trace_macros + * Attr + * - bench + * - global_allocator + * - test + * - test_case + * Derive + * - Clone + * - Copy + * - Debug + * - Default + * - Eq + * - Hash + * - Ord + * - PartialEq + * - PartialOrd + * - RustcDecodable + * - RustcEncodable + * rustc also has a "quote" macro that is defined differently and is supposedly not stable so + * eh. */ + /* TODO: actually implement injection of these macros. In particular, derive macros, cfg, and + * test should be prioritised since they seem to be used the most. */ + + // crate injection + ::std::vector< ::std::string> names; + if (contains_name(crate.inner_attrs, "no_core")) { + // no prelude + injected_crate_name = ""; + } else if (contains_name(crate.inner_attrs, "no_std")) { + names.push_back("core"); + + if (!contains_name(crate.inner_attrs, "compiler_builtins")) { + names.push_back("compiler_builtins"); + } + + injected_crate_name = "core"; + } else { + names.push_back("std"); + + injected_crate_name = "std"; + } + + // reverse iterate through names to insert crate items in "forward" order at beginning of + // crate + for (auto it = names.rbegin(); it != names.rend(); ++it) { + // create "macro use" attribute for use on extern crate item to enable loading macros from + // it + AST::Attribute attr(AST::SimplePath::from_str("macro_use"), NULL); + + // create "extern crate" item with the name + ::std::unique_ptr<AST::ExternCrate> extern_crate( + new AST::ExternCrate(*it, AST::Visibility::create_error(), { ::std::move(attr) }, + Linemap::unknown_location())); + + // insert at beginning + crate.items.insert(crate.items.begin(), ::std::move(extern_crate)); + } + + // create use tree path + // prelude is injected_crate_name + ::std::vector<AST::SimplePathSegment> segments + = { AST::SimplePathSegment(injected_crate_name), AST::SimplePathSegment("prelude"), + AST::SimplePathSegment("v1") }; + // create use tree and decl + ::std::unique_ptr<AST::UseTreeGlob> use_tree(new AST::UseTreeGlob( + AST::UseTreeGlob::PATH_PREFIXED, AST::SimplePath(::std::move(segments)), Location())); + AST::Attribute prelude_attr(AST::SimplePath::from_str("prelude_import"), NULL); + ::std::unique_ptr<AST::UseDeclaration> use_decl(new AST::UseDeclaration(::std::move(use_tree), + AST::Visibility::create_error(), { ::std::move(prelude_attr) }, Location())); + + crate.items.insert(crate.items.begin(), ::std::move(use_decl)); + + /* TODO: potentially add checking attribute crate type? I can't figure out what this does + * currently comment says "Unconditionally collect crate types from attributes to make them + * used", which presumably refers to checking the linkage info by "crate_type". It also seems + * to ensure that an invalid crate type is not specified, so maybe just do that. Valid crate + * types: bin lib dylib staticlib cdylib rlib proc-macro */ + + fprintf(stderr, "finished injection\n"); } - else if (contains_name (crate.inner_attrs, "no_std")) - { - names.push_back ("core"); - if (!contains_name (crate.inner_attrs, "compiler_builtins")) - { - names.push_back ("compiler_builtins"); - } + void Session::expansion(AST::Crate& crate ATTRIBUTE_UNUSED) { + fprintf(stderr, "started expansion\n"); - injected_crate_name = "core"; - } - else - { - names.push_back ("std"); + // rustc has a modification to windows PATH temporarily here, which may end up being required - injected_crate_name = "std"; - } - - // reverse iterate through names to insert crate items in "forward" order - // at beginning of crate - for (auto it = names.rbegin (); it != names.rend (); ++it) - { - // create "macro use" attribute for use on extern crate item to enable - // loading macros from it - AST::Attribute attr (AST::SimplePath::from_str ("macro_use"), NULL); - - // create "extern crate" item with the name - ::std::unique_ptr<AST::ExternCrate> extern_crate ( - new AST::ExternCrate (*it, AST::Visibility::create_error (), - {::std::move (attr)}, - Linemap::unknown_location ())); - - // insert at beginning - crate.items.insert (crate.items.begin (), ::std::move (extern_crate)); - } + // create macro expansion config? + // if not, would at least have to configure recursion_limit - // create use tree path - // prelude is injected_crate_name - ::std::vector<AST::SimplePathSegment> segments - = {AST::SimplePathSegment (injected_crate_name), - AST::SimplePathSegment ("prelude"), AST::SimplePathSegment ("v1")}; - // create use tree and decl - ::std::unique_ptr<AST::UseTreeGlob> use_tree ( - new AST::UseTreeGlob (AST::UseTreeGlob::PATH_PREFIXED, - AST::SimplePath (::std::move (segments)), - Location ())); - AST::Attribute prelude_attr (AST::SimplePath::from_str ("prelude_import"), - NULL); - ::std::unique_ptr<AST::UseDeclaration> use_decl ( - new AST::UseDeclaration (::std::move (use_tree), - AST::Visibility::create_error (), - {::std::move (prelude_attr)}, Location ())); - - crate.items.insert (crate.items.begin (), ::std::move (use_decl)); - - /* TODO: potentially add checking attribute crate type? I can't figure out - * what this does currently comment says "Unconditionally collect crate - * types from attributes to make them used", which presumably refers to - * checking the linkage info by "crate_type". It also seems to ensure that - * an invalid crate type is not specified, so maybe just do that. Valid - * crate types: bin lib dylib staticlib cdylib rlib proc-macro */ - - fprintf (stderr, "finished injection\n"); -} + // create extctxt? from parse session, cfg, and resolver? + // expand by calling cxtctxt object's monotonic_expander's expand_crate method. -void -Session::expansion (AST::Crate &crate ATTRIBUTE_UNUSED) -{ - fprintf (stderr, "started expansion\n"); + // error reporting - check unused macros, get missing fragment specifiers - // rustc has a modification to windows PATH temporarily here, which may end - // up being required + // build test harness - // create macro expansion config? - // if not, would at least have to configure recursion_limit + // ast validation (also with proc macro decls) - // create extctxt? from parse session, cfg, and resolver? - // expand by calling cxtctxt object's monotonic_expander's expand_crate - // method. + // maybe create macro crate if not rustdoc - // error reporting - check unused macros, get missing fragment specifiers + fprintf(stderr, "finished expansion\n"); + } - // build test harness + void Session::name_resolution(AST::Crate& crate ATTRIBUTE_UNUSED) { + fprintf(stderr, "started name resolution\n"); - // ast validation (also with proc macro decls) + fprintf(stderr, "finished name resolution\n"); + } - // maybe create macro crate if not rustdoc + void TargetOptions::dump_target_options() const { + fprintf(stderr, "\033[0;31m--PREPARING TO DUMP ALL TARGET OPTIONS--\n\033[0m"); + for (const auto& pairs : features) { + for (const auto& value : pairs.second) { + fprintf(stderr, "%s: \"%s\"\n", pairs.first.c_str(), value.c_str()); + } + if (pairs.second.empty()) { + fprintf(stderr, "%s\n", pairs.first.c_str()); + } + } + if (features.empty()) { + fprintf(stderr, "No target options available!\n"); + } + + fprintf(stderr, "\033[0;31m--END OF TARGET OPTION DUMP--\n\033[0m"); + } - fprintf (stderr, "finished expansion\n"); -} + void TargetOptions::init_derived_values() { + // enable derived values based on target families + if (has_key_value_pair("target_family", "unix")) + insert_key("unix"); + if (has_key_value_pair("target_family", "windows")) + insert_key("windows"); + + // implicitly enable features + if (has_key_value_pair("target_feature", "aes")) + enable_implicit_feature_reqs("aes"); + if (has_key_value_pair("target_feature", "avx")) + enable_implicit_feature_reqs("sse4.2"); + if (has_key_value_pair("target_feature", "avx2")) + enable_implicit_feature_reqs("avx"); + if (has_key_value_pair("target_feature", "pclmulqdq")) + enable_implicit_feature_reqs("sse2"); + if (has_key_value_pair("target_feature", "sha")) + enable_implicit_feature_reqs("sse2"); + if (has_key_value_pair("target_feature", "sse2")) + enable_implicit_feature_reqs("sse"); + if (has_key_value_pair("target_feature", "sse3")) + enable_implicit_feature_reqs("sse2"); + if (has_key_value_pair("target_feature", "sse4.1")) + enable_implicit_feature_reqs("sse3"); + if (has_key_value_pair("target_feature", "sse4.2")) + enable_implicit_feature_reqs("sse4.1"); + if (has_key_value_pair("target_feature", "ssse3")) + enable_implicit_feature_reqs("sse3"); + } -void -Session::name_resolution (AST::Crate &crate ATTRIBUTE_UNUSED) -{ - fprintf (stderr, "started name resolution\n"); + void TargetOptions::enable_implicit_feature_reqs(std::string feature) { + if (feature == "aes") + enable_implicit_feature_reqs("sse2"); + else if (feature == "avx") + enable_implicit_feature_reqs("sse4.2"); + else if (feature == "avx2") + enable_implicit_feature_reqs("avx"); + else if (feature == "fma") + enable_implicit_feature_reqs("avx"); + else if (feature == "pclmulqdq") + enable_implicit_feature_reqs("sse2"); + else if (feature == "sha") + enable_implicit_feature_reqs("sse2"); + else if (feature == "sse2") + enable_implicit_feature_reqs("sse"); + else if (feature == "sse3") + enable_implicit_feature_reqs("sse2"); + else if (feature == "sse4.1") + enable_implicit_feature_reqs("sse3"); + else if (feature == "sse4.2") + enable_implicit_feature_reqs("sse4.1"); + else if (feature == "ssse3") + enable_implicit_feature_reqs("sse3"); + + if (!has_key_value_pair("target_feature", feature)) + insert_key_value_pair("target_feature", feature); + } - fprintf (stderr, "finished name resolution\n"); + // NOTEs: + /* mrustc compile pipeline: + * - target load (pass target spec to parser?) + * - parse (convert source to AST) + * - load crates (load any explicitly mentioned extern crates [not all of them]) + * - expand (AST transformations from attributes and macros, loads remaining extern crates + * [std/core and any triggered by macro expansion]) + * - implicit crates (test harness, allocator crate, panic crate) + * - resolve use (annotate every 'use' item with source [supposedly handles nasty recursion]) + * - resolve index (generate index of visible items for every module [avoids recursion in next + * pass]) + * - resolve absolute (resolve all paths into either variable names [types/values] or absolute + * paths) + * - HIR lower (convert modified AST to simpler HIR [both expressions and module tree]) + * - resolve type aliases (replace any usages of type aliases with actual type [except associated + * types]) + * - resolve bind (iterate HIR tree and set binding annotations on all concrete types [avoids + * path lookups later]) + * - resolve HIR markings (generate "markings" [e.g. for Copy/Send/Sync/...] for all types + * - sort impls (small pass - sort impls into groups) + * - resolve UFCS outer (determine source trait for all top-level <T>::Type [qualified] paths) + * - resolve UFCS paths (do the same, but include for exprs this time. also normalises results of + * previous pass [expanding known associated types]) + * - constant evaluate (evaluate all constants) + * - typecheck outer (checks impls are sane) + * - typecheck expressions (resolve and check types for all exprs) + * - expand HIR annotate (annotate how exprs are used - used for closure extractions and + * reborrows) + * - expand HIR closures (extract closures into structs implementing Fn* traits) + * - expand HIR vtables (generate vtables for types with dyn dispatch) + * - expand HIR calls (converts method and callable calls into explicit function calls) + * - expand HIR reborrows (apply reborrow rules [taking '&mut *v' instead of 'v']) + * - expand HIR erasedtype (replace all erased types 'impl Trait' with the true type) + * - typecheck expressions (validate - double check that previous passes haven't broke type + * system rules) + * - lower MIR (convert HIR exprs into a control-flow graph [MIR]) + * - MIR validate (check that the generated MIR is consistent) + * - MIR cleanup (perform various transformations on MIR - replace reads of const items with the + * item itself; convert casts to unsized types into 'MakeDst' operations) + * - MIR optimise (perform various simple optimisations on the MIR - constant propagation, dead + * code elimination, borrow elimination, some inlining) + * - MIR validate PO (re-validate the MIR) + * - MIR validate full (optionally: perform expensive state-tracking validation on MIR) + * - trans enumerate (enumerate all items needed for code generation, primarily types used for + * generics) + * - trans auto impls (create magic trait impls as enumerated in previous pass) + * - trans monomorph (generate monomorphised copies of all functions [with generics replaced with + * real types]) + * - MIR optimise inline (run optimisation again, this time with full type info [primarily for + * inlining]) + * - HIR serialise (write out HIR dump [module tree and generic/inline MIR]) + * - trans codegen (generate final output file: emit C source file and call C compiler) */ + + /* rustc compile pipeline (basic, in way less detail): + * - parse input (parse .rs to AST) + * - name resolution, macro expansion, and configuration (process AST recursively, resolving + * paths, expanding macros, processing #[cfg] nodes [i.e. maybe stripping stuff from AST]) + * - lower to HIR + * - type check and other analyses (e.g. privacy checking) + * - lower to MIR and post-processing (and do stuff like borrow checking) + * - translation to LLVM IR and LLVM optimisations (produce the .o files) + * - linking (link together .o files) */ + + /* Pierced-together rustc compile pipeline (from source): + * - parse input (parse file to crate) + * - register plugins (attributes injection, set various options, register lints, load plugins) + * - expansion/configure and expand (initial 'cfg' processing, 'loading compiler plugins', + * syntax expansion, secondary 'cfg' expansion, synthesis of a test harness if required, + * injection of any std lib dependency and prelude, and name resolution) - actually documented + * inline + * - seeming pierced-together order: pre-AST expansion lint checks, registering builtin + * macros, crate injection, then expand all macros, then maybe build test harness, AST validation, + * maybe create a macro crate (if not rustdoc), name resolution, complete gated feature + * checking, add all buffered lints + * - create global context (lower to HIR) + * - analysis on global context (HIR optimisations? create MIR?) + * - code generation + * - link */ } - -// NOTEs: -/* mrustc compile pipeline: - * - target load (pass target spec to parser?) - * - parse (convert source to AST) - * - load crates (load any explicitly mentioned extern crates [not all of - * them]) - * - expand (AST transformations from attributes and macros, loads remaining - * extern crates [std/core and any triggered by macro expansion]) - * - implicit crates (test harness, allocator crate, panic crate) - * - resolve use (annotate every 'use' item with source [supposedly handles - * nasty recursion]) - * - resolve index (generate index of visible items for every module [avoids - * recursion in next pass]) - * - resolve absolute (resolve all paths into either variable names - * [types/values] or absolute paths) - * - HIR lower (convert modified AST to simpler HIR [both expressions and - * module tree]) - * - resolve type aliases (replace any usages of type aliases with actual - * type [except associated types]) - * - resolve bind (iterate HIR tree and set binding annotations on all - * concrete types [avoids path lookups later]) - * - resolve HIR markings (generate "markings" [e.g. for Copy/Send/Sync/...] - * for all types - * - sort impls (small pass - sort impls into groups) - * - resolve UFCS outer (determine source trait for all top-level <T>::Type - * [qualified] paths) - * - resolve UFCS paths (do the same, but include for exprs this time. also - * normalises results of previous pass [expanding known associated types]) - * - constant evaluate (evaluate all constants) - * - typecheck outer (checks impls are sane) - * - typecheck expressions (resolve and check types for all exprs) - * - expand HIR annotate (annotate how exprs are used - used for closure - * extractions and reborrows) - * - expand HIR closures (extract closures into structs implementing Fn* - * traits) - * - expand HIR vtables (generate vtables for types with dyn dispatch) - * - expand HIR calls (converts method and callable calls into explicit - * function calls) - * - expand HIR reborrows (apply reborrow rules [taking '&mut *v' instead of - * 'v']) - * - expand HIR erasedtype (replace all erased types 'impl Trait' with the - * true type) - * - typecheck expressions (validate - double check that previous passes - * haven't broke type system rules) - * - lower MIR (convert HIR exprs into a control-flow graph [MIR]) - * - MIR validate (check that the generated MIR is consistent) - * - MIR cleanup (perform various transformations on MIR - replace reads of - * const items with the item itself; convert casts to unsized types into - * 'MakeDst' operations) - * - MIR optimise (perform various simple optimisations on the MIR - constant - * propagation, dead code elimination, borrow elimination, some inlining) - * - MIR validate PO (re-validate the MIR) - * - MIR validate full (optionally: perform expensive state-tracking - * validation on MIR) - * - trans enumerate (enumerate all items needed for code generation, - * primarily types used for generics) - * - trans auto impls (create magic trait impls as enumerated in previous - * pass) - * - trans monomorph (generate monomorphised copies of all functions [with - * generics replaced with real types]) - * - MIR optimise inline (run optimisation again, this time with full type - * info [primarily for inlining]) - * - HIR serialise (write out HIR dump [module tree and generic/inline MIR]) - * - trans codegen (generate final output file: emit C source file and call C - * compiler) */ - -/* rustc compile pipeline (basic, in way less detail): - * - parse input (parse .rs to AST) - * - name resolution, macro expansion, and configuration (process AST - * recursively, resolving paths, expanding macros, processing #[cfg] nodes - * [i.e. maybe stripping stuff from AST]) - * - lower to HIR - * - type check and other analyses (e.g. privacy checking) - * - lower to MIR and post-processing (and do stuff like borrow checking) - * - translation to LLVM IR and LLVM optimisations (produce the .o files) - * - linking (link together .o files) */ - -/* Pierced-together rustc compile pipeline (from source): - * - parse input (parse file to crate) - * - register plugins (attributes injection, set various options, register - * lints, load plugins) - * - expansion/configure and expand (initial 'cfg' processing, 'loading - * compiler plugins', syntax expansion, secondary 'cfg' expansion, synthesis - * of a test harness if required, injection of any std lib dependency and - * prelude, and name resolution) - actually documented inline - * - seeming pierced-together order: pre-AST expansion lint checks, - * registering builtin macros, crate injection, then expand all macros, then - * maybe build test harness, AST validation, maybe create a macro crate (if - * not rustdoc), name resolution, complete gated feature checking, add all - * buffered lints - * - create global context (lower to HIR) - * - analysis on global context (HIR optimisations? create MIR?) - * - code generation - * - link */ -} // namespace Rust diff --git a/gcc/rust/rust-session-manager.h b/gcc/rust/rust-session-manager.h index 1ce3a92..ee43232 100644 --- a/gcc/rust/rust-session-manager.h +++ b/gcc/rust/rust-session-manager.h @@ -18,206 +18,184 @@ #include <utility> namespace Rust { -// parser forward decl -class Parser; -// crate forward decl -namespace AST { -struct Crate; + // parser forward decl + class Parser; + // crate forward decl + namespace AST { + struct Crate; + } + + // Data related to target, most useful for conditional compilation and whatever. + struct TargetOptions { + // TODO: maybe make private and access through helpers to allow changes to impl + std::unordered_map<std::string, std::unordered_set<std::string>> features; + + public: + // Returns whether a key is defined in the feature set. + bool has_key(std::string key) const { + return features.find(key) != features.end(); + } + + // Returns whether a key exists with the given value in the feature set. + bool has_key_value_pair(std::string key, std::string value) const { + auto it = features.find(key); + if (it != features.end()) { + auto set = it->second; + auto it2 = set.find(value); + if (it2 != set.end()) + return true; + } + return false; + } + + // Returns the singular value from the key, or if the key has multiple, an empty string. + std::string get_singular_value(std::string key) const { + auto it = features.find(key); + if (it != features.end()) { + auto set = it->second; + if (set.size() == 1) + return *set.begin(); + } + return ""; + } + + // Returns all values associated with a key (including none), or an empty set if no key is found. + std::unordered_set< ::std::string> get_values_for_key(std::string key) const { + auto it = features.find(key); + if (it != features.end()) { + return it->second; + } + return {}; + } + + /* Inserts a key (no value) into the feature set. This will do nothing if the key already exists. + * This returns whether the insertion was successful (i.e. whether key already existed). */ + bool insert_key(std::string key) { + return features.insert(std::make_pair(key, std::unordered_set<std::string>())).second; + } + + // Inserts a key-value pair into the feature set. + void insert_key_value_pair(std::string key, std::string value) { + auto existing_set = get_values_for_key(key); + existing_set.insert(std::move(value)); + features[std::move(key)] = std::move(existing_set); + } + + // Dump all target options to stderr. + void dump_target_options() const; + + // Creates derived values and implicit enables after all target info is added (e.g. "unix"). + void init_derived_values(); + + // Enables all requirements for the feature given, and will enable feature itself if not enabled. + void enable_implicit_feature_reqs(std::string feature); + + /* According to reference, Rust uses either multi-map key-values or just values (although + * values may be aliases for a key-value value). This seems like overkill. Thus, depending on + * whether the attributes used in cfg are fixed or not, I think I'll either put each + * non-multimap "key-value" as a separate field and have the multimap "key-values" in a + * regular map for that one key, or actually use a multimap. + * + * rustc itself uses a set of key-value tuples where the second tuple element is optional. + * This gets rid of the requirement to make a multi-map, I guess, but seems like it might make + * search slow (unless all "is defined"-only ones have empty string as second element). */ + /* cfg attributes: + * - target_arch: single value + * - target_feature: multiple values possible + * - target_os: single value + * - target_family: single value (or no value?) + * - unix: set when target_family = "unix" + * - windows: set when target_family = "windows" + * - if these are just syntactic sugar, then maybe have a separate set or map for this kind + * of stuff + * - target_env: set when needed for disambiguation about ABI - usually empty string for GNU, + * complicated + * - seems to be a single value (if any) + * - target_endian: single value; "little" or "big" + * - target_pointer_width: single value, "32" for 32-bit pointers, etc. + * - target_vendor, single value + * - test: set when testing is being done + * - again, seems similar to a "is defined" rather than "is equal to" like unix + * - debug_assertions: seems to "is defined" + * - proc_macro: no idea, bad docs. seems to be boolean, so maybe "is defined" */ + }; + + // Defines compiler options (e.g. dump, etc.). + struct CompileOptions { + // TODO: use bitfield for smaller memory requirements? + + // FIXME: this is set up for "instead of" dumping - in future, dumps should not inhibit + // compilation + enum DumpOptions { + NO_DUMP, + LEXER_DUMP, + PARSER_AST_DUMP, + REGISTER_PLUGINS_DUMP, + INJECTION_DUMP, + EXPANSION_DUMP, + NAME_RESOLUTION_DUMP, + TARGET_OPTION_DUMP, + // TODO: add more? + } dump_option; + + // configuration options - actually useful for conditional compilation and whatever + // data related to target arch, features, os, family, env, endian, pointer width, vendor + TargetOptions target_data; + bool enable_test = false; + bool debug_assertions = false; + bool proc_macro = false; + }; + + /* Defines a compiler session. This is for a single compiler invocation, so potentially includes + * parsing multiple crates. */ + struct Session { + CompileOptions options; + // This should really be in a per-crate storage area but it is wiped with every file so eh. + ::std::string injected_crate_name; + + // backend wrapper to GCC GENERIC + Backend* backend; + + // backend linemap + Linemap* linemap; + + // TODO: replace raw pointers with smart pointers? + + public: + /* Initialise compiler session. Corresponds to langhook grs_langhook_init(). Note that this is + * called after option handling. */ + void init(); + bool handle_option(enum opt_code code, const char* arg, HOST_WIDE_INT value, int kind, + location_t loc, const struct cl_option_handlers* handlers); + void parse_files(int num_files, const char** files); + void init_options(); + + private: + // TODO: should this be private or public? + void parse_file(const char* filename); + bool enable_dump(::std::string arg); + + void debug_dump_load_crates(Parser& parser); + + void implicitly_enable_feature(::std::string feature_name); + void enable_features(); + + // pipeline stages - TODO maybe move? + /* Register plugins pipeline stage. TODO maybe move to another object? Currently dummy stage. + * In future will handle attribute injection (top-level inner attribute creation from command + * line arguments), setting options maybe, registering lints maybe, loading plugins maybe. */ + void register_plugins(AST::Crate& crate); + /* Injection pipeline stage. TODO maybe move to another object? Maybe have some lint checks + * (in future, obviously), register builtin macros, crate injection. */ + void injection(AST::Crate& crate); + /* Expansion pipeline stage. TODO maybe move to another object? Expands all macros, maybe + * build test harness in future, AST validation, maybe create macro crate (if not rustdoc).*/ + void expansion(AST::Crate& crate); + /* Name resolution pipeline stage. TODO maybe move to another object. Performs name + * resolution, maybe complete gated feature checking, maybe create buffered lints in future. + */ + void name_resolution(AST::Crate& crate); + }; } -// Data related to target, most useful for conditional compilation and whatever. -struct TargetOptions -{ - // TODO: maybe make private and access through helpers to allow changes to - // impl - std::unordered_map<std::string, std::unordered_set<std::string> > features; - -public: - // Returns whether a key is defined in the feature set. - bool has_key (std::string key) const - { - return features.find (key) != features.end (); - } - - // Returns whether a key exists with the given value in the feature set. - bool has_key_value_pair (std::string key, std::string value) const - { - auto it = features.find (key); - if (it != features.end ()) - { - auto set = it->second; - auto it2 = set.find (value); - if (it2 != set.end ()) - return true; - } - return false; - } - - // Returns the singular value from the key, or if the key has multiple, an - // empty string. - std::string get_singular_value (std::string key) const - { - auto it = features.find (key); - if (it != features.end ()) - { - auto set = it->second; - if (set.size () == 1) - return *set.begin (); - } - return ""; - } - - // Returns all values associated with a key (including none), or an empty set - // if no key is found. - std::unordered_set< ::std::string> get_values_for_key (std::string key) const - { - auto it = features.find (key); - if (it != features.end ()) - { - return it->second; - } - return {}; - } - - /* Inserts a key (no value) into the feature set. This will do nothing if the - * key already exists. - * This returns whether the insertion was successful (i.e. whether key already - * existed). */ - bool insert_key (std::string key) - { - return features - .insert (std::make_pair (key, std::unordered_set<std::string> ())) - .second; - } - - // Inserts a key-value pair into the feature set. - void insert_key_value_pair (std::string key, std::string value) - { - auto existing_set = get_values_for_key (key); - existing_set.insert (std::move (value)); - features[std::move (key)] = std::move (existing_set); - } - - /* According to reference, Rust uses either multi-map key-values or just - * values (although values may be aliases for a key-value value). This seems - * like overkill. Thus, depending on whether the attributes used in cfg are - * fixed or not, I think I'll either put each non-multimap "key-value" as a - * separate field and have the multimap "key-values" in a regular map for that - * one key, or actually use a multimap. - * - * rustc itself uses a set of key-value tuples where the second tuple element - * is optional. This gets rid of the requirement to make a multi-map, I guess, - * but seems like it might make - * search slow (unless all "is defined"-only ones have empty string as second - * element). */ - /* cfg attributes: - * - target_arch: single value - * - target_feature: multiple values possible - * - target_os: single value - * - target_family: single value (or no value?) - * - unix: set when target_family = "unix" - * - windows: set when target_family = "windows" - * - if these are just syntactic sugar, then maybe have a separate set or map - * for this kind of stuff - * - target_env: set when needed for disambiguation about ABI - usually empty - * string for GNU, complicated - * - seems to be a single value (if any) - * - target_endian: single value; "little" or "big" - * - target_pointer_width: single value, "32" for 32-bit pointers, etc. - * - target_vendor, single value - * - test: set when testing is being done - * - again, seems similar to a "is defined" rather than "is equal to" like - * unix - * - debug_assertions: seems to "is defined" - * - proc_macro: no idea, bad docs. seems to be boolean, so maybe "is defined" - */ -}; - -// Defines compiler options (e.g. dump, etc.). -struct CompileOptions -{ - // TODO: use bitfield for smaller memory requirements? - - // FIXME: this is set up for "instead of" dumping - in future, dumps should - // not inhibit compilation - enum DumpOptions - { - NO_DUMP, - LEXER_DUMP, - PARSER_AST_DUMP, - REGISTER_PLUGINS_DUMP, - INJECTION_DUMP, - EXPANSION_DUMP, - NAME_RESOLUTION_DUMP, - // TODO: add more? - } dump_option; - - // configuration options - actually useful for conditional compilation and - // whatever data related to target arch, features, os, family, env, endian, - // pointer width, vendor - TargetOptions target_data; - bool enable_test = false; - bool debug_assertions = false; - bool proc_macro = false; -}; - -/* Defines a compiler session. This is for a single compiler invocation, so - * potentially includes parsing multiple crates. */ -struct Session -{ - CompileOptions options; - // This should really be in a per-crate storage area but it is wiped with - // every file so eh. - ::std::string injected_crate_name; - - // backend wrapper to GCC GENERIC - Backend *backend; - - // backend linemap - Linemap *linemap; - - // TODO: replace raw pointers with smart pointers? - -public: - /* Initialise compiler session. Corresponds to langhook grs_langhook_init(). - * Note that this is called after option handling. */ - void init (); - bool handle_option (enum opt_code code, const char *arg, HOST_WIDE_INT value, - int kind, location_t loc, - const struct cl_option_handlers *handlers); - void parse_files (int num_files, const char **files); - void init_options (); - -private: - // TODO: should this be private or public? - void parse_file (const char *filename); - bool enable_dump (::std::string arg); - - void debug_dump_load_crates (Parser &parser); - - void implicitly_enable_feature (::std::string feature_name); - void enable_features (); - - // pipeline stages - TODO maybe move? - /* Register plugins pipeline stage. TODO maybe move to another object? - * Currently dummy stage. In future will handle attribute injection (top-level - * inner attribute creation from command line arguments), setting options - * maybe, registering lints maybe, loading plugins maybe. */ - void register_plugins (AST::Crate &crate); - /* Injection pipeline stage. TODO maybe move to another object? Maybe have - * some lint checks (in future, obviously), register builtin macros, crate - * injection. */ - void injection (AST::Crate &crate); - /* Expansion pipeline stage. TODO maybe move to another object? Expands all - * macros, maybe build test harness in future, AST validation, maybe create - * macro crate (if not rustdoc).*/ - void expansion (AST::Crate &crate); - /* Name resolution pipeline stage. TODO maybe move to another object. Performs - * name resolution, maybe complete gated feature checking, maybe create - * buffered lints in future. - */ - void name_resolution (AST::Crate &crate); -}; -} // namespace Rust - #endif diff --git a/gcc/rust/rust-target.def b/gcc/rust/rust-target.def index 3f375cb..13cf3e5 100644 --- a/gcc/rust/rust-target.def +++ b/gcc/rust/rust-target.def @@ -43,7 +43,7 @@ predefined by this hook apply to all files that are being compiled.", /* Environmental OS info relating to the target OS. */ DEFHOOK (/*d_os_versions*/rust_os_info, - "Similarly to @code{TARGET_RUST_CPU_INFO}, but is used for configuration info\n\ + "Similar to @code{TARGET_RUST_CPU_INFO}, but is used for configuration info\n\ relating to the target operating system.", void, (void), hook_void_void) |