/* Copyright (C) 2024-2025 Free Software Foundation, Inc. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Under Section 7 of GPL version 3, you are granted additional permissions described in the GCC Runtime Library Exception, version 3.1, as published by the Free Software Foundation. You should have received a copy of the GNU General Public License and a copy of the GCC Runtime Library Exception along with this program; see the files COPYING3 and COPYING.RUNTIME respectively. If not, see . */ #if !defined _IMMINTRIN_H_INCLUDED #error "Never use directly; include instead." #endif #ifndef _AMXTRANSPOSEINTRIN_H_INCLUDED #define _AMXTRANSPOSEINTRIN_H_INCLUDED #if !defined(__AMX_TRANSPOSE__) #pragma GCC push_options #pragma GCC target("amx-transpose") #define __DISABLE_AMX_TRANSPOSE__ #endif /* __AMX_TRANSPOSE__ */ #if defined(__x86_64__) #define _tile_transposed_internal(dst,src) \ __asm__ volatile\ ("{ttransposed\t%%tmm"#src", %%tmm"#dst"|ttransposed\t%%tmm"#dst", %%tmm"#src"}" ::) #define _tile_2rpntlvwz0_internal(dst,base,stride) \ __asm__ volatile\ ("{t2rpntlvwz0\t(%0,%1,1), %%tmm"#dst"|t2rpntlvwz0\t%%tmm"#dst", [%0+%1*1]}" \ :: "r" ((const void*) (base)), "r" ((long) (stride))) #define _tile_2rpntlvwz0t1_internal(dst,base,stride) \ __asm__ volatile\ ("{t2rpntlvwz0t1\t(%0,%1,1), %%tmm"#dst"|t2rpntlvwz0t1\t%%tmm"#dst", [%0+%1*1]}" \ :: "r" ((const void*)(base)), "r" ((long)(stride))) #define _tile_2rpntlvwz1_internal(dst,base,stride) \ __asm__ volatile\ ("{t2rpntlvwz1\t(%0,%1,1), %%tmm"#dst"|t2rpntlvwz1\t%%tmm"#dst", [%0+%1*1]}" \ :: "r" ((const void*)(base)), "r" ((long)(stride))) #define _tile_2rpntlvwz1t1_internal(dst,base,stride) \ __asm__ volatile\ ("{t2rpntlvwz1t1\t(%0,%1,1), %%tmm"#dst"|t2rpntlvwz1t1\t%%tmm"#dst", [%0+%1*1]}" \ :: "r" ((const void*)(base)), "r" ((long)(stride))) #define _tile_transposed(dst,src) \ _tile_transposed_internal (dst, src) #define _tile_2rpntlvwz0(dst,base,stride) \ _tile_2rpntlvwz0_internal (dst, base, stride) #define _tile_2rpntlvwz0t1(dst,base,stride) \ _tile_2rpntlvwz0t1_internal (dst, base, stride) #define _tile_2rpntlvwz1(dst,base,stride) \ _tile_2rpntlvwz1_internal (dst, base, stride) #define _tile_2rpntlvwz1t1(dst,base,stride) \ _tile_2rpntlvwz1t1_internal (dst, base, stride) #if !defined(__AMX_BF16__) #pragma GCC push_options #pragma GCC target("amx-bf16") #define __DISABLE_AMX_BF16__ #endif /* __AMX_BF16__ */ #define _tile_tdpbf16ps_internal(src1_dst,src2,src3) \ __asm__ volatile\ ("{ttdpbf16ps\t%%tmm"#src3", %%tmm"#src2", %%tmm"#src1_dst"|ttdpbf16ps\t%%tmm"#src1_dst", %%tmm"#src2", %%tmm"#src3"}" ::) #define _tile_tdpbf16ps(src1_dst,src2,src3) \ _tile_tdpbf16ps_internal (src1_dst, src2, src3) #ifdef __DISABLE_AMX_BF16__ #undef __DISABLE_AMX_BF16__ #pragma GCC pop_options #endif /* __DISABLE_AMX_BF16__ */ #if !defined(__AMX_FP16__) #pragma GCC push_options #pragma GCC target("amx-fp16") #define __DISABLE_AMX_FP16__ #endif /* __AMX_FP16__ */ #define _tile_tdpfp16ps_internal(src1_dst,src2,src3) \ __asm__ volatile\ ("{ttdpfp16ps\t%%tmm"#src3", %%tmm"#src2", %%tmm"#src1_dst"|ttdpfp16ps\t%%tmm"#src1_dst", %%tmm"#src2", %%tmm"#src3"}" ::) #define _tile_tdpfp16ps(src1_dst,src2,src3) \ _tile_tdpfp16ps_internal (src1_dst, src2, src3) #ifdef __DISABLE_AMX_FP16__ #undef __DISABLE_AMX_FP16__ #pragma GCC pop_options #endif /* __DISABLE_AMX_FP16__ */ #if !defined(__AMX_COMPLEX__) #pragma GCC push_options #pragma GCC target("amx-complex") #define __DISABLE_AMX_COMPLEX__ #endif /* __AMX_COMPLEX__ */ #define _tile_conjtcmmimfp16ps_internal(src1_dst,src2,src3) \ __asm__ volatile\ ("{tconjtcmmimfp16ps\t%%tmm"#src3", %%tmm"#src2", %%tmm"#src1_dst"|tconjtcmmimfp16ps\t%%tmm"#src1_dst", %%tmm"#src2", %%tmm"#src3"}" ::) #define _tile_conjtfp16_internal(dst,src) \ __asm__ volatile\ ("{tconjtfp16\t%%tmm"#src", %%tmm"#dst"|tconjtfp16\t%%tmm"#dst", %%tmm"#src"}" ::) #define _tile_tcmmimfp16ps_internal(src1_dst,src2,src3) \ __asm__ volatile\ ("{ttcmmimfp16ps\t%%tmm"#src3", %%tmm"#src2", %%tmm"#src1_dst"|ttcmmimfp16ps\t%%tmm"#src1_dst", %%tmm"#src2", %%tmm"#src3"}" ::) #define _tile_tcmmrlfp16ps_internal(src1_dst,src2,src3) \ __asm__ volatile\ ("{ttcmmrlfp16ps\t%%tmm"#src3", %%tmm"#src2", %%tmm"#src1_dst"|ttcmmrlfp16ps\t%%tmm"#src1_dst", %%tmm"#src2", %%tmm"#src3"}" ::) #define _tile_conjtcmmimfp16ps(src1_dst,src2,src3) \ _tile_conjtcmmimfp16ps_internal (src1_dst, src2, src3) #define _tile_conjtfp16(dst,src) \ _tile_conjtfp16_internal (dst, src) #define _tile_tcmmimfp16ps(src1_dst,src2,src3) \ _tile_tcmmimfp16ps_internal (src1_dst, src2, src3) #define _tile_tcmmrlfp16ps(src1_dst,src2,src3) \ _tile_tcmmrlfp16ps_internal (src1_dst, src2, src3) #ifdef __DISABLE_AMX_COMPLEX__ #undef __DISABLE_AMX_COMPLEX__ #pragma GCC pop_options #endif /* __DISABLE_AMX_COMPLEX__ */ #if !defined(__AMX_TF32__) #pragma GCC push_options #pragma GCC target("amx-tf32") #define __DISABLE_AMX_TF32__ #endif /* __AMX_TF32__ */ #define _tile_tmmultf32ps_internal(src1_dst,src2,src3) \ __asm__ volatile\ ("{ttmmultf32ps\t%%tmm"#src3", %%tmm"#src2", %%tmm"#src1_dst"|ttmmultf32ps\t%%tmm"#src1_dst", %%tmm"#src2", %%tmm"#src3"}" ::) #define _tile_tmmultf32ps(src1_dst,src2,src3) \ _tile_tmmultf32ps_internal (src1_dst, src2, src3) #ifdef __DISABLE_AMX_TF32__ #undef __DISABLE_AMX_TF32__ #pragma GCC pop_options #endif /* __DISABLE_AMX_TF32__ */ #endif /* __x86_64__ */ #ifdef __DISABLE_AMX_TRANSPOSE__ #undef __DISABLE_AMX_TRANSPOSE__ #pragma GCC pop_options #endif /* __DISABLE_AMX_TRANSPOSE__ */ #endif /* _AMXTRANSPOSEINTRIN_H_INCLUDED */