Last active
September 25, 2018 06:11
-
-
Save xiangzhai/e6fc42f6b9fc959de6baa5d793ea9263 to your computer and use it in GitHub Desktop.
Backport reorganize the loongson march and extensions instructions set to gcc-8.1.0
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Author: Leslie Zhai <[email protected]> | |
| Date: Tue Sep 25 14:08:58 2018 +0800 | |
| Backport reorganize the loongson march and extensions instructions set to gcc-8.1.0 | |
| diff --git a/gcc/config.gcc b/gcc/config.gcc | |
| index a5defb0..067fdad 100644 | |
| --- a/gcc/config.gcc | |
| +++ b/gcc/config.gcc | |
| @@ -439,7 +439,7 @@ microblaze*-*-*) | |
| ;; | |
| mips*-*-*) | |
| cpu_type=mips | |
| - extra_headers="loongson.h msa.h" | |
| + extra_headers="loongson.h loongson-mmiintrin.h msa.h" | |
| extra_objs="frame-header-opt.o" | |
| extra_options="${extra_options} g.opt fused-madd.opt mips/mips-tables.opt" | |
| ;; | |
| diff --git a/gcc/config/mips/gs264e.md b/gcc/config/mips/gs264e.md | |
| new file mode 100644 | |
| index 0000000..9b30bb5 | |
| --- /dev/null | |
| +++ b/gcc/config/mips/gs264e.md | |
| @@ -0,0 +1,133 @@ | |
| +;; Pipeline model for Loongson gs264e cores. | |
| + | |
| +;; Copyright (C) 2011-2018 Free Software Foundation, Inc. | |
| +;; | |
| +;; This file is part of GCC. | |
| +;; | |
| +;; GCC is free software; you can redistribute it and/or modify it | |
| +;; under the terms of the GNU General Public License as published | |
| +;; by the Free Software Foundation; either version 3, or (at your | |
| +;; option) any later version. | |
| +;; | |
| +;; GCC is distributed in the hope that it will be useful, but WITHOUT | |
| +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
| +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public | |
| +;; License for more details. | |
| +;; | |
| +;; You should have received a copy of the GNU General Public License | |
| +;; along with GCC; see the file COPYING3. If not see | |
| +;; <http://www.gnu.org/licenses/>. | |
| + | |
| +;; Uncomment the following line to output automata for debugging. | |
| +;; (automata_option "v") | |
| + | |
| +;; Automaton for integer instructions. | |
| +(define_automaton "gs264e_a_alu") | |
| + | |
| +;; Automaton for floating-point instructions. | |
| +(define_automaton "gs264e_a_falu") | |
| + | |
| +;; Automaton for memory operations. | |
| +(define_automaton "gs264e_a_mem") | |
| + | |
| +;; Describe the resources. | |
| + | |
| +(define_cpu_unit "gs264e_alu1" "gs264e_a_alu") | |
| +(define_cpu_unit "gs264e_mem1" "gs264e_a_mem") | |
| +(define_cpu_unit "gs264e_falu1" "gs264e_a_falu") | |
| + | |
| +;; Describe instruction reservations. | |
| + | |
| +(define_insn_reservation "gs264e_arith" 1 | |
| + (and (eq_attr "cpu" "gs264e") | |
| + (eq_attr "type" "arith,clz,const,logical, | |
| + move,nop,shift,signext,slt")) | |
| + "gs264e_alu1") | |
| + | |
| +(define_insn_reservation "gs264e_branch" 1 | |
| + (and (eq_attr "cpu" "gs264e") | |
| + (eq_attr "type" "branch,jump,call,condmove,trap")) | |
| + "gs264e_alu1") | |
| + | |
| +(define_insn_reservation "gs264e_mfhilo" 1 | |
| + (and (eq_attr "cpu" "gs264e") | |
| + (eq_attr "type" "mfhi,mflo,mthi,mtlo")) | |
| + "gs264e_alu1") | |
| + | |
| +;; Operation imul3nc is fully pipelined. | |
| +(define_insn_reservation "gs264e_imul3nc" 7 | |
| + (and (eq_attr "cpu" "gs264e") | |
| + (eq_attr "type" "imul3nc")) | |
| + "gs264e_alu1") | |
| + | |
| +(define_insn_reservation "gs264e_imul" 7 | |
| + (and (eq_attr "cpu" "gs264e") | |
| + (eq_attr "type" "imul,imadd")) | |
| + "gs264e_alu1") | |
| + | |
| +(define_insn_reservation "gs264e_idiv_si" 12 | |
| + (and (eq_attr "cpu" "gs264e") | |
| + (and (eq_attr "type" "idiv") | |
| + (eq_attr "mode" "SI"))) | |
| + "gs264e_alu1") | |
| + | |
| +(define_insn_reservation "gs264e_idiv_di" 25 | |
| + (and (eq_attr "cpu" "gs264e") | |
| + (and (eq_attr "type" "idiv") | |
| + (eq_attr "mode" "DI"))) | |
| + "gs264e_alu1") | |
| + | |
| +(define_insn_reservation "gs264e_load" 4 | |
| + (and (eq_attr "cpu" "gs264e") | |
| + (eq_attr "type" "load")) | |
| + "gs264e_mem1") | |
| + | |
| +(define_insn_reservation "gs264e_fpload" 4 | |
| + (and (eq_attr "cpu" "gs264e") | |
| + (eq_attr "type" "load,mfc,mtc")) | |
| + "gs264e_mem1") | |
| + | |
| +(define_insn_reservation "gs264e_prefetch" 0 | |
| + (and (eq_attr "cpu" "gs264e") | |
| + (eq_attr "type" "prefetch,prefetchx")) | |
| + "gs264e_mem1") | |
| + | |
| +(define_insn_reservation "gs264e_store" 0 | |
| + (and (eq_attr "cpu" "gs264e") | |
| + (eq_attr "type" "store,fpstore,fpidxstore")) | |
| + "gs264e_mem1") | |
| + | |
| +(define_insn_reservation "gs264e_fadd" 4 | |
| + (and (eq_attr "cpu" "gs264e") | |
| + (eq_attr "type" "fadd,fmul,fmadd")) | |
| + "gs264e_falu1") | |
| + | |
| +(define_insn_reservation "gs264e_fcmp" 2 | |
| + (and (eq_attr "cpu" "gs264e") | |
| + (eq_attr "type" "fabs,fcmp,fmove,fneg")) | |
| + "gs264e_falu1") | |
| + | |
| +(define_insn_reservation "gs264e_fcvt" 4 | |
| + (and (eq_attr "cpu" "gs264e") | |
| + (eq_attr "type" "fcvt")) | |
| + "gs264e_falu1") | |
| + | |
| +(define_insn_reservation "gs264e_fdiv_sf" 12 | |
| + (and (eq_attr "cpu" "gs264e") | |
| + (and (eq_attr "type" "fdiv,frdiv,fsqrt,frsqrt") | |
| + (eq_attr "mode" "SF"))) | |
| + "gs264e_falu1") | |
| + | |
| +(define_insn_reservation "gs264e_fdiv_df" 19 | |
| + (and (eq_attr "cpu" "gs264e") | |
| + (and (eq_attr "type" "fdiv,frdiv,fsqrt,frsqrt") | |
| + (eq_attr "mode" "DF"))) | |
| + "gs264e_falu1") | |
| + | |
| +;; Force single-dispatch for unknown or multi. | |
| +(define_insn_reservation "gs264e_unknown" 1 | |
| + (and (eq_attr "cpu" "gs264e") | |
| + (eq_attr "type" "unknown,multi,atomic,syncloop")) | |
| + "gs264e_alu1 + gs264e_falu1 + gs264e_mem1") | |
| + | |
| +;; End of DFA-based pipeline description for gs264e | |
| diff --git a/gcc/config/mips/gs464.md b/gcc/config/mips/gs464.md | |
| new file mode 100644 | |
| index 0000000..82efb66 | |
| --- /dev/null | |
| +++ b/gcc/config/mips/gs464.md | |
| @@ -0,0 +1,137 @@ | |
| +;; Pipeline model for Loongson gs464 cores. | |
| + | |
| +;; Copyright (C) 2011-2018 Free Software Foundation, Inc. | |
| +;; | |
| +;; This file is part of GCC. | |
| +;; | |
| +;; GCC is free software; you can redistribute it and/or modify it | |
| +;; under the terms of the GNU General Public License as published | |
| +;; by the Free Software Foundation; either version 3, or (at your | |
| +;; option) any later version. | |
| +;; | |
| +;; GCC is distributed in the hope that it will be useful, but WITHOUT | |
| +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
| +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public | |
| +;; License for more details. | |
| +;; | |
| +;; You should have received a copy of the GNU General Public License | |
| +;; along with GCC; see the file COPYING3. If not see | |
| +;; <http://www.gnu.org/licenses/>. | |
| + | |
| +;; Uncomment the following line to output automata for debugging. | |
| +;; (automata_option "v") | |
| + | |
| +;; Automaton for integer instructions. | |
| +(define_automaton "gs464_a_alu") | |
| + | |
| +;; Automaton for floating-point instructions. | |
| +(define_automaton "gs464_a_falu") | |
| + | |
| +;; Automaton for memory operations. | |
| +(define_automaton "gs464_a_mem") | |
| + | |
| +;; Describe the resources. | |
| + | |
| +(define_cpu_unit "gs464_alu1" "gs464_a_alu") | |
| +(define_cpu_unit "gs464_alu2" "gs464_a_alu") | |
| +(define_cpu_unit "gs464_mem" "gs464_a_mem") | |
| +(define_cpu_unit "gs464_falu1" "gs464_a_falu") | |
| +(define_cpu_unit "gs464_falu2" "gs464_a_falu") | |
| + | |
| +;; Describe instruction reservations. | |
| + | |
| +(define_insn_reservation "gs464_arith" 1 | |
| + (and (eq_attr "cpu" "gs464") | |
| + (eq_attr "type" "arith,clz,const,logical, | |
| + move,nop,shift,signext,slt")) | |
| + "gs464_alu1 | gs464_alu2") | |
| + | |
| +(define_insn_reservation "gs464_branch" 1 | |
| + (and (eq_attr "cpu" "gs464") | |
| + (eq_attr "type" "branch,jump,call,condmove,trap")) | |
| + "gs464_alu1") | |
| + | |
| +(define_insn_reservation "gs464_mfhilo" 1 | |
| + (and (eq_attr "cpu" "gs464") | |
| + (eq_attr "type" "mfhi,mflo,mthi,mtlo")) | |
| + "gs464_alu2") | |
| + | |
| +;; Operation imul3nc is fully pipelined. | |
| +(define_insn_reservation "gs464_imul3nc" 5 | |
| + (and (eq_attr "cpu" "gs464") | |
| + (eq_attr "type" "imul3nc")) | |
| + "gs464_alu2") | |
| + | |
| +(define_insn_reservation "gs464_imul" 7 | |
| + (and (eq_attr "cpu" "gs464") | |
| + (eq_attr "type" "imul,imadd")) | |
| + "gs464_alu2 * 7") | |
| + | |
| +(define_insn_reservation "gs464_idiv_si" 12 | |
| + (and (eq_attr "cpu" "gs464") | |
| + (and (eq_attr "type" "idiv") | |
| + (eq_attr "mode" "SI"))) | |
| + "gs464_alu2 * 12") | |
| + | |
| +(define_insn_reservation "gs464_idiv_di" 25 | |
| + (and (eq_attr "cpu" "gs464") | |
| + (and (eq_attr "type" "idiv") | |
| + (eq_attr "mode" "DI"))) | |
| + "gs464_alu2 * 25") | |
| + | |
| +(define_insn_reservation "gs464_load" 3 | |
| + (and (eq_attr "cpu" "gs464") | |
| + (eq_attr "type" "load")) | |
| + "gs464_mem") | |
| + | |
| +(define_insn_reservation "gs464_fpload" 4 | |
| + (and (eq_attr "cpu" "gs464") | |
| + (eq_attr "type" "load,mfc,mtc")) | |
| + "gs464_mem") | |
| + | |
| +(define_insn_reservation "gs464_prefetch" 0 | |
| + (and (eq_attr "cpu" "gs464") | |
| + (eq_attr "type" "prefetch,prefetchx")) | |
| + "gs464_mem") | |
| + | |
| +(define_insn_reservation "gs464_store" 0 | |
| + (and (eq_attr "cpu" "gs464") | |
| + (eq_attr "type" "store,fpstore,fpidxstore")) | |
| + "gs464_mem") | |
| + | |
| +;; All the fp operations can be executed in FALU1. Only fp add, | |
| +;; sub, mul, madd can be executed in FALU2. Try FALU2 firstly. | |
| +(define_insn_reservation "gs464_fadd" 6 | |
| + (and (eq_attr "cpu" "gs464") | |
| + (eq_attr "type" "fadd,fmul,fmadd")) | |
| + "gs464_falu2 | gs464_falu1") | |
| + | |
| +(define_insn_reservation "gs464_fcmp" 2 | |
| + (and (eq_attr "cpu" "gs464") | |
| + (eq_attr "type" "fabs,fcmp,fmove,fneg")) | |
| + "gs464_falu1") | |
| + | |
| +(define_insn_reservation "gs464_fcvt" 4 | |
| + (and (eq_attr "cpu" "gs464") | |
| + (eq_attr "type" "fcvt")) | |
| + "gs464_falu1") | |
| + | |
| +(define_insn_reservation "gs464_fdiv_sf" 12 | |
| + (and (eq_attr "cpu" "gs464") | |
| + (and (eq_attr "type" "fdiv,frdiv,fsqrt,frsqrt") | |
| + (eq_attr "mode" "SF"))) | |
| + "gs464_falu1 * 12") | |
| + | |
| +(define_insn_reservation "gs464_fdiv_df" 19 | |
| + (and (eq_attr "cpu" "gs464") | |
| + (and (eq_attr "type" "fdiv,frdiv,fsqrt,frsqrt") | |
| + (eq_attr "mode" "DF"))) | |
| + "gs464_falu1 * 19") | |
| + | |
| +;; Force single-dispatch for unknown or multi. | |
| +(define_insn_reservation "gs464_unknown" 1 | |
| + (and (eq_attr "cpu" "gs464") | |
| + (eq_attr "type" "unknown,multi,atomic,syncloop")) | |
| + "gs464_alu1 + gs464_alu2 + gs464_falu1 + gs464_falu2 + gs464_mem") | |
| + | |
| +;; End of DFA-based pipeline description for gs464 | |
| diff --git a/gcc/config/mips/gs464e.md b/gcc/config/mips/gs464e.md | |
| new file mode 100644 | |
| index 0000000..e2ef37d | |
| --- /dev/null | |
| +++ b/gcc/config/mips/gs464e.md | |
| @@ -0,0 +1,137 @@ | |
| +;; Pipeline model for Loongson gs464e cores. | |
| + | |
| +;; Copyright (C) 2011-2018 Free Software Foundation, Inc. | |
| +;; | |
| +;; This file is part of GCC. | |
| +;; | |
| +;; GCC is free software; you can redistribute it and/or modify it | |
| +;; under the terms of the GNU General Public License as published | |
| +;; by the Free Software Foundation; either version 3, or (at your | |
| +;; option) any later version. | |
| +;; | |
| +;; GCC is distributed in the hope that it will be useful, but WITHOUT | |
| +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
| +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public | |
| +;; License for more details. | |
| +;; | |
| +;; You should have received a copy of the GNU General Public License | |
| +;; along with GCC; see the file COPYING3. If not see | |
| +;; <http://www.gnu.org/licenses/>. | |
| + | |
| +;; Uncomment the following line to output automata for debugging. | |
| +;; (automata_option "v") | |
| + | |
| +;; Automaton for integer instructions. | |
| +(define_automaton "gs464e_a_alu") | |
| + | |
| +;; Automaton for floating-point instructions. | |
| +(define_automaton "gs464e_a_falu") | |
| + | |
| +;; Automaton for memory operations. | |
| +(define_automaton "gs464e_a_mem") | |
| + | |
| +;; Describe the resources. | |
| + | |
| +(define_cpu_unit "gs464e_alu1" "gs464e_a_alu") | |
| +(define_cpu_unit "gs464e_alu2" "gs464e_a_alu") | |
| +(define_cpu_unit "gs464e_mem1" "gs464e_a_mem") | |
| +(define_cpu_unit "gs464e_mem2" "gs464e_a_mem") | |
| +(define_cpu_unit "gs464e_falu1" "gs464e_a_falu") | |
| +(define_cpu_unit "gs464e_falu2" "gs464e_a_falu") | |
| + | |
| +;; Describe instruction reservations. | |
| + | |
| +(define_insn_reservation "gs464e_arith" 1 | |
| + (and (eq_attr "cpu" "gs464e") | |
| + (eq_attr "type" "arith,clz,const,logical, | |
| + move,nop,shift,signext,slt")) | |
| + "gs464e_alu1 | gs464e_alu2") | |
| + | |
| +(define_insn_reservation "gs464e_branch" 1 | |
| + (and (eq_attr "cpu" "gs464e") | |
| + (eq_attr "type" "branch,jump,call,condmove,trap")) | |
| + "gs464e_alu1 | gs464e_alu2") | |
| + | |
| +(define_insn_reservation "gs464e_mfhilo" 1 | |
| + (and (eq_attr "cpu" "gs464e") | |
| + (eq_attr "type" "mfhi,mflo,mthi,mtlo")) | |
| + "gs464e_alu1 | gs464e_alu2") | |
| + | |
| +;; Operation imul3nc is fully pipelined. | |
| +(define_insn_reservation "gs464e_imul3nc" 5 | |
| + (and (eq_attr "cpu" "gs464e") | |
| + (eq_attr "type" "imul3nc")) | |
| + "gs464e_alu1 | gs464e_alu2") | |
| + | |
| +(define_insn_reservation "gs464e_imul" 7 | |
| + (and (eq_attr "cpu" "gs464e") | |
| + (eq_attr "type" "imul,imadd")) | |
| + "gs464e_alu1 | gs464e_alu2") | |
| + | |
| +(define_insn_reservation "gs464e_idiv_si" 12 | |
| + (and (eq_attr "cpu" "gs464e") | |
| + (and (eq_attr "type" "idiv") | |
| + (eq_attr "mode" "SI"))) | |
| + "gs464e_alu1 | gs464e_alu2") | |
| + | |
| +(define_insn_reservation "gs464e_idiv_di" 25 | |
| + (and (eq_attr "cpu" "gs464e") | |
| + (and (eq_attr "type" "idiv") | |
| + (eq_attr "mode" "DI"))) | |
| + "gs464e_alu1 | gs464e_alu2") | |
| + | |
| +(define_insn_reservation "gs464e_load" 4 | |
| + (and (eq_attr "cpu" "gs464e") | |
| + (eq_attr "type" "load")) | |
| + "gs464e_mem1 | gs464e_mem2") | |
| + | |
| +(define_insn_reservation "gs464e_fpload" 5 | |
| + (and (eq_attr "cpu" "gs464e") | |
| + (eq_attr "type" "load,mfc,mtc")) | |
| + "gs464e_mem1 | gs464e_mem2") | |
| + | |
| +(define_insn_reservation "gs464e_prefetch" 0 | |
| + (and (eq_attr "cpu" "gs464e") | |
| + (eq_attr "type" "prefetch,prefetchx")) | |
| + "gs464e_mem1 | gs464e_mem2") | |
| + | |
| +(define_insn_reservation "gs464e_store" 0 | |
| + (and (eq_attr "cpu" "gs464e") | |
| + (eq_attr "type" "store,fpstore,fpidxstore")) | |
| + "gs464e_mem1 | gs464e_mem2") | |
| + | |
| +(define_insn_reservation "gs464e_fadd" 4 | |
| + (and (eq_attr "cpu" "gs464e") | |
| + (eq_attr "type" "fadd,fmul,fmadd")) | |
| + "gs464e_falu1 | gs464e_falu2") | |
| + | |
| +(define_insn_reservation "gs464e_fcmp" 2 | |
| + (and (eq_attr "cpu" "gs464e") | |
| + (eq_attr "type" "fabs,fcmp,fmove,fneg")) | |
| + "gs464e_falu1 | gs464e_falu2") | |
| + | |
| +(define_insn_reservation "gs464e_fcvt" 4 | |
| + (and (eq_attr "cpu" "gs464e") | |
| + (eq_attr "type" "fcvt")) | |
| + "gs464e_falu1 | gs464e_falu2") | |
| + | |
| +(define_insn_reservation "gs464e_fdiv_sf" 12 | |
| + (and (eq_attr "cpu" "gs464e") | |
| + (and (eq_attr "type" "fdiv,frdiv,fsqrt,frsqrt") | |
| + (eq_attr "mode" "SF"))) | |
| + "gs464e_falu1 | gs464e_falu2") | |
| + | |
| +(define_insn_reservation "gs464e_fdiv_df" 19 | |
| + (and (eq_attr "cpu" "gs464e") | |
| + (and (eq_attr "type" "fdiv,frdiv,fsqrt,frsqrt") | |
| + (eq_attr "mode" "DF"))) | |
| + "gs464e_falu1 | gs464e_falu2") | |
| + | |
| +;; Force single-dispatch for unknown or multi. | |
| +(define_insn_reservation "gs464e_unknown" 1 | |
| + (and (eq_attr "cpu" "gs464e") | |
| + (eq_attr "type" "unknown,multi,atomic,syncloop")) | |
| + "gs464e_alu1 + gs464e_alu2 + gs464e_falu1 | |
| + + gs464e_falu2 + gs464e_mem1 + gs464e_mem2") | |
| + | |
| +;; End of DFA-based pipeline description for gs464e | |
| diff --git a/gcc/config/mips/loongson-mmi.md b/gcc/config/mips/loongson-mmi.md | |
| new file mode 100644 | |
| index 0000000..ad23f67 | |
| --- /dev/null | |
| +++ b/gcc/config/mips/loongson-mmi.md | |
| @@ -0,0 +1,903 @@ | |
| +;; Machine description for Loongson MultiMedia extensions Instructions (MMI). | |
| +;; Copyright (C) 2008-2018 Free Software Foundation, Inc. | |
| +;; Contributed by CodeSourcery. | |
| +;; | |
| +;; This file is part of GCC. | |
| +;; | |
| +;; GCC is free software; you can redistribute it and/or modify | |
| +;; it under the terms of the GNU General Public License as published by | |
| +;; the Free Software Foundation; either version 3, or (at your option) | |
| +;; any later version. | |
| + | |
| +;; GCC is distributed in the hope that it will be useful, | |
| +;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| +;; GNU General Public License for more details. | |
| + | |
| +;; You should have received a copy of the GNU General Public License | |
| +;; along with GCC; see the file COPYING3. If not see | |
| +;; <http://www.gnu.org/licenses/>. | |
| + | |
| +(define_c_enum "unspec" [ | |
| + UNSPEC_LOONGSON_PAVG | |
| + UNSPEC_LOONGSON_PCMPEQ | |
| + UNSPEC_LOONGSON_PCMPGT | |
| + UNSPEC_LOONGSON_PEXTR | |
| + UNSPEC_LOONGSON_PINSRH | |
| + UNSPEC_LOONGSON_VINIT | |
| + UNSPEC_LOONGSON_PMADD | |
| + UNSPEC_LOONGSON_PMOVMSK | |
| + UNSPEC_LOONGSON_PMULHU | |
| + UNSPEC_LOONGSON_PMULH | |
| + UNSPEC_LOONGSON_PMULU | |
| + UNSPEC_LOONGSON_PASUBUB | |
| + UNSPEC_LOONGSON_BIADD | |
| + UNSPEC_LOONGSON_PSADBH | |
| + UNSPEC_LOONGSON_PSHUFH | |
| + UNSPEC_LOONGSON_PUNPCKH | |
| + UNSPEC_LOONGSON_PUNPCKL | |
| + UNSPEC_LOONGSON_PADDD | |
| + UNSPEC_LOONGSON_PSUBD | |
| + UNSPEC_LOONGSON_DSLL | |
| + UNSPEC_LOONGSON_DSRL | |
| +]) | |
| + | |
| +;; Mode iterators and attributes. | |
| + | |
| +;; 64-bit vectors of bytes. | |
| +(define_mode_iterator VB [V8QI]) | |
| + | |
| +;; 64-bit vectors of halfwords. | |
| +(define_mode_iterator VH [V4HI]) | |
| + | |
| +;; 64-bit vectors of words. | |
| +(define_mode_iterator VW [V2SI]) | |
| + | |
| +;; 64-bit vectors of halfwords and bytes. | |
| +(define_mode_iterator VHB [V4HI V8QI]) | |
| + | |
| +;; 64-bit vectors of words and halfwords. | |
| +(define_mode_iterator VWH [V2SI V4HI]) | |
| + | |
| +;; 64-bit vectors of words and bytes | |
| +(define_mode_iterator VWB [V2SI V8QI]) | |
| + | |
| +;; 64-bit vectors of words, halfwords and bytes. | |
| +(define_mode_iterator VWHB [V2SI V4HI V8QI]) | |
| + | |
| +;; 64-bit vectors of words, halfwords and bytes; and DImode. | |
| +(define_mode_iterator VWHBDI [V2SI V4HI V8QI DI]) | |
| + | |
| +;; The Loongson instruction suffixes corresponding to the modes in the | |
| +;; VWHBDI iterator. | |
| +(define_mode_attr V_suffix [(V2SI "w") (V4HI "h") (V8QI "b") (DI "d")]) | |
| + | |
| +;; Given a vector type T, the mode of a vector half the size of T | |
| +;; and with the same number of elements. | |
| +(define_mode_attr V_squash [(V2SI "V2HI") (V4HI "V4QI")]) | |
| + | |
| +;; Given a vector type T, the mode of a vector the same size as T | |
| +;; but with half as many elements. | |
| +(define_mode_attr V_stretch_half [(V2SI "DI") (V4HI "V2SI") (V8QI "V4HI")]) | |
| + | |
| +;; The Loongson instruction suffixes corresponding to the transformation | |
| +;; expressed by V_stretch_half. | |
| +(define_mode_attr V_stretch_half_suffix [(V2SI "wd") (V4HI "hw") (V8QI "bh")]) | |
| + | |
| +;; Given a vector type T, the mode of a vector the same size as T | |
| +;; but with twice as many elements. | |
| +(define_mode_attr V_squash_double [(V2SI "V4HI") (V4HI "V8QI")]) | |
| + | |
| +;; Given a vector type T, the inner mode. | |
| +(define_mode_attr V_inner [(V8QI "QI") (V4HI "HI") (V2SI "SI")]) | |
| + | |
| +;; The Loongson instruction suffixes corresponding to the conversions | |
| +;; specified by V_half_width. | |
| +(define_mode_attr V_squash_double_suffix [(V2SI "wh") (V4HI "hb")]) | |
| + | |
| +;; Move patterns. | |
| + | |
| +;; Expander to legitimize moves involving values of vector modes. | |
| +(define_expand "mov<mode>" | |
| + [(set (match_operand:VWHB 0) | |
| + (match_operand:VWHB 1))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| +{ | |
| + if (mips_legitimize_move (<MODE>mode, operands[0], operands[1])) | |
| + DONE; | |
| +}) | |
| + | |
| +;; Handle legitimized moves between values of vector modes. | |
| +(define_insn "mov<mode>_internal" | |
| + [(set (match_operand:VWHB 0 "nonimmediate_operand" "=m,f,d,f, d, m, d") | |
| + (match_operand:VWHB 1 "move_operand" "f,m,f,dYG,dYG,dYG,m"))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + { return mips_output_move (operands[0], operands[1]); } | |
| + [(set_attr "move_type" "fpstore,fpload,mfc,mtc,move,store,load") | |
| + (set_attr "mode" "DI")]) | |
| + | |
| +;; Initialization of a vector. | |
| + | |
| +(define_expand "vec_init<mode><unitmode>" | |
| + [(set (match_operand:VWHB 0 "register_operand") | |
| + (match_operand 1 ""))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| +{ | |
| + mips_expand_vector_init (operands[0], operands[1]); | |
| + DONE; | |
| +}) | |
| + | |
| +;; Helper for vec_init. Initialize element 0 of the output from the input. | |
| +;; All other elements are undefined. | |
| +(define_insn "loongson_vec_init1_<mode>" | |
| + [(set (match_operand:VHB 0 "register_operand" "=f") | |
| + (unspec:VHB [(truncate:<V_inner> | |
| + (match_operand:DI 1 "reg_or_0_operand" "Jd"))] | |
| + UNSPEC_LOONGSON_VINIT))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "dmtc1\t%z1,%0" | |
| + [(set_attr "move_type" "mtc") | |
| + (set_attr "mode" "DI")]) | |
| + | |
| +;; Helper for vec_initv2si. | |
| +(define_insn "*vec_concatv2si" | |
| + [(set (match_operand:V2SI 0 "register_operand" "=f") | |
| + (vec_concat:V2SI | |
| + (match_operand:SI 1 "register_operand" "f") | |
| + (match_operand:SI 2 "register_operand" "f")))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "punpcklwd\t%0,%1,%2" | |
| + [(set_attr "type" "fcvt")]) | |
| + | |
| +;; Instruction patterns for SIMD instructions. | |
| + | |
| +;; Pack with signed saturation. | |
| +(define_insn "vec_pack_ssat_<mode>" | |
| + [(set (match_operand:<V_squash_double> 0 "register_operand" "=f") | |
| + (vec_concat:<V_squash_double> | |
| + (ss_truncate:<V_squash> | |
| + (match_operand:VWH 1 "register_operand" "f")) | |
| + (ss_truncate:<V_squash> | |
| + (match_operand:VWH 2 "register_operand" "f"))))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "packss<V_squash_double_suffix>\t%0,%1,%2" | |
| + [(set_attr "type" "fmul")]) | |
| + | |
| +;; Pack with unsigned saturation. | |
| +(define_insn "vec_pack_usat_<mode>" | |
| + [(set (match_operand:<V_squash_double> 0 "register_operand" "=f") | |
| + (vec_concat:<V_squash_double> | |
| + (us_truncate:<V_squash> | |
| + (match_operand:VH 1 "register_operand" "f")) | |
| + (us_truncate:<V_squash> | |
| + (match_operand:VH 2 "register_operand" "f"))))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "packus<V_squash_double_suffix>\t%0,%1,%2" | |
| + [(set_attr "type" "fmul")]) | |
| + | |
| +;; Addition, treating overflow by wraparound. | |
| +(define_insn "add<mode>3" | |
| + [(set (match_operand:VWHB 0 "register_operand" "=f") | |
| + (plus:VWHB (match_operand:VWHB 1 "register_operand" "f") | |
| + (match_operand:VWHB 2 "register_operand" "f")))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "padd<V_suffix>\t%0,%1,%2" | |
| + [(set_attr "type" "fadd")]) | |
| + | |
| +;; Addition of doubleword integers stored in FP registers. | |
| +;; Overflow is treated by wraparound. | |
| +;; We use 'unspec' instead of 'plus' here to avoid clash with | |
| +;; mips.md::add<mode>3. If 'plus' was used, then such instruction | |
| +;; would be recognized as adddi3 and reload would make it use | |
| +;; GPRs instead of FPRs. | |
| +(define_insn "loongson_paddd" | |
| + [(set (match_operand:DI 0 "register_operand" "=f") | |
| + (unspec:DI [(match_operand:DI 1 "register_operand" "f") | |
| + (match_operand:DI 2 "register_operand" "f")] | |
| + UNSPEC_LOONGSON_PADDD))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "paddd\t%0,%1,%2" | |
| + [(set_attr "type" "fadd")]) | |
| + | |
| +;; Addition, treating overflow by signed saturation. | |
| +(define_insn "ssadd<mode>3" | |
| + [(set (match_operand:VHB 0 "register_operand" "=f") | |
| + (ss_plus:VHB (match_operand:VHB 1 "register_operand" "f") | |
| + (match_operand:VHB 2 "register_operand" "f")))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "padds<V_suffix>\t%0,%1,%2" | |
| + [(set_attr "type" "fadd")]) | |
| + | |
| +;; Addition, treating overflow by unsigned saturation. | |
| +(define_insn "usadd<mode>3" | |
| + [(set (match_operand:VHB 0 "register_operand" "=f") | |
| + (us_plus:VHB (match_operand:VHB 1 "register_operand" "f") | |
| + (match_operand:VHB 2 "register_operand" "f")))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "paddus<V_suffix>\t%0,%1,%2" | |
| + [(set_attr "type" "fadd")]) | |
| + | |
| +;; Logical AND NOT. | |
| +(define_insn "loongson_pandn_<V_suffix>" | |
| + [(set (match_operand:VWHBDI 0 "register_operand" "=f") | |
| + (and:VWHBDI | |
| + (not:VWHBDI (match_operand:VWHBDI 1 "register_operand" "f")) | |
| + (match_operand:VWHBDI 2 "register_operand" "f")))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "pandn\t%0,%1,%2" | |
| + [(set_attr "type" "fmul")]) | |
| + | |
| +;; Logical AND. | |
| +(define_insn "and<mode>3" | |
| + [(set (match_operand:VWHB 0 "register_operand" "=f") | |
| + (and:VWHB (match_operand:VWHB 1 "register_operand" "f") | |
| + (match_operand:VWHB 2 "register_operand" "f")))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "and\t%0,%1,%2" | |
| + [(set_attr "type" "fmul")]) | |
| + | |
| +;; Logical OR. | |
| +(define_insn "ior<mode>3" | |
| + [(set (match_operand:VWHB 0 "register_operand" "=f") | |
| + (ior:VWHB (match_operand:VWHB 1 "register_operand" "f") | |
| + (match_operand:VWHB 2 "register_operand" "f")))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "or\t%0,%1,%2" | |
| + [(set_attr "type" "fcvt")]) | |
| + | |
| +;; Logical XOR. | |
| +(define_insn "xor<mode>3" | |
| + [(set (match_operand:VWHB 0 "register_operand" "=f") | |
| + (xor:VWHB (match_operand:VWHB 1 "register_operand" "f") | |
| + (match_operand:VWHB 2 "register_operand" "f")))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "xor\t%0,%1,%2" | |
| + [(set_attr "type" "fmul")]) | |
| + | |
| +;; Logical NOR. | |
| +(define_insn "*loongson_nor" | |
| + [(set (match_operand:VWHB 0 "register_operand" "=f") | |
| + (and:VWHB | |
| + (not:VWHB (match_operand:VWHB 1 "register_operand" "f")) | |
| + (not:VWHB (match_operand:VWHB 2 "register_operand" "f"))))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "nor\t%0,%1,%2" | |
| + [(set_attr "type" "fmul")]) | |
| + | |
| +;; Logical NOT. | |
| +(define_insn "one_cmpl<mode>2" | |
| + [(set (match_operand:VWHB 0 "register_operand" "=f") | |
| + (not:VWHB (match_operand:VWHB 1 "register_operand" "f")))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "nor\t%0,%1,%1" | |
| + [(set_attr "type" "fmul")]) | |
| + | |
| +;; Average. | |
| +(define_insn "loongson_pavg<V_suffix>" | |
| + [(set (match_operand:VHB 0 "register_operand" "=f") | |
| + (unspec:VHB [(match_operand:VHB 1 "register_operand" "f") | |
| + (match_operand:VHB 2 "register_operand" "f")] | |
| + UNSPEC_LOONGSON_PAVG))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "pavg<V_suffix>\t%0,%1,%2" | |
| + [(set_attr "type" "fadd")]) | |
| + | |
| +;; Equality test. | |
| +(define_insn "loongson_pcmpeq<V_suffix>" | |
| + [(set (match_operand:VWHB 0 "register_operand" "=f") | |
| + (unspec:VWHB [(match_operand:VWHB 1 "register_operand" "f") | |
| + (match_operand:VWHB 2 "register_operand" "f")] | |
| + UNSPEC_LOONGSON_PCMPEQ))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "pcmpeq<V_suffix>\t%0,%1,%2" | |
| + [(set_attr "type" "fadd")]) | |
| + | |
| +;; Greater-than test. | |
| +(define_insn "loongson_pcmpgt<V_suffix>" | |
| + [(set (match_operand:VWHB 0 "register_operand" "=f") | |
| + (unspec:VWHB [(match_operand:VWHB 1 "register_operand" "f") | |
| + (match_operand:VWHB 2 "register_operand" "f")] | |
| + UNSPEC_LOONGSON_PCMPGT))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "pcmpgt<V_suffix>\t%0,%1,%2" | |
| + [(set_attr "type" "fadd")]) | |
| + | |
| +;; Extract halfword. | |
| +(define_insn "loongson_pextrh" | |
| + [(set (match_operand:V4HI 0 "register_operand" "=f") | |
| + (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "f") | |
| + (match_operand:SI 2 "register_operand" "f")] | |
| + UNSPEC_LOONGSON_PEXTR))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "pextrh\t%0,%1,%2" | |
| + [(set_attr "type" "fcvt")]) | |
| + | |
| +;; Insert halfword. | |
| +(define_insn "loongson_pinsrh_0" | |
| + [(set (match_operand:V4HI 0 "register_operand" "=f") | |
| + (vec_select:V4HI | |
| + (vec_concat:V8HI | |
| + (match_operand:V4HI 1 "register_operand" "f") | |
| + (match_operand:V4HI 2 "register_operand" "f")) | |
| + (parallel [(const_int 4) (const_int 1) | |
| + (const_int 2) (const_int 3)])))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "pinsrh_0\t%0,%1,%2" | |
| + [(set_attr "type" "fdiv")]) | |
| + | |
| +(define_insn "loongson_pinsrh_1" | |
| + [(set (match_operand:V4HI 0 "register_operand" "=f") | |
| + (vec_select:V4HI | |
| + (vec_concat:V8HI | |
| + (match_operand:V4HI 1 "register_operand" "f") | |
| + (match_operand:V4HI 2 "register_operand" "f")) | |
| + (parallel [(const_int 0) (const_int 4) | |
| + (const_int 2) (const_int 3)])))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "pinsrh_1\t%0,%1,%2" | |
| + [(set_attr "type" "fdiv")]) | |
| + | |
| +(define_insn "loongson_pinsrh_2" | |
| + [(set (match_operand:V4HI 0 "register_operand" "=f") | |
| + (vec_select:V4HI | |
| + (vec_concat:V8HI | |
| + (match_operand:V4HI 1 "register_operand" "f") | |
| + (match_operand:V4HI 2 "register_operand" "f")) | |
| + (parallel [(const_int 0) (const_int 1) | |
| + (const_int 4) (const_int 3)])))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "pinsrh_2\t%0,%1,%2" | |
| + [(set_attr "type" "fdiv")]) | |
| + | |
| +(define_insn "loongson_pinsrh_3" | |
| + [(set (match_operand:V4HI 0 "register_operand" "=f") | |
| + (vec_select:V4HI | |
| + (vec_concat:V8HI | |
| + (match_operand:V4HI 1 "register_operand" "f") | |
| + (match_operand:V4HI 2 "register_operand" "f")) | |
| + (parallel [(const_int 0) (const_int 1) | |
| + (const_int 2) (const_int 4)])))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "pinsrh_3\t%0,%1,%2" | |
| + [(set_attr "type" "fdiv")]) | |
| + | |
| +(define_insn "*vec_setv4hi" | |
| + [(set (match_operand:V4HI 0 "register_operand" "=f") | |
| + (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "f") | |
| + (match_operand:SI 2 "register_operand" "f") | |
| + (match_operand:SI 3 "const_0_to_3_operand" "")] | |
| + UNSPEC_LOONGSON_PINSRH))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "pinsrh_%3\t%0,%1,%2" | |
| + [(set_attr "type" "fdiv")]) | |
| + | |
| +(define_expand "vec_setv4hi" | |
| + [(set (match_operand:V4HI 0 "register_operand" "=f") | |
| + (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "f") | |
| + (match_operand:HI 2 "register_operand" "f") | |
| + (match_operand:SI 3 "const_0_to_3_operand" "")] | |
| + UNSPEC_LOONGSON_PINSRH))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| +{ | |
| + rtx ext = gen_reg_rtx (SImode); | |
| + emit_move_insn (ext, gen_lowpart (SImode, operands[2])); | |
| + operands[2] = ext; | |
| +}) | |
| + | |
| +;; Multiply and add packed integers. | |
| +(define_insn "loongson_pmaddhw" | |
| + [(set (match_operand:V2SI 0 "register_operand" "=f") | |
| + (unspec:V2SI [(match_operand:V4HI 1 "register_operand" "f") | |
| + (match_operand:V4HI 2 "register_operand" "f")] | |
| + UNSPEC_LOONGSON_PMADD))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "pmaddhw\t%0,%1,%2" | |
| + [(set_attr "type" "fmul")]) | |
| + | |
| +(define_expand "sdot_prodv4hi" | |
| + [(match_operand:V2SI 0 "register_operand" "") | |
| + (match_operand:V4HI 1 "register_operand" "") | |
| + (match_operand:V4HI 2 "register_operand" "") | |
| + (match_operand:V2SI 3 "register_operand" "")] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| +{ | |
| + rtx t = gen_reg_rtx (V2SImode); | |
| + emit_insn (gen_loongson_pmaddhw (t, operands[1], operands[2])); | |
| + emit_insn (gen_addv2si3 (operands[0], t, operands[3])); | |
| + DONE; | |
| +}) | |
| + | |
| +;; Maximum of signed halfwords. | |
| +(define_insn "smaxv4hi3" | |
| + [(set (match_operand:V4HI 0 "register_operand" "=f") | |
| + (smax:V4HI (match_operand:V4HI 1 "register_operand" "f") | |
| + (match_operand:V4HI 2 "register_operand" "f")))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "pmaxsh\t%0,%1,%2" | |
| + [(set_attr "type" "fadd")]) | |
| + | |
| +(define_expand "smax<mode>3" | |
| + [(match_operand:VWB 0 "register_operand" "") | |
| + (match_operand:VWB 1 "register_operand" "") | |
| + (match_operand:VWB 2 "register_operand" "")] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| +{ | |
| + mips_expand_vec_minmax (operands[0], operands[1], operands[2], | |
| + gen_loongson_pcmpgt<V_suffix>, false); | |
| + DONE; | |
| +}) | |
| + | |
| +;; Maximum of unsigned bytes. | |
| +(define_insn "umaxv8qi3" | |
| + [(set (match_operand:V8QI 0 "register_operand" "=f") | |
| + (umax:V8QI (match_operand:V8QI 1 "register_operand" "f") | |
| + (match_operand:V8QI 2 "register_operand" "f")))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "pmaxub\t%0,%1,%2" | |
| + [(set_attr "type" "fadd")]) | |
| + | |
| +;; Minimum of signed halfwords. | |
| +(define_insn "sminv4hi3" | |
| + [(set (match_operand:V4HI 0 "register_operand" "=f") | |
| + (smin:V4HI (match_operand:V4HI 1 "register_operand" "f") | |
| + (match_operand:V4HI 2 "register_operand" "f")))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "pminsh\t%0,%1,%2" | |
| + [(set_attr "type" "fadd")]) | |
| + | |
| +(define_expand "smin<mode>3" | |
| + [(match_operand:VWB 0 "register_operand" "") | |
| + (match_operand:VWB 1 "register_operand" "") | |
| + (match_operand:VWB 2 "register_operand" "")] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| +{ | |
| + mips_expand_vec_minmax (operands[0], operands[1], operands[2], | |
| + gen_loongson_pcmpgt<V_suffix>, true); | |
| + DONE; | |
| +}) | |
| + | |
| +;; Minimum of unsigned bytes. | |
| +(define_insn "uminv8qi3" | |
| + [(set (match_operand:V8QI 0 "register_operand" "=f") | |
| + (umin:V8QI (match_operand:V8QI 1 "register_operand" "f") | |
| + (match_operand:V8QI 2 "register_operand" "f")))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "pminub\t%0,%1,%2" | |
| + [(set_attr "type" "fadd")]) | |
| + | |
| +;; Move byte mask. | |
| +(define_insn "loongson_pmovmsk<V_suffix>" | |
| + [(set (match_operand:VB 0 "register_operand" "=f") | |
| + (unspec:VB [(match_operand:VB 1 "register_operand" "f")] | |
| + UNSPEC_LOONGSON_PMOVMSK))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "pmovmsk<V_suffix>\t%0,%1" | |
| + [(set_attr "type" "fabs")]) | |
| + | |
| +;; Multiply unsigned integers and store high result. | |
| +(define_insn "umul<mode>3_highpart" | |
| + [(set (match_operand:VH 0 "register_operand" "=f") | |
| + (unspec:VH [(match_operand:VH 1 "register_operand" "f") | |
| + (match_operand:VH 2 "register_operand" "f")] | |
| + UNSPEC_LOONGSON_PMULHU))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "pmulhu<V_suffix>\t%0,%1,%2" | |
| + [(set_attr "type" "fmul")]) | |
| + | |
| +;; Multiply signed integers and store high result. | |
| +(define_insn "smul<mode>3_highpart" | |
| + [(set (match_operand:VH 0 "register_operand" "=f") | |
| + (unspec:VH [(match_operand:VH 1 "register_operand" "f") | |
| + (match_operand:VH 2 "register_operand" "f")] | |
| + UNSPEC_LOONGSON_PMULH))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "pmulh<V_suffix>\t%0,%1,%2" | |
| + [(set_attr "type" "fmul")]) | |
| + | |
| +;; Multiply signed integers and store low result. | |
| +(define_insn "mul<mode>3" | |
| + [(set (match_operand:VH 0 "register_operand" "=f") | |
| + (mult:VH (match_operand:VH 1 "register_operand" "f") | |
| + (match_operand:VH 2 "register_operand" "f")))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "pmull<V_suffix>\t%0,%1,%2" | |
| + [(set_attr "type" "fmul")]) | |
| + | |
| +;; Multiply unsigned word integers. | |
| +(define_insn "loongson_pmulu<V_suffix>" | |
| + [(set (match_operand:DI 0 "register_operand" "=f") | |
| + (unspec:DI [(match_operand:VW 1 "register_operand" "f") | |
| + (match_operand:VW 2 "register_operand" "f")] | |
| + UNSPEC_LOONGSON_PMULU))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "pmulu<V_suffix>\t%0,%1,%2" | |
| + [(set_attr "type" "fmul")]) | |
| + | |
| +;; Absolute difference. | |
| +(define_insn "loongson_pasubub" | |
| + [(set (match_operand:VB 0 "register_operand" "=f") | |
| + (unspec:VB [(match_operand:VB 1 "register_operand" "f") | |
| + (match_operand:VB 2 "register_operand" "f")] | |
| + UNSPEC_LOONGSON_PASUBUB))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "pasubub\t%0,%1,%2" | |
| + [(set_attr "type" "fadd")]) | |
| + | |
| +;; Sum of unsigned byte integers. | |
| +(define_insn "loongson_biadd" | |
| + [(set (match_operand:<V_stretch_half> 0 "register_operand" "=f") | |
| + (unspec:<V_stretch_half> [(match_operand:VB 1 "register_operand" "f")] | |
| + UNSPEC_LOONGSON_BIADD))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "biadd\t%0,%1" | |
| + [(set_attr "type" "fabs")]) | |
| + | |
| +(define_insn "reduc_uplus_v8qi" | |
| + [(set (match_operand:V8QI 0 "register_operand" "=f") | |
| + (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "f")] | |
| + UNSPEC_LOONGSON_BIADD))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "biadd\t%0,%1" | |
| + [(set_attr "type" "fabs")]) | |
| + | |
| +;; Sum of absolute differences. | |
| +(define_insn "loongson_psadbh" | |
| + [(set (match_operand:<V_stretch_half> 0 "register_operand" "=f") | |
| + (unspec:<V_stretch_half> [(match_operand:VB 1 "register_operand" "f") | |
| + (match_operand:VB 2 "register_operand" "f")] | |
| + UNSPEC_LOONGSON_PSADBH))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "pasubub\t%0,%1,%2;biadd\t%0,%0" | |
| + [(set_attr "type" "fadd")]) | |
| + | |
| +;; Shuffle halfwords. | |
| +(define_insn "loongson_pshufh" | |
| + [(set (match_operand:VH 0 "register_operand" "=f") | |
| + (unspec:VH [(match_operand:VH 1 "register_operand" "f") | |
| + (match_operand:SI 2 "register_operand" "f")] | |
| + UNSPEC_LOONGSON_PSHUFH))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "pshufh\t%0,%1,%2" | |
| + [(set_attr "type" "fmul")]) | |
| + | |
| +;; Shift left logical. | |
| +(define_insn "ashl<mode>3" | |
| + [(set (match_operand:VWH 0 "register_operand" "=f") | |
| + (ashift:VWH (match_operand:VWH 1 "register_operand" "f") | |
| + (match_operand:SI 2 "register_operand" "f")))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "psll<V_suffix>\t%0,%1,%2" | |
| + [(set_attr "type" "fcvt")]) | |
| + | |
| +;; Shift right arithmetic. | |
| +(define_insn "ashr<mode>3" | |
| + [(set (match_operand:VWH 0 "register_operand" "=f") | |
| + (ashiftrt:VWH (match_operand:VWH 1 "register_operand" "f") | |
| + (match_operand:SI 2 "register_operand" "f")))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "psra<V_suffix>\t%0,%1,%2" | |
| + [(set_attr "type" "fcvt")]) | |
| + | |
| +;; Shift right logical. | |
| +(define_insn "lshr<mode>3" | |
| + [(set (match_operand:VWH 0 "register_operand" "=f") | |
| + (lshiftrt:VWH (match_operand:VWH 1 "register_operand" "f") | |
| + (match_operand:SI 2 "register_operand" "f")))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "psrl<V_suffix>\t%0,%1,%2" | |
| + [(set_attr "type" "fcvt")]) | |
| + | |
| +;; Subtraction, treating overflow by wraparound. | |
| +(define_insn "sub<mode>3" | |
| + [(set (match_operand:VWHB 0 "register_operand" "=f") | |
| + (minus:VWHB (match_operand:VWHB 1 "register_operand" "f") | |
| + (match_operand:VWHB 2 "register_operand" "f")))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "psub<V_suffix>\t%0,%1,%2" | |
| + [(set_attr "type" "fadd")]) | |
| + | |
| +;; Subtraction of doubleword integers stored in FP registers. | |
| +;; Overflow is treated by wraparound. | |
| +;; See loongson_paddd for the reason we use 'unspec' rather than | |
| +;; 'minus' here. | |
| +(define_insn "loongson_psubd" | |
| + [(set (match_operand:DI 0 "register_operand" "=f") | |
| + (unspec:DI [(match_operand:DI 1 "register_operand" "f") | |
| + (match_operand:DI 2 "register_operand" "f")] | |
| + UNSPEC_LOONGSON_PSUBD))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "psubd\t%0,%1,%2" | |
| + [(set_attr "type" "fadd")]) | |
| + | |
| +;; Subtraction, treating overflow by signed saturation. | |
| +(define_insn "sssub<mode>3" | |
| + [(set (match_operand:VHB 0 "register_operand" "=f") | |
| + (ss_minus:VHB (match_operand:VHB 1 "register_operand" "f") | |
| + (match_operand:VHB 2 "register_operand" "f")))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "psubs<V_suffix>\t%0,%1,%2" | |
| + [(set_attr "type" "fadd")]) | |
| + | |
| +;; Subtraction, treating overflow by unsigned saturation. | |
| +(define_insn "ussub<mode>3" | |
| + [(set (match_operand:VHB 0 "register_operand" "=f") | |
| + (us_minus:VHB (match_operand:VHB 1 "register_operand" "f") | |
| + (match_operand:VHB 2 "register_operand" "f")))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "psubus<V_suffix>\t%0,%1,%2" | |
| + [(set_attr "type" "fadd")]) | |
| + | |
| +;; Unpack high data. Recall that Loongson only runs in little-endian. | |
| +(define_insn "loongson_punpckhbh" | |
| + [(set (match_operand:V8QI 0 "register_operand" "=f") | |
| + (vec_select:V8QI | |
| + (vec_concat:V16QI | |
| + (match_operand:V8QI 1 "register_operand" "f") | |
| + (match_operand:V8QI 2 "register_operand" "f")) | |
| + (parallel [(const_int 4) (const_int 12) | |
| + (const_int 5) (const_int 13) | |
| + (const_int 6) (const_int 14) | |
| + (const_int 7) (const_int 15)])))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "punpckhbh\t%0,%1,%2" | |
| + [(set_attr "type" "fdiv")]) | |
| + | |
| +(define_insn "loongson_punpckhhw" | |
| + [(set (match_operand:V4HI 0 "register_operand" "=f") | |
| + (vec_select:V4HI | |
| + (vec_concat:V8HI | |
| + (match_operand:V4HI 1 "register_operand" "f") | |
| + (match_operand:V4HI 2 "register_operand" "f")) | |
| + (parallel [(const_int 2) (const_int 6) | |
| + (const_int 3) (const_int 7)])))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "punpckhhw\t%0,%1,%2" | |
| + [(set_attr "type" "fdiv")]) | |
| + | |
| +(define_insn "loongson_punpckhhw_qi" | |
| + [(set (match_operand:V8QI 0 "register_operand" "=f") | |
| + (vec_select:V8QI | |
| + (vec_concat:V16QI | |
| + (match_operand:V8QI 1 "register_operand" "f") | |
| + (match_operand:V8QI 2 "register_operand" "f")) | |
| + (parallel [(const_int 4) (const_int 5) | |
| + (const_int 12) (const_int 13) | |
| + (const_int 6) (const_int 7) | |
| + (const_int 14) (const_int 15)])))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "punpckhhw\t%0,%1,%2" | |
| + [(set_attr "type" "fdiv")]) | |
| + | |
| +(define_insn "loongson_punpckhwd" | |
| + [(set (match_operand:V2SI 0 "register_operand" "=f") | |
| + (vec_select:V2SI | |
| + (vec_concat:V4SI | |
| + (match_operand:V2SI 1 "register_operand" "f") | |
| + (match_operand:V2SI 2 "register_operand" "f")) | |
| + (parallel [(const_int 1) (const_int 3)])))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "punpckhwd\t%0,%1,%2" | |
| + [(set_attr "type" "fcvt")]) | |
| + | |
| +(define_insn "loongson_punpckhwd_qi" | |
| + [(set (match_operand:V8QI 0 "register_operand" "=f") | |
| + (vec_select:V8QI | |
| + (vec_concat:V16QI | |
| + (match_operand:V8QI 1 "register_operand" "f") | |
| + (match_operand:V8QI 2 "register_operand" "f")) | |
| + (parallel [(const_int 4) (const_int 5) | |
| + (const_int 6) (const_int 7) | |
| + (const_int 12) (const_int 13) | |
| + (const_int 14) (const_int 15)])))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "punpckhwd\t%0,%1,%2" | |
| + [(set_attr "type" "fcvt")]) | |
| + | |
| +(define_insn "loongson_punpckhwd_hi" | |
| + [(set (match_operand:V4HI 0 "register_operand" "=f") | |
| + (vec_select:V4HI | |
| + (vec_concat:V8HI | |
| + (match_operand:V4HI 1 "register_operand" "f") | |
| + (match_operand:V4HI 2 "register_operand" "f")) | |
| + (parallel [(const_int 2) (const_int 3) | |
| + (const_int 6) (const_int 7)])))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "punpckhwd\t%0,%1,%2" | |
| + [(set_attr "type" "fcvt")]) | |
| + | |
| +;; Unpack low data. | |
| +(define_insn "loongson_punpcklbh" | |
| + [(set (match_operand:V8QI 0 "register_operand" "=f") | |
| + (vec_select:V8QI | |
| + (vec_concat:V16QI | |
| + (match_operand:V8QI 1 "register_operand" "f") | |
| + (match_operand:V8QI 2 "register_operand" "f")) | |
| + (parallel [(const_int 0) (const_int 8) | |
| + (const_int 1) (const_int 9) | |
| + (const_int 2) (const_int 10) | |
| + (const_int 3) (const_int 11)])))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "punpcklbh\t%0,%1,%2" | |
| + [(set_attr "type" "fdiv")]) | |
| + | |
| +(define_insn "loongson_punpcklhw" | |
| + [(set (match_operand:V4HI 0 "register_operand" "=f") | |
| + (vec_select:V4HI | |
| + (vec_concat:V8HI | |
| + (match_operand:V4HI 1 "register_operand" "f") | |
| + (match_operand:V4HI 2 "register_operand" "f")) | |
| + (parallel [(const_int 0) (const_int 4) | |
| + (const_int 1) (const_int 5)])))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "punpcklhw\t%0,%1,%2" | |
| + [(set_attr "type" "fdiv")]) | |
| + | |
| +(define_insn "*loongson_punpcklhw_qi" | |
| + [(set (match_operand:V8QI 0 "register_operand" "=f") | |
| + (vec_select:V8QI | |
| + (vec_concat:V16QI | |
| + (match_operand:V8QI 1 "register_operand" "f") | |
| + (match_operand:V8QI 2 "register_operand" "f")) | |
| + (parallel [(const_int 0) (const_int 1) | |
| + (const_int 8) (const_int 9) | |
| + (const_int 2) (const_int 3) | |
| + (const_int 10) (const_int 11)])))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "punpcklhw\t%0,%1,%2" | |
| + [(set_attr "type" "fdiv")]) | |
| + | |
| +(define_insn "loongson_punpcklwd" | |
| + [(set (match_operand:V2SI 0 "register_operand" "=f") | |
| + (vec_select:V2SI | |
| + (vec_concat:V4SI | |
| + (match_operand:V2SI 1 "register_operand" "f") | |
| + (match_operand:V2SI 2 "register_operand" "f")) | |
| + (parallel [(const_int 0) (const_int 2)])))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "punpcklwd\t%0,%1,%2" | |
| + [(set_attr "type" "fcvt")]) | |
| + | |
| +(define_insn "*loongson_punpcklwd_qi" | |
| + [(set (match_operand:V8QI 0 "register_operand" "=f") | |
| + (vec_select:V8QI | |
| + (vec_concat:V16QI | |
| + (match_operand:V8QI 1 "register_operand" "f") | |
| + (match_operand:V8QI 2 "register_operand" "f")) | |
| + (parallel [(const_int 0) (const_int 1) | |
| + (const_int 2) (const_int 3) | |
| + (const_int 8) (const_int 9) | |
| + (const_int 10) (const_int 11)])))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "punpcklwd\t%0,%1,%2" | |
| + [(set_attr "type" "fcvt")]) | |
| + | |
| +(define_insn "*loongson_punpcklwd_hi" | |
| + [(set (match_operand:V4HI 0 "register_operand" "=f") | |
| + (vec_select:V4HI | |
| + (vec_concat:V8HI | |
| + (match_operand:V4HI 1 "register_operand" "f") | |
| + (match_operand:V4HI 2 "register_operand" "f")) | |
| + (parallel [(const_int 0) (const_int 1) | |
| + (const_int 4) (const_int 5)])))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "punpcklwd\t%0,%1,%2" | |
| + [(set_attr "type" "fcvt")]) | |
| + | |
| +(define_expand "vec_unpacks_lo_<mode>" | |
| + [(match_operand:<V_stretch_half> 0 "register_operand" "") | |
| + (match_operand:VHB 1 "register_operand" "")] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| +{ | |
| + mips_expand_vec_unpack (operands, false, false); | |
| + DONE; | |
| +}) | |
| + | |
| +(define_expand "vec_unpacks_hi_<mode>" | |
| + [(match_operand:<V_stretch_half> 0 "register_operand" "") | |
| + (match_operand:VHB 1 "register_operand" "")] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| +{ | |
| + mips_expand_vec_unpack (operands, false, true); | |
| + DONE; | |
| +}) | |
| + | |
| +(define_expand "vec_unpacku_lo_<mode>" | |
| + [(match_operand:<V_stretch_half> 0 "register_operand" "") | |
| + (match_operand:VHB 1 "register_operand" "")] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| +{ | |
| + mips_expand_vec_unpack (operands, true, false); | |
| + DONE; | |
| +}) | |
| + | |
| +(define_expand "vec_unpacku_hi_<mode>" | |
| + [(match_operand:<V_stretch_half> 0 "register_operand" "") | |
| + (match_operand:VHB 1 "register_operand" "")] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| +{ | |
| + mips_expand_vec_unpack (operands, true, true); | |
| + DONE; | |
| +}) | |
| + | |
| +;; Whole vector shifts, used for reduction epilogues. | |
| +(define_insn "vec_shl_<mode>" | |
| + [(set (match_operand:VWHBDI 0 "register_operand" "=f") | |
| + (unspec:VWHBDI [(match_operand:VWHBDI 1 "register_operand" "f") | |
| + (match_operand:SI 2 "register_operand" "f")] | |
| + UNSPEC_LOONGSON_DSLL))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "dsll\t%0,%1,%2" | |
| + [(set_attr "type" "fcvt")]) | |
| + | |
| +(define_insn "vec_shr_<mode>" | |
| + [(set (match_operand:VWHBDI 0 "register_operand" "=f") | |
| + (unspec:VWHBDI [(match_operand:VWHBDI 1 "register_operand" "f") | |
| + (match_operand:SI 2 "register_operand" "f")] | |
| + UNSPEC_LOONGSON_DSRL))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "dsrl\t%0,%1,%2" | |
| + [(set_attr "type" "fcvt")]) | |
| + | |
| +(define_insn "vec_loongson_extract_lo_<mode>" | |
| + [(set (match_operand:<V_inner> 0 "register_operand" "=r") | |
| + (vec_select:<V_inner> | |
| + (match_operand:VWHB 1 "register_operand" "f") | |
| + (parallel [(const_int 0)])))] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| + "mfc1\t%0,%1" | |
| + [(set_attr "type" "mfc")]) | |
| + | |
| +(define_expand "reduc_plus_scal_<mode>" | |
| + [(match_operand:<V_inner> 0 "register_operand" "") | |
| + (match_operand:VWHB 1 "register_operand" "")] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| +{ | |
| + rtx tmp = gen_reg_rtx (GET_MODE (operands[1])); | |
| + mips_expand_vec_reduc (tmp, operands[1], gen_add<mode>3); | |
| + emit_insn (gen_vec_loongson_extract_lo_<mode> (operands[0], tmp)); | |
| + DONE; | |
| +}) | |
| + | |
| +(define_expand "reduc_smax_scal_<mode>" | |
| + [(match_operand:<V_inner> 0 "register_operand" "") | |
| + (match_operand:VWHB 1 "register_operand" "")] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| +{ | |
| + rtx tmp = gen_reg_rtx (GET_MODE (operands[1])); | |
| + mips_expand_vec_reduc (tmp, operands[1], gen_smax<mode>3); | |
| + emit_insn (gen_vec_loongson_extract_lo_<mode> (operands[0], tmp)); | |
| + DONE; | |
| +}) | |
| + | |
| +(define_expand "reduc_smin_scal_<mode>" | |
| + [(match_operand:<V_inner> 0 "register_operand" "") | |
| + (match_operand:VWHB 1 "register_operand" "")] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| +{ | |
| + rtx tmp = gen_reg_rtx (GET_MODE (operands[1])); | |
| + mips_expand_vec_reduc (tmp, operands[1], gen_smin<mode>3); | |
| + emit_insn (gen_vec_loongson_extract_lo_<mode> (operands[0], tmp)); | |
| + DONE; | |
| +}) | |
| + | |
| +(define_expand "reduc_umax_scal_<mode>" | |
| + [(match_operand:<V_inner> 0 "register_operand" "") | |
| + (match_operand:VB 1 "register_operand" "")] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| +{ | |
| + rtx tmp = gen_reg_rtx (GET_MODE (operands[1])); | |
| + mips_expand_vec_reduc (tmp, operands[1], gen_umax<mode>3); | |
| + emit_insn (gen_vec_loongson_extract_lo_<mode> (operands[0], tmp)); | |
| + DONE; | |
| +}) | |
| + | |
| +(define_expand "reduc_umin_scal_<mode>" | |
| + [(match_operand:<V_inner> 0 "register_operand" "") | |
| + (match_operand:VB 1 "register_operand" "")] | |
| + "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI" | |
| +{ | |
| + rtx tmp = gen_reg_rtx (GET_MODE (operands[1])); | |
| + mips_expand_vec_reduc (tmp, operands[1], gen_umin<mode>3); | |
| + emit_insn (gen_vec_loongson_extract_lo_<mode> (operands[0], tmp)); | |
| + DONE; | |
| +}) | |
| diff --git a/gcc/config/mips/loongson-mmiintrin.h b/gcc/config/mips/loongson-mmiintrin.h | |
| new file mode 100644 | |
| index 0000000..6f35fb5 | |
| --- /dev/null | |
| +++ b/gcc/config/mips/loongson-mmiintrin.h | |
| @@ -0,0 +1,691 @@ | |
| +/* Intrinsics for Loongson MultiMedia extension Instructions operations. | |
| + | |
| + Copyright (C) 2008-2018 Free Software Foundation, Inc. | |
| + Contributed by CodeSourcery. | |
| + | |
| + This file is part of GCC. | |
| + | |
| + GCC is free software; you can redistribute it and/or modify it | |
| + under the terms of the GNU General Public License as published | |
| + by the Free Software Foundation; either version 3, or (at your | |
| + option) any later version. | |
| + | |
| + GCC is distributed in the hope that it will be useful, but WITHOUT | |
| + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
| + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public | |
| + License for more details. | |
| + | |
| + Under Section 7 of GPL version 3, you are granted additional | |
| + permissions described in the GCC Runtime Library Exception, version | |
| + 3.1, as published by the Free Software Foundation. | |
| + | |
| + You should have received a copy of the GNU General Public License and | |
| + a copy of the GCC Runtime Library Exception along with this program; | |
| + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
| + <http://www.gnu.org/licenses/>. */ | |
| + | |
| +#ifndef _GCC_LOONGSON_MMIINTRIN_H | |
| +#define _GCC_LOONGSON_MMIINTRIN_H | |
| + | |
| +#if !defined(__mips_loongson_mmi) | |
| +# error "You must select -mloongson-mmi or -march=loongson2e/2f/3a to use | |
| + loongson-mmiintrin.h" | |
| +#endif | |
| + | |
| +#ifdef __cplusplus | |
| +extern "C" { | |
| +#endif | |
| + | |
| +#include <stdint.h> | |
| + | |
| +/* Vectors of unsigned bytes, halfwords and words. */ | |
| +typedef uint8_t uint8x8_t __attribute__((vector_size (8))); | |
| +typedef uint16_t uint16x4_t __attribute__((vector_size (8))); | |
| +typedef uint32_t uint32x2_t __attribute__((vector_size (8))); | |
| + | |
| +/* Vectors of signed bytes, halfwords and words. */ | |
| +typedef int8_t int8x8_t __attribute__((vector_size (8))); | |
| +typedef int16_t int16x4_t __attribute__((vector_size (8))); | |
| +typedef int32_t int32x2_t __attribute__((vector_size (8))); | |
| + | |
| +/* SIMD intrinsics. | |
| + Unless otherwise noted, calls to the functions below will expand into | |
| + precisely one machine instruction, modulo any moves required to | |
| + satisfy register allocation constraints. */ | |
| + | |
| +/* Pack with signed saturation. */ | |
| +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| +packsswh (int32x2_t s, int32x2_t t) | |
| +{ | |
| + return __builtin_loongson_packsswh (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) | |
| +packsshb (int16x4_t s, int16x4_t t) | |
| +{ | |
| + return __builtin_loongson_packsshb (s, t); | |
| +} | |
| + | |
| +/* Pack with unsigned saturation. */ | |
| +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
| +packushb (uint16x4_t s, uint16x4_t t) | |
| +{ | |
| + return __builtin_loongson_packushb (s, t); | |
| +} | |
| + | |
| +/* Vector addition, treating overflow by wraparound. */ | |
| +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) | |
| +paddw_u (uint32x2_t s, uint32x2_t t) | |
| +{ | |
| + return __builtin_loongson_paddw_u (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| +paddh_u (uint16x4_t s, uint16x4_t t) | |
| +{ | |
| + return __builtin_loongson_paddh_u (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
| +paddb_u (uint8x8_t s, uint8x8_t t) | |
| +{ | |
| + return __builtin_loongson_paddb_u (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) | |
| +paddw_s (int32x2_t s, int32x2_t t) | |
| +{ | |
| + return __builtin_loongson_paddw_s (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| +paddh_s (int16x4_t s, int16x4_t t) | |
| +{ | |
| + return __builtin_loongson_paddh_s (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) | |
| +paddb_s (int8x8_t s, int8x8_t t) | |
| +{ | |
| + return __builtin_loongson_paddb_s (s, t); | |
| +} | |
| + | |
| +/* Addition of doubleword integers, treating overflow by wraparound. */ | |
| +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) | |
| +paddd_u (uint64_t s, uint64_t t) | |
| +{ | |
| + return __builtin_loongson_paddd_u (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) | |
| +paddd_s (int64_t s, int64_t t) | |
| +{ | |
| + return __builtin_loongson_paddd_s (s, t); | |
| +} | |
| + | |
| +/* Vector addition, treating overflow by signed saturation. */ | |
| +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| +paddsh (int16x4_t s, int16x4_t t) | |
| +{ | |
| + return __builtin_loongson_paddsh (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) | |
| +paddsb (int8x8_t s, int8x8_t t) | |
| +{ | |
| + return __builtin_loongson_paddsb (s, t); | |
| +} | |
| + | |
| +/* Vector addition, treating overflow by unsigned saturation. */ | |
| +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| +paddush (uint16x4_t s, uint16x4_t t) | |
| +{ | |
| + return __builtin_loongson_paddush (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
| +paddusb (uint8x8_t s, uint8x8_t t) | |
| +{ | |
| + return __builtin_loongson_paddusb (s, t); | |
| +} | |
| + | |
| +/* Logical AND NOT. */ | |
| +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) | |
| +pandn_ud (uint64_t s, uint64_t t) | |
| +{ | |
| + return __builtin_loongson_pandn_ud (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) | |
| +pandn_uw (uint32x2_t s, uint32x2_t t) | |
| +{ | |
| + return __builtin_loongson_pandn_uw (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| +pandn_uh (uint16x4_t s, uint16x4_t t) | |
| +{ | |
| + return __builtin_loongson_pandn_uh (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
| +pandn_ub (uint8x8_t s, uint8x8_t t) | |
| +{ | |
| + return __builtin_loongson_pandn_ub (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) | |
| +pandn_sd (int64_t s, int64_t t) | |
| +{ | |
| + return __builtin_loongson_pandn_sd (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) | |
| +pandn_sw (int32x2_t s, int32x2_t t) | |
| +{ | |
| + return __builtin_loongson_pandn_sw (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| +pandn_sh (int16x4_t s, int16x4_t t) | |
| +{ | |
| + return __builtin_loongson_pandn_sh (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) | |
| +pandn_sb (int8x8_t s, int8x8_t t) | |
| +{ | |
| + return __builtin_loongson_pandn_sb (s, t); | |
| +} | |
| + | |
| +/* Average. */ | |
| +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| +pavgh (uint16x4_t s, uint16x4_t t) | |
| +{ | |
| + return __builtin_loongson_pavgh (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
| +pavgb (uint8x8_t s, uint8x8_t t) | |
| +{ | |
| + return __builtin_loongson_pavgb (s, t); | |
| +} | |
| + | |
| +/* Equality test. */ | |
| +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) | |
| +pcmpeqw_u (uint32x2_t s, uint32x2_t t) | |
| +{ | |
| + return __builtin_loongson_pcmpeqw_u (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| +pcmpeqh_u (uint16x4_t s, uint16x4_t t) | |
| +{ | |
| + return __builtin_loongson_pcmpeqh_u (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
| +pcmpeqb_u (uint8x8_t s, uint8x8_t t) | |
| +{ | |
| + return __builtin_loongson_pcmpeqb_u (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) | |
| +pcmpeqw_s (int32x2_t s, int32x2_t t) | |
| +{ | |
| + return __builtin_loongson_pcmpeqw_s (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| +pcmpeqh_s (int16x4_t s, int16x4_t t) | |
| +{ | |
| + return __builtin_loongson_pcmpeqh_s (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) | |
| +pcmpeqb_s (int8x8_t s, int8x8_t t) | |
| +{ | |
| + return __builtin_loongson_pcmpeqb_s (s, t); | |
| +} | |
| + | |
| +/* Greater-than test. */ | |
| +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) | |
| +pcmpgtw_u (uint32x2_t s, uint32x2_t t) | |
| +{ | |
| + return __builtin_loongson_pcmpgtw_u (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| +pcmpgth_u (uint16x4_t s, uint16x4_t t) | |
| +{ | |
| + return __builtin_loongson_pcmpgth_u (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
| +pcmpgtb_u (uint8x8_t s, uint8x8_t t) | |
| +{ | |
| + return __builtin_loongson_pcmpgtb_u (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) | |
| +pcmpgtw_s (int32x2_t s, int32x2_t t) | |
| +{ | |
| + return __builtin_loongson_pcmpgtw_s (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| +pcmpgth_s (int16x4_t s, int16x4_t t) | |
| +{ | |
| + return __builtin_loongson_pcmpgth_s (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) | |
| +pcmpgtb_s (int8x8_t s, int8x8_t t) | |
| +{ | |
| + return __builtin_loongson_pcmpgtb_s (s, t); | |
| +} | |
| + | |
| +/* Extract halfword. */ | |
| +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| +pextrh_u (uint16x4_t s, int field /* 0--3. */) | |
| +{ | |
| + return __builtin_loongson_pextrh_u (s, field); | |
| +} | |
| + | |
| +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| +pextrh_s (int16x4_t s, int field /* 0--3. */) | |
| +{ | |
| + return __builtin_loongson_pextrh_s (s, field); | |
| +} | |
| + | |
| +/* Insert halfword. */ | |
| +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| +pinsrh_0_u (uint16x4_t s, uint16x4_t t) | |
| +{ | |
| + return __builtin_loongson_pinsrh_0_u (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| +pinsrh_1_u (uint16x4_t s, uint16x4_t t) | |
| +{ | |
| + return __builtin_loongson_pinsrh_1_u (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| +pinsrh_2_u (uint16x4_t s, uint16x4_t t) | |
| +{ | |
| + return __builtin_loongson_pinsrh_2_u (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| +pinsrh_3_u (uint16x4_t s, uint16x4_t t) | |
| +{ | |
| + return __builtin_loongson_pinsrh_3_u (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| +pinsrh_0_s (int16x4_t s, int16x4_t t) | |
| +{ | |
| + return __builtin_loongson_pinsrh_0_s (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| +pinsrh_1_s (int16x4_t s, int16x4_t t) | |
| +{ | |
| + return __builtin_loongson_pinsrh_1_s (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| +pinsrh_2_s (int16x4_t s, int16x4_t t) | |
| +{ | |
| + return __builtin_loongson_pinsrh_2_s (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| +pinsrh_3_s (int16x4_t s, int16x4_t t) | |
| +{ | |
| + return __builtin_loongson_pinsrh_3_s (s, t); | |
| +} | |
| + | |
| +/* Multiply and add. */ | |
| +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) | |
| +pmaddhw (int16x4_t s, int16x4_t t) | |
| +{ | |
| + return __builtin_loongson_pmaddhw (s, t); | |
| +} | |
| + | |
| +/* Maximum of signed halfwords. */ | |
| +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| +pmaxsh (int16x4_t s, int16x4_t t) | |
| +{ | |
| + return __builtin_loongson_pmaxsh (s, t); | |
| +} | |
| + | |
| +/* Maximum of unsigned bytes. */ | |
| +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
| +pmaxub (uint8x8_t s, uint8x8_t t) | |
| +{ | |
| + return __builtin_loongson_pmaxub (s, t); | |
| +} | |
| + | |
| +/* Minimum of signed halfwords. */ | |
| +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| +pminsh (int16x4_t s, int16x4_t t) | |
| +{ | |
| + return __builtin_loongson_pminsh (s, t); | |
| +} | |
| + | |
| +/* Minimum of unsigned bytes. */ | |
| +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
| +pminub (uint8x8_t s, uint8x8_t t) | |
| +{ | |
| + return __builtin_loongson_pminub (s, t); | |
| +} | |
| + | |
| +/* Move byte mask. */ | |
| +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
| +pmovmskb_u (uint8x8_t s) | |
| +{ | |
| + return __builtin_loongson_pmovmskb_u (s); | |
| +} | |
| + | |
| +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) | |
| +pmovmskb_s (int8x8_t s) | |
| +{ | |
| + return __builtin_loongson_pmovmskb_s (s); | |
| +} | |
| + | |
| +/* Multiply unsigned integers and store high result. */ | |
| +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| +pmulhuh (uint16x4_t s, uint16x4_t t) | |
| +{ | |
| + return __builtin_loongson_pmulhuh (s, t); | |
| +} | |
| + | |
| +/* Multiply signed integers and store high result. */ | |
| +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| +pmulhh (int16x4_t s, int16x4_t t) | |
| +{ | |
| + return __builtin_loongson_pmulhh (s, t); | |
| +} | |
| + | |
| +/* Multiply signed integers and store low result. */ | |
| +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| +pmullh (int16x4_t s, int16x4_t t) | |
| +{ | |
| + return __builtin_loongson_pmullh (s, t); | |
| +} | |
| + | |
| +/* Multiply unsigned word integers. */ | |
| +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) | |
| +pmuluw (uint32x2_t s, uint32x2_t t) | |
| +{ | |
| + return __builtin_loongson_pmuluw (s, t); | |
| +} | |
| + | |
| +/* Absolute difference. */ | |
| +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
| +pasubub (uint8x8_t s, uint8x8_t t) | |
| +{ | |
| + return __builtin_loongson_pasubub (s, t); | |
| +} | |
| + | |
| +/* Sum of unsigned byte integers. */ | |
| +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| +biadd (uint8x8_t s) | |
| +{ | |
| + return __builtin_loongson_biadd (s); | |
| +} | |
| + | |
| +/* Sum of absolute differences. | |
| + Note that this intrinsic expands into two machine instructions: | |
| + PASUBUB followed by BIADD. */ | |
| +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| +psadbh (uint8x8_t s, uint8x8_t t) | |
| +{ | |
| + return __builtin_loongson_psadbh (s, t); | |
| +} | |
| + | |
| +/* Shuffle halfwords. */ | |
| +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| +pshufh_u (uint16x4_t dest, uint16x4_t s, uint8_t order) | |
| +{ | |
| + return __builtin_loongson_pshufh_u (s, order); | |
| +} | |
| + | |
| +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| +pshufh_s (int16x4_t dest, int16x4_t s, uint8_t order) | |
| +{ | |
| + return __builtin_loongson_pshufh_s (s, order); | |
| +} | |
| + | |
| +/* Shift left logical. */ | |
| +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| +psllh_u (uint16x4_t s, uint8_t amount) | |
| +{ | |
| + return __builtin_loongson_psllh_u (s, amount); | |
| +} | |
| + | |
| +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| +psllh_s (int16x4_t s, uint8_t amount) | |
| +{ | |
| + return __builtin_loongson_psllh_s (s, amount); | |
| +} | |
| + | |
| +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) | |
| +psllw_u (uint32x2_t s, uint8_t amount) | |
| +{ | |
| + return __builtin_loongson_psllw_u (s, amount); | |
| +} | |
| + | |
| +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) | |
| +psllw_s (int32x2_t s, uint8_t amount) | |
| +{ | |
| + return __builtin_loongson_psllw_s (s, amount); | |
| +} | |
| + | |
| +/* Shift right logical. */ | |
| +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| +psrlh_u (uint16x4_t s, uint8_t amount) | |
| +{ | |
| + return __builtin_loongson_psrlh_u (s, amount); | |
| +} | |
| + | |
| +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| +psrlh_s (int16x4_t s, uint8_t amount) | |
| +{ | |
| + return __builtin_loongson_psrlh_s (s, amount); | |
| +} | |
| + | |
| +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) | |
| +psrlw_u (uint32x2_t s, uint8_t amount) | |
| +{ | |
| + return __builtin_loongson_psrlw_u (s, amount); | |
| +} | |
| + | |
| +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) | |
| +psrlw_s (int32x2_t s, uint8_t amount) | |
| +{ | |
| + return __builtin_loongson_psrlw_s (s, amount); | |
| +} | |
| + | |
| +/* Shift right arithmetic. */ | |
| +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| +psrah_u (uint16x4_t s, uint8_t amount) | |
| +{ | |
| + return __builtin_loongson_psrah_u (s, amount); | |
| +} | |
| + | |
| +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| +psrah_s (int16x4_t s, uint8_t amount) | |
| +{ | |
| + return __builtin_loongson_psrah_s (s, amount); | |
| +} | |
| + | |
| +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) | |
| +psraw_u (uint32x2_t s, uint8_t amount) | |
| +{ | |
| + return __builtin_loongson_psraw_u (s, amount); | |
| +} | |
| + | |
| +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) | |
| +psraw_s (int32x2_t s, uint8_t amount) | |
| +{ | |
| + return __builtin_loongson_psraw_s (s, amount); | |
| +} | |
| + | |
| +/* Vector subtraction, treating overflow by wraparound. */ | |
| +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) | |
| +psubw_u (uint32x2_t s, uint32x2_t t) | |
| +{ | |
| + return __builtin_loongson_psubw_u (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| +psubh_u (uint16x4_t s, uint16x4_t t) | |
| +{ | |
| + return __builtin_loongson_psubh_u (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
| +psubb_u (uint8x8_t s, uint8x8_t t) | |
| +{ | |
| + return __builtin_loongson_psubb_u (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) | |
| +psubw_s (int32x2_t s, int32x2_t t) | |
| +{ | |
| + return __builtin_loongson_psubw_s (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| +psubh_s (int16x4_t s, int16x4_t t) | |
| +{ | |
| + return __builtin_loongson_psubh_s (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) | |
| +psubb_s (int8x8_t s, int8x8_t t) | |
| +{ | |
| + return __builtin_loongson_psubb_s (s, t); | |
| +} | |
| + | |
| +/* Subtraction of doubleword integers, treating overflow by wraparound. */ | |
| +__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) | |
| +psubd_u (uint64_t s, uint64_t t) | |
| +{ | |
| + return __builtin_loongson_psubd_u (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) | |
| +psubd_s (int64_t s, int64_t t) | |
| +{ | |
| + return __builtin_loongson_psubd_s (s, t); | |
| +} | |
| + | |
| +/* Vector subtraction, treating overflow by signed saturation. */ | |
| +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| +psubsh (int16x4_t s, int16x4_t t) | |
| +{ | |
| + return __builtin_loongson_psubsh (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) | |
| +psubsb (int8x8_t s, int8x8_t t) | |
| +{ | |
| + return __builtin_loongson_psubsb (s, t); | |
| +} | |
| + | |
| +/* Vector subtraction, treating overflow by unsigned saturation. */ | |
| +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| +psubush (uint16x4_t s, uint16x4_t t) | |
| +{ | |
| + return __builtin_loongson_psubush (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
| +psubusb (uint8x8_t s, uint8x8_t t) | |
| +{ | |
| + return __builtin_loongson_psubusb (s, t); | |
| +} | |
| + | |
| +/* Unpack high data. */ | |
| +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) | |
| +punpckhwd_u (uint32x2_t s, uint32x2_t t) | |
| +{ | |
| + return __builtin_loongson_punpckhwd_u (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| +punpckhhw_u (uint16x4_t s, uint16x4_t t) | |
| +{ | |
| + return __builtin_loongson_punpckhhw_u (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
| +punpckhbh_u (uint8x8_t s, uint8x8_t t) | |
| +{ | |
| + return __builtin_loongson_punpckhbh_u (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) | |
| +punpckhwd_s (int32x2_t s, int32x2_t t) | |
| +{ | |
| + return __builtin_loongson_punpckhwd_s (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| +punpckhhw_s (int16x4_t s, int16x4_t t) | |
| +{ | |
| + return __builtin_loongson_punpckhhw_s (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) | |
| +punpckhbh_s (int8x8_t s, int8x8_t t) | |
| +{ | |
| + return __builtin_loongson_punpckhbh_s (s, t); | |
| +} | |
| + | |
| +/* Unpack low data. */ | |
| +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) | |
| +punpcklwd_u (uint32x2_t s, uint32x2_t t) | |
| +{ | |
| + return __builtin_loongson_punpcklwd_u (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| +punpcklhw_u (uint16x4_t s, uint16x4_t t) | |
| +{ | |
| + return __builtin_loongson_punpcklhw_u (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
| +punpcklbh_u (uint8x8_t s, uint8x8_t t) | |
| +{ | |
| + return __builtin_loongson_punpcklbh_u (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) | |
| +punpcklwd_s (int32x2_t s, int32x2_t t) | |
| +{ | |
| + return __builtin_loongson_punpcklwd_s (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| +punpcklhw_s (int16x4_t s, int16x4_t t) | |
| +{ | |
| + return __builtin_loongson_punpcklhw_s (s, t); | |
| +} | |
| + | |
| +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) | |
| +punpcklbh_s (int8x8_t s, int8x8_t t) | |
| +{ | |
| + return __builtin_loongson_punpcklbh_s (s, t); | |
| +} | |
| + | |
| +#ifdef __cplusplus | |
| +} | |
| +#endif | |
| + | |
| +#endif | |
| diff --git a/gcc/config/mips/loongson.h b/gcc/config/mips/loongson.h | |
| index 3a99878..3d0c26b 100644 | |
| --- a/gcc/config/mips/loongson.h | |
| +++ b/gcc/config/mips/loongson.h | |
| @@ -1,4 +1,4 @@ | |
| -/* Intrinsics for ST Microelectronics Loongson-2E/2F SIMD operations. | |
| +/* Intrinsics for Loongson MultiMedia extension Instructions operations. | |
| Copyright (C) 2008-2018 Free Software Foundation, Inc. | |
| Contributed by CodeSourcery. | |
| @@ -24,2701 +24,9 @@ | |
| see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
| <http://www.gnu.org/licenses/>. */ | |
| -#ifndef _GCC_LOONGSON_H | |
| -#define _GCC_LOONGSON_H | |
| - | |
| -#if !defined(__mips_loongson_vector_rev) | |
| -# error "You must select -march=loongson2e/2f/3a to use loongson.h" | |
| -#endif | |
| - | |
| -#ifdef __cplusplus | |
| -extern "C" { | |
| -#endif | |
| - | |
| -#include <stdint.h> | |
| - | |
| -/* Vectors of unsigned bytes, halfwords and words. */ | |
| -typedef uint8_t uint8x8_t __attribute__((vector_size (8))); | |
| -typedef uint16_t uint16x4_t __attribute__((vector_size (8))); | |
| -typedef uint32_t uint32x2_t __attribute__((vector_size (8))); | |
| - | |
| -/* Vectors of signed bytes, halfwords and words. */ | |
| -typedef int8_t int8x8_t __attribute__((vector_size (8))); | |
| -typedef int16_t int16x4_t __attribute__((vector_size (8))); | |
| -typedef int32_t int32x2_t __attribute__((vector_size (8))); | |
| - | |
| -/* SIMD intrinsics. | |
| - Unless otherwise noted, calls to the functions below will expand into | |
| - precisely one machine instruction, modulo any moves required to | |
| - satisfy register allocation constraints. */ | |
| - | |
| -/* Pack with signed saturation. */ | |
| -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| -packsswh (int32x2_t s, int32x2_t t) | |
| -{ | |
| - return __builtin_loongson_packsswh (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) | |
| -packsshb (int16x4_t s, int16x4_t t) | |
| -{ | |
| - return __builtin_loongson_packsshb (s, t); | |
| -} | |
| - | |
| -/* Pack with unsigned saturation. */ | |
| -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
| -packushb (uint16x4_t s, uint16x4_t t) | |
| -{ | |
| - return __builtin_loongson_packushb (s, t); | |
| -} | |
| - | |
| -/* Vector addition, treating overflow by wraparound. */ | |
| -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) | |
| -paddw_u (uint32x2_t s, uint32x2_t t) | |
| -{ | |
| - return __builtin_loongson_paddw_u (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| -paddh_u (uint16x4_t s, uint16x4_t t) | |
| -{ | |
| - return __builtin_loongson_paddh_u (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
| -paddb_u (uint8x8_t s, uint8x8_t t) | |
| -{ | |
| - return __builtin_loongson_paddb_u (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) | |
| -paddw_s (int32x2_t s, int32x2_t t) | |
| -{ | |
| - return __builtin_loongson_paddw_s (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| -paddh_s (int16x4_t s, int16x4_t t) | |
| -{ | |
| - return __builtin_loongson_paddh_s (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) | |
| -paddb_s (int8x8_t s, int8x8_t t) | |
| -{ | |
| - return __builtin_loongson_paddb_s (s, t); | |
| -} | |
| - | |
| -/* Addition of doubleword integers, treating overflow by wraparound. */ | |
| -__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) | |
| -paddd_u (uint64_t s, uint64_t t) | |
| -{ | |
| - return __builtin_loongson_paddd_u (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline int64_t __attribute__ ((__always_inline__)) | |
| -paddd_s (int64_t s, int64_t t) | |
| -{ | |
| - return __builtin_loongson_paddd_s (s, t); | |
| -} | |
| - | |
| -/* Vector addition, treating overflow by signed saturation. */ | |
| -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| -paddsh (int16x4_t s, int16x4_t t) | |
| -{ | |
| - return __builtin_loongson_paddsh (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) | |
| -paddsb (int8x8_t s, int8x8_t t) | |
| -{ | |
| - return __builtin_loongson_paddsb (s, t); | |
| -} | |
| - | |
| -/* Vector addition, treating overflow by unsigned saturation. */ | |
| -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| -paddush (uint16x4_t s, uint16x4_t t) | |
| -{ | |
| - return __builtin_loongson_paddush (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
| -paddusb (uint8x8_t s, uint8x8_t t) | |
| -{ | |
| - return __builtin_loongson_paddusb (s, t); | |
| -} | |
| - | |
| -/* Logical AND NOT. */ | |
| -__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) | |
| -pandn_ud (uint64_t s, uint64_t t) | |
| -{ | |
| - return __builtin_loongson_pandn_ud (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) | |
| -pandn_uw (uint32x2_t s, uint32x2_t t) | |
| -{ | |
| - return __builtin_loongson_pandn_uw (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| -pandn_uh (uint16x4_t s, uint16x4_t t) | |
| -{ | |
| - return __builtin_loongson_pandn_uh (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
| -pandn_ub (uint8x8_t s, uint8x8_t t) | |
| -{ | |
| - return __builtin_loongson_pandn_ub (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline int64_t __attribute__ ((__always_inline__)) | |
| -pandn_sd (int64_t s, int64_t t) | |
| -{ | |
| - return __builtin_loongson_pandn_sd (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) | |
| -pandn_sw (int32x2_t s, int32x2_t t) | |
| -{ | |
| - return __builtin_loongson_pandn_sw (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| -pandn_sh (int16x4_t s, int16x4_t t) | |
| -{ | |
| - return __builtin_loongson_pandn_sh (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) | |
| -pandn_sb (int8x8_t s, int8x8_t t) | |
| -{ | |
| - return __builtin_loongson_pandn_sb (s, t); | |
| -} | |
| - | |
| -/* Average. */ | |
| -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| -pavgh (uint16x4_t s, uint16x4_t t) | |
| -{ | |
| - return __builtin_loongson_pavgh (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
| -pavgb (uint8x8_t s, uint8x8_t t) | |
| -{ | |
| - return __builtin_loongson_pavgb (s, t); | |
| -} | |
| - | |
| -/* Equality test. */ | |
| -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) | |
| -pcmpeqw_u (uint32x2_t s, uint32x2_t t) | |
| -{ | |
| - return __builtin_loongson_pcmpeqw_u (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| -pcmpeqh_u (uint16x4_t s, uint16x4_t t) | |
| -{ | |
| - return __builtin_loongson_pcmpeqh_u (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
| -pcmpeqb_u (uint8x8_t s, uint8x8_t t) | |
| -{ | |
| - return __builtin_loongson_pcmpeqb_u (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) | |
| -pcmpeqw_s (int32x2_t s, int32x2_t t) | |
| -{ | |
| - return __builtin_loongson_pcmpeqw_s (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| -pcmpeqh_s (int16x4_t s, int16x4_t t) | |
| -{ | |
| - return __builtin_loongson_pcmpeqh_s (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) | |
| -pcmpeqb_s (int8x8_t s, int8x8_t t) | |
| -{ | |
| - return __builtin_loongson_pcmpeqb_s (s, t); | |
| -} | |
| - | |
| -/* Greater-than test. */ | |
| -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) | |
| -pcmpgtw_u (uint32x2_t s, uint32x2_t t) | |
| -{ | |
| - return __builtin_loongson_pcmpgtw_u (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| -pcmpgth_u (uint16x4_t s, uint16x4_t t) | |
| -{ | |
| - return __builtin_loongson_pcmpgth_u (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
| -pcmpgtb_u (uint8x8_t s, uint8x8_t t) | |
| -{ | |
| - return __builtin_loongson_pcmpgtb_u (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) | |
| -pcmpgtw_s (int32x2_t s, int32x2_t t) | |
| -{ | |
| - return __builtin_loongson_pcmpgtw_s (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| -pcmpgth_s (int16x4_t s, int16x4_t t) | |
| -{ | |
| - return __builtin_loongson_pcmpgth_s (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) | |
| -pcmpgtb_s (int8x8_t s, int8x8_t t) | |
| -{ | |
| - return __builtin_loongson_pcmpgtb_s (s, t); | |
| -} | |
| - | |
| -/* Extract halfword. */ | |
| -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| -pextrh_u (uint16x4_t s, int field /* 0--3 */) | |
| -{ | |
| - return __builtin_loongson_pextrh_u (s, field); | |
| -} | |
| - | |
| -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| -pextrh_s (int16x4_t s, int field /* 0--3 */) | |
| -{ | |
| - return __builtin_loongson_pextrh_s (s, field); | |
| -} | |
| - | |
| -/* Insert halfword. */ | |
| -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| -pinsrh_0_u (uint16x4_t s, uint16x4_t t) | |
| -{ | |
| - return __builtin_loongson_pinsrh_0_u (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| -pinsrh_1_u (uint16x4_t s, uint16x4_t t) | |
| -{ | |
| - return __builtin_loongson_pinsrh_1_u (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| -pinsrh_2_u (uint16x4_t s, uint16x4_t t) | |
| -{ | |
| - return __builtin_loongson_pinsrh_2_u (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| -pinsrh_3_u (uint16x4_t s, uint16x4_t t) | |
| -{ | |
| - return __builtin_loongson_pinsrh_3_u (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| -pinsrh_0_s (int16x4_t s, int16x4_t t) | |
| -{ | |
| - return __builtin_loongson_pinsrh_0_s (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| -pinsrh_1_s (int16x4_t s, int16x4_t t) | |
| -{ | |
| - return __builtin_loongson_pinsrh_1_s (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| -pinsrh_2_s (int16x4_t s, int16x4_t t) | |
| -{ | |
| - return __builtin_loongson_pinsrh_2_s (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| -pinsrh_3_s (int16x4_t s, int16x4_t t) | |
| -{ | |
| - return __builtin_loongson_pinsrh_3_s (s, t); | |
| -} | |
| - | |
| -/* Multiply and add. */ | |
| -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) | |
| -pmaddhw (int16x4_t s, int16x4_t t) | |
| -{ | |
| - return __builtin_loongson_pmaddhw (s, t); | |
| -} | |
| - | |
| -/* Maximum of signed halfwords. */ | |
| -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| -pmaxsh (int16x4_t s, int16x4_t t) | |
| -{ | |
| - return __builtin_loongson_pmaxsh (s, t); | |
| -} | |
| - | |
| -/* Maximum of unsigned bytes. */ | |
| -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
| -pmaxub (uint8x8_t s, uint8x8_t t) | |
| -{ | |
| - return __builtin_loongson_pmaxub (s, t); | |
| -} | |
| - | |
| -/* Minimum of signed halfwords. */ | |
| -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| -pminsh (int16x4_t s, int16x4_t t) | |
| -{ | |
| - return __builtin_loongson_pminsh (s, t); | |
| -} | |
| - | |
| -/* Minimum of unsigned bytes. */ | |
| -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
| -pminub (uint8x8_t s, uint8x8_t t) | |
| -{ | |
| - return __builtin_loongson_pminub (s, t); | |
| -} | |
| - | |
| -/* Move byte mask. */ | |
| -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
| -pmovmskb_u (uint8x8_t s) | |
| -{ | |
| - return __builtin_loongson_pmovmskb_u (s); | |
| -} | |
| - | |
| -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) | |
| -pmovmskb_s (int8x8_t s) | |
| -{ | |
| - return __builtin_loongson_pmovmskb_s (s); | |
| -} | |
| - | |
| -/* Multiply unsigned integers and store high result. */ | |
| -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| -pmulhuh (uint16x4_t s, uint16x4_t t) | |
| -{ | |
| - return __builtin_loongson_pmulhuh (s, t); | |
| -} | |
| - | |
| -/* Multiply signed integers and store high result. */ | |
| -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| -pmulhh (int16x4_t s, int16x4_t t) | |
| -{ | |
| - return __builtin_loongson_pmulhh (s, t); | |
| -} | |
| - | |
| -/* Multiply signed integers and store low result. */ | |
| -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| -pmullh (int16x4_t s, int16x4_t t) | |
| -{ | |
| - return __builtin_loongson_pmullh (s, t); | |
| -} | |
| - | |
| -/* Multiply unsigned word integers. */ | |
| -__extension__ static __inline int64_t __attribute__ ((__always_inline__)) | |
| -pmuluw (uint32x2_t s, uint32x2_t t) | |
| -{ | |
| - return __builtin_loongson_pmuluw (s, t); | |
| -} | |
| - | |
| -/* Absolute difference. */ | |
| -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
| -pasubub (uint8x8_t s, uint8x8_t t) | |
| -{ | |
| - return __builtin_loongson_pasubub (s, t); | |
| -} | |
| - | |
| -/* Sum of unsigned byte integers. */ | |
| -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| -biadd (uint8x8_t s) | |
| -{ | |
| - return __builtin_loongson_biadd (s); | |
| -} | |
| - | |
| -/* Sum of absolute differences. | |
| - Note that this intrinsic expands into two machine instructions: | |
| - PASUBUB followed by BIADD. */ | |
| -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| -psadbh (uint8x8_t s, uint8x8_t t) | |
| -{ | |
| - return __builtin_loongson_psadbh (s, t); | |
| -} | |
| - | |
| -/* Shuffle halfwords. */ | |
| -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| -pshufh_u (uint16x4_t dest, uint16x4_t s, uint8_t order) | |
| -{ | |
| - return __builtin_loongson_pshufh_u (s, order); | |
| -} | |
| - | |
| -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| -pshufh_s (int16x4_t dest, int16x4_t s, uint8_t order) | |
| -{ | |
| - return __builtin_loongson_pshufh_s (s, order); | |
| -} | |
| - | |
| -/* Shift left logical. */ | |
| -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| -psllh_u (uint16x4_t s, uint8_t amount) | |
| -{ | |
| - return __builtin_loongson_psllh_u (s, amount); | |
| -} | |
| - | |
| -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| -psllh_s (int16x4_t s, uint8_t amount) | |
| -{ | |
| - return __builtin_loongson_psllh_s (s, amount); | |
| -} | |
| - | |
| -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) | |
| -psllw_u (uint32x2_t s, uint8_t amount) | |
| -{ | |
| - return __builtin_loongson_psllw_u (s, amount); | |
| -} | |
| - | |
| -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) | |
| -psllw_s (int32x2_t s, uint8_t amount) | |
| -{ | |
| - return __builtin_loongson_psllw_s (s, amount); | |
| -} | |
| - | |
| -/* Shift right logical. */ | |
| -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| -psrlh_u (uint16x4_t s, uint8_t amount) | |
| -{ | |
| - return __builtin_loongson_psrlh_u (s, amount); | |
| -} | |
| - | |
| -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| -psrlh_s (int16x4_t s, uint8_t amount) | |
| -{ | |
| - return __builtin_loongson_psrlh_s (s, amount); | |
| -} | |
| - | |
| -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) | |
| -psrlw_u (uint32x2_t s, uint8_t amount) | |
| -{ | |
| - return __builtin_loongson_psrlw_u (s, amount); | |
| -} | |
| - | |
| -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) | |
| -psrlw_s (int32x2_t s, uint8_t amount) | |
| -{ | |
| - return __builtin_loongson_psrlw_s (s, amount); | |
| -} | |
| - | |
| -/* Shift right arithmetic. */ | |
| -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| -psrah_u (uint16x4_t s, uint8_t amount) | |
| -{ | |
| - return __builtin_loongson_psrah_u (s, amount); | |
| -} | |
| - | |
| -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| -psrah_s (int16x4_t s, uint8_t amount) | |
| -{ | |
| - return __builtin_loongson_psrah_s (s, amount); | |
| -} | |
| - | |
| -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) | |
| -psraw_u (uint32x2_t s, uint8_t amount) | |
| -{ | |
| - return __builtin_loongson_psraw_u (s, amount); | |
| -} | |
| - | |
| -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) | |
| -psraw_s (int32x2_t s, uint8_t amount) | |
| -{ | |
| - return __builtin_loongson_psraw_s (s, amount); | |
| -} | |
| - | |
| -/* Vector subtraction, treating overflow by wraparound. */ | |
| -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) | |
| -psubw_u (uint32x2_t s, uint32x2_t t) | |
| -{ | |
| - return __builtin_loongson_psubw_u (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| -psubh_u (uint16x4_t s, uint16x4_t t) | |
| -{ | |
| - return __builtin_loongson_psubh_u (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
| -psubb_u (uint8x8_t s, uint8x8_t t) | |
| -{ | |
| - return __builtin_loongson_psubb_u (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) | |
| -psubw_s (int32x2_t s, int32x2_t t) | |
| -{ | |
| - return __builtin_loongson_psubw_s (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| -psubh_s (int16x4_t s, int16x4_t t) | |
| -{ | |
| - return __builtin_loongson_psubh_s (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) | |
| -psubb_s (int8x8_t s, int8x8_t t) | |
| -{ | |
| - return __builtin_loongson_psubb_s (s, t); | |
| -} | |
| - | |
| -/* Subtraction of doubleword integers, treating overflow by wraparound. */ | |
| -__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) | |
| -psubd_u (uint64_t s, uint64_t t) | |
| -{ | |
| - return __builtin_loongson_psubd_u (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline int64_t __attribute__ ((__always_inline__)) | |
| -psubd_s (int64_t s, int64_t t) | |
| -{ | |
| - return __builtin_loongson_psubd_s (s, t); | |
| -} | |
| - | |
| -/* Vector subtraction, treating overflow by signed saturation. */ | |
| -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| -psubsh (int16x4_t s, int16x4_t t) | |
| -{ | |
| - return __builtin_loongson_psubsh (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) | |
| -psubsb (int8x8_t s, int8x8_t t) | |
| -{ | |
| - return __builtin_loongson_psubsb (s, t); | |
| -} | |
| - | |
| -/* Vector subtraction, treating overflow by unsigned saturation. */ | |
| -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| -psubush (uint16x4_t s, uint16x4_t t) | |
| -{ | |
| - return __builtin_loongson_psubush (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
| -psubusb (uint8x8_t s, uint8x8_t t) | |
| -{ | |
| - return __builtin_loongson_psubusb (s, t); | |
| -} | |
| - | |
| -/* Unpack high data. */ | |
| -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) | |
| -punpckhwd_u (uint32x2_t s, uint32x2_t t) | |
| -{ | |
| - return __builtin_loongson_punpckhwd_u (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| -punpckhhw_u (uint16x4_t s, uint16x4_t t) | |
| -{ | |
| - return __builtin_loongson_punpckhhw_u (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
| -punpckhbh_u (uint8x8_t s, uint8x8_t t) | |
| -{ | |
| - return __builtin_loongson_punpckhbh_u (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) | |
| -punpckhwd_s (int32x2_t s, int32x2_t t) | |
| -{ | |
| - return __builtin_loongson_punpckhwd_s (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| -punpckhhw_s (int16x4_t s, int16x4_t t) | |
| -{ | |
| - return __builtin_loongson_punpckhhw_s (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) | |
| -punpckhbh_s (int8x8_t s, int8x8_t t) | |
| -{ | |
| - return __builtin_loongson_punpckhbh_s (s, t); | |
| -} | |
| - | |
| -/* Unpack low data. */ | |
| -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) | |
| -punpcklwd_u (uint32x2_t s, uint32x2_t t) | |
| -{ | |
| - return __builtin_loongson_punpcklwd_u (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) | |
| -punpcklhw_u (uint16x4_t s, uint16x4_t t) | |
| -{ | |
| - return __builtin_loongson_punpcklhw_u (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) | |
| -punpcklbh_u (uint8x8_t s, uint8x8_t t) | |
| -{ | |
| - return __builtin_loongson_punpcklbh_u (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) | |
| -punpcklwd_s (int32x2_t s, int32x2_t t) | |
| -{ | |
| - return __builtin_loongson_punpcklwd_s (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) | |
| -punpcklhw_s (int16x4_t s, int16x4_t t) | |
| -{ | |
| - return __builtin_loongson_punpcklhw_s (s, t); | |
| -} | |
| - | |
| -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) | |
| -punpcklbh_s (int8x8_t s, int8x8_t t) | |
| -{ | |
| - return __builtin_loongson_punpcklbh_s (s, t); | |
| -} | |
| - | |
| -/* SSE2-style Vectors */ | |
| -typedef double __v1df __attribute__ ((__vector_size__ (8))); | |
| -typedef long long __v1di __attribute__ ((__vector_size__ (8))); | |
| -typedef unsigned long long __v1du __attribute__ ((__vector_size__ (8))); | |
| -typedef int __v2si __attribute__ ((__vector_size__ (8))); | |
| -typedef unsigned int __v2su __attribute__ ((__vector_size__ (8))); | |
| -typedef short __v4hi __attribute__ ((__vector_size__ (8))); | |
| -typedef unsigned short __v4hu __attribute__ ((__vector_size__ (8))); | |
| -typedef char __v8qi __attribute__ ((__vector_size__ (8))); | |
| -typedef unsigned char __v8qu __attribute__ ((__vector_size__ (8))); | |
| - | |
| -typedef struct __v2df { __v1df hi; __v1df lo; } __v2df; | |
| -typedef struct __v2di { __v1di hi; __v1di lo; } __v2di; | |
| -typedef struct __v2du { __v1du hi; __v1du lo; } __v2du; | |
| -typedef struct __v4si { __v2si hi; __v2si lo; } __v4si; | |
| -typedef struct __v4su { __v2su hi; __v2su lo; } __v4su; | |
| -typedef struct __v8hi { __v4hi hi; __v4hi lo; } __v8hi; | |
| -typedef struct __v8hu { __v4hu hi; __v4hu lo; } __v8hu; | |
| -typedef struct __v16qi { __v8qi hi; __v8qi lo; } __v16qi; | |
| -typedef struct __v16qu { __v8qu hi; __v8qu lo; } __v16qu; | |
| - | |
| -typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__)); | |
| -typedef long long __m64i __attribute__ ((__vector_size__ (8), __may_alias__)); | |
| -typedef double __m64d __attribute__ ((__vector_size__ (8), __may_alias__)); | |
| - | |
| -typedef struct __m128 { __m64 hi; __m64 lo; } __m128; | |
| -typedef struct __m128i { __m64i hi; __m64i lo; } __m128i; | |
| -typedef struct __m128d { __m64d hi; __m64d lo; } __m128d; | |
| - | |
| -/* Create a selector for use with the SHUFPD instruction. */ | |
| -#define _MM_SHUFFLE2(fp1,fp0) \ | |
| - (((fp1) << 1) | (fp0)) | |
| - | |
| -/* Create a vector with element 0 as F and the rest zero. */ | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_set_sd (double __F) | |
| -{ | |
| - /* return __extension__ (__m128d){ __F, 0.0 }; */ | |
| - __m128d val; | |
| - val.lo = (__m64d){ __F }; | |
| - val.hi = (__m64d){ 0.0 }; | |
| - return __extension__ val; | |
| -} | |
| - | |
| -/* Create a vector with both elements equal to F. */ | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_set1_pd (double __F) | |
| -{ | |
| - /* return __extension__ (__m128d){ __F, __F }; */ | |
| - __m128d val; | |
| - val.lo = (__m64d){ __F }; | |
| - val.hi = (__m64d){ __F }; | |
| - return __extension__ val; | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_set_pd1 (double __F) | |
| -{ | |
| - return _mm_set1_pd (__F); | |
| -} | |
| - | |
| -/* Create a vector with the lower value X and upper value W. */ | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_set_pd (double __W, double __X) | |
| -{ | |
| - /* return __extension__ (__m128d){ __X, __W }; */ | |
| - __m128d val; | |
| - val.lo = (__m64d){ __X }; | |
| - val.hi = (__m64d){ __W }; | |
| - return __extension__ val; | |
| -} | |
| - | |
| -/* Create a vector with the lower value W and upper value X. */ | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_setr_pd (double __W, double __X) | |
| -{ | |
| - /* return __extension__ (__m128d){ __W, __X }; */ | |
| - __m128d val; | |
| - val.lo = (__m64d){ __W }; | |
| - val.hi = (__m64d){ __X }; | |
| - return __extension__ val; | |
| -} | |
| - | |
| -/* Create an undefined vector. */ | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_undefined_pd (void) | |
| -{ | |
| - __m128d __Y = __Y; | |
| - return __Y; | |
| -} | |
| - | |
| -/* Create a vector of zeros. */ | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_setzero_pd (void) | |
| -{ | |
| - /* return __extension__ (__m128d){ 0.0, 0.0 }; */ | |
| - __m128d val; | |
| - val.lo = (__m64d){ 0.0 }; | |
| - val.hi = (__m64d){ 0.0 }; | |
| - return __extension__ val; | |
| -} | |
| - | |
| -#if 0 /* FIXME */ | |
| -/* Sets the low DPFP value of A from the low value of B. */ | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_move_sd (__m128d __A, __m128d __B) | |
| -{ | |
| - return (__m128d) __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B); | |
| -} | |
| - | |
| -/* Load two DPFP values from P. The address must be 16-byte aligned. */ | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_load_pd (double const *__P) | |
| -{ | |
| - return *(__m128d *)__P; | |
| -} | |
| - | |
| -/* Load two DPFP values from P. The address need not be 16-byte aligned. */ | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_loadu_pd (double const *__P) | |
| -{ | |
| - return __builtin_ia32_loadupd (__P); | |
| -} | |
| -#endif | |
| - | |
| -/* Create a vector with all two elements equal to *P. */ | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_load1_pd (double const *__P) | |
| -{ | |
| - return _mm_set1_pd (*__P); | |
| -} | |
| - | |
| -/* Create a vector with element 0 as *P and the rest zero. */ | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_load_sd (double const *__P) | |
| -{ | |
| - return _mm_set_sd (*__P); | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_load_pd1 (double const *__P) | |
| -{ | |
| - return _mm_load1_pd (__P); | |
| -} | |
| - | |
| -#if 0 /* FIXME */ | |
| -/* Load two DPFP values in reverse order. The address must be aligned. */ | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_loadr_pd (double const *__P) | |
| -{ | |
| - __m128d __tmp = _mm_load_pd (__P); | |
| - return __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,1)); | |
| -} | |
| - | |
| -/* Store two DPFP values. The address must be 16-byte aligned. */ | |
| -extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_store_pd (double *__P, __m128d __A) | |
| -{ | |
| - *(__m128d *)__P = __A; | |
| -} | |
| - | |
| -/* Store two DPFP values. The address need not be 16-byte aligned. */ | |
| -extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_storeu_pd (double *__P, __m128d __A) | |
| -{ | |
| - __builtin_ia32_storeupd (__P, __A); | |
| -} | |
| -#endif | |
| - | |
| -/* Stores the lower DPFP value. */ | |
| -extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_store_sd (double *__P, __m128d __A) | |
| -{ | |
| - /* *__P = ((__v2df)__A)[0]; */ | |
| - __asm__ volatile ( | |
| - "sdc1 %[lo], %[__P] \n\t" | |
| - ::[lo]"f"((__m64i)__A.lo), [__P]"m"(*__P) | |
| - : "memory" | |
| - ); | |
| -} | |
| - | |
| -extern __inline double __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cvtsd_f64 (__m128d __A) | |
| -{ | |
| - /* return ((__v2df)__A)[0]; */ | |
| - double val; | |
| - __asm__ volatile ( | |
| - "ldc1 %[val], %[lo] \n\t" | |
| - : [val]"=&f"(val) | |
| - : [lo]"m"(__A.lo) | |
| - ); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_storel_pd (double *__P, __m128d __A) | |
| -{ | |
| - _mm_store_sd (__P, __A); | |
| -} | |
| - | |
| -/* Stores the upper DPFP value. */ | |
| -extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_storeh_pd (double *__P, __m128d __A) | |
| -{ | |
| - /* *__P = ((__v2df)__A)[1]; */ | |
| - __asm__ volatile ( | |
| - "sdc1 %[hi], %[__P] \n\t" | |
| - ::[hi]"f"((__m64i)__A.hi), [__P]"m"(*__P) | |
| - : "memory" | |
| - ); | |
| -} | |
| - | |
| -#if 0 /* FIXME */ | |
| -/* Store the lower DPFP value across two words. | |
| - The address must be 16-byte aligned. */ | |
| -extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_store1_pd (double *__P, __m128d __A) | |
| -{ | |
| - _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,0))); | |
| -} | |
| - | |
| -extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_store_pd1 (double *__P, __m128d __A) | |
| -{ | |
| - _mm_store1_pd (__P, __A); | |
| -} | |
| - | |
| -/* Store two DPFP values in reverse order. The address must be aligned. */ | |
| -extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_storer_pd (double *__P, __m128d __A) | |
| -{ | |
| - _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,1))); | |
| -} | |
| -#endif | |
| - | |
| -extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cvtsi128_si32 (__m128i __A) | |
| -{ | |
| - /* return __builtin_ia32_vec_ext_v4si ((__v4si)__A, 0); */ | |
| - int val; | |
| - __asm__ volatile ( | |
| - "mfc1 %[val], %[lo] \n\t" | |
| - : [val]"=&r"(val) | |
| - : [lo]"f"(__A.lo) | |
| - ); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cvtsi128_si64 (__m128i __A) | |
| -{ | |
| - /* return ((__v2di)__A)[0]; */ | |
| - long long val; | |
| - __asm__ volatile ( | |
| - "dmfc1 %[val], %[lo] \n\t" | |
| - : [val]"=&r"(val) | |
| - : [lo]"f"(__A.lo) | |
| - ); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cvtsi128_si64x (__m128i __A) | |
| -{ | |
| - /* return ((__v2di)__A)[0]; */ | |
| - long long val; | |
| - __asm__ volatile ( | |
| - "dmfc1 %[val], %[lo] \n\t" | |
| - : [val]"=&r"(val) | |
| - : [lo]"f"(__A.lo) | |
| - ); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_add_pd (__m128d __A, __m128d __B) | |
| -{ | |
| - /* return (__m128d) ((__v2df)__A + (__v2df)__B); */ | |
| - __m128d val; | |
| - val.lo = (__m64d) ((__v1df)__A.lo + (__v1df)__B.lo); | |
| - val.hi = (__m64d) ((__v1df)__A.hi + (__v1df)__B.hi); | |
| - return val; | |
| -} | |
| - | |
| -#if 0 /* FIXME */ | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_add_sd (__m128d __A, __m128d __B) | |
| -{ | |
| - return (__m128d)__builtin_ia32_addsd ((__v2df)__A, (__v2df)__B); | |
| -} | |
| -#endif | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_sub_pd (__m128d __A, __m128d __B) | |
| -{ | |
| - /* return (__m128d) ((__v2df)__A - (__v2df)__B); */ | |
| - __m128d val; | |
| - val.lo = (__m64d) ((__v1df)__A.lo - (__v1df)__B.lo); | |
| - val.hi = (__m64d) ((__v1df)__A.hi - (__v1df)__B.hi); | |
| - return val; | |
| -} | |
| - | |
| -#if 0 /* FIXME */ | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_sub_sd (__m128d __A, __m128d __B) | |
| -{ | |
| - return (__m128d)__builtin_ia32_subsd ((__v2df)__A, (__v2df)__B); | |
| -} | |
| -#endif | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_mul_pd (__m128d __A, __m128d __B) | |
| -{ | |
| - /* return (__m128d) ((__v2df)__A * (__v2df)__B); */ | |
| - __m128d val; | |
| - val.lo = (__m64d) ((__v1df)__A.lo * (__v1df)__B.lo); | |
| - val.hi = (__m64d) ((__v1df)__A.hi * (__v1df)__B.hi); | |
| - return val; | |
| -} | |
| - | |
| -#if 0 /* FIXME */ | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_mul_sd (__m128d __A, __m128d __B) | |
| -{ | |
| - return (__m128d)__builtin_ia32_mulsd ((__v2df)__A, (__v2df)__B); | |
| -} | |
| -#endif | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_div_pd (__m128d __A, __m128d __B) | |
| -{ | |
| - /* return (__m128d) ((__v2df)__A / (__v2df)__B); */ | |
| - __m128d val; | |
| - val.lo = (__m64d) ((__v1df)__A.lo / (__v1df)__B.lo); | |
| - val.hi = (__m64d) ((__v1df)__A.hi / (__v1df)__B.hi); | |
| - return val; | |
| -} | |
| - | |
| -#if 0 /* FIXME */ | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_div_sd (__m128d __A, __m128d __B) | |
| -{ | |
| - return (__m128d)__builtin_ia32_divsd ((__v2df)__A, (__v2df)__B); | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_sqrt_pd (__m128d __A) | |
| -{ | |
| - return (__m128d)__builtin_ia32_sqrtpd ((__v2df)__A); | |
| -} | |
| - | |
| -/* Return pair {sqrt (B[0]), A[1]}. */ | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_sqrt_sd (__m128d __A, __m128d __B) | |
| -{ | |
| - __v2df __tmp = __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B); | |
| - return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__tmp); | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_min_pd (__m128d __A, __m128d __B) | |
| -{ | |
| - return (__m128d)__builtin_ia32_minpd ((__v2df)__A, (__v2df)__B); | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_min_sd (__m128d __A, __m128d __B) | |
| -{ | |
| - return (__m128d)__builtin_ia32_minsd ((__v2df)__A, (__v2df)__B); | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_max_pd (__m128d __A, __m128d __B) | |
| -{ | |
| - return (__m128d)__builtin_ia32_maxpd ((__v2df)__A, (__v2df)__B); | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_max_sd (__m128d __A, __m128d __B) | |
| -{ | |
| - return (__m128d)__builtin_ia32_maxsd ((__v2df)__A, (__v2df)__B); | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_and_pd (__m128d __A, __m128d __B) | |
| -{ | |
| - return (__m128d)__builtin_ia32_andpd ((__v2df)__A, (__v2df)__B); | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_andnot_pd (__m128d __A, __m128d __B) | |
| -{ | |
| - return (__m128d)__builtin_ia32_andnpd ((__v2df)__A, (__v2df)__B); | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_or_pd (__m128d __A, __m128d __B) | |
| -{ | |
| - return (__m128d)__builtin_ia32_orpd ((__v2df)__A, (__v2df)__B); | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_xor_pd (__m128d __A, __m128d __B) | |
| -{ | |
| - return (__m128d)__builtin_ia32_xorpd ((__v2df)__A, (__v2df)__B); | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cmpeq_pd (__m128d __A, __m128d __B) | |
| -{ | |
| - return (__m128d)__builtin_ia32_cmpeqpd ((__v2df)__A, (__v2df)__B); | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cmplt_pd (__m128d __A, __m128d __B) | |
| -{ | |
| - return (__m128d)__builtin_ia32_cmpltpd ((__v2df)__A, (__v2df)__B); | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cmple_pd (__m128d __A, __m128d __B) | |
| -{ | |
| - return (__m128d)__builtin_ia32_cmplepd ((__v2df)__A, (__v2df)__B); | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cmpgt_pd (__m128d __A, __m128d __B) | |
| -{ | |
| - return (__m128d)__builtin_ia32_cmpgtpd ((__v2df)__A, (__v2df)__B); | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cmpge_pd (__m128d __A, __m128d __B) | |
| -{ | |
| - return (__m128d)__builtin_ia32_cmpgepd ((__v2df)__A, (__v2df)__B); | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cmpneq_pd (__m128d __A, __m128d __B) | |
| -{ | |
| - return (__m128d)__builtin_ia32_cmpneqpd ((__v2df)__A, (__v2df)__B); | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cmpnlt_pd (__m128d __A, __m128d __B) | |
| -{ | |
| - return (__m128d)__builtin_ia32_cmpnltpd ((__v2df)__A, (__v2df)__B); | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cmpnle_pd (__m128d __A, __m128d __B) | |
| -{ | |
| - return (__m128d)__builtin_ia32_cmpnlepd ((__v2df)__A, (__v2df)__B); | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cmpngt_pd (__m128d __A, __m128d __B) | |
| -{ | |
| - return (__m128d)__builtin_ia32_cmpngtpd ((__v2df)__A, (__v2df)__B); | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cmpnge_pd (__m128d __A, __m128d __B) | |
| -{ | |
| - return (__m128d)__builtin_ia32_cmpngepd ((__v2df)__A, (__v2df)__B); | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cmpord_pd (__m128d __A, __m128d __B) | |
| -{ | |
| - return (__m128d)__builtin_ia32_cmpordpd ((__v2df)__A, (__v2df)__B); | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cmpunord_pd (__m128d __A, __m128d __B) | |
| -{ | |
| - return (__m128d)__builtin_ia32_cmpunordpd ((__v2df)__A, (__v2df)__B); | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cmpeq_sd (__m128d __A, __m128d __B) | |
| -{ | |
| - return (__m128d)__builtin_ia32_cmpeqsd ((__v2df)__A, (__v2df)__B); | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cmplt_sd (__m128d __A, __m128d __B) | |
| -{ | |
| - return (__m128d)__builtin_ia32_cmpltsd ((__v2df)__A, (__v2df)__B); | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cmple_sd (__m128d __A, __m128d __B) | |
| -{ | |
| - return (__m128d)__builtin_ia32_cmplesd ((__v2df)__A, (__v2df)__B); | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cmpgt_sd (__m128d __A, __m128d __B) | |
| -{ | |
| - return (__m128d) __builtin_ia32_movsd ((__v2df) __A, | |
| - (__v2df) | |
| - __builtin_ia32_cmpltsd ((__v2df) __B, | |
| - (__v2df) | |
| - __A)); | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cmpge_sd (__m128d __A, __m128d __B) | |
| -{ | |
| - return (__m128d) __builtin_ia32_movsd ((__v2df) __A, | |
| - (__v2df) | |
| - __builtin_ia32_cmplesd ((__v2df) __B, | |
| - (__v2df) | |
| - __A)); | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cmpneq_sd (__m128d __A, __m128d __B) | |
| -{ | |
| - return (__m128d)__builtin_ia32_cmpneqsd ((__v2df)__A, (__v2df)__B); | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cmpnlt_sd (__m128d __A, __m128d __B) | |
| -{ | |
| - return (__m128d)__builtin_ia32_cmpnltsd ((__v2df)__A, (__v2df)__B); | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cmpnle_sd (__m128d __A, __m128d __B) | |
| -{ | |
| - return (__m128d)__builtin_ia32_cmpnlesd ((__v2df)__A, (__v2df)__B); | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cmpngt_sd (__m128d __A, __m128d __B) | |
| -{ | |
| - return (__m128d) __builtin_ia32_movsd ((__v2df) __A, | |
| - (__v2df) | |
| - __builtin_ia32_cmpnltsd ((__v2df) __B, | |
| - (__v2df) | |
| - __A)); | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cmpnge_sd (__m128d __A, __m128d __B) | |
| -{ | |
| - return (__m128d) __builtin_ia32_movsd ((__v2df) __A, | |
| - (__v2df) | |
| - __builtin_ia32_cmpnlesd ((__v2df) __B, | |
| - (__v2df) | |
| - __A)); | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cmpord_sd (__m128d __A, __m128d __B) | |
| -{ | |
| - return (__m128d)__builtin_ia32_cmpordsd ((__v2df)__A, (__v2df)__B); | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cmpunord_sd (__m128d __A, __m128d __B) | |
| -{ | |
| - return (__m128d)__builtin_ia32_cmpunordsd ((__v2df)__A, (__v2df)__B); | |
| -} | |
| - | |
| -extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_comieq_sd (__m128d __A, __m128d __B) | |
| -{ | |
| - return __builtin_ia32_comisdeq ((__v2df)__A, (__v2df)__B); | |
| -} | |
| - | |
| -extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_comilt_sd (__m128d __A, __m128d __B) | |
| -{ | |
| - return __builtin_ia32_comisdlt ((__v2df)__A, (__v2df)__B); | |
| -} | |
| - | |
| -extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_comile_sd (__m128d __A, __m128d __B) | |
| -{ | |
| - return __builtin_ia32_comisdle ((__v2df)__A, (__v2df)__B); | |
| -} | |
| - | |
| -extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_comigt_sd (__m128d __A, __m128d __B) | |
| -{ | |
| - return __builtin_ia32_comisdgt ((__v2df)__A, (__v2df)__B); | |
| -} | |
| - | |
| -extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_comige_sd (__m128d __A, __m128d __B) | |
| -{ | |
| - return __builtin_ia32_comisdge ((__v2df)__A, (__v2df)__B); | |
| -} | |
| - | |
| -extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_comineq_sd (__m128d __A, __m128d __B) | |
| -{ | |
| - return __builtin_ia32_comisdneq ((__v2df)__A, (__v2df)__B); | |
| -} | |
| - | |
| -extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_ucomieq_sd (__m128d __A, __m128d __B) | |
| -{ | |
| - return __builtin_ia32_ucomisdeq ((__v2df)__A, (__v2df)__B); | |
| -} | |
| - | |
| -extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_ucomilt_sd (__m128d __A, __m128d __B) | |
| -{ | |
| - return __builtin_ia32_ucomisdlt ((__v2df)__A, (__v2df)__B); | |
| -} | |
| - | |
| -extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_ucomile_sd (__m128d __A, __m128d __B) | |
| -{ | |
| - return __builtin_ia32_ucomisdle ((__v2df)__A, (__v2df)__B); | |
| -} | |
| - | |
| -extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_ucomigt_sd (__m128d __A, __m128d __B) | |
| -{ | |
| - return __builtin_ia32_ucomisdgt ((__v2df)__A, (__v2df)__B); | |
| -} | |
| - | |
| -extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_ucomige_sd (__m128d __A, __m128d __B) | |
| -{ | |
| - return __builtin_ia32_ucomisdge ((__v2df)__A, (__v2df)__B); | |
| -} | |
| - | |
| -extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_ucomineq_sd (__m128d __A, __m128d __B) | |
| -{ | |
| - return __builtin_ia32_ucomisdneq ((__v2df)__A, (__v2df)__B); | |
| -} | |
| -#endif | |
| - | |
| -/* Create a vector of Qi, where i is the element number. */ | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_set_epi64x (long long __q1, long long __q0) | |
| -{ | |
| - /* return __extension__ (__m128i)(__v2di){ __q0, __q1 }; */ | |
| - __m128i val; | |
| - val.lo = (__m64i)(__v1di){ __q0 }; | |
| - val.hi = (__m64i)(__v1di){ __q1 }; | |
| - return __extension__ val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_set_epi64 (__m64 __q1, __m64 __q0) | |
| -{ | |
| - return _mm_set_epi64x ((long long)__q1, (long long)__q0); | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_set_epi32 (int __q3, int __q2, int __q1, int __q0) | |
| -{ | |
| - /* return __extension__ (__m128i)(__v4si){ __q0, __q1, __q2, __q3 }; */ | |
| - __m128i val; | |
| - val.lo = (__m64i)(__v2si){ __q0, __q1 }; | |
| - val.hi = (__m64i)(__v2si){ __q2, __q3 }; | |
| - return __extension__ val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_set_epi16 (short __q7, short __q6, short __q5, short __q4, | |
| - short __q3, short __q2, short __q1, short __q0) | |
| -{ | |
| - /* return __extension__ (__m128i)(__v8hi){ | |
| - __q0, __q1, __q2, __q3, __q4, __q5, __q6, __q7 }; */ | |
| - __m128i val; | |
| - val.lo = (__m64i)(__v4hi){ __q0, __q1, __q2, __q3 }; | |
| - val.hi = (__m64i)(__v4hi){ __q4, __q5, __q6, __q7 }; | |
| - return __extension__ val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_set_epi8 (char __q15, char __q14, char __q13, char __q12, | |
| - char __q11, char __q10, char __q09, char __q08, | |
| - char __q07, char __q06, char __q05, char __q04, | |
| - char __q03, char __q02, char __q01, char __q00) | |
| -{ | |
| - /* return __extension__ (__m128i)(__v16qi){ | |
| - __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07, | |
| - __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15 | |
| - }; */ | |
| - __m128i val; | |
| - val.lo = (__m64i)(__v8qi){ __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07 }; | |
| - val.hi = (__m64i)(__v8qi){ __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15 }; | |
| - return __extension__ val; | |
| -} | |
| - | |
| -/* Set all of the elements of the vector to A. */ | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_set1_epi64x (long long __A) | |
| -{ | |
| - return _mm_set_epi64x (__A, __A); | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_set1_epi64 (__m64 __A) | |
| -{ | |
| - return _mm_set_epi64 (__A, __A); | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_set1_epi32 (int __A) | |
| -{ | |
| - return _mm_set_epi32 (__A, __A, __A, __A); | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_set1_epi16 (short __A) | |
| -{ | |
| - return _mm_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A); | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_set1_epi8 (char __A) | |
| -{ | |
| - return _mm_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A, | |
| - __A, __A, __A, __A, __A, __A, __A, __A); | |
| -} | |
| - | |
| -/* Create a vector of Qi, where i is the element number. | |
| - The parameter order is reversed from the _mm_set_epi* functions. */ | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_setr_epi64 (__m64 __q0, __m64 __q1) | |
| -{ | |
| - return _mm_set_epi64 (__q1, __q0); | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_setr_epi32 (int __q0, int __q1, int __q2, int __q3) | |
| -{ | |
| - return _mm_set_epi32 (__q3, __q2, __q1, __q0); | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_setr_epi16 (short __q0, short __q1, short __q2, short __q3, | |
| - short __q4, short __q5, short __q6, short __q7) | |
| -{ | |
| - return _mm_set_epi16 (__q7, __q6, __q5, __q4, __q3, __q2, __q1, __q0); | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_setr_epi8 (char __q00, char __q01, char __q02, char __q03, | |
| - char __q04, char __q05, char __q06, char __q07, | |
| - char __q08, char __q09, char __q10, char __q11, | |
| - char __q12, char __q13, char __q14, char __q15) | |
| -{ | |
| - return _mm_set_epi8 (__q15, __q14, __q13, __q12, __q11, __q10, __q09, __q08, | |
| - __q07, __q06, __q05, __q04, __q03, __q02, __q01, __q00); | |
| -} | |
| - | |
| -/* Create a vector with element 0 as *P and the rest zero. */ | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_load_si128 (__m128i const *__P) | |
| -{ | |
| - return *__P; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_loadu_si128 (__m128i const *__P) | |
| -{ | |
| - /* return (__m128i) __builtin_ia32_loaddqu ((char const *)__P); */ | |
| - __m128i val; | |
| - __asm__ volatile ( | |
| - "gsldlc1 %[lo], 0x07(%[__P]) \n\t" | |
| - "gsldrc1 %[lo], 0x00(%[__P]) \n\t" | |
| - "gsldlc1 %[hi], 0x0f(%[__P]) \n\t" | |
| - "gsldrc1 %[hi], 0x08(%[__P]) \n\t" | |
| - : [hi]"=&f"(val.hi), [lo]"=&f"(val.lo) | |
| - : [__P]"r"(__P) | |
| - ); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_loadl_epi64 (__m128i const *__P) | |
| -{ | |
| - return _mm_set_epi64 ((__m64)0LL, *(__m64 *)__P); | |
| -} | |
| - | |
| -extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_store_si128 (__m128i *__P, __m128i __B) | |
| -{ | |
| - /* *__P = __B; */ | |
| - __asm__ volatile ( | |
| - "gssqc1 %[hi], %[lo], 0x00(%[__P]) \n\t" | |
| - ::[hi]"f"(__B.hi), [lo]"f"(__B.lo), | |
| - [__P]"r"(__P) | |
| - : "memory" | |
| - ); | |
| -} | |
| - | |
| -extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_storeu_si128 (__m128i *__P, __m128i __B) | |
| -{ | |
| - /* __builtin_ia32_storedqu ((char *)__P, (__v16qi)__B); */ | |
| - __asm__ volatile ( | |
| - "gssdlc1 %[lo], 0x07(%[__P]) \n\t" | |
| - "gssdrc1 %[lo], 0x00(%[__P]) \n\t" | |
| - "gssdlc1 %[hi], 0x0f(%[__P]) \n\t" | |
| - "gssdrc1 %[hi], 0x08(%[__P]) \n\t" | |
| - ::[hi]"f"(__B.hi), [lo]"f"(__B.lo), | |
| - [__P]"r"(__P) | |
| - : "memory" | |
| - ); | |
| -} | |
| - | |
| -extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_storel_epi64 (__m128i *__P, __m128i __B) | |
| -{ | |
| - /* *(long long *)__P = ((__v2di)__B)[0]; */ | |
| - __asm__ volatile ( | |
| - "sdc1 %[lo], %[__P] \n\t" | |
| - ::[lo]"f"(__B.lo), [__P]"m"(*__P) | |
| - : "memory" | |
| - ); | |
| -} | |
| - | |
| -extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_movepi64_pi64 (__m128i __B) | |
| -{ | |
| - /* return (__m64) ((__v2di)__B)[0]; */ | |
| - __m64 val; | |
| - __asm__ volatile ( | |
| - "dmfc1 %[val], %[lo] \n\t" | |
| - : [val]"=&r"(val) | |
| - : [lo]"f"(__B.lo) | |
| - ); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_movpi64_epi64 (__m64 __A) | |
| -{ | |
| - return _mm_set_epi64 ((__m64)0LL, __A); | |
| -} | |
| - | |
| -#if 0 /* FIXME */ | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_move_epi64 (__m128i __A) | |
| -{ | |
| - return (__m128i)__builtin_ia32_movq128 ((__v2di) __A); | |
| -} | |
| - | |
| -/* Create an undefined vector. */ | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_undefined_si128 (void) | |
| -{ | |
| - __m128i __Y = __Y; | |
| - return __Y; | |
| -} | |
| -#endif | |
| - | |
| -/* Create a vector of zeros. */ | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_setzero_si128 (void) | |
| -{ | |
| - /* return __extension__ (__m128i)(__v4si){ 0, 0, 0, 0 }; */ | |
| - __m128i val; | |
| - val.hi = (__m64i)(__v2si){ 0, 0 }; | |
| - val.lo = (__m64i)(__v2si){ 0, 0 }; | |
| - return val; | |
| -} | |
| - | |
| -#if 0 /* FIXME */ | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cvtepi32_pd (__m128i __A) | |
| -{ | |
| - return (__m128d)__builtin_ia32_cvtdq2pd ((__v4si) __A); | |
| -} | |
| - | |
| -extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cvtepi32_ps (__m128i __A) | |
| -{ | |
| - return (__m128)__builtin_ia32_cvtdq2ps ((__v4si) __A); | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cvtpd_epi32 (__m128d __A) | |
| -{ | |
| - return (__m128i)__builtin_ia32_cvtpd2dq ((__v2df) __A); | |
| -} | |
| - | |
| -extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cvtpd_pi32 (__m128d __A) | |
| -{ | |
| - return (__m64)__builtin_ia32_cvtpd2pi ((__v2df) __A); | |
| -} | |
| - | |
| -extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cvtpd_ps (__m128d __A) | |
| -{ | |
| - return (__m128)__builtin_ia32_cvtpd2ps ((__v2df) __A); | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cvttpd_epi32 (__m128d __A) | |
| -{ | |
| - return (__m128i)__builtin_ia32_cvttpd2dq ((__v2df) __A); | |
| -} | |
| - | |
| -extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cvttpd_pi32 (__m128d __A) | |
| -{ | |
| - return (__m64)__builtin_ia32_cvttpd2pi ((__v2df) __A); | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cvtpi32_pd (__m64 __A) | |
| -{ | |
| - return (__m128d)__builtin_ia32_cvtpi2pd ((__v2si) __A); | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cvtps_epi32 (__m128 __A) | |
| -{ | |
| - return (__m128i)__builtin_ia32_cvtps2dq ((__v4sf) __A); | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cvttps_epi32 (__m128 __A) | |
| -{ | |
| - return (__m128i)__builtin_ia32_cvttps2dq ((__v4sf) __A); | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cvtps_pd (__m128 __A) | |
| -{ | |
| - return (__m128d)__builtin_ia32_cvtps2pd ((__v4sf) __A); | |
| -} | |
| - | |
| -extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cvtsd_si32 (__m128d __A) | |
| -{ | |
| - return __builtin_ia32_cvtsd2si ((__v2df) __A); | |
| -} | |
| - | |
| -#ifdef __x86_64__ | |
| -/* Intel intrinsic. */ | |
| -extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cvtsd_si64 (__m128d __A) | |
| -{ | |
| - return __builtin_ia32_cvtsd2si64 ((__v2df) __A); | |
| -} | |
| - | |
| -/* Microsoft intrinsic. */ | |
| -extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cvtsd_si64x (__m128d __A) | |
| -{ | |
| - return __builtin_ia32_cvtsd2si64 ((__v2df) __A); | |
| -} | |
| -#endif | |
| - | |
| -extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cvttsd_si32 (__m128d __A) | |
| -{ | |
| - return __builtin_ia32_cvttsd2si ((__v2df) __A); | |
| -} | |
| - | |
| -#ifdef __x86_64__ | |
| -/* Intel intrinsic. */ | |
| -extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cvttsd_si64 (__m128d __A) | |
| -{ | |
| - return __builtin_ia32_cvttsd2si64 ((__v2df) __A); | |
| -} | |
| - | |
| -/* Microsoft intrinsic. */ | |
| -extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cvttsd_si64x (__m128d __A) | |
| -{ | |
| - return __builtin_ia32_cvttsd2si64 ((__v2df) __A); | |
| -} | |
| -#endif | |
| - | |
| -extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cvtsd_ss (__m128 __A, __m128d __B) | |
| -{ | |
| - return (__m128)__builtin_ia32_cvtsd2ss ((__v4sf) __A, (__v2df) __B); | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cvtsi32_sd (__m128d __A, int __B) | |
| -{ | |
| - return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B); | |
| -} | |
| - | |
| -#ifdef __x86_64__ | |
| -/* Intel intrinsic. */ | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cvtsi64_sd (__m128d __A, long long __B) | |
| -{ | |
| - return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B); | |
| -} | |
| - | |
| -/* Microsoft intrinsic. */ | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cvtsi64x_sd (__m128d __A, long long __B) | |
| -{ | |
| - return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B); | |
| -} | |
| -#endif | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cvtss_sd (__m128d __A, __m128 __B) | |
| -{ | |
| - return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B); | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_shuffle_pd(__m128d __A, __m128d __B, const int __mask) | |
| -{ | |
| - return (__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, __mask); | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_unpackhi_pd (__m128d __A, __m128d __B) | |
| -{ | |
| - return (__m128d)__builtin_ia32_unpckhpd ((__v2df)__A, (__v2df)__B); | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_unpacklo_pd (__m128d __A, __m128d __B) | |
| -{ | |
| - return (__m128d)__builtin_ia32_unpcklpd ((__v2df)__A, (__v2df)__B); | |
| -} | |
| -#endif | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_loadh_pd (__m128d __A, double const *__B) | |
| -{ | |
| - /* return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, __B); */ | |
| - __asm__ volatile ( | |
| - "sdc1 %[__B], 0x00+%[ahi] \n\t" | |
| - : [ahi]"=m"(__A.hi) | |
| - : [__B]"f"(*__B) | |
| - : "memory" | |
| - ); | |
| - return __A; | |
| -} | |
| - | |
| -#if 0 /* FIXME */ | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_loadl_pd (__m128d __A, double const *__B) | |
| -{ | |
| - return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, __B); | |
| -} | |
| - | |
| -extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_movemask_pd (__m128d __A) | |
| -{ | |
| - return __builtin_ia32_movmskpd ((__v2df)__A); | |
| -} | |
| -#endif | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_packs_epi16 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_packsswb128 ((__v8hi)__A, (__v8hi)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) __builtin_loongson_packsshb ((int16x4_t)__A.hi, (int16x4_t)__B.hi); | |
| - val.lo = (__m64i) __builtin_loongson_packsshb ((int16x4_t)__A.lo, (int16x4_t)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_packs_epi32 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_packssdw128 ((__v4si)__A, (__v4si)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) __builtin_loongson_packsswh ((int32x2_t)__A.hi, (int32x2_t)__B.hi); | |
| - val.lo = (__m64i) __builtin_loongson_packsswh ((int32x2_t)__A.lo, (int32x2_t)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_packus_epi16 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_packuswb128 ((__v8hi)__A, (__v8hi)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) __builtin_loongson_packushb ((uint16x4_t)__A.hi, (uint16x4_t)__B.hi); | |
| - val.lo = (__m64i) __builtin_loongson_packushb ((uint16x4_t)__A.lo, (uint16x4_t)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_unpackhi_epi8 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_punpckhbw128 ((__v16qi)__A, (__v16qi)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) __builtin_loongson_punpckhbh_s ((int8x8_t)__A.hi, (int8x8_t)__B.hi); | |
| - val.lo = (__m64i) __builtin_loongson_punpckhbh_s ((int8x8_t)__A.lo, (int8x8_t)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_unpackhi_epi16 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_punpckhwd128 ((__v8hi)__A, (__v8hi)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) __builtin_loongson_punpckhhw_s ((int16x4_t)__A.hi, (int16x4_t)__B.hi); | |
| - val.lo = (__m64i) __builtin_loongson_punpckhhw_s ((int16x4_t)__A.lo, (int16x4_t)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_unpackhi_epi32 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_punpckhdq128 ((__v4si)__A, (__v4si)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) __builtin_loongson_punpckhwd_s ((__v2si)__A.hi, (__v2si)__B.hi); | |
| - val.lo = (__m64i) __builtin_loongson_punpckhwd_s ((__v2si)__A.lo, (__v2si)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_unpackhi_epi64 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_punpckhqdq128 ((__v2di)__A, (__v2di)__B); */ | |
| - __m128i val; | |
| - __asm__ volatile ( | |
| - "mov.d %[vlo], %[ahi] \n\t" | |
| - "mov.d %[vhi], %[bhi] \n\t" | |
| - : [vhi]"=&f"(val.hi), [vlo]"=&f"(val.lo) | |
| - : [ahi]"f"(__A.hi), [bhi]"f"(__B.hi) | |
| - ); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_unpacklo_epi8 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_punpcklbw128 ((__v16qi)__A, (__v16qi)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) __builtin_loongson_punpcklbh_s ((int8x8_t)__A.hi, (int8x8_t)__B.hi); | |
| - val.lo = (__m64i) __builtin_loongson_punpcklbh_s ((int8x8_t)__A.lo, (int8x8_t)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_unpacklo_epi16 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_punpcklwd128 ((__v8hi)__A, (__v8hi)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) __builtin_loongson_punpcklhw_s ((int16x4_t)__A.hi, (int16x4_t)__B.hi); | |
| - val.lo = (__m64i) __builtin_loongson_punpcklhw_s ((int16x4_t)__A.lo, (int16x4_t)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_unpacklo_epi32 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_punpckldq128 ((__v4si)__A, (__v4si)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) __builtin_loongson_punpcklwd_s ((__v2si)__A.hi, (__v2si)__B.hi); | |
| - val.lo = (__m64i) __builtin_loongson_punpcklwd_s ((__v2si)__A.lo, (__v2si)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_unpacklo_epi64 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_punpcklqdq128 ((__v2di)__A, (__v2di)__B); */ | |
| - __m128i val; | |
| - __asm__ volatile ( | |
| - "mov.d %[vlo], %[alo] \n\t" | |
| - "mov.d %[vhi], %[blo] \n\t" | |
| - : [vhi]"=&f"(val.hi), [vlo]"=&f"(val.lo) | |
| - : [alo]"f"(__A.lo), [blo]"f"(__B.lo) | |
| - ); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_add_epi8 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i) ((__v16qu)__A + (__v16qu)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) ((__v8qu)__A.hi + (__v8qu)__B.hi); | |
| - val.lo = (__m64i) ((__v8qu)__A.lo + (__v8qu)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_add_epi16 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i) ((__v8hu)__A + (__v8hu)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) ((__v4hu)__A.hi + (__v4hu)__B.hi); | |
| - val.lo = (__m64i) ((__v4hu)__A.lo + (__v4hu)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_add_epi32 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i) ((__v4su)__A + (__v4su)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) ((__v2su)__A.hi + (__v2su)__B.hi); | |
| - val.lo = (__m64i) ((__v2su)__A.lo + (__v2su)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_add_epi64 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i) ((__v2du)__A + (__v2du)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) ((__v1du)__A.hi + (__v1du)__B.hi); | |
| - val.lo = (__m64i) ((__v1du)__A.lo + (__v1du)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_adds_epi8 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_paddsb128 ((__v16qi)__A, (__v16qi)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) __builtin_loongson_paddsb ((int8x8_t)__A.hi, (int8x8_t)__B.hi); | |
| - val.lo = (__m64i) __builtin_loongson_paddsb ((int8x8_t)__A.lo, (int8x8_t)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_adds_epi16 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_paddsw128 ((__v8hi)__A, (__v8hi)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) __builtin_loongson_paddsh ((int16x4_t)__A.hi, (int16x4_t)__B.hi); | |
| - val.lo = (__m64i) __builtin_loongson_paddsh ((int16x4_t)__A.lo, (int16x4_t)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_adds_epu8 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_paddusb128 ((__v16qi)__A, (__v16qi)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) __builtin_loongson_paddusb ((uint8x8_t)__A.hi, (uint8x8_t)__B.hi); | |
| - val.lo = (__m64i) __builtin_loongson_paddusb ((uint8x8_t)__A.lo, (uint8x8_t)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_adds_epu16 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_paddusw128 ((__v8hi)__A, (__v8hi)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) __builtin_loongson_paddush ((uint16x4_t)__A.hi, (uint16x4_t)__B.hi); | |
| - val.lo = (__m64i) __builtin_loongson_paddush ((uint16x4_t)__A.lo, (uint16x4_t)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_sub_epi8 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i) ((__v16qu)__A - (__v16qu)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) ((__v8qu)__A.hi - (__v8qu)__B.hi); | |
| - val.lo = (__m64i) ((__v8qu)__A.lo - (__v8qu)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_sub_epi16 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i) ((__v8hu)__A - (__v8hu)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) ((__v4hu)__A.hi - (__v4hu)__B.hi); | |
| - val.lo = (__m64i) ((__v4hu)__A.lo - (__v4hu)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_sub_epi32 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i) ((__v4su)__A - (__v4su)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) ((__v2su)__A.hi - (__v2su)__B.hi); | |
| - val.lo = (__m64i) ((__v2su)__A.lo - (__v2su)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_sub_epi64 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i) ((__v2du)__A - (__v2du)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) ((__v1du)__A.hi - (__v1du)__B.hi); | |
| - val.lo = (__m64i) ((__v1du)__A.lo - (__v1du)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_subs_epi8 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_psubsb128 ((__v16qi)__A, (__v16qi)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) __builtin_loongson_psubsb ((int8x8_t)__A.hi, (int8x8_t)__B.hi); | |
| - val.lo = (__m64i) __builtin_loongson_psubsb ((int8x8_t)__A.lo, (int8x8_t)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_subs_epi16 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_psubsw128 ((__v8hi)__A, (__v8hi)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) __builtin_loongson_psubsh ((int16x4_t)__A.hi, (int16x4_t)__B.hi); | |
| - val.lo = (__m64i) __builtin_loongson_psubsh ((int16x4_t)__A.lo, (int16x4_t)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_subs_epu8 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_psubusb128 ((__v16qi)__A, (__v16qi)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) __builtin_loongson_psubusb ((uint8x8_t)__A.hi, (uint8x8_t)__B.hi); | |
| - val.lo = (__m64i) __builtin_loongson_psubusb ((uint8x8_t)__A.lo, (uint8x8_t)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_subs_epu16 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_psubusw128 ((__v8hi)__A, (__v8hi)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) __builtin_loongson_psubush ((uint16x4_t)__A.hi, (uint16x4_t)__B.hi); | |
| - val.lo = (__m64i) __builtin_loongson_psubush ((uint16x4_t)__A.lo, (uint16x4_t)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_madd_epi16 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_pmaddwd128 ((__v8hi)__A, (__v8hi)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) __builtin_loongson_pmaddhw ((int16x4_t)__A.hi, (int16x4_t)__B.hi); | |
| - val.lo = (__m64i) __builtin_loongson_pmaddhw ((int16x4_t)__A.lo, (int16x4_t)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_mulhi_epi16 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_pmulhw128 ((__v8hi)__A, (__v8hi)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) __builtin_loongson_pmulhh ((int16x4_t)__A.hi, (int16x4_t)__B.hi); | |
| - val.lo = (__m64i) __builtin_loongson_pmulhh ((int16x4_t)__A.lo, (int16x4_t)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_mullo_epi16 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i) ((__v8hu)__A * (__v8hu)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) ((__v4hu)__A.hi * (__v4hu)__B.hi); | |
| - val.lo = (__m64i) ((__v4hu)__A.lo * (__v4hu)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_mul_su32 (__m64 __A, __m64 __B) | |
| -{ | |
| - /* return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B); */ | |
| - __m64 val; | |
| - val = (__m64) __builtin_loongson_pmuluw ((uint32x2_t)__A, (uint32x2_t)__B); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_mul_epu32 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_pmuludq128 ((__v4si)__A, (__v4si)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) __builtin_loongson_pmuluw ((uint32x2_t)__A.hi, (uint32x2_t)__B.hi); | |
| - val.lo = (__m64i) __builtin_loongson_pmuluw ((uint32x2_t)__A.lo, (uint32x2_t)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_slli_epi16 (__m128i __A, int __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) __builtin_loongson_psllh_s ((int16x4_t)__A.hi, (uint8_t)__B); | |
| - val.lo = (__m64i) __builtin_loongson_psllh_s ((int16x4_t)__A.lo, (uint8_t)__B); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_slli_epi32 (__m128i __A, int __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) __builtin_loongson_psllw_s ((int32x2_t)__A.hi, (uint8_t)__B); | |
| - val.lo = (__m64i) __builtin_loongson_psllw_s ((int32x2_t)__A.lo, (uint8_t)__B); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_slli_epi64 (__m128i __A, int __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B); */ | |
| - __m128i val; | |
| - __asm__ volatile ( | |
| - "dsll %[lo], %[lo], %[__B] \n\t" | |
| - "dsll %[hi], %[hi], %[__B] \n\t" | |
| - : [hi]"=&f"(__A.hi), [lo]"=&f"(__A.lo) | |
| - : [__B]"f"(__B) | |
| - ); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_srai_epi16 (__m128i __A, int __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) __builtin_loongson_psrah_s ((int16x4_t)__A.hi, (uint8_t)__B); | |
| - val.lo = (__m64i) __builtin_loongson_psrah_s ((int16x4_t)__A.lo, (uint8_t)__B); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_srai_epi32 (__m128i __A, int __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) __builtin_loongson_psraw_s ((int32x2_t)__A.hi, (uint8_t)__B); | |
| - val.lo = (__m64i) __builtin_loongson_psraw_s ((int32x2_t)__A.lo, (uint8_t)__B); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_bsrli_si128 (__m128i __A, const int __N) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) __builtin_loongson_psrlw_s ((int32x2_t)__A.hi, (uint8_t)(__N<<3)); | |
| - val.lo = (__m64i) __builtin_loongson_psrlw_s ((int32x2_t)__A.lo, (uint8_t)(__N<<3)); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_bslli_si128 (__m128i __A, const int __N) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) __builtin_loongson_psllw_s ((int32x2_t)__A.hi, (uint8_t)(__N<<3)); | |
| - val.lo = (__m64i) __builtin_loongson_psllw_s ((int32x2_t)__A.lo, (uint8_t)(__N<<3)); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_srli_si128 (__m128i __A, const int __N) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) __builtin_loongson_psrlw_s ((int32x2_t)__A.hi, (uint8_t)(__N<<3)); | |
| - val.lo = (__m64i) __builtin_loongson_psrlw_s ((int32x2_t)__A.lo, (uint8_t)(__N<<3)); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_slli_si128 (__m128i __A, const int __N) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) __builtin_loongson_psllw_s ((int32x2_t)__A.hi, (uint8_t)(__N<<3)); | |
| - val.lo = (__m64i) __builtin_loongson_psllw_s ((int32x2_t)__A.lo, (uint8_t)(__N<<3)); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_srli_epi16 (__m128i __A, int __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) __builtin_loongson_psrlh_s ((int16x4_t)__A.hi, (uint8_t)__B); | |
| - val.lo = (__m64i) __builtin_loongson_psrlh_s ((int16x4_t)__A.lo, (uint8_t)__B); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_srli_epi32 (__m128i __A, int __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) __builtin_loongson_psrlw_s ((int32x2_t)__A.hi, (uint8_t)__B); | |
| - val.lo = (__m64i) __builtin_loongson_psrlw_s ((int32x2_t)__A.lo, (uint8_t)__B); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_srli_epi64 (__m128i __A, int __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B); */ | |
| - __m128i val; | |
| - __asm__ volatile ( | |
| - "dsrl %[vlo], %[lo], %[__B] \n\t" | |
| - "dsrl %[vhi], %[hi], %[__B] \n\t" | |
| - : [vhi]"=&f"(val.hi), [vlo]"=&f"(val.lo) | |
| - : [hi]"f"(__A.hi), [lo]"f"(__A.lo), | |
| - [__B]"f"(__B) | |
| - ); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_sll_epi16 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_psllw128((__v8hi)__A, (__v8hi)__B); */ | |
| - __m128i val; | |
| - __asm__ volatile ( | |
| - "psllh %[vlo], %[lo], %[__B] \n\t" | |
| - "psllh %[vhi], %[hi], %[__B] \n\t" | |
| - : [vhi]"=&f"(val.hi), [vlo]"=&f"(val.lo) | |
| - : [hi]"f"(__A.hi), [lo]"f"(__A.lo), | |
| - [__B]"f"(__B.lo) | |
| - ); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_sll_epi32 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_pslld128((__v4si)__A, (__v4si)__B); */ | |
| - __m128i val; | |
| - __asm__ volatile ( | |
| - "psllw %[vlo], %[lo], %[__B] \n\t" | |
| - "psllw %[vhi], %[hi], %[__B] \n\t" | |
| - : [vhi]"=&f"(val.hi), [vlo]"=&f"(val.lo) | |
| - : [hi]"f"(__A.hi), [lo]"f"(__A.lo), | |
| - [__B]"f"(__B.lo) | |
| - ); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_sll_epi64 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_psllq128((__v2di)__A, (__v2di)__B); */ | |
| - __m128i val; | |
| - __asm__ volatile ( | |
| - "dsll %[vlo], %[lo], %[__B] \n\t" | |
| - "dsll %[vhi], %[hi], %[__B] \n\t" | |
| - : [vhi]"=&f"(val.hi), [vlo]"=&f"(val.lo) | |
| - : [hi]"f"(__A.hi), [lo]"f"(__A.lo), | |
| - [__B]"f"(__B.lo) | |
| - ); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_sra_epi16 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_psraw128 ((__v8hi)__A, (__v8hi)__B); */ | |
| - __m128i val; | |
| - __asm__ volatile ( | |
| - "psrah %[vlo], %[lo], %[__B] \n\t" | |
| - "psrah %[vhi], %[hi], %[__B] \n\t" | |
| - : [vhi]"=&f"(val.hi), [vlo]"=&f"(val.lo) | |
| - : [hi]"f"(__A.hi), [lo]"f"(__A.lo), | |
| - [__B]"f"(__B.lo) | |
| - ); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_sra_epi32 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_psrad128 ((__v4si)__A, (__v4si)__B); */ | |
| - __m128i val; | |
| - __asm__ volatile ( | |
| - "dsra %[vlo], %[lo], %[__B] \n\t" | |
| - "dsra %[vhi], %[hi], %[__B] \n\t" | |
| - : [vhi]"=&f"(val.hi), [vlo]"=&f"(val.lo) | |
| - : [hi]"f"(__A.hi), [lo]"f"(__A.lo), | |
| - [__B]"f"(__B.lo) | |
| - ); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_srl_epi16 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_psrlw128 ((__v8hi)__A, (__v8hi)__B); */ | |
| - __m128i val; | |
| - __asm__ volatile ( | |
| - "psrlh %[vlo], %[lo], %[__B] \n\t" | |
| - "psrlh %[vhi], %[hi], %[__B] \n\t" | |
| - : [vhi]"=&f"(val.hi), [vlo]"=&f"(val.lo) | |
| - : [hi]"f"(__A.hi), [lo]"f"(__A.lo), | |
| - [__B]"f"(__B.lo) | |
| - ); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_srl_epi32 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_psrld128 ((__v4si)__A, (__v4si)__B); */ | |
| - __m128i val; | |
| - __asm__ volatile ( | |
| - "psrlw %[vlo], %[lo], %[__B] \n\t" | |
| - "psrlw %[vhi], %[hi], %[__B] \n\t" | |
| - : [vhi]"=&f"(val.hi), [vlo]"=&f"(val.lo) | |
| - : [hi]"f"(__A.hi), [lo]"f"(__A.lo), | |
| - [__B]"f"(__B.lo) | |
| - ); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_srl_epi64 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B); */ | |
| - __m128i val; | |
| - __asm__ volatile ( | |
| - "dsrl %[lo], %[lo], %[__B] \n\t" | |
| - "dsrl %[hi], %[hi], %[__B] \n\t" | |
| - : [hi]"=&f"(__A.hi), [lo]"=&f"(__A.lo) | |
| - : [__B]"f"(__B.lo) | |
| - ); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_and_si128 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i) ((__v2du)__A & (__v2du)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) ((__v1du)__A.hi & (__v1du)__B.hi); | |
| - val.lo = (__m64i) ((__v1du)__A.lo & (__v1du)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_andnot_si128 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_pandn128 ((__v2di)__A, (__v2di)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i)__builtin_loongson_pandn_sd ((int64_t)__A.hi, (int64_t)__B.hi); | |
| - val.lo = (__m64i)__builtin_loongson_pandn_sd ((int64_t)__A.lo, (int64_t)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_or_si128 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i) ((__v2du)__A | (__v2du)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) ((__v1du)__A.hi | (__v1du)__B.hi); | |
| - val.lo = (__m64i) ((__v1du)__A.lo | (__v1du)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_xor_si128 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i) ((__v2du)__A ^ (__v2du)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) ((__v1du)__A.hi ^ (__v1du)__B.hi); | |
| - val.lo = (__m64i) ((__v1du)__A.lo ^ (__v1du)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cmpeq_epi8 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i) ((__v16qi)__A == (__v16qi)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) ((__v8qi)__A.hi == (__v8qi)__B.hi); | |
| - val.lo = (__m64i) ((__v8qi)__A.lo == (__v8qi)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cmpeq_epi16 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i) ((__v8hi)__A == (__v8hi)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) ((__v4hi)__A.hi == (__v4hi)__B.hi); | |
| - val.lo = (__m64i) ((__v4hi)__A.lo == (__v4hi)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cmpeq_epi32 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i) ((__v4si)__A == (__v4si)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) ((__v2si)__A.hi == (__v2si)__B.hi); | |
| - val.lo = (__m64i) ((__v2si)__A.lo == (__v2si)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cmplt_epi8 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i) ((__v16qi)__A < (__v16qi)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) ((__v8qi)__A.hi < (__v8qi)__B.hi); | |
| - val.lo = (__m64i) ((__v8qi)__A.lo < (__v8qi)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cmplt_epi16 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i) ((__v8hi)__A < (__v8hi)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) ((__v4hi)__A.hi < (__v4hi)__B.hi); | |
| - val.lo = (__m64i) ((__v4hi)__A.lo < (__v4hi)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cmplt_epi32 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i) ((__v4si)__A < (__v4si)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) ((__v2si)__A.hi < (__v2si)__B.hi); | |
| - val.lo = (__m64i) ((__v2si)__A.lo < (__v2si)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cmpgt_epi8 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i) ((__v16qi)__A > (__v16qi)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) ((__v8qi)__A.hi > (__v8qi)__B.hi); | |
| - val.lo = (__m64i) ((__v8qi)__A.lo > (__v8qi)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cmpgt_epi16 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i) ((__v8hi)__A > (__v8hi)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) ((__v4hi)__A.hi > (__v4hi)__B.hi); | |
| - val.lo = (__m64i) ((__v4hi)__A.lo > (__v4hi)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cmpgt_epi32 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i) ((__v4si)__A > (__v4si)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) ((__v2si)__A.hi > (__v2si)__B.hi); | |
| - val.lo = (__m64i) ((__v2si)__A.lo > (__v2si)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -#if 0 /* FIXME */ | |
| -#ifdef __OPTIMIZE__ | |
| -extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_extract_epi16 (__m128i const __A, int const __N) | |
| -{ | |
| - return (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N); | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_insert_epi16 (__m128i const __A, int const __D, int const __N) | |
| -{ | |
| - return (__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)__A, __D, __N); | |
| -} | |
| -#else | |
| -#define _mm_extract_epi16(A, N) \ | |
| - ((int) (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)(__m128i)(A), (int)(N))) | |
| -#define _mm_insert_epi16(A, D, N) \ | |
| - ((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(__m128i)(A), \ | |
| - (int)(D), (int)(N))) | |
| -#endif | |
| -#endif | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_max_epi16 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_pmaxsw128 ((__v8hi)__A, (__v8hi)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) __builtin_loongson_pmaxsh ((int16x4_t)__A.hi, (int16x4_t)__B.hi); | |
| - val.lo = (__m64i) __builtin_loongson_pmaxsh ((int16x4_t)__A.lo, (int16x4_t)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_max_epu8 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_pmaxub128 ((__v16qi)__A, (__v16qi)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) __builtin_loongson_pmaxub ((uint8x8_t)__A.hi, (uint8x8_t)__B.hi); | |
| - val.lo = (__m64i) __builtin_loongson_pmaxub ((uint8x8_t)__A.lo, (uint8x8_t)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_min_epi16 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_pminsw128 ((__v8hi)__A, (__v8hi)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) __builtin_loongson_pminsh ((int16x4_t)__A.hi, (int16x4_t)__B.hi); | |
| - val.lo = (__m64i) __builtin_loongson_pminsh ((int16x4_t)__A.lo, (int16x4_t)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_min_epu8 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_pminub128 ((__v16qi)__A, (__v16qi)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) __builtin_loongson_pminub ((uint8x8_t)__A.hi, (uint8x8_t)__B.hi); | |
| - val.lo = (__m64i) __builtin_loongson_pminub ((uint8x8_t)__A.lo, (uint8x8_t)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -/* FIXME: return int8x8_t, not int */ | |
| -extern __inline int8x8_t __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_movemask_epi8 (__m128i __A) | |
| -{ | |
| - /* return __builtin_ia32_pmovmskb128 ((__v16qi)__A); */ | |
| - int8x8_t val; | |
| - val = __builtin_loongson_pmovmskb_s ((int8x8_t)__A.hi); | |
| - val = val << 8; | |
| - val |= __builtin_loongson_pmovmskb_s ((int8x8_t)__A.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_mulhi_epu16 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) __builtin_loongson_pmulhuh ((uint16x4_t)__A.hi, (uint16x4_t)__B.hi); | |
| - val.lo = (__m64i) __builtin_loongson_pmulhuh ((uint16x4_t)__A.lo, (uint16x4_t)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_shufflehi_epi16 (__m128i __A, const int __mask) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __mask); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) __builtin_loongson_pshufh_s ((int16x4_t)__A.hi, (uint8_t)__mask); | |
| - val.lo = (__m64i) __builtin_loongson_pshufh_s ((int16x4_t)__A.lo, (uint8_t)__mask); | |
| - return val; | |
| -} | |
| - | |
| -#if 0 /* FIXME */ | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_shufflelo_epi16 (__m128i __A, const int __mask) | |
| -{ | |
| - return (__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __mask); | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_shuffle_epi32 (__m128i __A, const int __mask) | |
| -{ | |
| - return (__m128i)__builtin_ia32_pshufd ((__v4si)__A, __mask); | |
| -} | |
| - | |
| -extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_maskmoveu_si128 (__m128i __A, __m128i __B, char *__C) | |
| -{ | |
| - __builtin_ia32_maskmovdqu ((__v16qi)__A, (__v16qi)__B, __C); | |
| -} | |
| -#endif | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_avg_epu8 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_pavgb128 ((__v16qi)__A, (__v16qi)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) __builtin_loongson_pavgb ((uint8x8_t)__A.hi, (uint8x8_t)__B.hi); | |
| - val.lo = (__m64i) __builtin_loongson_pavgb ((uint8x8_t)__A.lo, (uint8x8_t)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_avg_epu16 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_pavgw128 ((__v8hi)__A, (__v8hi)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) __builtin_loongson_pavgh ((uint16x4_t)__A.hi, (uint16x4_t)__B.hi); | |
| - val.lo = (__m64i) __builtin_loongson_pavgh ((uint16x4_t)__A.lo, (uint16x4_t)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_sad_epu8 (__m128i __A, __m128i __B) | |
| -{ | |
| - /* return (__m128i)__builtin_ia32_psadbw128 ((__v16qi)__A, (__v16qi)__B); */ | |
| - __m128i val; | |
| - val.hi = (__m64i) __builtin_loongson_psadbh ((uint8x8_t)__A.hi, (uint8x8_t)__B.hi); | |
| - val.lo = (__m64i) __builtin_loongson_psadbh ((uint8x8_t)__A.lo, (uint8x8_t)__B.lo); | |
| - return val; | |
| -} | |
| - | |
| -#if 0 /* FIXME */ | |
| -extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_stream_si32 (int *__A, int __B) | |
| -{ | |
| - __builtin_ia32_movnti (__A, __B); | |
| -} | |
| - | |
| -extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_stream_si128 (__m128i *__A, __m128i __B) | |
| -{ | |
| - __builtin_ia32_movntdq ((__v2di *)__A, (__v2di)__B); | |
| -} | |
| - | |
| -extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_stream_pd (double *__A, __m128d __B) | |
| -{ | |
| - __builtin_ia32_movntpd (__A, (__v2df)__B); | |
| -} | |
| - | |
| -extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_clflush (void const *__A) | |
| -{ | |
| - __builtin_ia32_clflush (__A); | |
| -} | |
| - | |
| -extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_lfence (void) | |
| -{ | |
| - __builtin_ia32_lfence (); | |
| -} | |
| - | |
| -extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_mfence (void) | |
| -{ | |
| - __builtin_ia32_mfence (); | |
| -} | |
| -#endif | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cvtsi32_si128 (int __A) | |
| -{ | |
| - return _mm_set_epi32 (0, 0, 0, __A); | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cvtsi64_si128 (long long __A) | |
| -{ | |
| - return _mm_set_epi64x (0, __A); | |
| -} | |
| - | |
| -/* Microsoft intrinsic. */ | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_cvtsi64x_si128 (long long __A) | |
| -{ | |
| - return _mm_set_epi64x (0, __A); | |
| -} | |
| - | |
| -/* Casts between various SP, DP, INT vector types. Note that these do no | |
| - conversion of values, they just change the type. */ | |
| -extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_castpd_ps(__m128d __A) | |
| -{ | |
| - /* return (__m128) __A; */ | |
| - __m128 val; | |
| - val.lo = (__m64) __A.lo; | |
| - val.hi = (__m64) __A.hi; | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_castpd_si128(__m128d __A) | |
| -{ | |
| - /* return (__m128i) __A; */ | |
| - __m128i val; | |
| - val.lo = (__m64i) __A.lo; | |
| - val.hi = (__m64i) __A.hi; | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_castps_pd(__m128 __A) | |
| -{ | |
| - /* return (__m128d) __A; */ | |
| - __m128d val; | |
| - val.lo = (__m64d) __A.lo; | |
| - val.hi = (__m64d) __A.hi; | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_castps_si128(__m128 __A) | |
| -{ | |
| - /* return (__m128i) __A; */ | |
| - __m128i val; | |
| - val.lo = (__m64i) __A.lo; | |
| - val.hi = (__m64i) __A.hi; | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_castsi128_ps(__m128i __A) | |
| -{ | |
| - /* return (__m128) __A; */ | |
| - __m128 val; | |
| - val.lo = (__m64) __A.lo; | |
| - val.hi = (__m64) __A.hi; | |
| - return val; | |
| -} | |
| - | |
| -extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
| -_mm_castsi128_pd(__m128i __A) | |
| -{ | |
| - /* return (__m128d) __A; */ | |
| - __m128d val; | |
| - val.lo = (__m64d) __A.lo; | |
| - val.hi = (__m64d) __A.hi; | |
| - return val; | |
| -} | |
| - | |
| -#ifdef __cplusplus | |
| -} | |
| -#endif | |
| - | |
| +#if !defined(_GCC_LOONGSON_MMIINTRIN_H) | |
| +#warning \ | |
| + loongson.h will be deprecated without further notice at a future date. \ | |
| + Please use loongson-mmiintrin.h instead. | |
| +#include "loongson-mmiintrin.h" | |
| #endif | |
| diff --git a/gcc/config/mips/loongson.md b/gcc/config/mips/loongson.md | |
| deleted file mode 100644 | |
| index 88f1487..0000000 | |
| --- a/gcc/config/mips/loongson.md | |
| +++ /dev/null | |
| @@ -1,974 +0,0 @@ | |
| -;; Machine description for Loongson-specific patterns, such as | |
| -;; ST Microelectronics Loongson-2E/2F etc. | |
| -;; Copyright (C) 2008-2018 Free Software Foundation, Inc. | |
| -;; Contributed by CodeSourcery. | |
| -;; | |
| -;; This file is part of GCC. | |
| -;; | |
| -;; GCC is free software; you can redistribute it and/or modify | |
| -;; it under the terms of the GNU General Public License as published by | |
| -;; the Free Software Foundation; either version 3, or (at your option) | |
| -;; any later version. | |
| - | |
| -;; GCC is distributed in the hope that it will be useful, | |
| -;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| -;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| -;; GNU General Public License for more details. | |
| - | |
| -;; You should have received a copy of the GNU General Public License | |
| -;; along with GCC; see the file COPYING3. If not see | |
| -;; <http://www.gnu.org/licenses/>. | |
| - | |
| -(define_c_enum "unspec" [ | |
| - UNSPEC_LOONGSON_PAVG | |
| - UNSPEC_LOONGSON_PCMPEQ | |
| - UNSPEC_LOONGSON_PCMPGT | |
| - UNSPEC_LOONGSON_PEXTR | |
| - UNSPEC_LOONGSON_PINSRH | |
| - UNSPEC_LOONGSON_VINIT | |
| - UNSPEC_LOONGSON_PMADD | |
| - UNSPEC_LOONGSON_PMOVMSK | |
| - UNSPEC_LOONGSON_PMULHU | |
| - UNSPEC_LOONGSON_PMULH | |
| - UNSPEC_LOONGSON_PMULU | |
| - UNSPEC_LOONGSON_PASUBUB | |
| - UNSPEC_LOONGSON_BIADD | |
| - UNSPEC_LOONGSON_PSADBH | |
| - UNSPEC_LOONGSON_PSHUFH | |
| - UNSPEC_LOONGSON_PUNPCKH | |
| - UNSPEC_LOONGSON_PUNPCKL | |
| - UNSPEC_LOONGSON_PADDD | |
| - UNSPEC_LOONGSON_PSUBD | |
| - UNSPEC_LOONGSON_DSLL | |
| - UNSPEC_LOONGSON_DSRL | |
| -]) | |
| - | |
| -;; Mode iterators and attributes. | |
| - | |
| -;; 64-bit vectors of bytes. | |
| -(define_mode_iterator VB [V8QI]) | |
| - | |
| -;; 64-bit vectors of halfwords. | |
| -(define_mode_iterator VH [V4HI]) | |
| - | |
| -;; 64-bit vectors of words. | |
| -(define_mode_iterator VW [V2SI]) | |
| - | |
| -;; 64-bit vectors of halfwords and bytes. | |
| -(define_mode_iterator VHB [V4HI V8QI]) | |
| - | |
| -;; 64-bit vectors of words and halfwords. | |
| -(define_mode_iterator VWH [V2SI V4HI]) | |
| - | |
| -;; 64-bit vectors of words and bytes | |
| -(define_mode_iterator VWB [V2SI V8QI]) | |
| - | |
| -;; 64-bit vectors of words, halfwords and bytes. | |
| -(define_mode_iterator VWHB [V2SI V4HI V8QI]) | |
| - | |
| -;; 64-bit vectors of words, halfwords and bytes; and DImode. | |
| -(define_mode_iterator VWHBDI [V2SI V4HI V8QI DI]) | |
| - | |
| -;; The Loongson instruction suffixes corresponding to the modes in the | |
| -;; VWHBDI iterator. | |
| -(define_mode_attr V_suffix [(V2SI "w") (V4HI "h") (V8QI "b") (DI "d")]) | |
| - | |
| -;; Given a vector type T, the mode of a vector half the size of T | |
| -;; and with the same number of elements. | |
| -(define_mode_attr V_squash [(V2SI "V2HI") (V4HI "V4QI")]) | |
| - | |
| -;; Given a vector type T, the mode of a vector the same size as T | |
| -;; but with half as many elements. | |
| -(define_mode_attr V_stretch_half [(V2SI "DI") (V4HI "V2SI") (V8QI "V4HI")]) | |
| - | |
| -;; The Loongson instruction suffixes corresponding to the transformation | |
| -;; expressed by V_stretch_half. | |
| -(define_mode_attr V_stretch_half_suffix [(V2SI "wd") (V4HI "hw") (V8QI "bh")]) | |
| - | |
| -;; Given a vector type T, the mode of a vector the same size as T | |
| -;; but with twice as many elements. | |
| -(define_mode_attr V_squash_double [(V2SI "V4HI") (V4HI "V8QI")]) | |
| - | |
| -;; Given a vector type T, the inner mode. | |
| -(define_mode_attr V_inner [(V8QI "QI") (V4HI "HI") (V2SI "SI")]) | |
| - | |
| -;; The Loongson instruction suffixes corresponding to the conversions | |
| -;; specified by V_half_width. | |
| -(define_mode_attr V_squash_double_suffix [(V2SI "wh") (V4HI "hb")]) | |
| - | |
| -;; Move patterns. | |
| - | |
| -;; Expander to legitimize moves involving values of vector modes. | |
| -(define_expand "mov<mode>" | |
| - [(set (match_operand:VWHB 0) | |
| - (match_operand:VWHB 1))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| -{ | |
| - if (mips_legitimize_move (<MODE>mode, operands[0], operands[1])) | |
| - DONE; | |
| -}) | |
| - | |
| -;; Handle legitimized moves between values of vector modes. | |
| -(define_insn "mov<mode>_internal" | |
| - [(set (match_operand:VWHB 0 "nonimmediate_operand" "=m,f,d,f, d, m, d") | |
| - (match_operand:VWHB 1 "move_operand" "f,m,f,dYG,dYG,dYG,m"))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - { return mips_output_move (operands[0], operands[1]); } | |
| - [(set_attr "move_type" "fpstore,fpload,mfc,mtc,move,store,load") | |
| - (set_attr "mode" "DI")]) | |
| - | |
| -;; Initialization of a vector. | |
| - | |
| -(define_expand "vec_init<mode><unitmode>" | |
| - [(set (match_operand:VWHB 0 "register_operand") | |
| - (match_operand 1 ""))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| -{ | |
| - mips_expand_vector_init (operands[0], operands[1]); | |
| - DONE; | |
| -}) | |
| - | |
| -;; Helper for vec_init. Initialize element 0 of the output from the input. | |
| -;; All other elements are undefined. | |
| -(define_insn "loongson_vec_init1_<mode>" | |
| - [(set (match_operand:VHB 0 "register_operand" "=f") | |
| - (unspec:VHB [(truncate:<V_inner> | |
| - (match_operand:DI 1 "reg_or_0_operand" "Jd"))] | |
| - UNSPEC_LOONGSON_VINIT))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "dmtc1\t%z1,%0" | |
| - [(set_attr "move_type" "mtc") | |
| - (set_attr "mode" "DI")]) | |
| - | |
| -;; Helper for vec_initv2si. | |
| -(define_insn "*vec_concatv2si" | |
| - [(set (match_operand:V2SI 0 "register_operand" "=f") | |
| - (vec_concat:V2SI | |
| - (match_operand:SI 1 "register_operand" "f") | |
| - (match_operand:SI 2 "register_operand" "f")))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "punpcklwd\t%0,%1,%2" | |
| - [(set_attr "type" "fcvt")]) | |
| - | |
| -;; Instruction patterns for SIMD instructions. | |
| - | |
| -;; Pack with signed saturation. | |
| -(define_insn "vec_pack_ssat_<mode>" | |
| - [(set (match_operand:<V_squash_double> 0 "register_operand" "=f") | |
| - (vec_concat:<V_squash_double> | |
| - (ss_truncate:<V_squash> | |
| - (match_operand:VWH 1 "register_operand" "f")) | |
| - (ss_truncate:<V_squash> | |
| - (match_operand:VWH 2 "register_operand" "f"))))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "packss<V_squash_double_suffix>\t%0,%1,%2" | |
| - [(set_attr "type" "fmul")]) | |
| - | |
| -;; Pack with unsigned saturation. | |
| -(define_insn "vec_pack_usat_<mode>" | |
| - [(set (match_operand:<V_squash_double> 0 "register_operand" "=f") | |
| - (vec_concat:<V_squash_double> | |
| - (us_truncate:<V_squash> | |
| - (match_operand:VH 1 "register_operand" "f")) | |
| - (us_truncate:<V_squash> | |
| - (match_operand:VH 2 "register_operand" "f"))))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "packus<V_squash_double_suffix>\t%0,%1,%2" | |
| - [(set_attr "type" "fmul")]) | |
| - | |
| -;; Addition, treating overflow by wraparound. | |
| -(define_insn "add<mode>3" | |
| - [(set (match_operand:VWHB 0 "register_operand" "=f") | |
| - (plus:VWHB (match_operand:VWHB 1 "register_operand" "f") | |
| - (match_operand:VWHB 2 "register_operand" "f")))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "padd<V_suffix>\t%0,%1,%2" | |
| - [(set_attr "type" "fadd")]) | |
| - | |
| -;; Addition of doubleword integers stored in FP registers. | |
| -;; Overflow is treated by wraparound. | |
| -;; We use 'unspec' instead of 'plus' here to avoid clash with | |
| -;; mips.md::add<mode>3. If 'plus' was used, then such instruction | |
| -;; would be recognized as adddi3 and reload would make it use | |
| -;; GPRs instead of FPRs. | |
| -(define_insn "loongson_paddd" | |
| - [(set (match_operand:DI 0 "register_operand" "=f") | |
| - (unspec:DI [(match_operand:DI 1 "register_operand" "f") | |
| - (match_operand:DI 2 "register_operand" "f")] | |
| - UNSPEC_LOONGSON_PADDD))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "paddd\t%0,%1,%2" | |
| - [(set_attr "type" "fadd")]) | |
| - | |
| -;; Addition, treating overflow by signed saturation. | |
| -(define_insn "ssadd<mode>3" | |
| - [(set (match_operand:VHB 0 "register_operand" "=f") | |
| - (ss_plus:VHB (match_operand:VHB 1 "register_operand" "f") | |
| - (match_operand:VHB 2 "register_operand" "f")))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "padds<V_suffix>\t%0,%1,%2" | |
| - [(set_attr "type" "fadd")]) | |
| - | |
| -;; Addition, treating overflow by unsigned saturation. | |
| -(define_insn "usadd<mode>3" | |
| - [(set (match_operand:VHB 0 "register_operand" "=f") | |
| - (us_plus:VHB (match_operand:VHB 1 "register_operand" "f") | |
| - (match_operand:VHB 2 "register_operand" "f")))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "paddus<V_suffix>\t%0,%1,%2" | |
| - [(set_attr "type" "fadd")]) | |
| - | |
| -;; Logical AND NOT. | |
| -(define_insn "loongson_pandn_<V_suffix>" | |
| - [(set (match_operand:VWHBDI 0 "register_operand" "=f") | |
| - (and:VWHBDI | |
| - (not:VWHBDI (match_operand:VWHBDI 1 "register_operand" "f")) | |
| - (match_operand:VWHBDI 2 "register_operand" "f")))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "pandn\t%0,%1,%2" | |
| - [(set_attr "type" "fmul")]) | |
| - | |
| -;; Logical AND. | |
| -(define_insn "and<mode>3" | |
| - [(set (match_operand:VWHB 0 "register_operand" "=f") | |
| - (and:VWHB (match_operand:VWHB 1 "register_operand" "f") | |
| - (match_operand:VWHB 2 "register_operand" "f")))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "and\t%0,%1,%2" | |
| - [(set_attr "type" "fmul")]) | |
| - | |
| -;; Logical OR. | |
| -(define_insn "ior<mode>3" | |
| - [(set (match_operand:VWHB 0 "register_operand" "=f") | |
| - (ior:VWHB (match_operand:VWHB 1 "register_operand" "f") | |
| - (match_operand:VWHB 2 "register_operand" "f")))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "or\t%0,%1,%2" | |
| - [(set_attr "type" "fcvt")]) | |
| - | |
| -;; Logical XOR. | |
| -(define_insn "xor<mode>3" | |
| - [(set (match_operand:VWHB 0 "register_operand" "=f") | |
| - (xor:VWHB (match_operand:VWHB 1 "register_operand" "f") | |
| - (match_operand:VWHB 2 "register_operand" "f")))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "xor\t%0,%1,%2" | |
| - [(set_attr "type" "fmul")]) | |
| - | |
| -;; Logical NOR. | |
| -(define_insn "*loongson_nor" | |
| - [(set (match_operand:VWHB 0 "register_operand" "=f") | |
| - (and:VWHB | |
| - (not:VWHB (match_operand:VWHB 1 "register_operand" "f")) | |
| - (not:VWHB (match_operand:VWHB 2 "register_operand" "f"))))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "nor\t%0,%1,%2" | |
| - [(set_attr "type" "fmul")]) | |
| - | |
| -;; Logical NOT. | |
| -(define_insn "one_cmpl<mode>2" | |
| - [(set (match_operand:VWHB 0 "register_operand" "=f") | |
| - (not:VWHB (match_operand:VWHB 1 "register_operand" "f")))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "nor\t%0,%1,%1" | |
| - [(set_attr "type" "fmul")]) | |
| - | |
| -;; Average. | |
| -(define_insn "loongson_pavg<V_suffix>" | |
| - [(set (match_operand:VHB 0 "register_operand" "=f") | |
| - (unspec:VHB [(match_operand:VHB 1 "register_operand" "f") | |
| - (match_operand:VHB 2 "register_operand" "f")] | |
| - UNSPEC_LOONGSON_PAVG))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "pavg<V_suffix>\t%0,%1,%2" | |
| - [(set_attr "type" "fadd")]) | |
| - | |
| -;; Equality test. | |
| -(define_insn "loongson_pcmpeq<V_suffix>" | |
| - [(set (match_operand:VWHB 0 "register_operand" "=f") | |
| - (unspec:VWHB [(match_operand:VWHB 1 "register_operand" "f") | |
| - (match_operand:VWHB 2 "register_operand" "f")] | |
| - UNSPEC_LOONGSON_PCMPEQ))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "pcmpeq<V_suffix>\t%0,%1,%2" | |
| - [(set_attr "type" "fadd")]) | |
| - | |
| -;; Greater-than test. | |
| -(define_insn "loongson_pcmpgt<V_suffix>" | |
| - [(set (match_operand:VWHB 0 "register_operand" "=f") | |
| - (unspec:VWHB [(match_operand:VWHB 1 "register_operand" "f") | |
| - (match_operand:VWHB 2 "register_operand" "f")] | |
| - UNSPEC_LOONGSON_PCMPGT))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "pcmpgt<V_suffix>\t%0,%1,%2" | |
| - [(set_attr "type" "fadd")]) | |
| - | |
| -;; Extract halfword. | |
| -(define_insn "loongson_pextrh" | |
| - [(set (match_operand:V4HI 0 "register_operand" "=f") | |
| - (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "f") | |
| - (match_operand:SI 2 "register_operand" "f")] | |
| - UNSPEC_LOONGSON_PEXTR))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "pextrh\t%0,%1,%2" | |
| - [(set_attr "type" "fcvt")]) | |
| - | |
| -;; Insert halfword. | |
| -(define_insn "loongson_pinsrh_0" | |
| - [(set (match_operand:V4HI 0 "register_operand" "=f") | |
| - (vec_select:V4HI | |
| - (vec_concat:V8HI | |
| - (match_operand:V4HI 1 "register_operand" "f") | |
| - (match_operand:V4HI 2 "register_operand" "f")) | |
| - (parallel [(const_int 4) (const_int 1) | |
| - (const_int 2) (const_int 3)])))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "pinsrh_0\t%0,%1,%2" | |
| - [(set_attr "type" "fdiv")]) | |
| - | |
| -(define_insn "loongson_pinsrh_1" | |
| - [(set (match_operand:V4HI 0 "register_operand" "=f") | |
| - (vec_select:V4HI | |
| - (vec_concat:V8HI | |
| - (match_operand:V4HI 1 "register_operand" "f") | |
| - (match_operand:V4HI 2 "register_operand" "f")) | |
| - (parallel [(const_int 0) (const_int 4) | |
| - (const_int 2) (const_int 3)])))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "pinsrh_1\t%0,%1,%2" | |
| - [(set_attr "type" "fdiv")]) | |
| - | |
| -(define_insn "loongson_pinsrh_2" | |
| - [(set (match_operand:V4HI 0 "register_operand" "=f") | |
| - (vec_select:V4HI | |
| - (vec_concat:V8HI | |
| - (match_operand:V4HI 1 "register_operand" "f") | |
| - (match_operand:V4HI 2 "register_operand" "f")) | |
| - (parallel [(const_int 0) (const_int 1) | |
| - (const_int 4) (const_int 3)])))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "pinsrh_2\t%0,%1,%2" | |
| - [(set_attr "type" "fdiv")]) | |
| - | |
| -(define_insn "loongson_pinsrh_3" | |
| - [(set (match_operand:V4HI 0 "register_operand" "=f") | |
| - (vec_select:V4HI | |
| - (vec_concat:V8HI | |
| - (match_operand:V4HI 1 "register_operand" "f") | |
| - (match_operand:V4HI 2 "register_operand" "f")) | |
| - (parallel [(const_int 0) (const_int 1) | |
| - (const_int 2) (const_int 4)])))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "pinsrh_3\t%0,%1,%2" | |
| - [(set_attr "type" "fdiv")]) | |
| - | |
| -(define_insn "*vec_setv4hi" | |
| - [(set (match_operand:V4HI 0 "register_operand" "=f") | |
| - (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "f") | |
| - (match_operand:SI 2 "register_operand" "f") | |
| - (match_operand:SI 3 "const_0_to_3_operand" "")] | |
| - UNSPEC_LOONGSON_PINSRH))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "pinsrh_%3\t%0,%1,%2" | |
| - [(set_attr "type" "fdiv")]) | |
| - | |
| -(define_expand "vec_setv4hi" | |
| - [(set (match_operand:V4HI 0 "register_operand" "=f") | |
| - (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "f") | |
| - (match_operand:HI 2 "register_operand" "f") | |
| - (match_operand:SI 3 "const_0_to_3_operand" "")] | |
| - UNSPEC_LOONGSON_PINSRH))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| -{ | |
| - rtx ext = gen_reg_rtx (SImode); | |
| - emit_move_insn (ext, gen_lowpart (SImode, operands[1])); | |
| - operands[1] = ext; | |
| -}) | |
| - | |
| -;; Multiply and add packed integers. | |
| -(define_insn "loongson_pmaddhw" | |
| - [(set (match_operand:V2SI 0 "register_operand" "=f") | |
| - (unspec:V2SI [(match_operand:V4HI 1 "register_operand" "f") | |
| - (match_operand:V4HI 2 "register_operand" "f")] | |
| - UNSPEC_LOONGSON_PMADD))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "pmaddhw\t%0,%1,%2" | |
| - [(set_attr "type" "fmul")]) | |
| - | |
| -(define_expand "sdot_prodv4hi" | |
| - [(match_operand:V2SI 0 "register_operand" "") | |
| - (match_operand:V4HI 1 "register_operand" "") | |
| - (match_operand:V4HI 2 "register_operand" "") | |
| - (match_operand:V2SI 3 "register_operand" "")] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| -{ | |
| - rtx t = gen_reg_rtx (V2SImode); | |
| - emit_insn (gen_loongson_pmaddhw (t, operands[1], operands[2])); | |
| - emit_insn (gen_addv2si3 (operands[0], t, operands[3])); | |
| - DONE; | |
| -}) | |
| - | |
| -;; Maximum of signed halfwords. | |
| -(define_insn "smaxv4hi3" | |
| - [(set (match_operand:V4HI 0 "register_operand" "=f") | |
| - (smax:V4HI (match_operand:V4HI 1 "register_operand" "f") | |
| - (match_operand:V4HI 2 "register_operand" "f")))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "pmaxsh\t%0,%1,%2" | |
| - [(set_attr "type" "fadd")]) | |
| - | |
| -(define_expand "smax<mode>3" | |
| - [(match_operand:VWB 0 "register_operand" "") | |
| - (match_operand:VWB 1 "register_operand" "") | |
| - (match_operand:VWB 2 "register_operand" "")] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| -{ | |
| - mips_expand_vec_minmax (operands[0], operands[1], operands[2], | |
| - gen_loongson_pcmpgt<V_suffix>, false); | |
| - DONE; | |
| -}) | |
| - | |
| -;; Maximum of unsigned bytes. | |
| -(define_insn "umaxv8qi3" | |
| - [(set (match_operand:V8QI 0 "register_operand" "=f") | |
| - (umax:V8QI (match_operand:V8QI 1 "register_operand" "f") | |
| - (match_operand:V8QI 2 "register_operand" "f")))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "pmaxub\t%0,%1,%2" | |
| - [(set_attr "type" "fadd")]) | |
| - | |
| -;; Minimum of signed halfwords. | |
| -(define_insn "sminv4hi3" | |
| - [(set (match_operand:V4HI 0 "register_operand" "=f") | |
| - (smin:V4HI (match_operand:V4HI 1 "register_operand" "f") | |
| - (match_operand:V4HI 2 "register_operand" "f")))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "pminsh\t%0,%1,%2" | |
| - [(set_attr "type" "fadd")]) | |
| - | |
| -(define_expand "smin<mode>3" | |
| - [(match_operand:VWB 0 "register_operand" "") | |
| - (match_operand:VWB 1 "register_operand" "") | |
| - (match_operand:VWB 2 "register_operand" "")] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| -{ | |
| - mips_expand_vec_minmax (operands[0], operands[1], operands[2], | |
| - gen_loongson_pcmpgt<V_suffix>, true); | |
| - DONE; | |
| -}) | |
| - | |
| -;; Minimum of unsigned bytes. | |
| -(define_insn "uminv8qi3" | |
| - [(set (match_operand:V8QI 0 "register_operand" "=f") | |
| - (umin:V8QI (match_operand:V8QI 1 "register_operand" "f") | |
| - (match_operand:V8QI 2 "register_operand" "f")))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "pminub\t%0,%1,%2" | |
| - [(set_attr "type" "fadd")]) | |
| - | |
| -;; Move byte mask. | |
| -(define_insn "loongson_pmovmsk<V_suffix>" | |
| - [(set (match_operand:VB 0 "register_operand" "=f") | |
| - (unspec:VB [(match_operand:VB 1 "register_operand" "f")] | |
| - UNSPEC_LOONGSON_PMOVMSK))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "pmovmsk<V_suffix>\t%0,%1" | |
| - [(set_attr "type" "fabs")]) | |
| - | |
| -;; Multiply unsigned integers and store high result. | |
| -(define_insn "umul<mode>3_highpart" | |
| - [(set (match_operand:VH 0 "register_operand" "=f") | |
| - (unspec:VH [(match_operand:VH 1 "register_operand" "f") | |
| - (match_operand:VH 2 "register_operand" "f")] | |
| - UNSPEC_LOONGSON_PMULHU))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "pmulhu<V_suffix>\t%0,%1,%2" | |
| - [(set_attr "type" "fmul")]) | |
| - | |
| -;; Multiply signed integers and store high result. | |
| -(define_insn "smul<mode>3_highpart" | |
| - [(set (match_operand:VH 0 "register_operand" "=f") | |
| - (unspec:VH [(match_operand:VH 1 "register_operand" "f") | |
| - (match_operand:VH 2 "register_operand" "f")] | |
| - UNSPEC_LOONGSON_PMULH))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "pmulh<V_suffix>\t%0,%1,%2" | |
| - [(set_attr "type" "fmul")]) | |
| - | |
| -;; Multiply signed integers and store low result. | |
| -(define_insn "mul<mode>3" | |
| - [(set (match_operand:VH 0 "register_operand" "=f") | |
| - (mult:VH (match_operand:VH 1 "register_operand" "f") | |
| - (match_operand:VH 2 "register_operand" "f")))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "pmull<V_suffix>\t%0,%1,%2" | |
| - [(set_attr "type" "fmul")]) | |
| - | |
| -;; Multiply unsigned word integers. | |
| -(define_insn "loongson_pmulu<V_suffix>" | |
| - [(set (match_operand:DI 0 "register_operand" "=f") | |
| - (unspec:DI [(match_operand:VW 1 "register_operand" "f") | |
| - (match_operand:VW 2 "register_operand" "f")] | |
| - UNSPEC_LOONGSON_PMULU))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "pmulu<V_suffix>\t%0,%1,%2" | |
| - [(set_attr "type" "fmul")]) | |
| - | |
| -;; Absolute difference. | |
| -(define_insn "loongson_pasubub" | |
| - [(set (match_operand:VB 0 "register_operand" "=f") | |
| - (unspec:VB [(match_operand:VB 1 "register_operand" "f") | |
| - (match_operand:VB 2 "register_operand" "f")] | |
| - UNSPEC_LOONGSON_PASUBUB))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "pasubub\t%0,%1,%2" | |
| - [(set_attr "type" "fadd")]) | |
| - | |
| -;; Sum of unsigned byte integers. | |
| -(define_insn "loongson_biadd" | |
| - [(set (match_operand:<V_stretch_half> 0 "register_operand" "=f") | |
| - (unspec:<V_stretch_half> [(match_operand:VB 1 "register_operand" "f")] | |
| - UNSPEC_LOONGSON_BIADD))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "biadd\t%0,%1" | |
| - [(set_attr "type" "fabs")]) | |
| - | |
| -(define_insn "reduc_uplus_v8qi" | |
| - [(set (match_operand:V8QI 0 "register_operand" "=f") | |
| - (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "f")] | |
| - UNSPEC_LOONGSON_BIADD))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "biadd\t%0,%1" | |
| - [(set_attr "type" "fabs")]) | |
| - | |
| -;; Sum of absolute differences. | |
| -(define_insn "loongson_psadbh" | |
| - [(set (match_operand:<V_stretch_half> 0 "register_operand" "=f") | |
| - (unspec:<V_stretch_half> [(match_operand:VB 1 "register_operand" "f") | |
| - (match_operand:VB 2 "register_operand" "f")] | |
| - UNSPEC_LOONGSON_PSADBH))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "pasubub\t%0,%1,%2;biadd\t%0,%0" | |
| - [(set_attr "type" "fadd")]) | |
| - | |
| -;; Shuffle halfwords. | |
| -(define_insn "loongson_pshufh" | |
| - [(set (match_operand:VH 0 "register_operand" "=f") | |
| - (unspec:VH [(match_operand:VH 1 "register_operand" "f") | |
| - (match_operand:SI 2 "register_operand" "f")] | |
| - UNSPEC_LOONGSON_PSHUFH))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "pshufh\t%0,%1,%2" | |
| - [(set_attr "type" "fmul")]) | |
| - | |
| -;; Shift left logical. | |
| -(define_insn "ashl<mode>3" | |
| - [(set (match_operand:VWH 0 "register_operand" "=f") | |
| - (ashift:VWH (match_operand:VWH 1 "register_operand" "f") | |
| - (match_operand:SI 2 "register_operand" "f")))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "psll<V_suffix>\t%0,%1,%2" | |
| - [(set_attr "type" "fcvt")]) | |
| - | |
| -;; Shift right arithmetic. | |
| -(define_insn "ashr<mode>3" | |
| - [(set (match_operand:VWH 0 "register_operand" "=f") | |
| - (ashiftrt:VWH (match_operand:VWH 1 "register_operand" "f") | |
| - (match_operand:SI 2 "register_operand" "f")))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "psra<V_suffix>\t%0,%1,%2" | |
| - [(set_attr "type" "fcvt")]) | |
| - | |
| -;; Shift right logical. | |
| -(define_insn "lshr<mode>3" | |
| - [(set (match_operand:VWH 0 "register_operand" "=f") | |
| - (lshiftrt:VWH (match_operand:VWH 1 "register_operand" "f") | |
| - (match_operand:SI 2 "register_operand" "f")))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "psrl<V_suffix>\t%0,%1,%2" | |
| - [(set_attr "type" "fcvt")]) | |
| - | |
| -;; Subtraction, treating overflow by wraparound. | |
| -(define_insn "sub<mode>3" | |
| - [(set (match_operand:VWHB 0 "register_operand" "=f") | |
| - (minus:VWHB (match_operand:VWHB 1 "register_operand" "f") | |
| - (match_operand:VWHB 2 "register_operand" "f")))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "psub<V_suffix>\t%0,%1,%2" | |
| - [(set_attr "type" "fadd")]) | |
| - | |
| -;; Subtraction of doubleword integers stored in FP registers. | |
| -;; Overflow is treated by wraparound. | |
| -;; See loongson_paddd for the reason we use 'unspec' rather than | |
| -;; 'minus' here. | |
| -(define_insn "loongson_psubd" | |
| - [(set (match_operand:DI 0 "register_operand" "=f") | |
| - (unspec:DI [(match_operand:DI 1 "register_operand" "f") | |
| - (match_operand:DI 2 "register_operand" "f")] | |
| - UNSPEC_LOONGSON_PSUBD))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "psubd\t%0,%1,%2" | |
| - [(set_attr "type" "fadd")]) | |
| - | |
| -;; Subtraction, treating overflow by signed saturation. | |
| -(define_insn "sssub<mode>3" | |
| - [(set (match_operand:VHB 0 "register_operand" "=f") | |
| - (ss_minus:VHB (match_operand:VHB 1 "register_operand" "f") | |
| - (match_operand:VHB 2 "register_operand" "f")))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "psubs<V_suffix>\t%0,%1,%2" | |
| - [(set_attr "type" "fadd")]) | |
| - | |
| -;; Subtraction, treating overflow by unsigned saturation. | |
| -(define_insn "ussub<mode>3" | |
| - [(set (match_operand:VHB 0 "register_operand" "=f") | |
| - (us_minus:VHB (match_operand:VHB 1 "register_operand" "f") | |
| - (match_operand:VHB 2 "register_operand" "f")))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "psubus<V_suffix>\t%0,%1,%2" | |
| - [(set_attr "type" "fadd")]) | |
| - | |
| -;; Unpack high data. Recall that Loongson only runs in little-endian. | |
| -(define_insn "loongson_punpckhbh" | |
| - [(set (match_operand:V8QI 0 "register_operand" "=f") | |
| - (vec_select:V8QI | |
| - (vec_concat:V16QI | |
| - (match_operand:V8QI 1 "register_operand" "f") | |
| - (match_operand:V8QI 2 "register_operand" "f")) | |
| - (parallel [(const_int 4) (const_int 12) | |
| - (const_int 5) (const_int 13) | |
| - (const_int 6) (const_int 14) | |
| - (const_int 7) (const_int 15)])))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "punpckhbh\t%0,%1,%2" | |
| - [(set_attr "type" "fdiv")]) | |
| - | |
| -(define_insn "loongson_punpckhhw" | |
| - [(set (match_operand:V4HI 0 "register_operand" "=f") | |
| - (vec_select:V4HI | |
| - (vec_concat:V8HI | |
| - (match_operand:V4HI 1 "register_operand" "f") | |
| - (match_operand:V4HI 2 "register_operand" "f")) | |
| - (parallel [(const_int 2) (const_int 6) | |
| - (const_int 3) (const_int 7)])))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "punpckhhw\t%0,%1,%2" | |
| - [(set_attr "type" "fdiv")]) | |
| - | |
| -(define_insn "loongson_punpckhhw_qi" | |
| - [(set (match_operand:V8QI 0 "register_operand" "=f") | |
| - (vec_select:V8QI | |
| - (vec_concat:V16QI | |
| - (match_operand:V8QI 1 "register_operand" "f") | |
| - (match_operand:V8QI 2 "register_operand" "f")) | |
| - (parallel [(const_int 4) (const_int 5) | |
| - (const_int 12) (const_int 13) | |
| - (const_int 6) (const_int 7) | |
| - (const_int 14) (const_int 15)])))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "punpckhhw\t%0,%1,%2" | |
| - [(set_attr "type" "fdiv")]) | |
| - | |
| -(define_insn "loongson_punpckhwd" | |
| - [(set (match_operand:V2SI 0 "register_operand" "=f") | |
| - (vec_select:V2SI | |
| - (vec_concat:V4SI | |
| - (match_operand:V2SI 1 "register_operand" "f") | |
| - (match_operand:V2SI 2 "register_operand" "f")) | |
| - (parallel [(const_int 1) (const_int 3)])))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "punpckhwd\t%0,%1,%2" | |
| - [(set_attr "type" "fcvt")]) | |
| - | |
| -(define_insn "loongson_punpckhwd_qi" | |
| - [(set (match_operand:V8QI 0 "register_operand" "=f") | |
| - (vec_select:V8QI | |
| - (vec_concat:V16QI | |
| - (match_operand:V8QI 1 "register_operand" "f") | |
| - (match_operand:V8QI 2 "register_operand" "f")) | |
| - (parallel [(const_int 4) (const_int 5) | |
| - (const_int 6) (const_int 7) | |
| - (const_int 12) (const_int 13) | |
| - (const_int 14) (const_int 15)])))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "punpckhwd\t%0,%1,%2" | |
| - [(set_attr "type" "fcvt")]) | |
| - | |
| -(define_insn "loongson_punpckhwd_hi" | |
| - [(set (match_operand:V4HI 0 "register_operand" "=f") | |
| - (vec_select:V4HI | |
| - (vec_concat:V8HI | |
| - (match_operand:V4HI 1 "register_operand" "f") | |
| - (match_operand:V4HI 2 "register_operand" "f")) | |
| - (parallel [(const_int 2) (const_int 3) | |
| - (const_int 6) (const_int 7)])))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "punpckhwd\t%0,%1,%2" | |
| - [(set_attr "type" "fcvt")]) | |
| - | |
| -;; Unpack low data. | |
| -(define_insn "loongson_punpcklbh" | |
| - [(set (match_operand:V8QI 0 "register_operand" "=f") | |
| - (vec_select:V8QI | |
| - (vec_concat:V16QI | |
| - (match_operand:V8QI 1 "register_operand" "f") | |
| - (match_operand:V8QI 2 "register_operand" "f")) | |
| - (parallel [(const_int 0) (const_int 8) | |
| - (const_int 1) (const_int 9) | |
| - (const_int 2) (const_int 10) | |
| - (const_int 3) (const_int 11)])))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "punpcklbh\t%0,%1,%2" | |
| - [(set_attr "type" "fdiv")]) | |
| - | |
| -(define_insn "loongson_punpcklhw" | |
| - [(set (match_operand:V4HI 0 "register_operand" "=f") | |
| - (vec_select:V4HI | |
| - (vec_concat:V8HI | |
| - (match_operand:V4HI 1 "register_operand" "f") | |
| - (match_operand:V4HI 2 "register_operand" "f")) | |
| - (parallel [(const_int 0) (const_int 4) | |
| - (const_int 1) (const_int 5)])))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "punpcklhw\t%0,%1,%2" | |
| - [(set_attr "type" "fdiv")]) | |
| - | |
| -(define_insn "*loongson_punpcklhw_qi" | |
| - [(set (match_operand:V8QI 0 "register_operand" "=f") | |
| - (vec_select:V8QI | |
| - (vec_concat:V16QI | |
| - (match_operand:V8QI 1 "register_operand" "f") | |
| - (match_operand:V8QI 2 "register_operand" "f")) | |
| - (parallel [(const_int 0) (const_int 1) | |
| - (const_int 8) (const_int 9) | |
| - (const_int 2) (const_int 3) | |
| - (const_int 10) (const_int 11)])))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "punpcklhw\t%0,%1,%2" | |
| - [(set_attr "type" "fdiv")]) | |
| - | |
| -(define_insn "loongson_punpcklwd" | |
| - [(set (match_operand:V2SI 0 "register_operand" "=f") | |
| - (vec_select:V2SI | |
| - (vec_concat:V4SI | |
| - (match_operand:V2SI 1 "register_operand" "f") | |
| - (match_operand:V2SI 2 "register_operand" "f")) | |
| - (parallel [(const_int 0) (const_int 2)])))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "punpcklwd\t%0,%1,%2" | |
| - [(set_attr "type" "fcvt")]) | |
| - | |
| -(define_insn "*loongson_punpcklwd_qi" | |
| - [(set (match_operand:V8QI 0 "register_operand" "=f") | |
| - (vec_select:V8QI | |
| - (vec_concat:V16QI | |
| - (match_operand:V8QI 1 "register_operand" "f") | |
| - (match_operand:V8QI 2 "register_operand" "f")) | |
| - (parallel [(const_int 0) (const_int 1) | |
| - (const_int 2) (const_int 3) | |
| - (const_int 8) (const_int 9) | |
| - (const_int 10) (const_int 11)])))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "punpcklwd\t%0,%1,%2" | |
| - [(set_attr "type" "fcvt")]) | |
| - | |
| -(define_insn "*loongson_punpcklwd_hi" | |
| - [(set (match_operand:V4HI 0 "register_operand" "=f") | |
| - (vec_select:V4HI | |
| - (vec_concat:V8HI | |
| - (match_operand:V4HI 1 "register_operand" "f") | |
| - (match_operand:V4HI 2 "register_operand" "f")) | |
| - (parallel [(const_int 0) (const_int 1) | |
| - (const_int 4) (const_int 5)])))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "punpcklwd\t%0,%1,%2" | |
| - [(set_attr "type" "fcvt")]) | |
| - | |
| -(define_expand "vec_unpacks_lo_<mode>" | |
| - [(match_operand:<V_stretch_half> 0 "register_operand" "") | |
| - (match_operand:VHB 1 "register_operand" "")] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| -{ | |
| - mips_expand_vec_unpack (operands, false, false); | |
| - DONE; | |
| -}) | |
| - | |
| -(define_expand "vec_unpacks_hi_<mode>" | |
| - [(match_operand:<V_stretch_half> 0 "register_operand" "") | |
| - (match_operand:VHB 1 "register_operand" "")] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| -{ | |
| - mips_expand_vec_unpack (operands, false, true); | |
| - DONE; | |
| -}) | |
| - | |
| -(define_expand "vec_unpacku_lo_<mode>" | |
| - [(match_operand:<V_stretch_half> 0 "register_operand" "") | |
| - (match_operand:VHB 1 "register_operand" "")] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| -{ | |
| - mips_expand_vec_unpack (operands, true, false); | |
| - DONE; | |
| -}) | |
| - | |
| -(define_expand "vec_unpacku_hi_<mode>" | |
| - [(match_operand:<V_stretch_half> 0 "register_operand" "") | |
| - (match_operand:VHB 1 "register_operand" "")] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| -{ | |
| - mips_expand_vec_unpack (operands, true, true); | |
| - DONE; | |
| -}) | |
| - | |
| -;; Whole vector shifts, used for reduction epilogues. | |
| -(define_insn "vec_shl_<mode>" | |
| - [(set (match_operand:VWHBDI 0 "register_operand" "=f") | |
| - (unspec:VWHBDI [(match_operand:VWHBDI 1 "register_operand" "f") | |
| - (match_operand:SI 2 "register_operand" "f")] | |
| - UNSPEC_LOONGSON_DSLL))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "dsll\t%0,%1,%2" | |
| - [(set_attr "type" "fcvt")]) | |
| - | |
| -(define_insn "vec_shr_<mode>" | |
| - [(set (match_operand:VWHBDI 0 "register_operand" "=f") | |
| - (unspec:VWHBDI [(match_operand:VWHBDI 1 "register_operand" "f") | |
| - (match_operand:SI 2 "register_operand" "f")] | |
| - UNSPEC_LOONGSON_DSRL))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "dsrl\t%0,%1,%2" | |
| - [(set_attr "type" "fcvt")]) | |
| - | |
| -;; FOR G3A 128-bit mem access instructions | |
| -;; gssq and gslq define_insn pattern | |
| -(define_insn "movsqdi_internal" | |
| - [(parallel [(set (match_operand:DI 0 "stack_operand" "") | |
| - (match_operand:DI 1 "register_operand" "")) | |
| - (set (match_operand:DI 2 "stack_operand" "") | |
| - (match_operand:DI 3 "register_operand" ""))])] | |
| - "mips_gs464_128_store_p(operands)" | |
| - { | |
| - return "gssq\t%1,%3,%2"; | |
| - } | |
| - [(set_attr "length" "8") | |
| - (set_attr "can_delay" "no")]) | |
| - | |
| -(define_insn "movsqdf_internal" | |
| - [(parallel [(set (match_operand:DF 0 "stack_operand" "") | |
| - (match_operand:DF 1 "register_operand" "")) | |
| - (set (match_operand:DF 2 "stack_operand" "") | |
| - (match_operand:DF 3 "register_operand" ""))])] | |
| - "mips_gs464_128_store_p(operands)" | |
| - { | |
| - return "gssqc1\t%1,%3,%2"; | |
| - } | |
| - [(set_attr "length" "8") | |
| - (set_attr "can_delay" "no")]) | |
| - | |
| -(define_insn "movlqdi_internal" | |
| - [(parallel [(set (match_operand:DI 0 "register_operand" "") | |
| - (match_operand:DI 1 "stack_operand" "")) | |
| - (set (match_operand:DI 2 "register_operand" "") | |
| - (match_operand:DI 3 "stack_operand" ""))])] | |
| - "mips_gs464_128_load_p(operands)" | |
| - { | |
| - return "gslq\t%0,%2,%3"; | |
| - } | |
| - [(set_attr "length" "8") | |
| - (set_attr "can_delay" "no")]) | |
| - | |
| -(define_insn "movlqdf_internal" | |
| - [(parallel [(set (match_operand:DF 0 "register_operand" "") | |
| - (match_operand:DF 1 "stack_operand" "")) | |
| - (set (match_operand:DF 2 "register_operand" "") | |
| - (match_operand:DF 3 "stack_operand" ""))])] | |
| - "mips_gs464_128_load_p(operands)" | |
| - { | |
| - return "gslqc1\t%0,%2,%3"; | |
| - } | |
| - [(set_attr "length" "8") | |
| - (set_attr "can_delay" "no")]) | |
| - | |
| -;;for insn_and_split template sign extend | |
| -(define_insn "gsdmul3di" | |
| - [(set (match_operand:DI 0 "register_operand" "=d") | |
| - (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "d")) | |
| - (sign_extend: DI (match_operand:SI 2 "register_operand" "d"))))] | |
| - "TARGET_LOONGSON_3A && TARGET_64BIT" | |
| - "gsdmult\t%0,%1,%2" | |
| - [(set_attr "type" "imul3nc") | |
| - (set_attr "mode" "SI")]) | |
| - | |
| -;;for insn_and_split template zero extend | |
| -(define_insn "ugsdmul3di" | |
| - [(set (match_operand:DI 0 "register_operand" "=d") | |
| - (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "d")) | |
| - (zero_extend: DI (match_operand:SI 2 "register_operand" "d"))))] | |
| - "TARGET_LOONGSON_3A && TARGET_64BIT" | |
| - "gsdmultu\t%0,%1,%2" | |
| - [(set_attr "type" "imul3nc") | |
| - (set_attr "mode" "SI")]) | |
| - | |
| -(define_insn "vec_loongson_extract_lo_<mode>" | |
| - [(set (match_operand:<V_inner> 0 "register_operand" "=r") | |
| - (vec_select:<V_inner> | |
| - (match_operand:VWHB 1 "register_operand" "f") | |
| - (parallel [(const_int 0)])))] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| - "mfc1\t%0,%1" | |
| - [(set_attr "type" "mfc")]) | |
| - | |
| -(define_expand "reduc_plus_scal_<mode>" | |
| - [(match_operand:<V_inner> 0 "register_operand" "") | |
| - (match_operand:VWHB 1 "register_operand" "")] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| -{ | |
| - rtx tmp = gen_reg_rtx (GET_MODE (operands[1])); | |
| - mips_expand_vec_reduc (tmp, operands[1], gen_add<mode>3); | |
| - emit_insn (gen_vec_loongson_extract_lo_<mode> (operands[0], tmp)); | |
| - DONE; | |
| -}) | |
| - | |
| -(define_expand "reduc_smax_scal_<mode>" | |
| - [(match_operand:<V_inner> 0 "register_operand" "") | |
| - (match_operand:VWHB 1 "register_operand" "")] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| -{ | |
| - rtx tmp = gen_reg_rtx (GET_MODE (operands[1])); | |
| - mips_expand_vec_reduc (tmp, operands[1], gen_smax<mode>3); | |
| - emit_insn (gen_vec_loongson_extract_lo_<mode> (operands[0], tmp)); | |
| - DONE; | |
| -}) | |
| - | |
| -(define_expand "reduc_smin_scal_<mode>" | |
| - [(match_operand:<V_inner> 0 "register_operand" "") | |
| - (match_operand:VWHB 1 "register_operand" "")] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| -{ | |
| - rtx tmp = gen_reg_rtx (GET_MODE (operands[1])); | |
| - mips_expand_vec_reduc (tmp, operands[1], gen_smin<mode>3); | |
| - emit_insn (gen_vec_loongson_extract_lo_<mode> (operands[0], tmp)); | |
| - DONE; | |
| -}) | |
| - | |
| -(define_expand "reduc_umax_scal_<mode>" | |
| - [(match_operand:<V_inner> 0 "register_operand" "") | |
| - (match_operand:VB 1 "register_operand" "")] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| -{ | |
| - rtx tmp = gen_reg_rtx (GET_MODE (operands[1])); | |
| - mips_expand_vec_reduc (tmp, operands[1], gen_umax<mode>3); | |
| - emit_insn (gen_vec_loongson_extract_lo_<mode> (operands[0], tmp)); | |
| - DONE; | |
| -}) | |
| - | |
| -(define_expand "reduc_umin_scal_<mode>" | |
| - [(match_operand:<V_inner> 0 "register_operand" "") | |
| - (match_operand:VB 1 "register_operand" "")] | |
| - "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS" | |
| -{ | |
| - rtx tmp = gen_reg_rtx (GET_MODE (operands[1])); | |
| - mips_expand_vec_reduc (tmp, operands[1], gen_umin<mode>3); | |
| - emit_insn (gen_vec_loongson_extract_lo_<mode> (operands[0], tmp)); | |
| - DONE; | |
| -}) | |
| diff --git a/gcc/config/mips/loongson3a.md b/gcc/config/mips/loongson3a.md | |
| deleted file mode 100644 | |
| index 2ebde68..0000000 | |
| --- a/gcc/config/mips/loongson3a.md | |
| +++ /dev/null | |
| @@ -1,137 +0,0 @@ | |
| -;; Pipeline model for Loongson-3A cores. | |
| - | |
| -;; Copyright (C) 2011-2018 Free Software Foundation, Inc. | |
| -;; | |
| -;; This file is part of GCC. | |
| -;; | |
| -;; GCC is free software; you can redistribute it and/or modify it | |
| -;; under the terms of the GNU General Public License as published | |
| -;; by the Free Software Foundation; either version 3, or (at your | |
| -;; option) any later version. | |
| -;; | |
| -;; GCC is distributed in the hope that it will be useful, but WITHOUT | |
| -;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
| -;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public | |
| -;; License for more details. | |
| -;; | |
| -;; You should have received a copy of the GNU General Public License | |
| -;; along with GCC; see the file COPYING3. If not see | |
| -;; <http://www.gnu.org/licenses/>. | |
| - | |
| -;; Uncomment the following line to output automata for debugging. | |
| -;; (automata_option "v") | |
| - | |
| -;; Automaton for integer instructions. | |
| -(define_automaton "ls3a_a_alu") | |
| - | |
| -;; Automaton for floating-point instructions. | |
| -(define_automaton "ls3a_a_falu") | |
| - | |
| -;; Automaton for memory operations. | |
| -(define_automaton "ls3a_a_mem") | |
| - | |
| -;; Describe the resources. | |
| - | |
| -(define_cpu_unit "ls3a_alu1" "ls3a_a_alu") | |
| -(define_cpu_unit "ls3a_alu2" "ls3a_a_alu") | |
| -(define_cpu_unit "ls3a_mem" "ls3a_a_mem") | |
| -(define_cpu_unit "ls3a_falu1" "ls3a_a_falu") | |
| -(define_cpu_unit "ls3a_falu2" "ls3a_a_falu") | |
| - | |
| -;; Describe instruction reservations. | |
| - | |
| -(define_insn_reservation "ls3a_arith" 1 | |
| - (and (eq_attr "cpu" "loongson_3a") | |
| - (eq_attr "type" "arith,clz,const,logical, | |
| - move,nop,shift,signext,slt")) | |
| - "ls3a_alu1 | ls3a_alu2") | |
| - | |
| -(define_insn_reservation "ls3a_branch" 1 | |
| - (and (eq_attr "cpu" "loongson_3a") | |
| - (eq_attr "type" "branch,jump,call,condmove,trap")) | |
| - "ls3a_alu1") | |
| - | |
| -(define_insn_reservation "ls3a_mfhilo" 1 | |
| - (and (eq_attr "cpu" "loongson_3a") | |
| - (eq_attr "type" "mfhi,mflo,mthi,mtlo")) | |
| - "ls3a_alu2") | |
| - | |
| -;; Operation imul3nc is fully pipelined. | |
| -(define_insn_reservation "ls3a_imul3nc" 5 | |
| - (and (eq_attr "cpu" "loongson_3a") | |
| - (eq_attr "type" "imul3nc")) | |
| - "ls3a_alu2") | |
| - | |
| -(define_insn_reservation "ls3a_imul" 7 | |
| - (and (eq_attr "cpu" "loongson_3a") | |
| - (eq_attr "type" "imul,imadd")) | |
| - "ls3a_alu2 * 7") | |
| - | |
| -(define_insn_reservation "ls3a_idiv_si" 12 | |
| - (and (eq_attr "cpu" "loongson_3a") | |
| - (and (eq_attr "type" "idiv") | |
| - (eq_attr "mode" "SI"))) | |
| - "ls3a_alu2 * 12") | |
| - | |
| -(define_insn_reservation "ls3a_idiv_di" 25 | |
| - (and (eq_attr "cpu" "loongson_3a") | |
| - (and (eq_attr "type" "idiv") | |
| - (eq_attr "mode" "DI"))) | |
| - "ls3a_alu2 * 25") | |
| - | |
| -(define_insn_reservation "ls3a_load" 3 | |
| - (and (eq_attr "cpu" "loongson_3a") | |
| - (eq_attr "type" "load")) | |
| - "ls3a_mem") | |
| - | |
| -(define_insn_reservation "ls3a_fpload" 4 | |
| - (and (eq_attr "cpu" "loongson_3a") | |
| - (eq_attr "type" "load,mfc,mtc")) | |
| - "ls3a_mem") | |
| - | |
| -(define_insn_reservation "ls3a_prefetch" 0 | |
| - (and (eq_attr "cpu" "loongson_3a") | |
| - (eq_attr "type" "prefetch,prefetchx")) | |
| - "ls3a_mem") | |
| - | |
| -(define_insn_reservation "ls3a_store" 0 | |
| - (and (eq_attr "cpu" "loongson_3a") | |
| - (eq_attr "type" "store,fpstore,fpidxstore")) | |
| - "ls3a_mem") | |
| - | |
| -;; All the fp operations can be executed in FALU1. Only fp add, | |
| -;; sub, mul, madd can be executed in FALU2. Try FALU2 firstly. | |
| -(define_insn_reservation "ls3a_fadd" 6 | |
| - (and (eq_attr "cpu" "loongson_3a") | |
| - (eq_attr "type" "fadd,fmul,fmadd")) | |
| - "ls3a_falu2 | ls3a_falu1") | |
| - | |
| -(define_insn_reservation "ls3a_fcmp" 2 | |
| - (and (eq_attr "cpu" "loongson_3a") | |
| - (eq_attr "type" "fabs,fcmp,fmove,fneg")) | |
| - "ls3a_falu1") | |
| - | |
| -(define_insn_reservation "ls3a_fcvt" 4 | |
| - (and (eq_attr "cpu" "loongson_3a") | |
| - (eq_attr "type" "fcvt")) | |
| - "ls3a_falu1") | |
| - | |
| -(define_insn_reservation "ls3a_fdiv_sf" 12 | |
| - (and (eq_attr "cpu" "loongson_3a") | |
| - (and (eq_attr "type" "fdiv,frdiv,fsqrt,frsqrt") | |
| - (eq_attr "mode" "SF"))) | |
| - "ls3a_falu1 * 12") | |
| - | |
| -(define_insn_reservation "ls3a_fdiv_df" 19 | |
| - (and (eq_attr "cpu" "loongson_3a") | |
| - (and (eq_attr "type" "fdiv,frdiv,fsqrt,frsqrt") | |
| - (eq_attr "mode" "DF"))) | |
| - "ls3a_falu1 * 19") | |
| - | |
| -;; Force single-dispatch for unknown or multi. | |
| -(define_insn_reservation "ls3a_unknown" 1 | |
| - (and (eq_attr "cpu" "loongson_3a") | |
| - (eq_attr "type" "unknown,multi,atomic,syncloop")) | |
| - "ls3a_alu1 + ls3a_alu2 + ls3a_falu1 + ls3a_falu2 + ls3a_mem") | |
| - | |
| -;; End of DFA-based pipeline description for loongson_3a | |
| diff --git a/gcc/config/mips/mips-cpus.def b/gcc/config/mips/mips-cpus.def | |
| index d0640e5..e055117 100644 | |
| --- a/gcc/config/mips/mips-cpus.def | |
| +++ b/gcc/config/mips/mips-cpus.def | |
| @@ -162,7 +162,10 @@ MIPS_CPU ("sr71000", PROCESSOR_SR71000, 64, PTF_AVOID_BRANCHLIKELY_SPEED) | |
| MIPS_CPU ("xlr", PROCESSOR_XLR, 64, PTF_AVOID_BRANCHLIKELY_SPEED) | |
| /* MIPS64 Release 2 processors. */ | |
| -MIPS_CPU ("loongson3a", PROCESSOR_LOONGSON_3A, 65, PTF_AVOID_BRANCHLIKELY_SPEED) | |
| +MIPS_CPU ("loongson3a", PROCESSOR_GS464, 65, PTF_AVOID_BRANCHLIKELY_SPEED) | |
| +MIPS_CPU ("gs464", PROCESSOR_GS464, 65, PTF_AVOID_BRANCHLIKELY_SPEED) | |
| +MIPS_CPU ("gs464e", PROCESSOR_GS464E, 65, PTF_AVOID_BRANCHLIKELY_SPEED) | |
| +MIPS_CPU ("gs264e", PROCESSOR_GS264E, 65, PTF_AVOID_BRANCHLIKELY_SPEED) | |
| MIPS_CPU ("octeon", PROCESSOR_OCTEON, 65, PTF_AVOID_BRANCHLIKELY_SPEED) | |
| MIPS_CPU ("octeon+", PROCESSOR_OCTEON, 65, PTF_AVOID_BRANCHLIKELY_SPEED) | |
| MIPS_CPU ("octeon2", PROCESSOR_OCTEON2, 65, PTF_AVOID_BRANCHLIKELY_SPEED) | |
| diff --git a/gcc/config/mips/mips-tables.opt b/gcc/config/mips/mips-tables.opt | |
| index daccefb..461881d 100644 | |
| --- a/gcc/config/mips/mips-tables.opt | |
| +++ b/gcc/config/mips/mips-tables.opt | |
| @@ -679,20 +679,28 @@ EnumValue | |
| Enum(mips_arch_opt_value) String(loongson3a) Value(96) Canonical | |
| EnumValue | |
| -Enum(mips_arch_opt_value) String(octeon) Value(97) Canonical | |
| +Enum(mips_arch_opt_value) String(gs464) Value(97) Canonical | |
| EnumValue | |
| -Enum(mips_arch_opt_value) String(octeon+) Value(98) Canonical | |
| +Enum(mips_arch_opt_value) String(octeon) Value(98) Canonical | |
| EnumValue | |
| -Enum(mips_arch_opt_value) String(octeon2) Value(99) Canonical | |
| +Enum(mips_arch_opt_value) String(octeon+) Value(99) Canonical | |
| EnumValue | |
| -Enum(mips_arch_opt_value) String(octeon3) Value(100) Canonical | |
| +Enum(mips_arch_opt_value) String(octeon2) Value(100) Canonical | |
| EnumValue | |
| -Enum(mips_arch_opt_value) String(xlp) Value(101) Canonical | |
| +Enum(mips_arch_opt_value) String(octeon3) Value(101) Canonical | |
| EnumValue | |
| -Enum(mips_arch_opt_value) String(i6400) Value(102) Canonical | |
| +Enum(mips_arch_opt_value) String(xlp) Value(102) Canonical | |
| +EnumValue | |
| +Enum(mips_arch_opt_value) String(i6400) Value(103) Canonical | |
| + | |
| +EnumValue | |
| +Enum(mips_arch_opt_value) String(i6500) Value(104) Canonical | |
| + | |
| +EnumValue | |
| +Enum(mips_arch_opt_value) String(p6600) Value(105) Canonical | |
| diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c | |
| index 9daaaaa..9ce7d14 100644 | |
| --- a/gcc/config/mips/mips.c | |
| +++ b/gcc/config/mips/mips.c | |
| @@ -826,7 +826,13 @@ static const struct mips_rtx_cost_data | |
| { /* Loongson-2F */ | |
| DEFAULT_COSTS | |
| }, | |
| - { /* Loongson-3A */ | |
| + { /* Loongson gs464. */ | |
| + DEFAULT_COSTS | |
| + }, | |
| + { /* Loongson gs464e. */ | |
| + DEFAULT_COSTS | |
| + }, | |
| + { /* Loongson gs264e. */ | |
| DEFAULT_COSTS | |
| }, | |
| { /* M4k */ | |
| @@ -11646,7 +11652,7 @@ mips_for_each_saved_gpr_and_fpr (HOST_WIDE_INT sp_offset, | |
| /* The loongson3a gs464 gss<l>q[c1] instructions offset has 9+4 bit equal to 4096 | |
| * Option -mno-gs464-func-save-restore-reg disable this. */ | |
| - if(flag_sr_opt && TARGET_LOONGSON_3A | |
| + if(flag_sr_opt && TARGET_LOONGSON_EXT | |
| && TARGET_64BIT && !ABI_32 && (offset < 4096)) | |
| {/* FIXME: ABI */ | |
| for (regno = GP_REG_LAST; regno >= GP_REG_FIRST; regno--) | |
| @@ -11709,7 +11715,7 @@ mips_for_each_saved_gpr_and_fpr (HOST_WIDE_INT sp_offset, | |
| offset = cfun->machine->frame.fp_sp_offset - sp_offset; | |
| fpr_mode = (TARGET_SINGLE_FLOAT ? SFmode : DFmode); | |
| save_regno1 = save_regno2 = 0; | |
| - if(flag_sr_opt && TARGET_LOONGSON_3A && TARGET_FLOAT64 | |
| + if(flag_sr_opt && TARGET_LOONGSON_EXT && TARGET_FLOAT64 | |
| && !ABI_32 && (fpr_mode == DFmode) && (offset < 4096)) | |
| { | |
| for (regno = FP_REG_LAST - MAX_FPRS_PER_FMT + 1; | |
| @@ -12921,8 +12927,9 @@ mips_hard_regno_mode_ok_uncached (unsigned int regno, machine_mode mode) | |
| if (mode == CCFmode) | |
| return !(TARGET_FLOATXX && (regno & 1) != 0); | |
| - /* Allow 64-bit vector modes for Loongson-2E/2F. */ | |
| - if (TARGET_LOONGSON_VECTORS | |
| + /* Allow 64-bit vector modes for Loongson MultiMedia extensions | |
| + Instructions (MMI). */ | |
| + if (TARGET_LOONGSON_MMI | |
| && (mode == V2SImode | |
| || mode == V4HImode | |
| || mode == V8QImode | |
| @@ -13492,7 +13499,7 @@ mips_vector_mode_supported_p (machine_mode mode) | |
| case E_V2SImode: | |
| case E_V4HImode: | |
| case E_V8QImode: | |
| - return TARGET_LOONGSON_VECTORS; | |
| + return TARGET_LOONGSON_MMI; | |
| default: | |
| return MSA_SUPPORTED_MODE_P (mode); | |
| @@ -14245,7 +14252,7 @@ mips_process_sync_loop (rtx_insn *insn, rtx *operands) | |
| /* Output the release side of the memory barrier. */ | |
| /* The loongson3a need sync after label "1:", disable this */ | |
| - if (need_atomic_barrier_p (model, true) && ! TARGET_LOONGSON_3A) | |
| + if (need_atomic_barrier_p (model, true) && ! TARGET_LOONGSON_EXT) | |
| { | |
| if (required_oldval == 0 && TARGET_OCTEON) | |
| { | |
| @@ -14379,7 +14386,7 @@ mips_process_sync_loop (rtx_insn *insn, rtx *operands) | |
| /* Output the acquire side of the memory barrier. */ | |
| /* The loongson3a need sync after label "2:", disable this */ | |
| - if (TARGET_SYNC_AFTER_SC && need_atomic_barrier_p (model, false) && ! TARGET_LOONGSON_3A) | |
| + if (TARGET_SYNC_AFTER_SC && need_atomic_barrier_p (model, false) && ! TARGET_LOONGSON_EXT) | |
| mips_multi_add_insn ("sync", NULL); | |
| /* Output the exit label, if needed. */ | |
| @@ -14734,6 +14741,7 @@ mips_issue_rate (void) | |
| case PROCESSOR_OCTEON2: | |
| case PROCESSOR_OCTEON3: | |
| case PROCESSOR_I6400: | |
| + case PROCESSOR_GS264E: | |
| return 2; | |
| case PROCESSOR_SB1: | |
| @@ -14746,7 +14754,8 @@ mips_issue_rate (void) | |
| case PROCESSOR_LOONGSON_2E: | |
| case PROCESSOR_LOONGSON_2F: | |
| - case PROCESSOR_LOONGSON_3A: | |
| + case PROCESSOR_GS464: | |
| + case PROCESSOR_GS464E: | |
| case PROCESSOR_P5600: | |
| return 4; | |
| @@ -14877,10 +14886,10 @@ mips_multipass_dfa_lookahead (void) | |
| if (TUNE_SB1) | |
| return 4; | |
| - if (TUNE_LOONGSON_2EF || TUNE_LOONGSON_3A) | |
| + if (TUNE_LOONGSON_2EF || TUNE_GS464 || TUNE_GS464E) | |
| return 4; | |
| - if (TUNE_OCTEON) | |
| + if (TUNE_OCTEON || TUNE_GS264E) | |
| return 2; | |
| if (TUNE_P5600 || TUNE_I6400) | |
| @@ -15335,7 +15344,7 @@ AVAIL_NON_MIPS16 (dspr2, TARGET_DSPR2) | |
| AVAIL_NON_MIPS16 (dsp_32, !TARGET_64BIT && TARGET_DSP) | |
| AVAIL_NON_MIPS16 (dsp_64, TARGET_64BIT && TARGET_DSP) | |
| AVAIL_NON_MIPS16 (dspr2_32, !TARGET_64BIT && TARGET_DSPR2) | |
| -AVAIL_NON_MIPS16 (loongson, TARGET_LOONGSON_VECTORS) | |
| +AVAIL_NON_MIPS16 (loongson, TARGET_LOONGSON_MMI) | |
| AVAIL_NON_MIPS16 (cache, TARGET_CACHE_BUILTIN) | |
| AVAIL_NON_MIPS16 (msa, TARGET_MSA) | |
| @@ -20345,6 +20354,43 @@ mips_option_override (void) | |
| TARGET_DSPR2 = false; | |
| } | |
| + /* Make sure that when TARGET_LOONGSON_MMI is true, TARGET_HARD_FLOAT_ABI | |
| + is true. In o32 pairs of floating-point registers provide 64-bit | |
| + values. */ | |
| + if (TARGET_LOONGSON_MMI && !TARGET_HARD_FLOAT_ABI) | |
| + error ("%<-mloongson-mmi%> must be used with %<-mhard-float%>"); | |
| + | |
| + /* Default to enable Loongson MMI on Longson 2e, 2f, gs464, gs464e | |
| + * or gs264e target. */ | |
| + if ((target_flags_explicit & MASK_LOONGSON_MMI) == 0 | |
| + && ((strcmp (mips_arch_info->name, "loongson2e") == 0) | |
| + || (strcmp (mips_arch_info->name, "loongson2f") == 0) | |
| + || (strcmp (mips_arch_info->name, "loongson3a") == 0) | |
| + || (strcmp (mips_arch_info->name, "gs464") == 0) | |
| + || (strcmp (mips_arch_info->name, "gs464e") == 0) | |
| + || (strcmp (mips_arch_info->name, "gs264e") == 0))) | |
| + target_flags |= MASK_LOONGSON_MMI; | |
| + | |
| + /* Default to enable Loongson EXT on Longson gs464, gs464e | |
| + * or gs264e target. */ | |
| + if ((target_flags_explicit & MASK_LOONGSON_EXT) == 0 | |
| + && ((strcmp (mips_arch_info->name, "loongson3a") == 0) | |
| + || (strcmp (mips_arch_info->name, "gs464") == 0) | |
| + || (strcmp (mips_arch_info->name, "gs464e") == 0) | |
| + || (strcmp (mips_arch_info->name, "gs264e") == 0))) | |
| + target_flags |= MASK_LOONGSON_EXT; | |
| + | |
| + /* Default to enable Loongson EXT2 on gs464e or gs264e target. */ | |
| + if ((target_flags_explicit & MASK_LOONGSON_EXT2) == 0 | |
| + && ((strcmp (mips_arch_info->name, "gs464e") == 0) | |
| + || (strcmp (mips_arch_info->name, "gs264e") == 0))) | |
| + target_flags |= MASK_LOONGSON_EXT2; | |
| + | |
| + /* Default to enable MSA on gs264e target. */ | |
| + if ((target_flags_explicit & MASK_MSA) == 0 | |
| + && (strcmp (mips_arch_info->name, "gs264e") == 0)) | |
| + target_flags |= MASK_MSA; | |
| + | |
| /* .eh_frame addresses should be the same width as a C pointer. | |
| Most MIPS ABIs support only one pointer size, so the assembler | |
| will usually know exactly how big an .eh_frame address is. | |
| @@ -21330,12 +21376,12 @@ void mips_function_profiler (FILE *file) | |
| /* Implement TARGET_SHIFT_TRUNCATION_MASK. We want to keep the default | |
| behavior of TARGET_SHIFT_TRUNCATION_MASK for non-vector modes even | |
| - when TARGET_LOONGSON_VECTORS is true. */ | |
| + when TARGET_LOONGSON_MMI is true. */ | |
| static unsigned HOST_WIDE_INT | |
| mips_shift_truncation_mask (machine_mode mode) | |
| { | |
| - if (TARGET_LOONGSON_VECTORS && VECTOR_MODE_P (mode)) | |
| + if (TARGET_LOONGSON_MMI && VECTOR_MODE_P (mode)) | |
| return 0; | |
| return GET_MODE_BITSIZE (mode) - 1; | |
| @@ -21436,7 +21482,7 @@ mips_expand_vpc_loongson_even_odd (struct expand_vec_perm_d *d) | |
| unsigned i, odd, nelt = d->nelt; | |
| rtx t0, t1, t2, t3; | |
| - if (!(TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS)) | |
| + if (!(TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI)) | |
| return false; | |
| /* Even-odd for V2SI/V2SFmode is matched by interleave directly. */ | |
| if (nelt < 4) | |
| @@ -21493,7 +21539,7 @@ mips_expand_vpc_loongson_pshufh (struct expand_vec_perm_d *d) | |
| unsigned i, mask; | |
| rtx rmask; | |
| - if (!(TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS)) | |
| + if (!(TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI)) | |
| return false; | |
| if (d->vmode != V4HImode) | |
| return false; | |
| @@ -21545,7 +21591,7 @@ mips_expand_vpc_loongson_bcast (struct expand_vec_perm_d *d) | |
| unsigned i, elt; | |
| rtx t0, t1; | |
| - if (!(TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS)) | |
| + if (!(TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI)) | |
| return false; | |
| /* Note that we've already matched V2SI via punpck and V4HI via pshufh. */ | |
| if (d->vmode != V8QImode) | |
| @@ -22139,7 +22185,7 @@ mips_expand_vector_init (rtx target, rtx vals) | |
| } | |
| /* Loongson is the only cpu with vectors with more elements. */ | |
| - gcc_assert (TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS); | |
| + gcc_assert (TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI); | |
| /* If all values are identical, broadcast the value. */ | |
| if (all_same) | |
| diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h | |
| index 84ae675..971e73d 100644 | |
| --- a/gcc/config/mips/mips.h | |
| +++ b/gcc/config/mips/mips.h | |
| @@ -266,7 +266,9 @@ struct mips_cpu_info { | |
| #define TARGET_LOONGSON_2E (mips_arch == PROCESSOR_LOONGSON_2E) | |
| #define TARGET_LOONGSON_2F (mips_arch == PROCESSOR_LOONGSON_2F) | |
| #define TARGET_LOONGSON_2EF (TARGET_LOONGSON_2E || TARGET_LOONGSON_2F) | |
| -#define TARGET_LOONGSON_3A (mips_arch == PROCESSOR_LOONGSON_3A) | |
| +#define TARGET_GS464 (mips_arch == PROCESSOR_GS464) | |
| +#define TARGET_GS464E (mips_arch == PROCESSOR_GS464E) | |
| +#define TARGET_GS264E (mips_arch == PROCESSOR_GS264E) | |
| #define TARGET_MIPS3900 (mips_arch == PROCESSOR_R3900) | |
| #define TARGET_MIPS4000 (mips_arch == PROCESSOR_R4000) | |
| #define TARGET_MIPS4120 (mips_arch == PROCESSOR_R4120) | |
| @@ -298,7 +300,9 @@ struct mips_cpu_info { | |
| || mips_tune == PROCESSOR_74KF3_2) | |
| #define TUNE_LOONGSON_2EF (mips_tune == PROCESSOR_LOONGSON_2E \ | |
| || mips_tune == PROCESSOR_LOONGSON_2F) | |
| -#define TUNE_LOONGSON_3A (mips_tune == PROCESSOR_LOONGSON_3A) | |
| +#define TUNE_GS464 (mips_tune == PROCESSOR_GS464) | |
| +#define TUNE_GS464E (mips_tune == PROCESSOR_GS464E) | |
| +#define TUNE_GS264E (mips_tune == PROCESSOR_GS264E) | |
| #define TUNE_MIPS3000 (mips_tune == PROCESSOR_R3000) | |
| #define TUNE_MIPS3900 (mips_tune == PROCESSOR_R3900) | |
| #define TUNE_MIPS4000 (mips_tune == PROCESSOR_R4000) | |
| @@ -318,13 +322,6 @@ struct mips_cpu_info { | |
| #define TUNE_P5600 (mips_tune == PROCESSOR_P5600) | |
| #define TUNE_I6400 (mips_tune == PROCESSOR_I6400) | |
| -/* Whether vector modes and intrinsics for ST Microelectronics | |
| - Loongson-2E/2F processors should be enabled. In o32 pairs of | |
| - floating-point registers provide 64-bit values. */ | |
| -#define TARGET_LOONGSON_VECTORS (TARGET_HARD_FLOAT_ABI \ | |
| - && (TARGET_LOONGSON_2EF \ | |
| - || TARGET_LOONGSON_3A)) | |
| - | |
| /* True if the pre-reload scheduler should try to create chains of | |
| multiply-add or multiply-subtract instructions. For example, | |
| suppose we have: | |
| @@ -595,9 +592,12 @@ struct mips_cpu_info { | |
| if (TARGET_ABICALLS) \ | |
| builtin_define ("__mips_abicalls"); \ | |
| \ | |
| - /* Whether Loongson vector modes are enabled. */ \ | |
| - if (TARGET_LOONGSON_VECTORS) \ | |
| - builtin_define ("__mips_loongson_vector_rev"); \ | |
| + /* Whether Loongson vector modes are enabled. */ \ | |
| + if (TARGET_LOONGSON_MMI) \ | |
| + { \ | |
| + builtin_define ("__mips_loongson_vector_rev"); \ | |
| + builtin_define ("__mips_loongson_mmi"); \ | |
| + } \ | |
| \ | |
| /* Historical Octeon macro. */ \ | |
| if (TARGET_OCTEON) \ | |
| @@ -779,7 +779,8 @@ struct mips_cpu_info { | |
| %{march=mips32r6: -mips32r6} \ | |
| %{march=mips64|march=5k*|march=20k*|march=sb1*|march=sr71000 \ | |
| |march=xlr: -mips64} \ | |
| - %{march=mips64r2|march=loongson3a|march=octeon|march=xlp: -mips64r2} \ | |
| + %{march=mips64r2|march=loongson3a|march=gs464|march=gs464e|march=gs264e \ | |
| + |march=octeon|march=xlp: -mips64r2} \ | |
| %{march=mips64r3: -mips64r3} \ | |
| %{march=mips64r5: -mips64r5} \ | |
| %{march=mips64r6|march=i6400: -mips64r6}}" | |
| @@ -935,7 +936,7 @@ struct mips_cpu_info { | |
| /* ISA has 32 single-precision registers. */ | |
| #define ISA_HAS_ODD_SPREG ((mips_isa_rev >= 1 \ | |
| - && !TARGET_LOONGSON_3A) \ | |
| + && !TARGET_GS464) \ | |
| || TARGET_FLOAT64 \ | |
| || TARGET_MIPS5900) | |
| @@ -978,7 +979,7 @@ struct mips_cpu_info { | |
| because the former are faster and can also have the effect of reducing | |
| code size. */ | |
| #define ISA_AVOID_DIV_HILO ((TARGET_LOONGSON_2EF \ | |
| - || TARGET_LOONGSON_3A) \ | |
| + || TARGET_GS464) \ | |
| && !TARGET_MIPS16) | |
| /* ISA supports instructions DDIV and DDIVU. */ | |
| @@ -1071,14 +1072,14 @@ struct mips_cpu_info { | |
| 'd = [+-] (a * b [+-] c)'. */ | |
| #define ISA_HAS_FUSED_MADD4 (mips_madd4 \ | |
| && (TARGET_MIPS8000 \ | |
| - || TARGET_LOONGSON_3A)) | |
| + || TARGET_GS464)) | |
| /* ISA has 4 operand unfused madd instructions of the form | |
| 'd = [+-] (a * b [+-] c)'. */ | |
| #define ISA_HAS_UNFUSED_MADD4 (mips_madd4 \ | |
| && ISA_HAS_FP4 \ | |
| && !TARGET_MIPS8000 \ | |
| - && !TARGET_LOONGSON_3A) | |
| + && !TARGET_GS464) | |
| /* ISA has 3 operand r6 fused madd instructions of the form | |
| 'c = c [+-] (a * b)'. */ | |
| @@ -1114,6 +1115,9 @@ struct mips_cpu_info { | |
| /* ISA has count leading zeroes/ones instruction (not implemented). */ | |
| #define ISA_HAS_CLZ_CLO (mips_isa_rev >= 1 && !TARGET_MIPS16) | |
| +/* ISA has count tailing zeroes/ones instruction (not implemented). */ | |
| +#define ISA_HAS_CTZ_CTO (TARGET_LOONGSON_EXT2) | |
| + | |
| /* ISA has three operand multiply instructions that put | |
| the high part in an accumulator: mulhi or mulhiu. */ | |
| #define ISA_HAS_MULHI ((TARGET_MIPS5400 \ | |
| @@ -1355,6 +1359,7 @@ struct mips_cpu_info { | |
| %{mvirt} %{mno-virt} \ | |
| %{mxpa} %{mno-xpa} \ | |
| %{mmsa} %{mno-msa} \ | |
| +%{mloongson-mmi} %{mno-loongson-mmi} \ | |
| %{msmartmips} %{mno-smartmips} \ | |
| %{mmt} %{mno-mt} \ | |
| %{mfix-rm7000} %{mno-fix-rm7000} \ | |
| @@ -2631,9 +2636,9 @@ typedef struct mips_args { | |
| #define SLOW_BYTE_ACCESS (!TARGET_MIPS16) | |
| /* Standard MIPS integer shifts truncate the shift amount to the | |
| - width of the shifted operand. However, Loongson vector shifts | |
| + width of the shifted operand. However, Loongson MMI shifts | |
| do not truncate the shift amount at all. */ | |
| -#define SHIFT_COUNT_TRUNCATED (!TARGET_LOONGSON_VECTORS) | |
| +#define SHIFT_COUNT_TRUNCATED (!TARGET_LOONGSON_MMI) | |
| /* Specify the machine mode that pointers have. | |
| diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md | |
| index 1d95348..c5e50a4 100644 | |
| --- a/gcc/config/mips/mips.md | |
| +++ b/gcc/config/mips/mips.md | |
| @@ -37,7 +37,9 @@ | |
| 74kf3_2 | |
| loongson_2e | |
| loongson_2f | |
| - loongson_3a | |
| + gs464 | |
| + gs464e | |
| + gs264e | |
| m4k | |
| octeon | |
| octeon2 | |
| @@ -334,6 +336,7 @@ | |
| ;; slt set less than instructions | |
| ;; signext sign extend instructions | |
| ;; clz the clz and clo instructions | |
| +;; ctz the ctz and cto instructions | |
| ;; pop the pop instruction | |
| ;; trap trap if instructions | |
| ;; imul integer multiply 2 operands | |
| @@ -374,7 +377,7 @@ | |
| (define_attr "type" | |
| "unknown,branch,jump,call,load,fpload,fpidxload,store,fpstore,fpidxstore, | |
| prefetch,prefetchx,condmove,mtc,mfc,mthi,mtlo,mfhi,mflo,const,arith,logical, | |
| - shift,slt,signext,clz,pop,trap,imul,imul3,imul3nc,imadd,idiv,idiv3,move, | |
| + shift,slt,signext,clz,ctz,pop,trap,imul,imul3,imul3nc,imadd,idiv,idiv3,move, | |
| fmove,fadd,fmul,fmadd,fdiv,frdiv,frdiv1,frdiv2,fabs,fneg,fcmp,fcvt,fsqrt, | |
| frsqrt,frsqrt1,frsqrt2,dspmac,dspmacsat,accext,accmod,dspalu,dspalusat, | |
| multi,atomic,syncloop,nop,ghost,multimem, | |
| @@ -833,9 +836,9 @@ | |
| (define_mode_iterator MOVE64 | |
| [DI DF | |
| (V2SF "TARGET_HARD_FLOAT && TARGET_PAIRED_SINGLE_FLOAT") | |
| - (V2SI "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS") | |
| - (V4HI "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS") | |
| - (V8QI "TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS")]) | |
| + (V2SI "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI") | |
| + (V4HI "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI") | |
| + (V8QI "TARGET_HARD_FLOAT && TARGET_LOONGSON_MMI")]) | |
| ;; 128-bit modes for which we provide move patterns on 64-bit targets. | |
| (define_mode_iterator MOVE128 [TI TF]) | |
| @@ -862,9 +865,9 @@ | |
| [(DF "!TARGET_64BIT && TARGET_DOUBLE_FLOAT") | |
| (DI "!TARGET_64BIT && TARGET_DOUBLE_FLOAT") | |
| (V2SF "!TARGET_64BIT && TARGET_PAIRED_SINGLE_FLOAT") | |
| - (V2SI "!TARGET_64BIT && TARGET_LOONGSON_VECTORS") | |
| - (V4HI "!TARGET_64BIT && TARGET_LOONGSON_VECTORS") | |
| - (V8QI "!TARGET_64BIT && TARGET_LOONGSON_VECTORS") | |
| + (V2SI "!TARGET_64BIT && TARGET_LOONGSON_MMI") | |
| + (V4HI "!TARGET_64BIT && TARGET_LOONGSON_MMI") | |
| + (V8QI "!TARGET_64BIT && TARGET_LOONGSON_MMI") | |
| (TF "TARGET_64BIT && TARGET_FLOAT64")]) | |
| ;; In GPR templates, a string like "<d>subu" will expand to "subu" in the | |
| @@ -1181,7 +1184,9 @@ | |
| (include "9000.md") | |
| (include "10000.md") | |
| (include "loongson2ef.md") | |
| -(include "loongson3a.md") | |
| +(include "gs464.md") | |
| +(include "gs464e.md") | |
| +(include "gs264e.md") | |
| (include "octeon.md") | |
| (include "sb1.md") | |
| (include "sr71k.md") | |
| @@ -1608,7 +1613,7 @@ | |
| { | |
| rtx lo; | |
| - if (TARGET_LOONGSON_2EF || TARGET_LOONGSON_3A || ISA_HAS_R6<D>MUL) | |
| + if (TARGET_LOONGSON_2EF || TARGET_LOONGSON_EXT || ISA_HAS_R6<D>MUL) | |
| emit_insn (gen_mul<mode>3_mul3_nohilo (operands[0], operands[1], | |
| operands[2])); | |
| else if (ISA_HAS_<D>MUL3) | |
| @@ -1632,11 +1637,11 @@ | |
| (mult:GPR (match_operand:GPR 1 "register_operand" "d") | |
| (match_operand:GPR 2 "register_operand" "d"))) | |
| (clobber (match_scratch:GPR 3 "=l"))] | |
| - "TARGET_LOONGSON_2EF || TARGET_LOONGSON_3A || ISA_HAS_R6<D>MUL" | |
| + "TARGET_LOONGSON_2EF || TARGET_LOONGSON_EXT || ISA_HAS_R6<D>MUL" | |
| { | |
| if (TARGET_LOONGSON_2EF) | |
| return "<d>mult.g\t%0,%1,%2"; | |
| - else if (TARGET_LOONGSON_3A) | |
| + else if (TARGET_LOONGSON_EXT) | |
| return "gs<d>mult\t%0,%1,%2"; | |
| else | |
| return "<d>mul\t%0,%1,%2"; | |
| @@ -3026,11 +3031,11 @@ | |
| [(set (match_operand:GPR 0 "register_operand" "=&d") | |
| (any_div:GPR (match_operand:GPR 1 "register_operand" "d") | |
| (match_operand:GPR 2 "register_operand" "d")))] | |
| - "TARGET_LOONGSON_2EF || TARGET_LOONGSON_3A || ISA_HAS_R6<D>DIV" | |
| + "TARGET_LOONGSON_2EF || TARGET_LOONGSON_EXT || ISA_HAS_R6<D>DIV" | |
| { | |
| if (TARGET_LOONGSON_2EF) | |
| return mips_output_division ("<d>div<u>.g\t%0,%1,%2", operands); | |
| - else if (TARGET_LOONGSON_3A) | |
| + else if (TARGET_LOONGSON_EXT) | |
| return mips_output_division ("gs<d>div<u>\t%0,%1,%2", operands); | |
| else | |
| return mips_output_division ("<d>div<u>\t%0,%1,%2", operands); | |
| @@ -3042,11 +3047,11 @@ | |
| [(set (match_operand:GPR 0 "register_operand" "=&d") | |
| (any_mod:GPR (match_operand:GPR 1 "register_operand" "d") | |
| (match_operand:GPR 2 "register_operand" "d")))] | |
| - "TARGET_LOONGSON_2EF || TARGET_LOONGSON_3A || ISA_HAS_R6<D>DIV" | |
| + "TARGET_LOONGSON_2EF || TARGET_LOONGSON_EXT || ISA_HAS_R6<D>DIV" | |
| { | |
| if (TARGET_LOONGSON_2EF) | |
| return "<d>mod<u>.g\t%0,%1,%2"; | |
| - else if (TARGET_LOONGSON_3A) | |
| + else if (TARGET_LOONGSON_EXT) | |
| return "gs<d>mod<u>\t%0,%1,%2"; | |
| else | |
| return mips_output_division ("<d>mod<u>\t%0,%1,%2", operands); | |
| @@ -3159,6 +3164,23 @@ | |
| ;; | |
| ;; ................... | |
| ;; | |
| +;; Count tailing zeroes. | |
| +;; | |
| +;; ................... | |
| +;; | |
| + | |
| +(define_insn "ctz<mode>2" | |
| + [(set (match_operand:GPR 0 "register_operand" "=d") | |
| + (ctz:GPR (match_operand:GPR 1 "register_operand" "d")))] | |
| + "ISA_HAS_CTZ_CTO" | |
| + "<d>ctz\t%0,%1" | |
| + [(set_attr "type" "ctz") | |
| + (set_attr "mode" "<MODE>")]) | |
| + | |
| + | |
| +;; | |
| +;; ................... | |
| +;; | |
| ;; Count number of set bits. | |
| ;; | |
| ;; ................... | |
| @@ -4892,7 +4914,7 @@ | |
| (mem:GPR | |
| (plus:P (match_operand:P 1 "register_operand" "d") | |
| (match_operand:P 2 "register_operand" "d"))))] | |
| - "TARGET_LOONGSON_3A && TARGET_64BIT" | |
| + "TARGET_LOONGSON_EXT && TARGET_64BIT" | |
| "<GPR:gsloadx>\t%0,0(%1,%2)" | |
| [(set_attr "type" "load") | |
| (set_attr "mode" "<GPR:MODE>")]) | |
| @@ -4901,7 +4923,7 @@ | |
| [(set (mem:GPR (plus:P (match_operand:P 1 "register_operand" "d") | |
| (match_operand:P 2 "register_operand" "d"))) | |
| (match_operand:GPR 0 "register_operand" "d"))] | |
| - "TARGET_LOONGSON_3A && TARGET_64BIT" | |
| + "TARGET_LOONGSON_EXT && TARGET_64BIT" | |
| "<GPR:gsstorex>\t%0,0(%1,%2)" | |
| [(set_attr "type" "store") | |
| (set_attr "mode" "<GPR:MODE>")]) | |
| @@ -4913,7 +4935,7 @@ | |
| (mem:SHORT | |
| (plus:P (match_operand:P 1 "register_operand" "d") | |
| (match_operand:P 2 "register_operand" "d")))))] | |
| - "TARGET_LOONGSON_3A && TARGET_64BIT" | |
| + "TARGET_LOONGSON_EXT && TARGET_64BIT" | |
| "<SHORT:gsloadx>\t%0,0(%1,%2)" | |
| [(set_attr "type" "load") | |
| (set_attr "mode" "<GPR:MODE>")]) | |
| @@ -4922,7 +4944,7 @@ | |
| [(set (mem:SHORT (plus:P (match_operand:P 1 "register_operand" "d") | |
| (match_operand:P 2 "register_operand" "d"))) | |
| (match_operand:SHORT 0 "register_operand" "d"))] | |
| - "TARGET_LOONGSON_3A && TARGET_64BIT" | |
| + "TARGET_LOONGSON_EXT && TARGET_64BIT" | |
| "<SHORT:gsstorex>\t%0,0(%1,%2)" | |
| [(set_attr "type" "store") | |
| (set_attr "mode" "SI")]) | |
| @@ -5089,7 +5111,7 @@ | |
| (define_insn "movsf_zero" | |
| [(set (match_operand:SF 0 "register_operand" "=f") | |
| (match_operand:SF 1 "const_0_operand" ""))] | |
| - "TARGET_LOONGSON_3A" | |
| + "TARGET_LOONGSON_EXT" | |
| "xor %0,%0,%0" | |
| [(set_attr "type" "logical") | |
| (set_attr "mode" "SF")]) | |
| @@ -5150,7 +5172,7 @@ | |
| (define_insn "movdf_zero" | |
| [(set (match_operand:DF 0 "register_operand" "=f") | |
| (match_operand:DF 1 "const_0_operand" ""))] | |
| - "TARGET_LOONGSON_3A" | |
| + "TARGET_LOONGSON_EXT" | |
| "xor %0,%0,%0" | |
| [(set_attr "type" "logical") | |
| (set_attr "mode" "DF")]) | |
| @@ -7226,9 +7248,11 @@ | |
| (match_operand 2 "const_int_operand" "n"))] | |
| "ISA_HAS_PREFETCH && TARGET_EXPLICIT_RELOCS" | |
| { | |
| - if (TARGET_LOONGSON_2EF || TARGET_LOONGSON_3A) | |
| + if (TARGET_LOONGSON_2EF || TARGET_LOONGSON_EXT || TARGET_LOONGSON_EXT2) | |
| { | |
| - /* Loongson 2[ef] and Loongson 3a use load to $0 for prefetching. */ | |
| + /* Loongson ext2 implementation pref insnstructions. */ | |
| + if (TARGET_LOONGSON_EXT2) | |
| + return "pref\t%1, %a0"; | |
| if (TARGET_64BIT) | |
| return "ld\t$0,%a0"; | |
| else | |
| @@ -7780,8 +7804,8 @@ | |
| ; microMIPS patterns. | |
| (include "micromips.md") | |
| -; ST-Microelectronics Loongson-2E/2F-specific patterns. | |
| -(include "loongson.md") | |
| +; Loongson MultiMedia extensions Instructions (MMI) patterns. | |
| +(include "loongson-mmi.md") | |
| ; The MIPS MSA Instructions. | |
| (include "mips-msa.md") | |
| diff --git a/gcc/config/mips/mips.opt b/gcc/config/mips/mips.opt | |
| index 4c0de02..92cdac4 100644 | |
| --- a/gcc/config/mips/mips.opt | |
| +++ b/gcc/config/mips/mips.opt | |
| @@ -300,7 +300,7 @@ Target Report Mask(MICROMIPS) | |
| Use microMIPS instructions. | |
| mmsa | |
| -Target Report Var(TARGET_MSA) | |
| +Target Report Mask(MSA) | |
| Use MIPS MSA Extension instructions. | |
| mmt | |
| @@ -455,3 +455,15 @@ Enum(mips_cb_setting) String(optimal) Value(MIPS_CB_OPTIMAL) | |
| EnumValue | |
| Enum(mips_cb_setting) String(always) Value(MIPS_CB_ALWAYS) | |
| + | |
| +mloongson-mmi | |
| +Target Report Mask(LOONGSON_MMI) | |
| +Use Loongson MultiMedia extensions Instructions (MMI) instructions. | |
| + | |
| +mloongson-ext | |
| +Target Report Mask(LOONGSON_EXT) | |
| +Use Loongson EXTension (EXT) instructions. | |
| + | |
| +mloongson-ext2 | |
| +Target Report Mask(LOONGSON_EXT2) | |
| +Use Loongson EXTension R2 (EXT2) instructions. | |
| diff --git a/gcc/config/mips/t-st b/gcc/config/mips/t-st | |
| index ec22d93..0791759 100644 | |
| --- a/gcc/config/mips/t-st | |
| +++ b/gcc/config/mips/t-st | |
| @@ -16,8 +16,8 @@ | |
| # along with GCC; see the file COPYING3. If not see | |
| # <http://www.gnu.org/licenses/>. | |
| -MULTILIB_OPTIONS = march=loongson3a/march=loongson2e/march=loongson2f mabi=n32/mabi=32/mabi=64 | |
| -MULTILIB_DIRNAMES = 3a 2e 2f lib32 lib lib64 | |
| +MULTILIB_OPTIONS = march=loongson3a/march=loongson2e/march=loongson2f/march=gs464/march=gs464e/march=gs264e mabi=n32/mabi=32/mabi=64 | |
| +MULTILIB_DIRNAMES = 3a 2e 2f gs464 gs464e gs264e lib32 lib lib64 | |
| MULTILIB_OSDIRNAMES = march.loongson2e/mabi.n32=../lib32/2e | |
| MULTILIB_OSDIRNAMES += march.loongson2e/mabi.32=../lib/2e | |
| @@ -28,6 +28,15 @@ MULTILIB_OSDIRNAMES += march.loongson2f/mabi.64=../lib64/2f | |
| MULTILIB_OSDIRNAMES += march.loongson3a/mabi.n32=../lib32/3a | |
| MULTILIB_OSDIRNAMES += march.loongson3a/mabi.32=../lib/3a | |
| MULTILIB_OSDIRNAMES += march.loongson3a/mabi.64=../lib64/3a | |
| +MULTILIB_OSDIRNAMES += march.gs464/mabi.n32=../lib32/gs464 | |
| +MULTILIB_OSDIRNAMES += march.gs464/mabi.32=../lib/gs464 | |
| +MULTILIB_OSDIRNAMES += march.gs464/mabi.64=../lib64/gs464 | |
| +MULTILIB_OSDIRNAMES += march.gs464e/mabi.n32=../lib32/gs464e | |
| +MULTILIB_OSDIRNAMES += march.gs464e/mabi.32=../lib/gs464e | |
| +MULTILIB_OSDIRNAMES += march.gs464e/mabi.64=../lib64/gs464e | |
| +MULTILIB_OSDIRNAMES += march.gs264e/mabi.n32=../lib32/gs264e | |
| +MULTILIB_OSDIRNAMES += march.gs264e/mabi.32=../lib/gs264e | |
| +MULTILIB_OSDIRNAMES += march.gs264e/mabi.64=../lib64/gs264e | |
| MULTILIB_OSDIRNAMES += mabi.n32=../lib32 | |
| MULTILIB_OSDIRNAMES += mabi.32=../lib | |
| MULTILIB_OSDIRNAMES += mabi.64=../lib64 | |
| diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi | |
| index 4441591..6b42028 100644 | |
| --- a/gcc/doc/invoke.texi | |
| +++ b/gcc/doc/invoke.texi | |
| @@ -20171,7 +20171,8 @@ The processor names are: | |
| @samp{1004kc}, @samp{1004kf2_1}, @samp{1004kf1_1}, | |
| @samp{i6400}, | |
| @samp{interaptiv}, | |
| -@samp{loongson2e}, @samp{loongson2f}, @samp{loongson3a}, | |
| +@samp{loongson2e}, @samp{loongson2f}, @samp{gs464}, @samp{gs464e}, | |
| +@samp{gs264e}, | |
| @samp{m4k}, | |
| @samp{m14k}, @samp{m14kc}, @samp{m14ke}, @samp{m14kec}, | |
| @samp{m5100}, @samp{m5101}, | |
| diff --git a/gcc/testsuite/gcc.target/mips/loongson-ctz.c b/gcc/testsuite/gcc.target/mips/loongson-ctz.c | |
| new file mode 100644 | |
| index 0000000..8df66a0 | |
| --- /dev/null | |
| +++ b/gcc/testsuite/gcc.target/mips/loongson-ctz.c | |
| @@ -0,0 +1,11 @@ | |
| +/* Test cases for Loongson EXT2 instrutions. */ | |
| + | |
| +/* { dg-do compile } */ | |
| +/* { dg-options "-mloongson-ext2" } */ | |
| + | |
| +unsigned int foo(unsigned int x) | |
| +{ | |
| + return __builtin_ctz (x); | |
| +} | |
| + | |
| +/* { dg-final { scan-assembler "ctz\t" } } */ | |
| diff --git a/gcc/testsuite/gcc.target/mips/loongson-dctz.c b/gcc/testsuite/gcc.target/mips/loongson-dctz.c | |
| new file mode 100644 | |
| index 0000000..8c47433 | |
| --- /dev/null | |
| +++ b/gcc/testsuite/gcc.target/mips/loongson-dctz.c | |
| @@ -0,0 +1,11 @@ | |
| +/* Test cases for Loongson EXT2 instrutions. */ | |
| + | |
| +/* { dg-do compile } */ | |
| +/* { dg-options "-mloongson-ext2" } */ | |
| + | |
| +unsigned long long foo(unsigned long long x) | |
| +{ | |
| + return __builtin_ctzl (x); | |
| +} | |
| + | |
| +/* { dg-final { scan-assembler "dctz\t" } } */ | |
| diff --git a/gcc/testsuite/gcc.target/mips/loongson-shift-count-truncated-1.c b/gcc/testsuite/gcc.target/mips/loongson-shift-count-truncated-1.c | |
| index baed48c..6e22c0e 100644 | |
| --- a/gcc/testsuite/gcc.target/mips/loongson-shift-count-truncated-1.c | |
| +++ b/gcc/testsuite/gcc.target/mips/loongson-shift-count-truncated-1.c | |
| @@ -4,11 +4,11 @@ | |
| /* loongson.h does not handle or check for MIPS16ness. There doesn't | |
| seem any good reason for it to, given that the Loongson processors | |
| do not support MIPS16. */ | |
| -/* { dg-options "isa=loongson -mhard-float -mno-mips16 (REQUIRES_STDLIB)" } */ | |
| +/* { dg-options "-mloongson-mmi -mhard-float -mno-mips16 (REQUIRES_STDLIB)" } */ | |
| /* See PR 52155. */ | |
| -/* { dg-options "isa=loongson -mhard-float -mno-mips16 -mlong64" { mips*-*-elf* && ilp32 } } */ | |
| +/* { dg-options "-mloongson-mmi -mhard-float -mno-mips16 -mlong64" { mips*-*-elf* && ilp32 } } */ | |
| -#include "loongson.h" | |
| +#include "loongson-mmiintrin.h" | |
| #include <assert.h> | |
| typedef union { int32x2_t v; int32_t a[2]; } int32x2_encap_t; | |
| diff --git a/gcc/testsuite/gcc.target/mips/loongson-simd.c b/gcc/testsuite/gcc.target/mips/loongson-simd.c | |
| index f263b43..34fdcec 100644 | |
| --- a/gcc/testsuite/gcc.target/mips/loongson-simd.c | |
| +++ b/gcc/testsuite/gcc.target/mips/loongson-simd.c | |
| @@ -26,9 +26,9 @@ along with GCC; see the file COPYING3. If not see | |
| because inclusion of some system headers e.g. stdint.h will fail due to not | |
| finding stubs-o32_hard.h. */ | |
| /* { dg-require-effective-target mips_nanlegacy } */ | |
| -/* { dg-options "isa=loongson -mhard-float -mno-micromips -mno-mips16 -flax-vector-conversions (REQUIRES_STDLIB)" } */ | |
| +/* { dg-options "-mloongson-mmi -mhard-float -mno-micromips -mno-mips16 -flax-vector-conversions (REQUIRES_STDLIB)" } */ | |
| -#include "loongson.h" | |
| +#include "loongson-mmiintrin.h" | |
| #include <stdio.h> | |
| #include <stdint.h> | |
| #include <assert.h> | |
| diff --git a/gcc/testsuite/gcc.target/mips/mips.exp b/gcc/testsuite/gcc.target/mips/mips.exp | |
| index 9db4fbe..5b2bf8b 100644 | |
| --- a/gcc/testsuite/gcc.target/mips/mips.exp | |
| +++ b/gcc/testsuite/gcc.target/mips/mips.exp | |
| @@ -296,6 +296,9 @@ foreach option { | |
| mcount-ra-address | |
| odd-spreg | |
| msa | |
| + loongson-mmi | |
| + loongson-ext | |
| + loongson-ext2 | |
| } { | |
| lappend mips_option_groups $option "-m(no-|)$option" | |
| } | |
| @@ -883,6 +886,12 @@ proc mips-dg-init {} { | |
| "-mno-msa" | |
| #endif | |
| + #ifdef __mips_loongson_mmi | |
| + "-mloongson-mmi" | |
| + #else | |
| + "-mno-loongson-mmi" | |
| + #endif | |
| + | |
| 0 | |
| }; | |
| } 0] | |
| diff --git a/gcc/testsuite/gcc.target/mips/umips-store16-1.c b/gcc/testsuite/gcc.target/mips/umips-store16-1.c | |
| index 6377e85..f82c837 100644 | |
| --- a/gcc/testsuite/gcc.target/mips/umips-store16-1.c | |
| +++ b/gcc/testsuite/gcc.target/mips/umips-store16-1.c | |
| @@ -1,4 +1,4 @@ | |
| -/* { dg-options "(-mmicromips)" } */ | |
| +/* { dg-options "(-mmicromips) forbid_cpu=loongson3a" } */ | |
| /* { dg-do assemble } */ | |
| register unsigned int global asm ("$16"); | |
| diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp | |
| index 50665df..326bd30 100644 | |
| --- a/gcc/testsuite/lib/target-supports.exp | |
| +++ b/gcc/testsuite/lib/target-supports.exp | |
| @@ -1902,20 +1902,20 @@ proc check_mpaired_single_hw_available { } { | |
| # Return 1 if the target supports executing Loongson vector instructions, | |
| # 0 otherwise. Cache the result. | |
| -proc check_mips_loongson_hw_available { } { | |
| - return [check_cached_effective_target mips_loongson_hw_available { | |
| +proc check_mips_loongson_mmi_hw_available { } { | |
| + return [check_cached_effective_target mips_loongson_mmi_hw_available { | |
| # If this is not the right target then we can skip the test. | |
| if { !([istarget mips*-*-*]) } { | |
| expr 0 | |
| } else { | |
| - check_runtime_nocache mips_loongson_hw_available { | |
| - #include <loongson.h> | |
| + check_runtime_nocache mips_loongson_mmi_hw_available { | |
| + #include <loongson-mmiintrin.h> | |
| int main() | |
| { | |
| asm volatile ("paddw $f2,$f4,$f6"); | |
| return 0; | |
| } | |
| - } "" | |
| + } "-mloongson-mmi" | |
| } | |
| }] | |
| } | |
| @@ -1969,9 +1969,9 @@ proc check_effective_target_mpaired_single_runtime { } { | |
| # Return 1 if the target supports running Loongson executables, 0 otherwise. | |
| -proc check_effective_target_mips_loongson_runtime { } { | |
| - if { [check_effective_target_mips_loongson] | |
| - && [check_mips_loongson_hw_available] } { | |
| +proc check_effective_target_mips_loongson_mmi_runtime { } { | |
| + if { [check_effective_target_mips_loongson_mmi] | |
| + && [check_mips_loongson_mmi_hw_available] } { | |
| return 1 | |
| } | |
| return 0 | |
| @@ -3070,7 +3070,7 @@ proc check_effective_target_vect_int { } { | |
| || [istarget aarch64*-*-*] | |
| || [is-effective-target arm_neon] | |
| || ([istarget mips*-*-*] | |
| - && ([et-is-effective-target mips_loongson] | |
| + && ([et-is-effective-target mips_loongson_mmi] | |
| || [et-is-effective-target mips_msa])) | |
| || ([istarget s390*-*-*] | |
| && [check_effective_target_s390_vx]) } { | |
| @@ -4808,11 +4808,24 @@ proc add_options_for_mips_msa { flags } { | |
| return "$flags -mmsa" | |
| } | |
| +# Add the options needed for MIPS Loongsn MMI Architecture. | |
| + | |
| +proc add_options_for_mips_loongson_mmi { flags } { | |
| + if { ! [check_effective_target_mips_loongson_mmi] } { | |
| + return "$flags" | |
| + } | |
| + return "$flags -mloongson-mmi" | |
| +} | |
| + | |
| + | |
| # Return 1 if this a Loongson-2E or -2F target using an ABI that supports | |
| # the Loongson vector modes. | |
| -proc check_effective_target_mips_loongson { } { | |
| +proc check_effective_target_mips_loongson_mmi { } { | |
| return [check_no_compiler_messages loongson assembly { | |
| + #if !defined(__mips_loongson_mmi) | |
| + #error !__mips_loongson_mmi | |
| + #endif | |
| #if !defined(__mips_loongson_vector_rev) | |
| #error !__mips_loongson_vector_rev | |
| #endif | |
| @@ -5387,7 +5400,7 @@ proc check_effective_target_vect_shift { } { | |
| || [is-effective-target arm_neon] | |
| || ([istarget mips*-*-*] | |
| && ([et-is-effective-target mips_msa] | |
| - || [et-is-effective-target mips_loongson])) | |
| + || [et-is-effective-target mips_loongson_mmi])) | |
| || ([istarget s390*-*-*] | |
| && [check_effective_target_s390_vx]) } { | |
| set et_vect_shift_saved($et_index) 1 | |
| @@ -5407,7 +5420,7 @@ proc check_effective_target_whole_vector_shift { } { | |
| || ([is-effective-target arm_neon] | |
| && [check_effective_target_arm_little_endian]) | |
| || ([istarget mips*-*-*] | |
| - && [et-is-effective-target mips_loongson]) | |
| + && [et-is-effective-target mips_loongson_mmi]) | |
| || ([istarget s390*-*-*] | |
| && [check_effective_target_s390_vx]) } { | |
| set answer 1 | |
| @@ -5613,7 +5626,7 @@ proc check_effective_target_vect_no_int_min_max { } { | |
| || [istarget spu-*-*] | |
| || [istarget alpha*-*-*] | |
| || ([istarget mips*-*-*] | |
| - && [et-is-effective-target mips_loongson]) } { | |
| + && [et-is-effective-target mips_loongson_mmi]) } { | |
| set et_vect_no_int_min_max_saved($et_index) 1 | |
| } | |
| } | |
| @@ -6384,7 +6397,7 @@ proc check_effective_target_vect_no_align { } { | |
| || [check_effective_target_arm_vect_no_misalign] | |
| || ([istarget powerpc*-*-*] && [check_p8vector_hw_available]) | |
| || ([istarget mips*-*-*] | |
| - && [et-is-effective-target mips_loongson]) } { | |
| + && [et-is-effective-target mips_loongson_mmi]) } { | |
| set et_vect_no_align_saved($et_index) 1 | |
| } | |
| } | |
| @@ -6714,7 +6727,7 @@ proc check_effective_target_vect_short_mult { } { | |
| || [check_effective_target_arm32] | |
| || ([istarget mips*-*-*] | |
| && ([et-is-effective-target mips_msa] | |
| - || [et-is-effective-target mips_loongson])) | |
| + || [et-is-effective-target mips_loongson_mmi])) | |
| || ([istarget s390*-*-*] | |
| && [check_effective_target_s390_vx]) } { | |
| set et_vect_short_mult_saved($et_index) 1 | |
| @@ -8529,8 +8542,8 @@ proc check_vect_support_and_set_flags { } { | |
| if { [check_effective_target_mpaired_single] } { | |
| lappend EFFECTIVE_TARGETS mpaired_single | |
| } | |
| - if { [check_effective_target_mips_loongson] } { | |
| - lappend EFFECTIVE_TARGETS mips_loongson | |
| + if { [check_effective_target_mips_loongson_mmi] } { | |
| + lappend EFFECTIVE_TARGETS mips_loongson_mmi | |
| } | |
| if { [check_effective_target_mips_msa] } { | |
| lappend EFFECTIVE_TARGETS mips_msa |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment