[ Avaa Bypassed ]




Upload:

Command:

hmhc3928@3.22.27.41: ~ $
/* Copyright (C) 2011-2013 Free Software Foundation, Inc.

   This file is part of GCC.

   GCC is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3, or (at your option)
   any later version.

   GCC is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   Under Section 7 of GPL version 3, you are granted additional
   permissions described in the GCC Runtime Library Exception, version
   3.1, as published by the Free Software Foundation.

   You should have received a copy of the GNU General Public License and
   a copy of the GCC Runtime Library Exception along with this program;
   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
   <http://www.gnu.org/licenses/>.  */

#ifndef _IMMINTRIN_H_INCLUDED
# error "Never use <avx2intrin.h> directly; include <immintrin.h> instead."
#endif

/* Sum absolute 8-bit integer difference of adjacent groups of 4
   byte integers in the first 2 operands.  Starting offsets within
   operands are determined by the 3rd mask operand.  */
#ifdef __OPTIMIZE__
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mpsadbw_epu8 (__m256i __X, __m256i __Y, const int __M)
{
  return (__m256i) __builtin_ia32_mpsadbw256 ((__v32qi)__X,
					      (__v32qi)__Y, __M);
}
#else
#define _mm256_mpsadbw_epu8(X, Y, M)					\
  ((__m256i) __builtin_ia32_mpsadbw256 ((__v32qi)(__m256i)(X),		\
					(__v32qi)(__m256i)(Y), (int)(M)))
#endif

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_abs_epi8 (__m256i __A)
{
  return (__m256i)__builtin_ia32_pabsb256 ((__v32qi)__A);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_abs_epi16 (__m256i __A)
{
  return (__m256i)__builtin_ia32_pabsw256 ((__v16hi)__A);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_abs_epi32 (__m256i __A)
{
  return (__m256i)__builtin_ia32_pabsd256 ((__v8si)__A);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_packs_epi32 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_packssdw256 ((__v8si)__A, (__v8si)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_packs_epi16 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_packsswb256 ((__v16hi)__A, (__v16hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_packus_epi32 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_packusdw256 ((__v8si)__A, (__v8si)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_packus_epi16 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_packuswb256 ((__v16hi)__A, (__v16hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_add_epi8 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_paddb256 ((__v32qi)__A, (__v32qi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_add_epi16 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_paddw256 ((__v16hi)__A, (__v16hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_add_epi32 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_paddd256 ((__v8si)__A, (__v8si)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_add_epi64 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_paddq256 ((__v4di)__A, (__v4di)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_adds_epi8 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_paddsb256 ((__v32qi)__A, (__v32qi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_adds_epi16 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_paddsw256 ((__v16hi)__A, (__v16hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_adds_epu8 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_paddusb256 ((__v32qi)__A, (__v32qi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_adds_epu16 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_paddusw256 ((__v16hi)__A, (__v16hi)__B);
}

#ifdef __OPTIMIZE__
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_alignr_epi8 (__m256i __A, __m256i __B, const int __N)
{
  return (__m256i) __builtin_ia32_palignr256 ((__v4di)__A,
					      (__v4di)__B,
					      __N * 8);
}
#else
/* In that case (__N*8) will be in vreg, and insn will not be matched. */
/* Use define instead */
#define _mm256_alignr_epi8(A, B, N)				   \
  ((__m256i) __builtin_ia32_palignr256 ((__v4di)(__m256i)(A),	   \
					(__v4di)(__m256i)(B),	   \
					(int)(N) * 8))
#endif

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_and_si256 (__m256i __A, __m256i __B)
{
  return (__m256i) __builtin_ia32_andsi256 ((__v4di)__A, (__v4di)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_andnot_si256 (__m256i __A, __m256i __B)
{
  return (__m256i) __builtin_ia32_andnotsi256 ((__v4di)__A, (__v4di)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_avg_epu8 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pavgb256 ((__v32qi)__A, (__v32qi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_avg_epu16 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pavgw256 ((__v16hi)__A, (__v16hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_blendv_epi8 (__m256i __X, __m256i __Y, __m256i __M)
{
  return (__m256i) __builtin_ia32_pblendvb256 ((__v32qi)__X,
					       (__v32qi)__Y,
					       (__v32qi)__M);
}

#ifdef __OPTIMIZE__
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_blend_epi16 (__m256i __X, __m256i __Y, const int __M)
{
  return (__m256i) __builtin_ia32_pblendw256 ((__v16hi)__X,
					      (__v16hi)__Y,
					       __M);
}
#else
#define _mm256_blend_epi16(X, Y, M)					\
  ((__m256i) __builtin_ia32_pblendw256 ((__v16hi)(__m256i)(X),		\
					(__v16hi)(__m256i)(Y), (int)(M)))
#endif

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmpeq_epi8 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pcmpeqb256 ((__v32qi)__A, (__v32qi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmpeq_epi16 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pcmpeqw256 ((__v16hi)__A, (__v16hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmpeq_epi32 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pcmpeqd256 ((__v8si)__A, (__v8si)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmpeq_epi64 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pcmpeqq256 ((__v4di)__A, (__v4di)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmpgt_epi8 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pcmpgtb256 ((__v32qi)__A,
					     (__v32qi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmpgt_epi16 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pcmpgtw256 ((__v16hi)__A,
					     (__v16hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmpgt_epi32 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pcmpgtd256 ((__v8si)__A,
					     (__v8si)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmpgt_epi64 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pcmpgtq256 ((__v4di)__A, (__v4di)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_hadd_epi16 (__m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_phaddw256 ((__v16hi)__X,
					     (__v16hi)__Y);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_hadd_epi32 (__m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_phaddd256 ((__v8si)__X, (__v8si)__Y);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_hadds_epi16 (__m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_phaddsw256 ((__v16hi)__X,
					      (__v16hi)__Y);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_hsub_epi16 (__m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_phsubw256 ((__v16hi)__X,
					     (__v16hi)__Y);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_hsub_epi32 (__m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_phsubd256 ((__v8si)__X, (__v8si)__Y);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_hsubs_epi16 (__m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_phsubsw256 ((__v16hi)__X,
					      (__v16hi)__Y);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maddubs_epi16 (__m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_pmaddubsw256 ((__v32qi)__X,
						(__v32qi)__Y);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_madd_epi16 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pmaddwd256 ((__v16hi)__A,
					     (__v16hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_max_epi8 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pmaxsb256 ((__v32qi)__A, (__v32qi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_max_epi16 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pmaxsw256 ((__v16hi)__A, (__v16hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_max_epi32 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pmaxsd256 ((__v8si)__A, (__v8si)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_max_epu8 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pmaxub256 ((__v32qi)__A, (__v32qi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_max_epu16 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pmaxuw256 ((__v16hi)__A, (__v16hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_max_epu32 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pmaxud256 ((__v8si)__A, (__v8si)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_min_epi8 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pminsb256 ((__v32qi)__A, (__v32qi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_min_epi16 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pminsw256 ((__v16hi)__A, (__v16hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_min_epi32 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pminsd256 ((__v8si)__A, (__v8si)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_min_epu8 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pminub256 ((__v32qi)__A, (__v32qi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_min_epu16 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)__A, (__v16hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_min_epu32 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pminud256 ((__v8si)__A, (__v8si)__B);
}

extern __inline int
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_movemask_epi8 (__m256i __A)
{
  return __builtin_ia32_pmovmskb256 ((__v32qi)__A);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtepi8_epi16 (__m128i __X)
{
  return (__m256i) __builtin_ia32_pmovsxbw256 ((__v16qi)__X);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtepi8_epi32 (__m128i __X)
{
  return (__m256i) __builtin_ia32_pmovsxbd256 ((__v16qi)__X);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtepi8_epi64 (__m128i __X)
{
  return (__m256i) __builtin_ia32_pmovsxbq256 ((__v16qi)__X);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtepi16_epi32 (__m128i __X)
{
  return (__m256i) __builtin_ia32_pmovsxwd256 ((__v8hi)__X);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtepi16_epi64 (__m128i __X)
{
  return (__m256i) __builtin_ia32_pmovsxwq256 ((__v8hi)__X);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtepi32_epi64 (__m128i __X)
{
  return (__m256i) __builtin_ia32_pmovsxdq256 ((__v4si)__X);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtepu8_epi16 (__m128i __X)
{
  return (__m256i) __builtin_ia32_pmovzxbw256 ((__v16qi)__X);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtepu8_epi32 (__m128i __X)
{
  return (__m256i) __builtin_ia32_pmovzxbd256 ((__v16qi)__X);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtepu8_epi64 (__m128i __X)
{
  return (__m256i) __builtin_ia32_pmovzxbq256 ((__v16qi)__X);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtepu16_epi32 (__m128i __X)
{
  return (__m256i) __builtin_ia32_pmovzxwd256 ((__v8hi)__X);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtepu16_epi64 (__m128i __X)
{
  return (__m256i) __builtin_ia32_pmovzxwq256 ((__v8hi)__X);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtepu32_epi64 (__m128i __X)
{
  return (__m256i) __builtin_ia32_pmovzxdq256 ((__v4si)__X);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mul_epi32 (__m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_pmuldq256 ((__v8si)__X, (__v8si)__Y);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mulhrs_epi16 (__m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_pmulhrsw256 ((__v16hi)__X,
					       (__v16hi)__Y);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mulhi_epu16 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pmulhuw256 ((__v16hi)__A, (__v16hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mulhi_epi16 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pmulhw256 ((__v16hi)__A, (__v16hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mullo_epi16 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pmullw256 ((__v16hi)__A, (__v16hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mullo_epi32 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pmulld256 ((__v8si)__A, (__v8si)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mul_epu32 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pmuludq256 ((__v8si)__A, (__v8si)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_or_si256 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_por256 ((__v4di)__A, (__v4di)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sad_epu8 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_psadbw256 ((__v32qi)__A, (__v32qi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_shuffle_epi8 (__m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_pshufb256 ((__v32qi)__X,
					     (__v32qi)__Y);
}

#ifdef __OPTIMIZE__
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_shuffle_epi32 (__m256i __A, const int __mask)
{
  return (__m256i)__builtin_ia32_pshufd256 ((__v8si)__A, __mask);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_shufflehi_epi16 (__m256i __A, const int __mask)
{
  return (__m256i)__builtin_ia32_pshufhw256 ((__v16hi)__A, __mask);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_shufflelo_epi16 (__m256i __A, const int __mask)
{
  return (__m256i)__builtin_ia32_pshuflw256 ((__v16hi)__A, __mask);
}
#else
#define _mm256_shuffle_epi32(A, N) \
  ((__m256i)__builtin_ia32_pshufd256 ((__v8si)(__m256i)(A), (int)(N)))
#define _mm256_shufflehi_epi16(A, N) \
  ((__m256i)__builtin_ia32_pshufhw256 ((__v16hi)(__m256i)(A), (int)(N)))
#define _mm256_shufflelo_epi16(A, N) \
  ((__m256i)__builtin_ia32_pshuflw256 ((__v16hi)(__m256i)(A), (int)(N)))
#endif

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sign_epi8 (__m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_psignb256 ((__v32qi)__X, (__v32qi)__Y);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sign_epi16 (__m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_psignw256 ((__v16hi)__X, (__v16hi)__Y);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sign_epi32 (__m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_psignd256 ((__v8si)__X, (__v8si)__Y);
}

#ifdef __OPTIMIZE__
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_bslli_epi128 (__m256i __A, const int __N)
{
  return (__m256i)__builtin_ia32_pslldqi256 (__A, __N * 8);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_slli_si256 (__m256i __A, const int __N)
{
  return (__m256i)__builtin_ia32_pslldqi256 (__A, __N * 8);
}
#else
#define _mm256_bslli_epi128(A, N) \
  ((__m256i)__builtin_ia32_pslldqi256 ((__m256i)(A), (int)(N) * 8))
#define _mm256_slli_si256(A, N) \
  ((__m256i)__builtin_ia32_pslldqi256 ((__m256i)(A), (int)(N) * 8))
#endif

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_slli_epi16 (__m256i __A, int __B)
{
  return (__m256i)__builtin_ia32_psllwi256 ((__v16hi)__A, __B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sll_epi16 (__m256i __A, __m128i __B)
{
  return (__m256i)__builtin_ia32_psllw256((__v16hi)__A, (__v8hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_slli_epi32 (__m256i __A, int __B)
{
  return (__m256i)__builtin_ia32_pslldi256 ((__v8si)__A, __B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sll_epi32 (__m256i __A, __m128i __B)
{
  return (__m256i)__builtin_ia32_pslld256((__v8si)__A, (__v4si)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_slli_epi64 (__m256i __A, int __B)
{
  return (__m256i)__builtin_ia32_psllqi256 ((__v4di)__A, __B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sll_epi64 (__m256i __A, __m128i __B)
{
  return (__m256i)__builtin_ia32_psllq256((__v4di)__A, (__v2di)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_srai_epi16 (__m256i __A, int __B)
{
  return (__m256i)__builtin_ia32_psrawi256 ((__v16hi)__A, __B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sra_epi16 (__m256i __A, __m128i __B)
{
  return (__m256i)__builtin_ia32_psraw256 ((__v16hi)__A, (__v8hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_srai_epi32 (__m256i __A, int __B)
{
  return (__m256i)__builtin_ia32_psradi256 ((__v8si)__A, __B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sra_epi32 (__m256i __A, __m128i __B)
{
  return (__m256i)__builtin_ia32_psrad256 ((__v8si)__A, (__v4si)__B);
}

#ifdef __OPTIMIZE__
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_bsrli_epi128 (__m256i __A, const int __N)
{
  return (__m256i)__builtin_ia32_psrldqi256 (__A, __N * 8);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_srli_si256 (__m256i __A, const int __N)
{
  return (__m256i)__builtin_ia32_psrldqi256 (__A, __N * 8);
}
#else
#define _mm256_bsrli_epi128(A, N) \
  ((__m256i)__builtin_ia32_psrldqi256 ((__m256i)(A), (int)(N) * 8))
#define _mm256_srli_si256(A, N) \
  ((__m256i)__builtin_ia32_psrldqi256 ((__m256i)(A), (int)(N) * 8))
#endif

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_srli_epi16 (__m256i __A, int __B)
{
  return (__m256i)__builtin_ia32_psrlwi256 ((__v16hi)__A, __B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_srl_epi16 (__m256i __A, __m128i __B)
{
  return (__m256i)__builtin_ia32_psrlw256((__v16hi)__A, (__v8hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_srli_epi32 (__m256i __A, int __B)
{
  return (__m256i)__builtin_ia32_psrldi256 ((__v8si)__A, __B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_srl_epi32 (__m256i __A, __m128i __B)
{
  return (__m256i)__builtin_ia32_psrld256((__v8si)__A, (__v4si)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_srli_epi64 (__m256i __A, int __B)
{
  return (__m256i)__builtin_ia32_psrlqi256 ((__v4di)__A, __B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_srl_epi64 (__m256i __A, __m128i __B)
{
  return (__m256i)__builtin_ia32_psrlq256((__v4di)__A, (__v2di)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sub_epi8 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_psubb256 ((__v32qi)__A, (__v32qi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sub_epi16 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_psubw256 ((__v16hi)__A, (__v16hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sub_epi32 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_psubd256 ((__v8si)__A, (__v8si)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sub_epi64 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_psubq256 ((__v4di)__A, (__v4di)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_subs_epi8 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_psubsb256 ((__v32qi)__A, (__v32qi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_subs_epi16 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_psubsw256 ((__v16hi)__A, (__v16hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_subs_epu8 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_psubusb256 ((__v32qi)__A, (__v32qi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_subs_epu16 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_psubusw256 ((__v16hi)__A, (__v16hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_unpackhi_epi8 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_punpckhbw256 ((__v32qi)__A, (__v32qi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_unpackhi_epi16 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_punpckhwd256 ((__v16hi)__A, (__v16hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_unpackhi_epi32 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_punpckhdq256 ((__v8si)__A, (__v8si)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_unpackhi_epi64 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_punpckhqdq256 ((__v4di)__A, (__v4di)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_unpacklo_epi8 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_punpcklbw256 ((__v32qi)__A, (__v32qi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_unpacklo_epi16 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_punpcklwd256 ((__v16hi)__A, (__v16hi)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_unpacklo_epi32 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_punpckldq256 ((__v8si)__A, (__v8si)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_unpacklo_epi64 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_punpcklqdq256 ((__v4di)__A, (__v4di)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_xor_si256 (__m256i __A, __m256i __B)
{
  return (__m256i)__builtin_ia32_pxor256 ((__v4di)__A, (__v4di)__B);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_stream_load_si256 (__m256i const *__X)
{
  return (__m256i) __builtin_ia32_movntdqa256 ((__v4di *) __X);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_broadcastss_ps (__m128 __X)
{
  return (__m128) __builtin_ia32_vbroadcastss_ps ((__v4sf)__X);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_broadcastss_ps (__m128 __X)
{
  return (__m256) __builtin_ia32_vbroadcastss_ps256 ((__v4sf)__X);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_broadcastsd_pd (__m128d __X)
{
  return (__m256d) __builtin_ia32_vbroadcastsd_pd256 ((__v2df)__X);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_broadcastsi128_si256 (__m128i __X)
{
  return (__m256i) __builtin_ia32_vbroadcastsi256 ((__v2di)__X);
}

#ifdef __OPTIMIZE__
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_blend_epi32 (__m128i __X, __m128i __Y, const int __M)
{
  return (__m128i) __builtin_ia32_pblendd128 ((__v4si)__X,
					      (__v4si)__Y,
					      __M);
}
#else
#define _mm_blend_epi32(X, Y, M)					\
  ((__m128i) __builtin_ia32_pblendd128 ((__v4si)(__m128i)(X),		\
					(__v4si)(__m128i)(Y), (int)(M)))
#endif

#ifdef __OPTIMIZE__
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_blend_epi32 (__m256i __X, __m256i __Y, const int __M)
{
  return (__m256i) __builtin_ia32_pblendd256 ((__v8si)__X,
					      (__v8si)__Y,
					      __M);
}
#else
#define _mm256_blend_epi32(X, Y, M)					\
  ((__m256i) __builtin_ia32_pblendd256 ((__v8si)(__m256i)(X),		\
					(__v8si)(__m256i)(Y), (int)(M)))
#endif

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_broadcastb_epi8 (__m128i __X)
{
  return (__m256i) __builtin_ia32_pbroadcastb256 ((__v16qi)__X);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_broadcastw_epi16 (__m128i __X)
{
  return (__m256i) __builtin_ia32_pbroadcastw256 ((__v8hi)__X);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_broadcastd_epi32 (__m128i __X)
{
  return (__m256i) __builtin_ia32_pbroadcastd256 ((__v4si)__X);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_broadcastq_epi64 (__m128i __X)
{
  return (__m256i) __builtin_ia32_pbroadcastq256 ((__v2di)__X);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_broadcastb_epi8 (__m128i __X)
{
  return (__m128i) __builtin_ia32_pbroadcastb128 ((__v16qi)__X);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_broadcastw_epi16 (__m128i __X)
{
  return (__m128i) __builtin_ia32_pbroadcastw128 ((__v8hi)__X);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_broadcastd_epi32 (__m128i __X)
{
  return (__m128i) __builtin_ia32_pbroadcastd128 ((__v4si)__X);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_broadcastq_epi64 (__m128i __X)
{
  return (__m128i) __builtin_ia32_pbroadcastq128 ((__v2di)__X);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permutevar8x32_epi32 (__m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_permvarsi256 ((__v8si)__X, (__v8si)__Y);
}

#ifdef __OPTIMIZE__
extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permute4x64_pd (__m256d __X, const int __M)
{
  return (__m256d) __builtin_ia32_permdf256 ((__v4df)__X, __M);
}
#else
#define _mm256_permute4x64_pd(X, M)			       \
  ((__m256d) __builtin_ia32_permdf256 ((__v4df)(__m256d)(X), (int)(M)))
#endif

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permutevar8x32_ps (__m256 __X, __m256i __Y)
{
  return (__m256) __builtin_ia32_permvarsf256 ((__v8sf)__X, (__v8si)__Y);
}

#ifdef __OPTIMIZE__
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permute4x64_epi64 (__m256i __X, const int __M)
{
  return (__m256i) __builtin_ia32_permdi256 ((__v4di)__X, __M);
}
#else
#define _mm256_permute4x64_epi64(X, M)			       \
  ((__m256i) __builtin_ia32_permdi256 ((__v4di)(__m256i)(X), (int)(M)))
#endif


#ifdef __OPTIMIZE__
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permute2x128_si256 (__m256i __X, __m256i __Y, const int __M)
{
  return (__m256i) __builtin_ia32_permti256 ((__v4di)__X, (__v4di)__Y, __M);
}
#else
#define _mm256_permute2x128_si256(X, Y, M)				\
  ((__m256i) __builtin_ia32_permti256 ((__v4di)(__m256i)(X), (__v4di)(__m256i)(Y), (int)(M)))
#endif

#ifdef __OPTIMIZE__
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_extracti128_si256 (__m256i __X, const int __M)
{
  return (__m128i) __builtin_ia32_extract128i256 ((__v4di)__X, __M);
}
#else
#define _mm256_extracti128_si256(X, M)				\
  ((__m128i) __builtin_ia32_extract128i256 ((__v4di)(__m256i)(X), (int)(M)))
#endif

#ifdef __OPTIMIZE__
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_inserti128_si256 (__m256i __X, __m128i __Y, const int __M)
{
  return (__m256i) __builtin_ia32_insert128i256 ((__v4di)__X, (__v2di)__Y, __M);
}
#else
#define _mm256_inserti128_si256(X, Y, M)			 \
  ((__m256i) __builtin_ia32_insert128i256 ((__v4di)(__m256i)(X), \
					   (__v2di)(__m128i)(Y), \
					   (int)(M)))
#endif

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskload_epi32 (int const *__X, __m256i __M )
{
  return (__m256i) __builtin_ia32_maskloadd256 ((const __v8si *)__X,
						(__v8si)__M);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskload_epi64 (long long const *__X, __m256i __M )
{
  return (__m256i) __builtin_ia32_maskloadq256 ((const __v4di *)__X,
						(__v4di)__M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskload_epi32 (int const *__X, __m128i __M )
{
  return (__m128i) __builtin_ia32_maskloadd ((const __v4si *)__X,
					     (__v4si)__M);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskload_epi64 (long long const *__X, __m128i __M )
{
  return (__m128i) __builtin_ia32_maskloadq ((const __v2di *)__X,
					     (__v2di)__M);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskstore_epi32 (int *__X, __m256i __M, __m256i __Y )
{
  __builtin_ia32_maskstored256 ((__v8si *)__X, (__v8si)__M, (__v8si)__Y);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskstore_epi64 (long long *__X, __m256i __M, __m256i __Y )
{
  __builtin_ia32_maskstoreq256 ((__v4di *)__X, (__v4di)__M, (__v4di)__Y);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskstore_epi32 (int *__X, __m128i __M, __m128i __Y )
{
  __builtin_ia32_maskstored ((__v4si *)__X, (__v4si)__M, (__v4si)__Y);
}

extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskstore_epi64 (long long *__X, __m128i __M, __m128i __Y )
{
  __builtin_ia32_maskstoreq (( __v2di *)__X, (__v2di)__M, (__v2di)__Y);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sllv_epi32 (__m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_psllv8si ((__v8si)__X, (__v8si)__Y);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_sllv_epi32 (__m128i __X, __m128i __Y)
{
  return (__m128i) __builtin_ia32_psllv4si ((__v4si)__X, (__v4si)__Y);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_sllv_epi64 (__m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_psllv4di ((__v4di)__X, (__v4di)__Y);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_sllv_epi64 (__m128i __X, __m128i __Y)
{
  return (__m128i) __builtin_ia32_psllv2di ((__v2di)__X, (__v2di)__Y);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_srav_epi32 (__m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_psrav8si ((__v8si)__X, (__v8si)__Y);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_srav_epi32 (__m128i __X, __m128i __Y)
{
  return (__m128i) __builtin_ia32_psrav4si ((__v4si)__X, (__v4si)__Y);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_srlv_epi32 (__m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_psrlv8si ((__v8si)__X, (__v8si)__Y);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_srlv_epi32 (__m128i __X, __m128i __Y)
{
  return (__m128i) __builtin_ia32_psrlv4si ((__v4si)__X, (__v4si)__Y);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_srlv_epi64 (__m256i __X, __m256i __Y)
{
  return (__m256i) __builtin_ia32_psrlv4di ((__v4di)__X, (__v4di)__Y);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_srlv_epi64 (__m128i __X, __m128i __Y)
{
  return (__m128i) __builtin_ia32_psrlv2di ((__v2di)__X, (__v2di)__Y);
}

#ifdef __OPTIMIZE__
extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_i32gather_pd (double const *base, __m128i index, const int scale)
{
  __v2df src = _mm_setzero_pd ();
  __v2df mask = _mm_cmpeq_pd (src, src);

  return (__m128d) __builtin_ia32_gathersiv2df (src,
						base,
						(__v4si)index,
						mask,
						scale);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_i32gather_pd (__m128d src, double const *base, __m128i index,
		       __m128d mask, const int scale)
{
  return (__m128d) __builtin_ia32_gathersiv2df ((__v2df)src,
						base,
						(__v4si)index,
						(__v2df)mask,
						scale);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_i32gather_pd (double const *base, __m128i index, const int scale)
{
  __v4df src = _mm256_setzero_pd ();
  __v4df mask = _mm256_cmp_pd (src, src, _CMP_EQ_OQ);

  return (__m256d) __builtin_ia32_gathersiv4df (src,
						base,
						(__v4si)index,
						mask,
						scale);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_i32gather_pd (__m256d src, double const *base,
			  __m128i index, __m256d mask, const int scale)
{
  return (__m256d) __builtin_ia32_gathersiv4df ((__v4df)src,
						base,
						(__v4si)index,
						(__v4df)mask,
						scale);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_i64gather_pd (double const *base, __m128i index, const int scale)
{
  __v2df src = _mm_setzero_pd ();
  __v2df mask = _mm_cmpeq_pd (src, src);

  return (__m128d) __builtin_ia32_gatherdiv2df (src,
						base,
						(__v2di)index,
						mask,
						scale);
}

extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_i64gather_pd (__m128d src, double const *base, __m128i index,
		       __m128d mask, const int scale)
{
  return (__m128d) __builtin_ia32_gatherdiv2df ((__v2df)src,
						base,
						(__v2di)index,
						(__v2df)mask,
						scale);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_i64gather_pd (double const *base, __m256i index, const int scale)
{
  __v4df src = _mm256_setzero_pd ();
  __v4df mask = _mm256_cmp_pd (src, src, _CMP_EQ_OQ);

  return (__m256d) __builtin_ia32_gatherdiv4df (src,
						base,
						(__v4di)index,
						mask,
						scale);
}

extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_i64gather_pd (__m256d src, double const *base,
			  __m256i index, __m256d mask, const int scale)
{
  return (__m256d) __builtin_ia32_gatherdiv4df ((__v4df)src,
						base,
						(__v4di)index,
						(__v4df)mask,
						scale);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_i32gather_ps (float const *base, __m128i index, const int scale)
{
  __v4sf src = _mm_setzero_ps ();
  __v4sf mask = _mm_cmpeq_ps (src, src);

  return (__m128) __builtin_ia32_gathersiv4sf (src,
					       base,
					       (__v4si)index,
					       mask,
					       scale);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_i32gather_ps (__m128 src, float const *base, __m128i index,
		       __m128 mask, const int scale)
{
  return (__m128) __builtin_ia32_gathersiv4sf ((__v4sf)src,
					       base,
					       (__v4si)index,
					       (__v4sf)mask,
					       scale);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_i32gather_ps (float const *base, __m256i index, const int scale)
{
  __v8sf src = _mm256_setzero_ps ();
  __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);

  return (__m256) __builtin_ia32_gathersiv8sf (src,
					       base,
					       (__v8si)index,
					       mask,
					       scale);
}

extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_i32gather_ps (__m256 src, float const *base,
			  __m256i index, __m256 mask, const int scale)
{
  return (__m256) __builtin_ia32_gathersiv8sf ((__v8sf)src,
					       base,
					       (__v8si)index,
					       (__v8sf)mask,
					       scale);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_i64gather_ps (float const *base, __m128i index, const int scale)
{
  __v4sf src = _mm_setzero_ps ();
  __v4sf mask = _mm_cmpeq_ps (src, src);

  return (__m128) __builtin_ia32_gatherdiv4sf (src,
					       base,
					       (__v2di)index,
					       mask,
					       scale);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_i64gather_ps (__m128 src, float const *base, __m128i index,
		       __m128 mask, const int scale)
{
  return (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf)src,
						base,
						(__v2di)index,
						(__v4sf)mask,
						scale);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_i64gather_ps (float const *base, __m256i index, const int scale)
{
  __v4sf src = _mm_setzero_ps ();
  __v4sf mask = _mm_cmpeq_ps (src, src);

  return (__m128) __builtin_ia32_gatherdiv4sf256 (src,
						  base,
						  (__v4di)index,
						  mask,
						  scale);
}

extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_i64gather_ps (__m128 src, float const *base,
			  __m256i index, __m128 mask, const int scale)
{
  return (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf)src,
						  base,
						  (__v4di)index,
						  (__v4sf)mask,
						  scale);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_i32gather_epi64 (long long int const *base,
		     __m128i index, const int scale)
{
  __v2di src = __extension__ (__v2di){ 0, 0 };
  __v2di mask = __extension__ (__v2di){ ~0, ~0 };

  return (__m128i) __builtin_ia32_gathersiv2di (src,
						base,
						(__v4si)index,
						mask,
						scale);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_i32gather_epi64 (__m128i src, long long int const *base,
			  __m128i index, __m128i mask, const int scale)
{
  return (__m128i) __builtin_ia32_gathersiv2di ((__v2di)src,
						base,
						(__v4si)index,
						(__v2di)mask,
						scale);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_i32gather_epi64 (long long int const *base,
			__m128i index, const int scale)
{
  __v4di src = __extension__ (__v4di){ 0, 0, 0, 0 };
  __v4di mask = __extension__ (__v4di){ ~0, ~0, ~0, ~0 };

  return (__m256i) __builtin_ia32_gathersiv4di (src,
						base,
						(__v4si)index,
						mask,
						scale);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_i32gather_epi64 (__m256i src, long long int const *base,
			     __m128i index, __m256i mask, const int scale)
{
  return (__m256i) __builtin_ia32_gathersiv4di ((__v4di)src,
						base,
						(__v4si)index,
						(__v4di)mask,
						scale);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_i64gather_epi64 (long long int const *base,
		     __m128i index, const int scale)
{
  __v2di src = __extension__ (__v2di){ 0, 0 };
  __v2di mask = __extension__ (__v2di){ ~0, ~0 };

  return (__m128i) __builtin_ia32_gatherdiv2di (src,
						base,
						(__v2di)index,
						mask,
						scale);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_i64gather_epi64 (__m128i src, long long int const *base, __m128i index,
			  __m128i mask, const int scale)
{
  return (__m128i) __builtin_ia32_gatherdiv2di ((__v2di)src,
						base,
						(__v2di)index,
						(__v2di)mask,
						scale);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_i64gather_epi64 (long long int const *base,
			__m256i index, const int scale)
{
  __v4di src = __extension__ (__v4di){ 0, 0, 0, 0 };
  __v4di mask = __extension__ (__v4di){ ~0, ~0, ~0, ~0 };

  return (__m256i) __builtin_ia32_gatherdiv4di (src,
						base,
						(__v4di)index,
						mask,
						scale);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_i64gather_epi64 (__m256i src, long long int const *base,
			     __m256i index, __m256i mask, const int scale)
{
  return (__m256i) __builtin_ia32_gatherdiv4di ((__v4di)src,
						base,
						(__v4di)index,
						(__v4di)mask,
						scale);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_i32gather_epi32 (int const *base, __m128i index, const int scale)
{
  __v4si src = __extension__ (__v4si){ 0, 0, 0, 0 };
  __v4si mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 };

  return (__m128i) __builtin_ia32_gathersiv4si (src,
					       base,
					       (__v4si)index,
					       mask,
					       scale);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_i32gather_epi32 (__m128i src, int const *base, __m128i index,
			  __m128i mask, const int scale)
{
  return (__m128i) __builtin_ia32_gathersiv4si ((__v4si)src,
						base,
						(__v4si)index,
						(__v4si)mask,
						scale);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_i32gather_epi32 (int const *base, __m256i index, const int scale)
{
  __v8si src = __extension__ (__v8si){ 0, 0, 0, 0, 0, 0, 0, 0 };
  __v8si mask = __extension__ (__v8si){ ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 };

  return (__m256i) __builtin_ia32_gathersiv8si (src,
						base,
						(__v8si)index,
						mask,
						scale);
}

extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_i32gather_epi32 (__m256i src, int const *base,
			     __m256i index, __m256i mask, const int scale)
{
  return (__m256i) __builtin_ia32_gathersiv8si ((__v8si)src,
						base,
						(__v8si)index,
						(__v8si)mask,
						scale);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_i64gather_epi32 (int const *base, __m128i index, const int scale)
{
  __v4si src = __extension__ (__v4si){ 0, 0, 0, 0 };
  __v4si mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 };

  return (__m128i) __builtin_ia32_gatherdiv4si (src,
						base,
						(__v2di)index,
						mask,
						scale);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_i64gather_epi32 (__m128i src, int const *base, __m128i index,
			  __m128i mask, const int scale)
{
  return (__m128i) __builtin_ia32_gatherdiv4si ((__v4si)src,
						base,
						(__v2di)index,
						(__v4si)mask,
						scale);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_i64gather_epi32 (int const *base, __m256i index, const int scale)
{
  __v4si src = __extension__ (__v4si){ 0, 0, 0, 0 };
  __v4si mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 };

  return (__m128i) __builtin_ia32_gatherdiv4si256 (src,
						  base,
						  (__v4di)index,
						  mask,
						  scale);
}

extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_i64gather_epi32 (__m128i src, int const *base,
			     __m256i index, __m128i mask, const int scale)
{
  return (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si)src,
						   base,
						   (__v4di)index,
						   (__v4si)mask,
						   scale);
}
#else /* __OPTIMIZE__ */
#define _mm_i32gather_pd(BASE, INDEX, SCALE)				\
  (__m128d) __builtin_ia32_gathersiv2df ((__v2df) _mm_setzero_pd (),	\
					 (double const *)BASE,		\
					 (__v4si)(__m128i)INDEX,	\
					 (__v2df)_mm_set1_pd(		\
					   (double)(long long int) -1), \
					 (int)SCALE)

#define _mm_mask_i32gather_pd(SRC, BASE, INDEX, MASK, SCALE)	 \
  (__m128d) __builtin_ia32_gathersiv2df ((__v2df)(__m128d)SRC,	 \
					 (double const *)BASE,	 \
					 (__v4si)(__m128i)INDEX, \
					 (__v2df)(__m128d)MASK,	 \
					 (int)SCALE)

#define _mm256_i32gather_pd(BASE, INDEX, SCALE)				\
  (__m256d) __builtin_ia32_gathersiv4df ((__v4df) _mm256_setzero_pd (),	\
					 (double const *)BASE,		\
					 (__v4si)(__m128i)INDEX,	\
					 (__v4df)_mm256_set1_pd(	\
					   (double)(long long int) -1), \
					 (int)SCALE)

#define _mm256_mask_i32gather_pd(SRC, BASE, INDEX, MASK, SCALE)	 \
  (__m256d) __builtin_ia32_gathersiv4df ((__v4df)(__m256d)SRC,	 \
					 (double const *)BASE,	 \
					 (__v4si)(__m128i)INDEX, \
					 (__v4df)(__m256d)MASK,	 \
					 (int)SCALE)

#define _mm_i64gather_pd(BASE, INDEX, SCALE)				\
  (__m128d) __builtin_ia32_gatherdiv2df ((__v2df) _mm_setzero_pd (),	\
					 (double const *)BASE,		\
					 (__v2di)(__m128i)INDEX,	\
					 (__v2df)_mm_set1_pd(		\
					   (double)(long long int) -1), \
					 (int)SCALE)

#define _mm_mask_i64gather_pd(SRC, BASE, INDEX, MASK, SCALE)	 \
  (__m128d) __builtin_ia32_gatherdiv2df ((__v2df)(__m128d)SRC,	 \
					 (double const *)BASE,	 \
					 (__v2di)(__m128i)INDEX, \
					 (__v2df)(__m128d)MASK,	 \
					 (int)SCALE)

#define _mm256_i64gather_pd(BASE, INDEX, SCALE)				\
  (__m256d) __builtin_ia32_gatherdiv4df ((__v4df) _mm256_setzero_pd (),	\
					 (double const *)BASE,		\
					 (__v4di)(__m256i)INDEX,	\
					 (__v4df)_mm256_set1_pd(	\
					   (double)(long long int) -1), \
					 (int)SCALE)

#define _mm256_mask_i64gather_pd(SRC, BASE, INDEX, MASK, SCALE)	 \
  (__m256d) __builtin_ia32_gatherdiv4df ((__v4df)(__m256d)SRC,	 \
					 (double const *)BASE,	 \
					 (__v4di)(__m256i)INDEX, \
					 (__v4df)(__m256d)MASK,	 \
					 (int)SCALE)

#define _mm_i32gather_ps(BASE, INDEX, SCALE)				\
  (__m128) __builtin_ia32_gathersiv4sf ((__v4sf) _mm_setzero_ps (),	\
					(float const *)BASE,		\
					(__v4si)(__m128i)INDEX,		\
					_mm_set1_ps ((float)(int) -1),	\
					(int)SCALE)

#define _mm_mask_i32gather_ps(SRC, BASE, INDEX, MASK, SCALE)	 \
  (__m128) __builtin_ia32_gathersiv4sf ((__v4sf)(__m128d)SRC,	 \
					(float const *)BASE,	 \
					(__v4si)(__m128i)INDEX,	 \
					(__v4sf)(__m128d)MASK,	 \
					(int)SCALE)

#define _mm256_i32gather_ps(BASE, INDEX, SCALE)			       \
  (__m256) __builtin_ia32_gathersiv8sf ((__v8sf) _mm256_setzero_ps (), \
					(float const *)BASE,	       \
					(__v8si)(__m256i)INDEX,	       \
					(__v8sf)_mm256_set1_ps (       \
					  (float)(int) -1),	       \
					(int)SCALE)

#define _mm256_mask_i32gather_ps(SRC, BASE, INDEX, MASK, SCALE) \
  (__m256) __builtin_ia32_gathersiv8sf ((__v8sf)(__m256)SRC,	\
					(float const *)BASE,	\
					(__v8si)(__m256i)INDEX, \
					(__v8sf)(__m256d)MASK,	\
					(int)SCALE)

#define _mm_i64gather_ps(BASE, INDEX, SCALE)				\
  (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf) _mm_setzero_pd (),	\
					(float const *)BASE,		\
					(__v2di)(__m128i)INDEX,		\
					(__v4sf)_mm_set1_ps (		\
					  (float)(int) -1),		\
					(int)SCALE)

#define _mm_mask_i64gather_ps(SRC, BASE, INDEX, MASK, SCALE)	 \
  (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf)(__m128)SRC,	 \
					(float const *)BASE,	 \
					(__v2di)(__m128i)INDEX,	 \
					(__v4sf)(__m128d)MASK,	 \
					(int)SCALE)

#define _mm256_i64gather_ps(BASE, INDEX, SCALE)				\
  (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf) _mm_setzero_ps (),	\
					   (float const *)BASE,		\
					   (__v4di)(__m256i)INDEX,	\
					   (__v4sf)_mm_set1_ps(		\
					     (float)(int) -1),		\
					   (int)SCALE)

#define _mm256_mask_i64gather_ps(SRC, BASE, INDEX, MASK, SCALE)	   \
  (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf)(__m128)SRC,	   \
					   (float const *)BASE,	   \
					   (__v4di)(__m256i)INDEX, \
					   (__v4sf)(__m128)MASK,   \
					   (int)SCALE)

#define _mm_i32gather_epi64(BASE, INDEX, SCALE)				\
  (__m128i) __builtin_ia32_gathersiv2di ((__v2di) _mm_setzero_si128 (), \
					 (long long const *)BASE,	\
					 (__v4si)(__m128i)INDEX,	\
					 (__v2di)_mm_set1_epi64x (-1),	\
					 (int)SCALE)

#define _mm_mask_i32gather_epi64(SRC, BASE, INDEX, MASK, SCALE)	  \
  (__m128i) __builtin_ia32_gathersiv2di ((__v2di)(__m128i)SRC,	  \
					 (long long const *)BASE, \
					 (__v4si)(__m128i)INDEX,  \
					 (__v2di)(__m128i)MASK,	  \
					 (int)SCALE)

#define _mm256_i32gather_epi64(BASE, INDEX, SCALE)			   \
  (__m256i) __builtin_ia32_gathersiv4di ((__v4di) _mm256_setzero_si256 (), \
					 (long long const *)BASE,	   \
					 (__v4si)(__m128i)INDEX,	   \
					 (__v4di)_mm256_set1_epi64x (-1),  \
					 (int)SCALE)

#define _mm256_mask_i32gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \
  (__m256i) __builtin_ia32_gathersiv4di ((__v4di)(__m256i)SRC,	   \
					 (long long const *)BASE,  \
					 (__v4si)(__m128i)INDEX,   \
					 (__v4di)(__m256i)MASK,	   \
					 (int)SCALE)

#define _mm_i64gather_epi64(BASE, INDEX, SCALE)				\
  (__m128i) __builtin_ia32_gatherdiv2di ((__v2di) _mm_setzero_si128 (), \
					 (long long const *)BASE,	\
					 (__v2di)(__m128i)INDEX,	\
					 (__v2di)_mm_set1_epi64x (-1),	\
					 (int)SCALE)

#define _mm_mask_i64gather_epi64(SRC, BASE, INDEX, MASK, SCALE)	  \
  (__m128i) __builtin_ia32_gatherdiv2di ((__v2di)(__m128i)SRC,	  \
					 (long long const *)BASE, \
					 (__v2di)(__m128i)INDEX,  \
					 (__v2di)(__m128i)MASK,	  \
					 (int)SCALE)

#define _mm256_i64gather_epi64(BASE, INDEX, SCALE)			   \
  (__m256i) __builtin_ia32_gatherdiv4di ((__v4di) _mm256_setzero_si256 (), \
					 (long long const *)BASE,	   \
					 (__v4di)(__m256i)INDEX,	   \
					 (__v4di)_mm256_set1_epi64x (-1),  \
					 (int)SCALE)

#define _mm256_mask_i64gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \
  (__m256i) __builtin_ia32_gatherdiv4di ((__v4di)(__m256i)SRC,	   \
					 (long long const *)BASE,  \
					 (__v4di)(__m256i)INDEX,   \
					 (__v4di)(__m256i)MASK,	   \
					 (int)SCALE)

#define _mm_i32gather_epi32(BASE, INDEX, SCALE)				\
  (__m128i) __builtin_ia32_gathersiv4si ((__v4si) _mm_setzero_si128 (),	\
					 (int const *)BASE,		\
					 (__v4si)(__m128i)INDEX,	\
					 (__v4si)_mm_set1_epi32 (-1),	\
					 (int)SCALE)

#define _mm_mask_i32gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
  (__m128i) __builtin_ia32_gathersiv4si ((__v4si)(__m128i)SRC,	\
					(int const *)BASE,	\
					(__v4si)(__m128i)INDEX, \
					(__v4si)(__m128i)MASK,	\
					(int)SCALE)

#define _mm256_i32gather_epi32(BASE, INDEX, SCALE)			   \
  (__m256i) __builtin_ia32_gathersiv8si ((__v8si) _mm256_setzero_si256 (), \
					 (int const *)BASE,		   \
					 (__v8si)(__m256i)INDEX,	   \
					 (__v8si)_mm256_set1_epi32 (-1),   \
					 (int)SCALE)

#define _mm256_mask_i32gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
  (__m256i) __builtin_ia32_gathersiv8si ((__v8si)(__m256i)SRC,	   \
					(int const *)BASE,	   \
					(__v8si)(__m256i)INDEX,	   \
					(__v8si)(__m256i)MASK,	   \
					(int)SCALE)

#define _mm_i64gather_epi32(BASE, INDEX, SCALE)				\
  (__m128i) __builtin_ia32_gatherdiv4si ((__v4si) _mm_setzero_si128 (),	\
					 (int const *)BASE,		\
					 (__v2di)(__m128i)INDEX,	\
					 (__v4si)_mm_set1_epi32 (-1),	\
					 (int)SCALE)

#define _mm_mask_i64gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
  (__m128i) __builtin_ia32_gatherdiv4si ((__v4si)(__m128i)SRC,	\
					(int const *)BASE,	\
					(__v2di)(__m128i)INDEX, \
					(__v4si)(__m128i)MASK,	\
					(int)SCALE)

#define _mm256_i64gather_epi32(BASE, INDEX, SCALE)			   \
  (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si) _mm_setzero_si128 (), \
					    (int const *)BASE,		   \
					    (__v4di)(__m256i)INDEX,	   \
					    (__v4si)_mm_set1_epi32(-1),	   \
					    (int)SCALE)

#define _mm256_mask_i64gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
  (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si)(__m128i)SRC,  \
					   (int const *)BASE,	   \
					   (__v4di)(__m256i)INDEX, \
					   (__v4si)(__m128i)MASK,  \
					   (int)SCALE)
#endif  /* __OPTIMIZE__ */

Filemanager

Name Type Size Permission Actions
adxintrin.h File 1.76 KB 0644
ammintrin.h File 3.01 KB 0644
avx2intrin.h File 56.15 KB 0644
avxintrin.h File 46.88 KB 0644
bmi2intrin.h File 3.17 KB 0644
bmiintrin.h File 5.32 KB 0644
bmmintrin.h File 1.13 KB 0644
cpuid.h File 8 KB 0644
cross-stdarg.h File 2.5 KB 0644
emmintrin.h File 49.56 KB 0644
f16cintrin.h File 3.2 KB 0644
float.h File 8.67 KB 0644
fma4intrin.h File 8.78 KB 0644
fmaintrin.h File 10.15 KB 0644
fxsrintrin.h File 1.87 KB 0644
ia32intrin.h File 6.2 KB 0644
immintrin.h File 3.85 KB 0644
iso646.h File 1.24 KB 0644
limits.h File 5.26 KB 0644
lwpintrin.h File 3.14 KB 0644
lzcntintrin.h File 2.15 KB 0644
mm3dnow.h File 6.32 KB 0644
mm_malloc.h File 1.71 KB 0644
mmintrin.h File 29.87 KB 0644
nmmintrin.h File 1.35 KB 0644
omp.h File 3.56 KB 0644
pkuintrin.h File 1.49 KB 0644
pmmintrin.h File 4.14 KB 0644
popcntintrin.h File 1.57 KB 0644
prfchwintrin.h File 1.54 KB 0644
rdseedintrin.h File 1.82 KB 0644
rtmintrin.h File 2.54 KB 0644
smmintrin.h File 27.05 KB 0644
stdalign.h File 1.18 KB 0644
stdarg.h File 3.98 KB 0644
stdbool.h File 1.4 KB 0644
stddef.h File 13.3 KB 0644
stdfix.h File 5.86 KB 0644
stdint-gcc.h File 6.86 KB 0644
stdint.h File 328 B 0644
stdnoreturn.h File 1.11 KB 0644
syslimits.h File 330 B 0644
tbmintrin.h File 5.06 KB 0644
tmmintrin.h File 8.02 KB 0644
unwind.h File 10.48 KB 0644
varargs.h File 139 B 0644
wmmintrin.h File 4.26 KB 0644
x86intrin.h File 2.32 KB 0644
xmmintrin.h File 40.7 KB 0644
xopintrin.h File 27.77 KB 0644
xsaveintrin.h File 1.94 KB 0644
xsaveoptintrin.h File 1.63 KB 0644
xtestintrin.h File 1.52 KB 0644