one30 / simd-sse-avx-neon

Introduction about SIMD instructions. Mainly about SSE and AVX.

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

SSE

  1. __m128i
typedef union __declspec(intrin_type) _CRT_ALIGN(16) __m128i {
	__int8			m128i_i8[16];
	__int16			m128i_i16[8];
	__int32			m128i_i32[4];
	__int64			m128i_i64[2];
	unsigned __int8		m128i_u8[16];
	unsigned __int16	m128i_u16[8];
	unsigned __int32	m128i_u32[4];
	unsigned __int64	m128i_u64[2];
} __m128i;

#if !defined(_CRT_ALIGN)
	#if defined(__midl)
		#define _CRT_ALIGN(x)
	#else
		#define _CRT_ALIGN(x) __declspec(align(x))
	#endif
#endif
  1. SSE2
  • __m128i _mm_unpackhi_epi16 (__m128i a, __m128i b)
  • __m128i _mm_unpackhi_epi32 (__m128i a, __m128i b)
  • __m128i _mm_unpackhi_epi64 (__m128i a, __m128i b)
  • __m128i _mm_unpackhi_epi8 (__m128i a, __m128i b)
  • __m128i _mm_unpacklo_epi16 (__m128i a, __m128i b)
  • __m128i _mm_unpacklo_epi32 (__m128i a, __m128i b)
  • __m128i _mm_unpacklo_epi64 (__m128i a, __m128i b)
  • __m128i _mm_unpacklo_epi8 (__m128i a, __m128i b)
  • void _mm_storeu_si128 (__m128i* mem_addr, __m128i a)
  • __m128i _mm_xor_si128 (__m128i a, __m128i b)
  • __m128i _mm_or_si128 (__m128i a, __m128i b)
  • __m128i _mm_and_si128 (__m128i a, __m128i b)
  • __m128i _mm_slli_epi16 (__m128i a, int imm8)
  • __m128i _mm_srli_epi16 (__m128i a, int imm8)
  • __m128i _mm_add_epi16 (__m128i a, __m128i b)
  • __m128i _mm_adds_epi16 (__m128i a, __m128i b)
    • 两个加法的不同在于,add会舍弃进位,adds会在溢出时将结果设置为最大值或最小值

Test supporting

#include <stdio.h>
#include <emmintrin.h>

int main (void ) {
	__m128i a = _mm_set1_epi8(0x11);
	return 0;
}

Compile with

$ gcc -march=native test.c

AVX

Projects

ARM NEON

NEON intrinsics

  • vld4_16
  uint16x4x4 vld4_16(const uint16_t *)
  Form of expected instructions: vld4.16 {d0, d1, d2, d3}, [r0]
  • vld4q_u16
  uint16x8x4_t vld4q_u16 (const uint16_t *) 
  Form of expected instruction(s): vld4.16 {d0, d1, d2, d3}, [r0]
  • vld4_lane_u16
 uit16x4x4_t vld4_lane_u16 (const uint16_t *, uint16x4x4_t, const int) 
 Form of expected instruction(s): vld4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0]

ARMv7-a

ARMv8-a ARM64 ASIMD

NEON projects

References

About

Introduction about SIMD instructions. Mainly about SSE and AVX.

License:MIT License


Languages

Language:C 100.0%