This code sample demonstrates how to use C, Streaming SIMD Extensions 2 (SSE2) and Streaming SIMD Extensions 3 (SSE3) intrinsics to multiply two complex numbers. The following output is typical of this code: 23.00+ -2.00i. Output may vary depending on your compiler version and the components of your computing platform.
SSE3 intrinsics do not run on processors from the Pentium® III family or earlier.
/*
* [Description]
* This code sample demonstrates the use of C in comparison with SSE2
* and SSE3 instrinsics to multiply two complex numbers.
*
* [Compile]
* icc double_complex.c (linux)
* icl double_complex.c (windows)
*
* [Output]
* Complex Product(C): 23.00+ -2.00i
* Complex Product(SSE3): 23.00+ -2.00i
* Complex Product(SSE2): 23.00+ -2.00i
*/
#include <stdio.h>
#include <pmmintrin.h>
typedef struct {
double real;
double img;
} complex_num;
// Multiplying complex numbers in C
void multiply_C(complex_num x, complex_num y, complex_num *z)
{
z->real = (x.real*y.real) - (x.img*y.img);
z->img = (x.img*y.real) + (y.img*x.real);
}
#if __INTEL_COMPILER
// Multiplying complex numbers using SSE3 intrinsics
void multiply_SSE3(complex_num x, complex_num y, complex_num *z)
{
__m128d num1, num2, num3;
// Duplicates lower vector element into upper vector element.
// num1: [x.real, x.real]
num1 = _mm_loaddup_pd(&x.real);
// Move y elements into a vector
// num2: [y.img, y.real]
num2 = _mm_set_pd(y.img, y.real);
// Multiplies vector elements
// num3: [(x.real*y.img), (x.real*y.real)]
num3 = _mm_mul_pd(num2, num1);
// num1: [x.img, x.img]
num1 = _mm_loaddup_pd(&x.img);
// Swaps the vector elements
// num2: [y.real, y.img]
num2 = _mm_shuffle_pd(num2, num2, 1);
// num2: [(x.img*y.real), (x.img*y.img)]
num2 = _mm_mul_pd(num2, num1);
// Adds upper vector element while subtracting lower vector element
// num3: [((x.real *y.img)+(x.img*y.real)),
// ((x.real*y.real)-(x.img*y.img))]
num3 = _mm_addsub_pd(num3, num2);
// Stores the elements of num3 into z
_mm_storeu_pd((double *)z, num3);
}
#endif
#if __INTEL_COMPILER
// Multiplying complex numbers using SSE2 intrinsics
void multiply_SSE2(complex_num x, complex_num y, complex_num *z)
{
__m128d num1, num2, num3, num4;
// Copies a single element into the vector
// num1: [x.real, x.real]
num1 = _mm_load1_pd(&x.real);
// Move y elements into a vector
// num2: [y.img, y.real]
num2 = _mm_set_pd(y.img, y.real);
// Multiplies vector elements
// num3: [(x.real*y.img), (x.real*y.real)]
num3 = _mm_mul_pd(num2, num1);
// num1: [x.img, x.img]
num1 = _mm_load1_pd(&x.img);
// Swaps the vector elements.
// num2: [y.real, y.img]
num2 = _mm_shuffle_pd(num2, num2, 1);
// num2: [(x.img*y.real), (x.img*y.img)]
num2 = _mm_mul_pd(num2, num1);
num4 = _mm_add_pd(num3, num2);
num3 = _mm_sub_pd(num3, num2);
num4 = _mm_shuffle_pd(num3, num4, 2);
// Stores the elements of num4 into z
_mm_storeu_pd((double *)z, num4);
}
#endif
int main()
{
complex_num a, b, c;
// Initialize complex numbers
a.real = 3;
a.img = 2;
b.real = 5;
b.img = -4;
// Output for each: 23.00+ -2.00i
multiply_C(a, b, &c);
printf("Complex Product(C): %2.2f+ %2.2fi\n", c.real, c.img);
#if __INTEL_COMPILER
multiply_SSE3(a, b, &c);
printf("Complex Product(SSE3): %2.2f+ %2.2fi\n", c.real, c.img);
multiply_SSE2(a, b, &c);
printf("Complex Product(SSE2): %2.2f+ %2.2fi\n", c.real, c.img);
#endif
return 0;
}