Jim

still not working

No preview for this file type
... ... @@ -9,35 +9,67 @@
struct Mat33 {
float m[3][3];
__m128 row[3];
__m256 row[3];
};
typedef struct Mat33 Mat33;
__m256 lincomb_AVX (Mat33 *A, Mat33 *B){
__m256 result;
const float *temp = A->m[0];
const float *temp2 = A->m[1];
const float *temp3 = A->m[2];
__m256 a = _mm256_broadcast_ss (temp);
__m256 b = _mm256_loadu_ps(B->m[0]);
result = _mm256_mul_ps (a,b);
__m256 c = _mm256_broadcast_ss (temp2);
__m256 d = _mm256_loadu_ps(B->m[1]);
result = _mm256_add_ps (result, _mm256_mul_ps (c,d));
__m256 e = _mm256_broadcast_ss (temp3);
__m256 f = _mm256_loadu_ps(B->m[2]);
result = _mm256_add_ps (result, _mm256_mul_ps (e,f));
__m128 lincomb_AVX (const float *a, Mat33 *B){
__m128 result;
result = _mm_mul_ps(_mm_broadcast_ss(&a[0]), B->row[0]);
result = _mm_add_ps(result, _mm_mul_ps(_mm_broadcast_ss(&a[1]), B->row[1]));
result = _mm_add_ps(result, _mm_mul_ps(_mm_broadcast_ss(&a[2]), B->row[2]));
return result;
}
Mat33 matmult_AVX (Mat33 *A, Mat33 *B){
Mat33 out;
_mm256_zeroupper();
__m256 out0x = lincomb_AVX(A, B);
__m256 out1x = lincomb_AVX(A, B);
__m256 out2x = lincomb_AVX(A, B);
out.row[0] = out0x;
out.row[1] = out1x;
out.row[2] = out2x;
return out;
}
void matmult_AVX (Mat33 *out, Mat33 *A, Mat33 *B){
Mat33 matmult_simple ( Mat33 *A, Mat33 *B){
_mm256_zeroupper();
__m128 out0x = lincomb_AVX(A->m[0], B);
__m128 out1x = lincomb_AVX(A->m[1], B);
__m128 out2x = lincomb_AVX(A->m[2], B);
Mat33 C;
for (int i=0; i < 3; i++)
for (int j=0; j < 3; j++)
C.m[i][j] = A->m[i][0]*B->m[0][j]+A->m[i][1]*B->m[1][j]+A->m[i][2]*B->m[2][j];
return C;
out->row[0] = out0x;
out->row[1] = out1x;
out->row[2] = out2x;
}
void set_mymat (Mat33 *M){
... ... @@ -47,7 +79,7 @@ void set_mymat (Mat33 *M){
M->m[i][j] = 1.0;
// printf ("%f ", M->m[i][j]);
}
printf ("\n");
// printf ("\n");
}
}
void get_mymat (Mat33 *M){
... ... @@ -63,15 +95,18 @@ void get_mymat (Mat33 *M){
int main() {
Mat33 A, B, out;
Mat33 A, B, C;
float *tmp;
set_mymat(&A);
set_mymat(&B);
matmult_AVX (&out, &A, &B);
C = matmult_AVX (&A, &B);
_mm256_storeu_ps (tmp, C.row[0]);
printf ("%f ", tmp[0]);
get_mymat (&A);
get_mymat (&B);
get_mymat (&out);
return 0;
}
... ...
No preview for this file type