merge pr #7 (take 2)

git-svn-id: http://svn2.nishi.boats/svn/milsko/trunk@540 b9cfdab3-6d41-4d17-bbe4-086880011989
This commit is contained in:
IoIxD
2025-11-01 03:37:40 +00:00
parent 141ac076e3
commit 3719f17250
10 changed files with 1053 additions and 0 deletions

412
src/math/default.c Normal file
View File

@@ -0,0 +1,412 @@
#include <Mw/LowLevelMath.h>
#include "math.h"
#include <assert.h>
#include <math.h>
static void default_add_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
out->un.u8.a = a->un.u8.a + b->un.u8.a;
out->un.u8.b = a->un.u8.b + b->un.u8.b;
out->un.u8.c = a->un.u8.c + b->un.u8.c;
out->un.u8.d = a->un.u8.d + b->un.u8.d;
out->un.u8.e = a->un.u8.e + b->un.u8.e;
out->un.u8.f = a->un.u8.f + b->un.u8.f;
out->un.u8.g = a->un.u8.g + b->un.u8.g;
out->un.u8.h = a->un.u8.h + b->un.u8.h;
};
static void default_sub_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
out->un.u8.a = a->un.u8.a - b->un.u8.a;
out->un.u8.b = a->un.u8.b - b->un.u8.b;
out->un.u8.c = a->un.u8.c - b->un.u8.c;
out->un.u8.d = a->un.u8.d - b->un.u8.d;
out->un.u8.e = a->un.u8.e - b->un.u8.e;
out->un.u8.f = a->un.u8.f - b->un.u8.f;
out->un.u8.g = a->un.u8.g - b->un.u8.g;
out->un.u8.h = a->un.u8.h - b->un.u8.h;
};
static void default_multiply_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
out->un.u8.a = a->un.u8.a * b->un.u8.a;
out->un.u8.b = a->un.u8.b * b->un.u8.b;
out->un.u8.c = a->un.u8.c * b->un.u8.c;
out->un.u8.d = a->un.u8.d * b->un.u8.d;
out->un.u8.e = a->un.u8.e * b->un.u8.e;
out->un.u8.f = a->un.u8.f * b->un.u8.f;
out->un.u8.g = a->un.u8.g * b->un.u8.g;
out->un.u8.h = a->un.u8.h * b->un.u8.h;
};
static void default_reciprocal_u8(MwLLVec* a, MwLLVec* out) {
out->un.u8.a = powf(a->un.u8.a, -1);
out->un.u8.b = powf(a->un.u8.b, -1);
out->un.u8.c = powf(a->un.u8.c, -1);
out->un.u8.d = powf(a->un.u8.d, -1);
out->un.u8.e = powf(a->un.u8.e, -1);
out->un.u8.f = powf(a->un.u8.f, -1);
out->un.u8.g = powf(a->un.u8.g, -1);
out->un.u8.h = powf(a->un.u8.h, -1);
};
static void default_squareRoot_u8(MwLLVec* a, MwLLVec* out) {
out->un.u8.a = sqrt(a->un.u8.a);
out->un.u8.b = sqrt(a->un.u8.b);
out->un.u8.c = sqrt(a->un.u8.c);
out->un.u8.d = sqrt(a->un.u8.d);
out->un.u8.e = sqrt(a->un.u8.e);
out->un.u8.f = sqrt(a->un.u8.f);
out->un.u8.g = sqrt(a->un.u8.g);
out->un.u8.h = sqrt(a->un.u8.h);
}
static void default_shiftRight_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
out->un.u8.a = a->un.u8.a >> b->un.u8.a;
out->un.u8.b = a->un.u8.b >> b->un.u8.b;
out->un.u8.c = a->un.u8.c >> b->un.u8.c;
out->un.u8.d = a->un.u8.d >> b->un.u8.d;
out->un.u8.e = a->un.u8.e >> b->un.u8.e;
out->un.u8.f = a->un.u8.f >> b->un.u8.f;
out->un.u8.g = a->un.u8.g >> b->un.u8.g;
out->un.u8.h = a->un.u8.h >> b->un.u8.h;
};
static void default_shiftLeft_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
out->un.u8.a = a->un.u8.a << b->un.u8.a;
out->un.u8.b = a->un.u8.b << b->un.u8.b;
out->un.u8.c = a->un.u8.c << b->un.u8.c;
out->un.u8.d = a->un.u8.d << b->un.u8.d;
out->un.u8.e = a->un.u8.e << b->un.u8.e;
out->un.u8.f = a->un.u8.f << b->un.u8.f;
out->un.u8.g = a->un.u8.g << b->un.u8.g;
out->un.u8.h = a->un.u8.h << b->un.u8.h;
}
static void default_equal_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
out->un.u8.a = a->un.u8.a == b->un.u8.a;
out->un.u8.b = a->un.u8.b == b->un.u8.b;
out->un.u8.c = a->un.u8.c == b->un.u8.c;
out->un.u8.d = a->un.u8.d == b->un.u8.d;
out->un.u8.e = a->un.u8.e == b->un.u8.e;
out->un.u8.f = a->un.u8.f == b->un.u8.f;
out->un.u8.g = a->un.u8.g == b->un.u8.g;
out->un.u8.h = a->un.u8.h == b->un.u8.h;
};
static void default_greaterThen_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
out->un.u8.a = a->un.u8.a >= b->un.u8.a;
out->un.u8.b = a->un.u8.b >= b->un.u8.b;
out->un.u8.c = a->un.u8.c >= b->un.u8.c;
out->un.u8.d = a->un.u8.d >= b->un.u8.d;
out->un.u8.e = a->un.u8.e >= b->un.u8.e;
out->un.u8.f = a->un.u8.f >= b->un.u8.f;
out->un.u8.g = a->un.u8.g >= b->un.u8.g;
out->un.u8.h = a->un.u8.h >= b->un.u8.h;
};
static MwLLMathVTable table_u8 = {
.Add = default_add_u8,
.Sub = default_sub_u8,
.Multiply = default_multiply_u8,
.Reciprocal = default_reciprocal_u8,
.SquareRoot = default_squareRoot_u8,
.ShiftRight = default_shiftRight_u8,
.ShiftLeft = default_shiftLeft_u8,
.Equal = default_equal_u8,
.GreaterThen = default_greaterThen_u8,
};
static void default_add_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
out->un.u16.a = a->un.u16.a + b->un.u16.a;
out->un.u16.b = a->un.u16.b + b->un.u16.b;
out->un.u16.c = a->un.u16.c + b->un.u16.c;
out->un.u16.d = a->un.u16.d + b->un.u16.d;
}
static void default_sub_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
out->un.u16.a = a->un.u16.a - b->un.u16.a;
out->un.u16.b = a->un.u16.b - b->un.u16.b;
out->un.u16.c = a->un.u16.c - b->un.u16.c;
out->un.u16.d = a->un.u16.d - b->un.u16.d;
}
static void default_multiply_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
out->un.u16.a = a->un.u16.a * b->un.u16.a;
out->un.u16.b = a->un.u16.b * b->un.u16.b;
out->un.u16.c = a->un.u16.c * b->un.u16.c;
out->un.u16.d = a->un.u16.d * b->un.u16.d;
}
static void default_reciprocal_u16(MwLLVec* a, MwLLVec* out) {
out->un.u16.a = powf(a->un.u16.a, -1);
out->un.u16.b = powf(a->un.u16.b, -1);
out->un.u16.c = powf(a->un.u16.c, -1);
out->un.u16.d = powf(a->un.u16.d, -1);
};
static void default_squareRoot_u16(MwLLVec* a, MwLLVec* out) {
out->un.u16.a = sqrt(a->un.u16.a);
out->un.u16.b = sqrt(a->un.u16.b);
out->un.u16.c = sqrt(a->un.u16.c);
out->un.u16.d = sqrt(a->un.u16.d);
};
static void default_shiftRight_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
out->un.u16.a = a->un.u16.a >> b->un.u16.a;
out->un.u16.b = a->un.u16.b >> b->un.u16.b;
out->un.u16.c = a->un.u16.c >> b->un.u16.c;
out->un.u16.d = a->un.u16.d >> b->un.u16.d;
};
static void default_shiftLeft_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
out->un.u16.a = a->un.u16.a << b->un.u16.a;
out->un.u16.b = a->un.u16.b << b->un.u16.b;
out->un.u16.c = a->un.u16.c << b->un.u16.c;
out->un.u16.d = a->un.u16.d << b->un.u16.d;
}
static void default_equal_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
out->un.u16.a = a->un.u16.a == b->un.u16.a;
out->un.u16.b = a->un.u16.b == b->un.u16.b;
out->un.u16.c = a->un.u16.c == b->un.u16.c;
out->un.u16.d = a->un.u16.d == b->un.u16.d;
}
static void default_greaterThen_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
out->un.u16.a = a->un.u16.a >= b->un.u16.a;
out->un.u16.b = a->un.u16.b >= b->un.u16.b;
out->un.u16.c = a->un.u16.c >= b->un.u16.c;
out->un.u16.d = a->un.u16.d >= b->un.u16.d;
}
static MwLLMathVTable table_u16 = {
.Add = default_add_u16,
.Sub = default_sub_u16,
.Multiply = default_multiply_u16,
.Reciprocal = default_reciprocal_u16,
.SquareRoot = default_squareRoot_u16,
.ShiftRight = default_shiftRight_u16,
.ShiftLeft = default_shiftLeft_u16,
.Equal = default_equal_u16,
.GreaterThen = default_greaterThen_u16,
};
static void default_add_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
out->un.u32.a = a->un.u32.a + b->un.u32.a;
out->un.u32.b = a->un.u32.b + b->un.u32.b;
}
static void default_sub_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
out->un.u32.a = a->un.u32.a - b->un.u32.a;
out->un.u32.b = a->un.u32.b - b->un.u32.b;
}
static void default_multiply_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
out->un.u32.a = a->un.u32.a * b->un.u32.a;
out->un.u32.b = a->un.u32.b * b->un.u32.b;
}
static void default_reciprocal_u32(MwLLVec* a, MwLLVec* out) {
out->un.u32.a = powf(a->un.u32.a, -1);
out->un.u32.b = powf(a->un.u32.b, -1);
};
static void default_squareRoot_u32(MwLLVec* a, MwLLVec* out) {
out->un.u32.a = sqrt(a->un.u32.a);
out->un.u32.b = sqrt(a->un.u32.b);
};
static void default_shiftRight_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
out->un.u32.a = a->un.u32.a >> b->un.u32.a;
out->un.u32.b = a->un.u32.b >> b->un.u32.b;
};
static void default_shiftLeft_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
out->un.u32.a = a->un.u32.a << b->un.u32.a;
out->un.u32.b = a->un.u32.b << b->un.u32.b;
}
static void default_equal_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
out->un.u32.a = a->un.u32.a == b->un.u32.a;
out->un.u32.b = a->un.u32.b == b->un.u32.b;
}
static void default_greaterThen_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
out->un.u32.a = a->un.u32.a >= b->un.u32.a;
out->un.u32.b = a->un.u32.b >= b->un.u32.b;
}
static MwLLMathVTable table_u32 = {
.Add = default_add_u32,
.Sub = default_sub_u32,
.Multiply = default_multiply_u32,
.Reciprocal = default_reciprocal_u32,
.SquareRoot = default_squareRoot_u32,
.ShiftRight = default_shiftRight_u32,
.ShiftLeft = default_shiftLeft_u32,
.Equal = default_equal_u32,
.GreaterThen = default_greaterThen_u32,
};
static void default_add_i8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
out->un.i8.a = a->un.i8.a + b->un.i8.a;
out->un.i8.b = a->un.i8.b + b->un.i8.b;
out->un.i8.c = a->un.i8.c + b->un.i8.c;
out->un.i8.d = a->un.i8.d + b->un.i8.d;
out->un.i8.e = a->un.i8.e + b->un.i8.e;
out->un.i8.f = a->un.i8.f + b->un.i8.f;
out->un.i8.g = a->un.i8.g + b->un.i8.g;
out->un.i8.h = a->un.i8.h + b->un.i8.h;
};
static void default_sub_i8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
out->un.i8.a = a->un.i8.a - b->un.i8.a;
out->un.i8.b = a->un.i8.b - b->un.i8.b;
out->un.i8.c = a->un.i8.c - b->un.i8.c;
out->un.i8.d = a->un.i8.d - b->un.i8.d;
out->un.i8.e = a->un.i8.e - b->un.i8.e;
out->un.i8.f = a->un.i8.f - b->un.i8.f;
out->un.i8.g = a->un.i8.g - b->un.i8.g;
out->un.i8.h = a->un.i8.h - b->un.i8.h;
};
static void default_multiply_i8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
out->un.i8.a = a->un.i8.a * b->un.i8.a;
out->un.i8.b = a->un.i8.b * b->un.i8.b;
out->un.i8.c = a->un.i8.c * b->un.i8.c;
out->un.i8.d = a->un.i8.d * b->un.i8.d;
out->un.i8.e = a->un.i8.e * b->un.i8.e;
out->un.i8.f = a->un.i8.f * b->un.i8.f;
out->un.i8.g = a->un.i8.g * b->un.i8.g;
out->un.i8.h = a->un.i8.h * b->un.i8.h;
};
static void default_reciprocal_i8(MwLLVec* a, MwLLVec* out) {
out->un.i8.a = powf(a->un.i8.a, -1);
out->un.i8.b = powf(a->un.i8.b, -1);
out->un.i8.c = powf(a->un.i8.c, -1);
out->un.i8.d = powf(a->un.i8.d, -1);
out->un.i8.e = powf(a->un.i8.e, -1);
out->un.i8.f = powf(a->un.i8.f, -1);
out->un.i8.g = powf(a->un.i8.g, -1);
out->un.i8.h = powf(a->un.i8.h, -1);
};
static void default_squareRoot_i8(MwLLVec* a, MwLLVec* out) {
out->un.i8.a = sqrt(a->un.i8.a);
out->un.i8.b = sqrt(a->un.i8.b);
out->un.i8.c = sqrt(a->un.i8.c);
out->un.i8.d = sqrt(a->un.i8.d);
out->un.i8.e = sqrt(a->un.i8.e);
out->un.i8.f = sqrt(a->un.i8.f);
out->un.i8.g = sqrt(a->un.i8.g);
out->un.i8.h = sqrt(a->un.i8.h);
}
static void default_equal_i8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
out->un.i8.a = a->un.i8.a == b->un.i8.a;
out->un.i8.b = a->un.i8.b == b->un.i8.b;
out->un.i8.c = a->un.i8.c == b->un.i8.c;
out->un.i8.d = a->un.i8.d == b->un.i8.d;
out->un.i8.e = a->un.i8.e == b->un.i8.e;
out->un.i8.f = a->un.i8.f == b->un.i8.f;
out->un.i8.g = a->un.i8.g == b->un.i8.g;
out->un.i8.h = a->un.i8.h == b->un.i8.h;
};
static void default_greaterThen_i8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
out->un.i8.a = a->un.i8.a >= b->un.i8.a;
out->un.i8.b = a->un.i8.b >= b->un.i8.b;
out->un.i8.c = a->un.i8.c >= b->un.i8.c;
out->un.i8.d = a->un.i8.d >= b->un.i8.d;
out->un.i8.e = a->un.i8.e >= b->un.i8.e;
out->un.i8.f = a->un.i8.f >= b->un.i8.f;
out->un.i8.g = a->un.i8.g >= b->un.i8.g;
out->un.i8.h = a->un.i8.h >= b->un.i8.h;
};
static MwLLMathVTable table_i8 = {
.Add = default_add_i8,
.Sub = default_sub_i8,
.Multiply = default_multiply_i8,
.Reciprocal = default_reciprocal_i8,
.SquareRoot = default_squareRoot_i8,
.ShiftRight = default_shiftRight_u8,
.ShiftLeft = default_shiftLeft_u8,
.Equal = default_equal_i8,
.GreaterThen = default_greaterThen_i8,
};
static void default_add_i16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
out->un.i16.a = a->un.i16.a + b->un.i16.a;
out->un.i16.b = a->un.i16.b + b->un.i16.b;
out->un.i16.c = a->un.i16.c + b->un.i16.c;
out->un.i16.d = a->un.i16.d + b->un.i16.d;
}
static void default_sub_i16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
out->un.i16.a = a->un.i16.a - b->un.i16.a;
out->un.i16.b = a->un.i16.b - b->un.i16.b;
out->un.i16.c = a->un.i16.c - b->un.i16.c;
out->un.i16.d = a->un.i16.d - b->un.i16.d;
}
static void default_multiply_i16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
out->un.i16.a = a->un.i16.a * b->un.i16.a;
out->un.i16.b = a->un.i16.b * b->un.i16.b;
out->un.i16.c = a->un.i16.c * b->un.i16.c;
out->un.i16.d = a->un.i16.d * b->un.i16.d;
}
static void default_reciprocal_i16(MwLLVec* a, MwLLVec* out) {
out->un.i16.a = powf(a->un.i16.a, -1);
out->un.i16.b = powf(a->un.i16.b, -1);
out->un.i16.c = powf(a->un.i16.c, -1);
out->un.i16.d = powf(a->un.i16.d, -1);
};
static void default_squareRoot_i16(MwLLVec* a, MwLLVec* out) {
out->un.i16.a = sqrt(a->un.i16.a);
out->un.i16.b = sqrt(a->un.i16.b);
out->un.i16.c = sqrt(a->un.i16.c);
out->un.i16.d = sqrt(a->un.i16.d);
};
static void default_equal_i16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
out->un.i16.a = a->un.i16.a == b->un.i16.a;
out->un.i16.b = a->un.i16.b == b->un.i16.b;
out->un.i16.c = a->un.i16.c == b->un.i16.c;
out->un.i16.d = a->un.i16.d == b->un.i16.d;
}
static void default_greaterThen_i16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
out->un.i16.a = a->un.i16.a >= b->un.i16.a;
out->un.i16.b = a->un.i16.b >= b->un.i16.b;
out->un.i16.c = a->un.i16.c >= b->un.i16.c;
out->un.i16.d = a->un.i16.d >= b->un.i16.d;
}
static MwLLMathVTable table_i16 = {
.Add = default_add_i16,
.Sub = default_sub_i16,
.Multiply = default_multiply_i16,
.Reciprocal = default_reciprocal_i16,
.SquareRoot = default_squareRoot_i16,
.ShiftRight = default_shiftRight_u16,
.ShiftLeft = default_shiftLeft_u16,
.Equal = default_equal_i16,
.GreaterThen = default_greaterThen_i16,
};
static void default_add_i32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
out->un.i32.a = a->un.i32.a + b->un.i32.a;
out->un.i32.b = a->un.i32.b + b->un.i32.b;
}
static void default_sub_i32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
out->un.i32.a = a->un.i32.a - b->un.i32.a;
out->un.i32.b = a->un.i32.b - b->un.i32.b;
}
static void default_multiply_i32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
out->un.i32.a = a->un.i32.a * b->un.i32.a;
out->un.i32.b = a->un.i32.b * b->un.i32.b;
}
static void default_reciprocal_i32(MwLLVec* a, MwLLVec* out) {
out->un.i32.a = powf(a->un.i32.a, -1);
out->un.i32.b = powf(a->un.i32.b, -1);
};
static void default_squareRoot_i32(MwLLVec* a, MwLLVec* out) {
out->un.i32.a = sqrt(a->un.i32.a);
out->un.i32.b = sqrt(a->un.i32.b);
};
static void default_equal_i32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
out->un.i32.a = a->un.i32.a == b->un.i32.a;
out->un.i32.b = a->un.i32.b == b->un.i32.b;
}
static void default_greaterThen_i32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
out->un.i32.a = a->un.i32.a >= b->un.i32.a;
out->un.i32.b = a->un.i32.b >= b->un.i32.b;
}
static MwLLMathVTable table_i32 = {
.Add = default_add_i32,
.Sub = default_sub_i32,
.Multiply = default_multiply_i32,
.Reciprocal = default_reciprocal_i32,
.SquareRoot = default_squareRoot_i32,
.ShiftRight = default_shiftRight_u32,
.ShiftLeft = default_shiftLeft_u32,
.Equal = default_equal_i32,
.GreaterThen = default_greaterThen_i32,
};
static MwLLMathVTable* defaultMultiTable[_MwLLVecType_Max] = {
&table_u8, // _MwLLVecTypeU8x8
&table_u16, // _MwLLVecTypeU16x4
&table_u32, // _MwLLVecTypeU32x2
&table_i8, // _MwLLVecTypeI8x8
&table_i16, // _MwLLVecTypeI16x4
&table_i32, // _MwLLVecTypeI32x2
};
MwLLMathVTable** default_multi_table() {
return defaultMultiTable;
}

125
src/math/math.c Normal file
View File

@@ -0,0 +1,125 @@
#include <Mw/LowLevelMath.h>
#include <assert.h>
#include <stdarg.h>
#include <stdio.h>
#include <unistd.h>
#include "math.h"
#include "Mw/BaseTypes.h"
#include "x86intrin.h"
MwLLVec _MwLLVecCreateGeneric(MwLLVecType ty, ...) {
MwLLVecUnion un;
MwLLVec vec;
va_list va;
va_start(va, ty);
// clang-format off
#define _A_B(ty) un.ty.a = va_arg(va, int); un.ty.b = va_arg(va, int);
#define _C_D(ty) un.ty.c = va_arg(va, int); un.ty.d = va_arg(va, int);
#define _E_F(ty) un.ty.e = va_arg(va, int); un.ty.f = va_arg(va, int);
#define _G_H(ty) un.ty.g = va_arg(va, int); un.ty.h = va_arg(va, int);
switch(ty) {
case _MwLLVecTypeU8x8: _A_B(u8); _C_D(u8); _E_F(u8); _G_H(u8); break;
case _MwLLVecTypeU16x4: _A_B(u16); _C_D(u16); break;
case _MwLLVecTypeU32x2: _A_B(u32); break;
case _MwLLVecTypeI8x8: _A_B(i8); _C_D(i8); _E_F(i8); _G_H(i8); break;
case _MwLLVecTypeI16x4: _A_B(i16); _C_D(i16); break;
case _MwLLVecTypeI32x2: _A_B(i32); break;
case _MwLLVecType_Max: break;
}
#undef _A_B
#undef _C_D
#undef _E_F
#undef _G_H
// clang-format on
va_end(va);
vec.ty = ty;
vec.un = un;
return vec;
}
static MwBool hasMMX(void) {
MwU32 eax = 1;
MwU32 ebx, edx;
__asm__ __volatile__(
"cpuid" : "=a"(eax), "=b"(ebx), "=d"(edx)
: "a"(1));
return (edx & (1 << 23)) == (1 << 23);
}
static MwLLMathVTable** mwLLMultiTable;
static MwLLMathVTable* multiTableSetupAndGet(MwLLVecType ty);
static MwLLMathVTable* multiTableGet(MwLLVecType ty);
static MwLLMathVTable* (*mwLLmathFunc)(MwLLVecType ty) = multiTableSetupAndGet;
static MwLLMathVTable* getMultiTable(MwLLVecType ty) {
return mwLLmathFunc(ty);
}
static MwLLMathVTable* multiTableSetupAndGet(MwLLVecType ty) {
mwLLMultiTable = default_multi_table();
#if defined(__i386__) || defined(__x86_64__)
if(hasMMX()) {
mmx_apply(mwLLMultiTable);
}
#endif
mwLLmathFunc = multiTableGet;
return mwLLMultiTable[ty];
}
static MwLLMathVTable* multiTableGet(MwLLVecType ty) {
return mwLLMultiTable[ty];
}
void MwLLMathAdd(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
assert(a->ty == b->ty && a->ty == out->ty && b->ty == out->ty);
return getMultiTable(a->ty)->Add(a, b, out);
};
void MwLLMathSub(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
assert(a->ty == b->ty && a->ty == out->ty && b->ty == out->ty);
return getMultiTable(a->ty)->Sub(a, b, out);
};
void MwLLMathMultiply(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
assert(a->ty == b->ty && a->ty == out->ty && b->ty == out->ty);
return getMultiTable(a->ty)->Multiply(a, b, out);
};
void MwLLMathReciprocal(MwLLVec* a, MwLLVec* out) {
assert(a->ty == out->ty);
return getMultiTable(a->ty)->Reciprocal(a, out);
};
void MwLLMathSquareRoot(MwLLVec* a, MwLLVec* out) {
assert(a->ty == out->ty);
return getMultiTable(a->ty)->SquareRoot(a, out);
};
void MwLLMathShiftRight(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
assert(a->ty == b->ty && a->ty == out->ty && b->ty == out->ty);
return getMultiTable(a->ty)->ShiftRight(a, b, out);
};
void MwLLMathShiftLeft(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
assert(a->ty == b->ty && a->ty == out->ty && b->ty == out->ty);
return getMultiTable(a->ty)->ShiftLeft(a, b, out);
};
void MwLLMathEqual(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
assert(a->ty == b->ty && a->ty == out->ty && b->ty == out->ty);
return getMultiTable(a->ty)->Equal(a, b, out);
};
void MwLLMathGreaterThen(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
assert(a->ty == b->ty && a->ty == out->ty && b->ty == out->ty);
return getMultiTable(a->ty)->GreaterThen(a, b, out);
};
void MwLLMathAnd(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
out->un.all = a->un.all & b->un.all;
};
void MwLLMathOr(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
out->un.all = a->un.all | b->un.all;
};

28
src/math/math.h Normal file
View File

@@ -0,0 +1,28 @@
/* $Id$ */
#ifndef __MW_LOWLEVEL_INTERNAL_MATH_H__
#define __MW_LOWLEVEL_INTERNAL_MATH_H__
#include <Mw/LowLevelMath.h>
struct _MwLLMathVTable {
void (*Add)(MwLLVec* a, MwLLVec* b, MwLLVec* out);
void (*Multiply)(MwLLVec* a, MwLLVec* b, MwLLVec* out);
void (*Sub)(MwLLVec* a, MwLLVec* b, MwLLVec* out);
void (*Reciprocal)(MwLLVec* a, MwLLVec* out);
void (*SquareRoot)(MwLLVec* a, MwLLVec* out);
void (*And)(MwLLVec* a, MwLLVec* b, MwLLVec* out);
void (*Or)(MwLLVec* a, MwLLVec* b, MwLLVec* out);
void (*ShiftRight)(MwLLVec* a, MwLLVec* b, MwLLVec* out);
void (*ShiftLeft)(MwLLVec* a, MwLLVec* b, MwLLVec* out);
void (*Equal)(MwLLVec* a, MwLLVec* b, MwLLVec* out);
void (*GreaterThen)(MwLLVec* a, MwLLVec* b, MwLLVec* out);
void (*LesserThen)(MwLLVec* a, MwLLVec* b, MwLLVec* out);
};
typedef struct _MwLLMathVTable MwLLMathVTable;
MwLLMathVTable** default_multi_table();
void mmx_apply(MwLLMathVTable**);
#endif

103
src/math/mmx.c Normal file
View File

@@ -0,0 +1,103 @@
#include <Mw/LowLevelMath.h>
#include "math.h"
#include <assert.h>
#include <mmintrin.h>
#include <stdio.h>
#include <x86intrin.h>
#define DO_MMX_INTRINSIC(intrin, _ty, _rty, _tyn) \
__m64 m = intrin(*(__m64*)&a->un._ty, *(__m64*)&b->un._ty); \
out->un._rty = *(struct _tyn*)&m;
static void mmx_add_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
DO_MMX_INTRINSIC(_m_paddusb, u8, u8, _MwLLVecDataU8x8);
};
static void mmx_sub_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
DO_MMX_INTRINSIC(_m_psubusb, u8, u8, _MwLLVecDataU8x8);
};
static void mmx_equal_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
DO_MMX_INTRINSIC(_m_pcmpeqb, u8, u8, _MwLLVecDataU8x8);
};
static void mmx_greaterThen_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
DO_MMX_INTRINSIC(_m_pcmpgtb, u8, u8, _MwLLVecDataU8x8);
};
static void mmx_add_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
DO_MMX_INTRINSIC(_m_paddusw, u16, u16, _MwLLVecDataU16x4);
}
static void mmx_sub_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
DO_MMX_INTRINSIC(_m_psubusw, u16, u16, _MwLLVecDataU16x4);
}
static void mmx_shiftRight_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
DO_MMX_INTRINSIC(_m_psrlw, u16, u16, _MwLLVecDataU16x4);
};
static void mmx_shiftLeft_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
DO_MMX_INTRINSIC(_m_psllw, u16, u16, _MwLLVecDataU16x4);
}
static void mmx_equal_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
DO_MMX_INTRINSIC(_m_pcmpeqw, u16, u16, _MwLLVecDataU16x4);
}
static void mmx_greaterThen_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
DO_MMX_INTRINSIC(_m_pcmpgtw, u16, u16, _MwLLVecDataU16x4);
}
static void mmx_add_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
DO_MMX_INTRINSIC(_m_paddd, u32, u32, _MwLLVecDataU32x2);
}
static void mmx_sub_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
DO_MMX_INTRINSIC(_m_psubd, u32, u32, _MwLLVecDataU32x2);
}
static void mmx_shiftRight_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
DO_MMX_INTRINSIC(_m_psrld, u32, u32, _MwLLVecDataU32x2);
};
static void mmx_shiftLeft_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
DO_MMX_INTRINSIC(_m_pslld, u32, u32, _MwLLVecDataU32x2);
}
static void mmx_equal_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
DO_MMX_INTRINSIC(_m_pcmpeqw, u32, u32, _MwLLVecDataU32x2);
}
static void mmx_greaterThen_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
DO_MMX_INTRINSIC(_m_pcmpgtw, u32, u32, _MwLLVecDataU32x2);
}
static void mmx_add_i8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
DO_MMX_INTRINSIC(_m_paddsb, i8, i8, _MwLLVecDataI8x8);
};
static void mmx_sub_i8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
DO_MMX_INTRINSIC(_m_psubsb, i8, i8, _MwLLVecDataI8x8);
};
static void mmx_add_i16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
DO_MMX_INTRINSIC(_m_paddsw, i16, i16, _MwLLVecDataI16x4);
}
static void mmx_sub_i16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
DO_MMX_INTRINSIC(_m_psubsw, i16, i16, _MwLLVecDataI16x4);
}
void mmx_apply(MwLLMathVTable** t) {
t[_MwLLVecTypeU8x8]->Add = mmx_add_u8;
t[_MwLLVecTypeU8x8]->Sub = mmx_sub_u8;
t[_MwLLVecTypeU8x8]->GreaterThen = mmx_greaterThen_u8;
t[_MwLLVecTypeU8x8]->Equal = mmx_equal_u8;
t[_MwLLVecTypeU16x4]->Add = mmx_add_u16;
t[_MwLLVecTypeU16x4]->Sub = mmx_sub_u16;
t[_MwLLVecTypeU16x4]->ShiftLeft = mmx_shiftLeft_u16;
t[_MwLLVecTypeU16x4]->ShiftRight = mmx_shiftRight_u16;
t[_MwLLVecTypeU16x4]->GreaterThen = mmx_greaterThen_u16;
t[_MwLLVecTypeU16x4]->Equal = mmx_equal_u16;
t[_MwLLVecTypeU32x2]->Add = mmx_add_u32;
t[_MwLLVecTypeU32x2]->Sub = mmx_sub_u32;
t[_MwLLVecTypeU32x2]->ShiftLeft = mmx_shiftLeft_u32;
t[_MwLLVecTypeU32x2]->ShiftRight = mmx_shiftRight_u32;
t[_MwLLVecTypeU32x2]->GreaterThen = mmx_greaterThen_u32;
t[_MwLLVecTypeU32x2]->Equal = mmx_equal_u32;
t[_MwLLVecTypeI8x8]->Add = mmx_add_i8;
t[_MwLLVecTypeI8x8]->Sub = mmx_sub_i8;
t[_MwLLVecTypeI16x4]->Add = mmx_add_i16;
t[_MwLLVecTypeI16x4]->Sub = mmx_sub_i16;
t[_MwLLVecTypeI16x4]->ShiftLeft = mmx_shiftLeft_u16;
t[_MwLLVecTypeI16x4]->ShiftRight = mmx_shiftRight_u16;
t[_MwLLVecTypeI32x2]->ShiftLeft = mmx_shiftLeft_u32;
t[_MwLLVecTypeI32x2]->ShiftRight = mmx_shiftRight_u32;
}

6
src/math/mmx_guard.c Normal file
View File

@@ -0,0 +1,6 @@
#if defined(__WATCOMC__) || defined(__i386__) || defined(__amd64__)
#include "mmx.c"
#else
void mmx_apply(MwLLMathVTable** t) {
}
#endif