merge pr #7 (take 2)

git-svn-id: http://svn2.nishi.boats/svn/milsko/trunk@540 b9cfdab3-6d41-4d17-bbe4-086880011989
2026-01-17 22:54:07 +00:00 · 2025-11-01 03:37:40 +00:00
parent 141ac076e3
commit 3719f17250
10 changed files with 1053 additions and 0 deletions
--- a/src/math/default.c
+++ b/src/math/default.c
@@ -0,0 +1,412 @@
+#include <Mw/LowLevelMath.h>
+#include "math.h"
+#include <assert.h>
+#include <math.h>
+
+static void default_add_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.u8.a = a->un.u8.a + b->un.u8.a;
+	out->un.u8.b = a->un.u8.b + b->un.u8.b;
+	out->un.u8.c = a->un.u8.c + b->un.u8.c;
+	out->un.u8.d = a->un.u8.d + b->un.u8.d;
+	out->un.u8.e = a->un.u8.e + b->un.u8.e;
+	out->un.u8.f = a->un.u8.f + b->un.u8.f;
+	out->un.u8.g = a->un.u8.g + b->un.u8.g;
+	out->un.u8.h = a->un.u8.h + b->un.u8.h;
+};
+static void default_sub_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.u8.a = a->un.u8.a - b->un.u8.a;
+	out->un.u8.b = a->un.u8.b - b->un.u8.b;
+	out->un.u8.c = a->un.u8.c - b->un.u8.c;
+	out->un.u8.d = a->un.u8.d - b->un.u8.d;
+	out->un.u8.e = a->un.u8.e - b->un.u8.e;
+	out->un.u8.f = a->un.u8.f - b->un.u8.f;
+	out->un.u8.g = a->un.u8.g - b->un.u8.g;
+	out->un.u8.h = a->un.u8.h - b->un.u8.h;
+};
+static void default_multiply_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.u8.a = a->un.u8.a * b->un.u8.a;
+	out->un.u8.b = a->un.u8.b * b->un.u8.b;
+	out->un.u8.c = a->un.u8.c * b->un.u8.c;
+	out->un.u8.d = a->un.u8.d * b->un.u8.d;
+	out->un.u8.e = a->un.u8.e * b->un.u8.e;
+	out->un.u8.f = a->un.u8.f * b->un.u8.f;
+	out->un.u8.g = a->un.u8.g * b->un.u8.g;
+	out->un.u8.h = a->un.u8.h * b->un.u8.h;
+};
+static void default_reciprocal_u8(MwLLVec* a, MwLLVec* out) {
+	out->un.u8.a = powf(a->un.u8.a, -1);
+	out->un.u8.b = powf(a->un.u8.b, -1);
+	out->un.u8.c = powf(a->un.u8.c, -1);
+	out->un.u8.d = powf(a->un.u8.d, -1);
+	out->un.u8.e = powf(a->un.u8.e, -1);
+	out->un.u8.f = powf(a->un.u8.f, -1);
+	out->un.u8.g = powf(a->un.u8.g, -1);
+	out->un.u8.h = powf(a->un.u8.h, -1);
+};
+static void default_squareRoot_u8(MwLLVec* a, MwLLVec* out) {
+	out->un.u8.a = sqrt(a->un.u8.a);
+	out->un.u8.b = sqrt(a->un.u8.b);
+	out->un.u8.c = sqrt(a->un.u8.c);
+	out->un.u8.d = sqrt(a->un.u8.d);
+	out->un.u8.e = sqrt(a->un.u8.e);
+	out->un.u8.f = sqrt(a->un.u8.f);
+	out->un.u8.g = sqrt(a->un.u8.g);
+	out->un.u8.h = sqrt(a->un.u8.h);
+}
+static void default_shiftRight_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.u8.a = a->un.u8.a >> b->un.u8.a;
+	out->un.u8.b = a->un.u8.b >> b->un.u8.b;
+	out->un.u8.c = a->un.u8.c >> b->un.u8.c;
+	out->un.u8.d = a->un.u8.d >> b->un.u8.d;
+	out->un.u8.e = a->un.u8.e >> b->un.u8.e;
+	out->un.u8.f = a->un.u8.f >> b->un.u8.f;
+	out->un.u8.g = a->un.u8.g >> b->un.u8.g;
+	out->un.u8.h = a->un.u8.h >> b->un.u8.h;
+};
+static void default_shiftLeft_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.u8.a = a->un.u8.a << b->un.u8.a;
+	out->un.u8.b = a->un.u8.b << b->un.u8.b;
+	out->un.u8.c = a->un.u8.c << b->un.u8.c;
+	out->un.u8.d = a->un.u8.d << b->un.u8.d;
+	out->un.u8.e = a->un.u8.e << b->un.u8.e;
+	out->un.u8.f = a->un.u8.f << b->un.u8.f;
+	out->un.u8.g = a->un.u8.g << b->un.u8.g;
+	out->un.u8.h = a->un.u8.h << b->un.u8.h;
+}
+static void default_equal_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.u8.a = a->un.u8.a == b->un.u8.a;
+	out->un.u8.b = a->un.u8.b == b->un.u8.b;
+	out->un.u8.c = a->un.u8.c == b->un.u8.c;
+	out->un.u8.d = a->un.u8.d == b->un.u8.d;
+	out->un.u8.e = a->un.u8.e == b->un.u8.e;
+	out->un.u8.f = a->un.u8.f == b->un.u8.f;
+	out->un.u8.g = a->un.u8.g == b->un.u8.g;
+	out->un.u8.h = a->un.u8.h == b->un.u8.h;
+};
+static void default_greaterThen_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.u8.a = a->un.u8.a >= b->un.u8.a;
+	out->un.u8.b = a->un.u8.b >= b->un.u8.b;
+	out->un.u8.c = a->un.u8.c >= b->un.u8.c;
+	out->un.u8.d = a->un.u8.d >= b->un.u8.d;
+	out->un.u8.e = a->un.u8.e >= b->un.u8.e;
+	out->un.u8.f = a->un.u8.f >= b->un.u8.f;
+	out->un.u8.g = a->un.u8.g >= b->un.u8.g;
+	out->un.u8.h = a->un.u8.h >= b->un.u8.h;
+};
+static MwLLMathVTable table_u8 = {
+    .Add	 = default_add_u8,
+    .Sub	 = default_sub_u8,
+    .Multiply	 = default_multiply_u8,
+    .Reciprocal	 = default_reciprocal_u8,
+    .SquareRoot	 = default_squareRoot_u8,
+    .ShiftRight	 = default_shiftRight_u8,
+    .ShiftLeft	 = default_shiftLeft_u8,
+    .Equal	 = default_equal_u8,
+    .GreaterThen = default_greaterThen_u8,
+};
+static void default_add_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.u16.a = a->un.u16.a + b->un.u16.a;
+	out->un.u16.b = a->un.u16.b + b->un.u16.b;
+	out->un.u16.c = a->un.u16.c + b->un.u16.c;
+	out->un.u16.d = a->un.u16.d + b->un.u16.d;
+}
+static void default_sub_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.u16.a = a->un.u16.a - b->un.u16.a;
+	out->un.u16.b = a->un.u16.b - b->un.u16.b;
+	out->un.u16.c = a->un.u16.c - b->un.u16.c;
+	out->un.u16.d = a->un.u16.d - b->un.u16.d;
+}
+static void default_multiply_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.u16.a = a->un.u16.a * b->un.u16.a;
+	out->un.u16.b = a->un.u16.b * b->un.u16.b;
+	out->un.u16.c = a->un.u16.c * b->un.u16.c;
+	out->un.u16.d = a->un.u16.d * b->un.u16.d;
+}
+static void default_reciprocal_u16(MwLLVec* a, MwLLVec* out) {
+	out->un.u16.a = powf(a->un.u16.a, -1);
+	out->un.u16.b = powf(a->un.u16.b, -1);
+	out->un.u16.c = powf(a->un.u16.c, -1);
+	out->un.u16.d = powf(a->un.u16.d, -1);
+};
+static void default_squareRoot_u16(MwLLVec* a, MwLLVec* out) {
+	out->un.u16.a = sqrt(a->un.u16.a);
+	out->un.u16.b = sqrt(a->un.u16.b);
+	out->un.u16.c = sqrt(a->un.u16.c);
+	out->un.u16.d = sqrt(a->un.u16.d);
+};
+
+static void default_shiftRight_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.u16.a = a->un.u16.a >> b->un.u16.a;
+	out->un.u16.b = a->un.u16.b >> b->un.u16.b;
+	out->un.u16.c = a->un.u16.c >> b->un.u16.c;
+	out->un.u16.d = a->un.u16.d >> b->un.u16.d;
+};
+static void default_shiftLeft_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.u16.a = a->un.u16.a << b->un.u16.a;
+	out->un.u16.b = a->un.u16.b << b->un.u16.b;
+	out->un.u16.c = a->un.u16.c << b->un.u16.c;
+	out->un.u16.d = a->un.u16.d << b->un.u16.d;
+}
+static void default_equal_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.u16.a = a->un.u16.a == b->un.u16.a;
+	out->un.u16.b = a->un.u16.b == b->un.u16.b;
+	out->un.u16.c = a->un.u16.c == b->un.u16.c;
+	out->un.u16.d = a->un.u16.d == b->un.u16.d;
+}
+static void default_greaterThen_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.u16.a = a->un.u16.a >= b->un.u16.a;
+	out->un.u16.b = a->un.u16.b >= b->un.u16.b;
+	out->un.u16.c = a->un.u16.c >= b->un.u16.c;
+	out->un.u16.d = a->un.u16.d >= b->un.u16.d;
+}
+static MwLLMathVTable table_u16 = {
+    .Add	 = default_add_u16,
+    .Sub	 = default_sub_u16,
+    .Multiply	 = default_multiply_u16,
+    .Reciprocal	 = default_reciprocal_u16,
+    .SquareRoot	 = default_squareRoot_u16,
+    .ShiftRight	 = default_shiftRight_u16,
+    .ShiftLeft	 = default_shiftLeft_u16,
+    .Equal	 = default_equal_u16,
+    .GreaterThen = default_greaterThen_u16,
+};
+
+static void default_add_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.u32.a = a->un.u32.a + b->un.u32.a;
+	out->un.u32.b = a->un.u32.b + b->un.u32.b;
+}
+static void default_sub_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.u32.a = a->un.u32.a - b->un.u32.a;
+	out->un.u32.b = a->un.u32.b - b->un.u32.b;
+}
+static void default_multiply_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.u32.a = a->un.u32.a * b->un.u32.a;
+	out->un.u32.b = a->un.u32.b * b->un.u32.b;
+}
+static void default_reciprocal_u32(MwLLVec* a, MwLLVec* out) {
+	out->un.u32.a = powf(a->un.u32.a, -1);
+	out->un.u32.b = powf(a->un.u32.b, -1);
+};
+static void default_squareRoot_u32(MwLLVec* a, MwLLVec* out) {
+	out->un.u32.a = sqrt(a->un.u32.a);
+	out->un.u32.b = sqrt(a->un.u32.b);
+};
+
+static void default_shiftRight_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.u32.a = a->un.u32.a >> b->un.u32.a;
+	out->un.u32.b = a->un.u32.b >> b->un.u32.b;
+};
+static void default_shiftLeft_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.u32.a = a->un.u32.a << b->un.u32.a;
+	out->un.u32.b = a->un.u32.b << b->un.u32.b;
+}
+static void default_equal_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.u32.a = a->un.u32.a == b->un.u32.a;
+	out->un.u32.b = a->un.u32.b == b->un.u32.b;
+}
+static void default_greaterThen_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.u32.a = a->un.u32.a >= b->un.u32.a;
+	out->un.u32.b = a->un.u32.b >= b->un.u32.b;
+}
+static MwLLMathVTable table_u32 = {
+    .Add	 = default_add_u32,
+    .Sub	 = default_sub_u32,
+    .Multiply	 = default_multiply_u32,
+    .Reciprocal	 = default_reciprocal_u32,
+    .SquareRoot	 = default_squareRoot_u32,
+    .ShiftRight	 = default_shiftRight_u32,
+    .ShiftLeft	 = default_shiftLeft_u32,
+    .Equal	 = default_equal_u32,
+    .GreaterThen = default_greaterThen_u32,
+};
+
+static void default_add_i8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.i8.a = a->un.i8.a + b->un.i8.a;
+	out->un.i8.b = a->un.i8.b + b->un.i8.b;
+	out->un.i8.c = a->un.i8.c + b->un.i8.c;
+	out->un.i8.d = a->un.i8.d + b->un.i8.d;
+	out->un.i8.e = a->un.i8.e + b->un.i8.e;
+	out->un.i8.f = a->un.i8.f + b->un.i8.f;
+	out->un.i8.g = a->un.i8.g + b->un.i8.g;
+	out->un.i8.h = a->un.i8.h + b->un.i8.h;
+};
+static void default_sub_i8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.i8.a = a->un.i8.a - b->un.i8.a;
+	out->un.i8.b = a->un.i8.b - b->un.i8.b;
+	out->un.i8.c = a->un.i8.c - b->un.i8.c;
+	out->un.i8.d = a->un.i8.d - b->un.i8.d;
+	out->un.i8.e = a->un.i8.e - b->un.i8.e;
+	out->un.i8.f = a->un.i8.f - b->un.i8.f;
+	out->un.i8.g = a->un.i8.g - b->un.i8.g;
+	out->un.i8.h = a->un.i8.h - b->un.i8.h;
+};
+static void default_multiply_i8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.i8.a = a->un.i8.a * b->un.i8.a;
+	out->un.i8.b = a->un.i8.b * b->un.i8.b;
+	out->un.i8.c = a->un.i8.c * b->un.i8.c;
+	out->un.i8.d = a->un.i8.d * b->un.i8.d;
+	out->un.i8.e = a->un.i8.e * b->un.i8.e;
+	out->un.i8.f = a->un.i8.f * b->un.i8.f;
+	out->un.i8.g = a->un.i8.g * b->un.i8.g;
+	out->un.i8.h = a->un.i8.h * b->un.i8.h;
+};
+static void default_reciprocal_i8(MwLLVec* a, MwLLVec* out) {
+	out->un.i8.a = powf(a->un.i8.a, -1);
+	out->un.i8.b = powf(a->un.i8.b, -1);
+	out->un.i8.c = powf(a->un.i8.c, -1);
+	out->un.i8.d = powf(a->un.i8.d, -1);
+	out->un.i8.e = powf(a->un.i8.e, -1);
+	out->un.i8.f = powf(a->un.i8.f, -1);
+	out->un.i8.g = powf(a->un.i8.g, -1);
+	out->un.i8.h = powf(a->un.i8.h, -1);
+};
+static void default_squareRoot_i8(MwLLVec* a, MwLLVec* out) {
+	out->un.i8.a = sqrt(a->un.i8.a);
+	out->un.i8.b = sqrt(a->un.i8.b);
+	out->un.i8.c = sqrt(a->un.i8.c);
+	out->un.i8.d = sqrt(a->un.i8.d);
+	out->un.i8.e = sqrt(a->un.i8.e);
+	out->un.i8.f = sqrt(a->un.i8.f);
+	out->un.i8.g = sqrt(a->un.i8.g);
+	out->un.i8.h = sqrt(a->un.i8.h);
+}
+
+static void default_equal_i8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.i8.a = a->un.i8.a == b->un.i8.a;
+	out->un.i8.b = a->un.i8.b == b->un.i8.b;
+	out->un.i8.c = a->un.i8.c == b->un.i8.c;
+	out->un.i8.d = a->un.i8.d == b->un.i8.d;
+	out->un.i8.e = a->un.i8.e == b->un.i8.e;
+	out->un.i8.f = a->un.i8.f == b->un.i8.f;
+	out->un.i8.g = a->un.i8.g == b->un.i8.g;
+	out->un.i8.h = a->un.i8.h == b->un.i8.h;
+};
+static void default_greaterThen_i8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.i8.a = a->un.i8.a >= b->un.i8.a;
+	out->un.i8.b = a->un.i8.b >= b->un.i8.b;
+	out->un.i8.c = a->un.i8.c >= b->un.i8.c;
+	out->un.i8.d = a->un.i8.d >= b->un.i8.d;
+	out->un.i8.e = a->un.i8.e >= b->un.i8.e;
+	out->un.i8.f = a->un.i8.f >= b->un.i8.f;
+	out->un.i8.g = a->un.i8.g >= b->un.i8.g;
+	out->un.i8.h = a->un.i8.h >= b->un.i8.h;
+};
+static MwLLMathVTable table_i8 = {
+    .Add	 = default_add_i8,
+    .Sub	 = default_sub_i8,
+    .Multiply	 = default_multiply_i8,
+    .Reciprocal	 = default_reciprocal_i8,
+    .SquareRoot	 = default_squareRoot_i8,
+    .ShiftRight	 = default_shiftRight_u8,
+    .ShiftLeft	 = default_shiftLeft_u8,
+    .Equal	 = default_equal_i8,
+    .GreaterThen = default_greaterThen_i8,
+};
+static void default_add_i16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.i16.a = a->un.i16.a + b->un.i16.a;
+	out->un.i16.b = a->un.i16.b + b->un.i16.b;
+	out->un.i16.c = a->un.i16.c + b->un.i16.c;
+	out->un.i16.d = a->un.i16.d + b->un.i16.d;
+}
+static void default_sub_i16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.i16.a = a->un.i16.a - b->un.i16.a;
+	out->un.i16.b = a->un.i16.b - b->un.i16.b;
+	out->un.i16.c = a->un.i16.c - b->un.i16.c;
+	out->un.i16.d = a->un.i16.d - b->un.i16.d;
+}
+static void default_multiply_i16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.i16.a = a->un.i16.a * b->un.i16.a;
+	out->un.i16.b = a->un.i16.b * b->un.i16.b;
+	out->un.i16.c = a->un.i16.c * b->un.i16.c;
+	out->un.i16.d = a->un.i16.d * b->un.i16.d;
+}
+static void default_reciprocal_i16(MwLLVec* a, MwLLVec* out) {
+	out->un.i16.a = powf(a->un.i16.a, -1);
+	out->un.i16.b = powf(a->un.i16.b, -1);
+	out->un.i16.c = powf(a->un.i16.c, -1);
+	out->un.i16.d = powf(a->un.i16.d, -1);
+};
+static void default_squareRoot_i16(MwLLVec* a, MwLLVec* out) {
+	out->un.i16.a = sqrt(a->un.i16.a);
+	out->un.i16.b = sqrt(a->un.i16.b);
+	out->un.i16.c = sqrt(a->un.i16.c);
+	out->un.i16.d = sqrt(a->un.i16.d);
+};
+
+static void default_equal_i16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.i16.a = a->un.i16.a == b->un.i16.a;
+	out->un.i16.b = a->un.i16.b == b->un.i16.b;
+	out->un.i16.c = a->un.i16.c == b->un.i16.c;
+	out->un.i16.d = a->un.i16.d == b->un.i16.d;
+}
+static void default_greaterThen_i16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.i16.a = a->un.i16.a >= b->un.i16.a;
+	out->un.i16.b = a->un.i16.b >= b->un.i16.b;
+	out->un.i16.c = a->un.i16.c >= b->un.i16.c;
+	out->un.i16.d = a->un.i16.d >= b->un.i16.d;
+}
+static MwLLMathVTable table_i16 = {
+    .Add	 = default_add_i16,
+    .Sub	 = default_sub_i16,
+    .Multiply	 = default_multiply_i16,
+    .Reciprocal	 = default_reciprocal_i16,
+    .SquareRoot	 = default_squareRoot_i16,
+    .ShiftRight	 = default_shiftRight_u16,
+    .ShiftLeft	 = default_shiftLeft_u16,
+    .Equal	 = default_equal_i16,
+    .GreaterThen = default_greaterThen_i16,
+};
+
+static void default_add_i32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.i32.a = a->un.i32.a + b->un.i32.a;
+	out->un.i32.b = a->un.i32.b + b->un.i32.b;
+}
+static void default_sub_i32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.i32.a = a->un.i32.a - b->un.i32.a;
+	out->un.i32.b = a->un.i32.b - b->un.i32.b;
+}
+static void default_multiply_i32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.i32.a = a->un.i32.a * b->un.i32.a;
+	out->un.i32.b = a->un.i32.b * b->un.i32.b;
+}
+static void default_reciprocal_i32(MwLLVec* a, MwLLVec* out) {
+	out->un.i32.a = powf(a->un.i32.a, -1);
+	out->un.i32.b = powf(a->un.i32.b, -1);
+};
+static void default_squareRoot_i32(MwLLVec* a, MwLLVec* out) {
+	out->un.i32.a = sqrt(a->un.i32.a);
+	out->un.i32.b = sqrt(a->un.i32.b);
+};
+
+static void default_equal_i32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.i32.a = a->un.i32.a == b->un.i32.a;
+	out->un.i32.b = a->un.i32.b == b->un.i32.b;
+}
+static void default_greaterThen_i32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.i32.a = a->un.i32.a >= b->un.i32.a;
+	out->un.i32.b = a->un.i32.b >= b->un.i32.b;
+}
+static MwLLMathVTable table_i32 = {
+    .Add	 = default_add_i32,
+    .Sub	 = default_sub_i32,
+    .Multiply	 = default_multiply_i32,
+    .Reciprocal	 = default_reciprocal_i32,
+    .SquareRoot	 = default_squareRoot_i32,
+    .ShiftRight	 = default_shiftRight_u32,
+    .ShiftLeft	 = default_shiftLeft_u32,
+    .Equal	 = default_equal_i32,
+    .GreaterThen = default_greaterThen_i32,
+};
+
+static MwLLMathVTable* defaultMultiTable[_MwLLVecType_Max] = {
+    &table_u8,	// _MwLLVecTypeU8x8
+    &table_u16, // _MwLLVecTypeU16x4
+    &table_u32, // _MwLLVecTypeU32x2
+    &table_i8,	// _MwLLVecTypeI8x8
+    &table_i16, // _MwLLVecTypeI16x4
+    &table_i32, // _MwLLVecTypeI32x2
+};
+
+MwLLMathVTable** default_multi_table() {
+	return defaultMultiTable;
+}
--- a/src/math/math.c
+++ b/src/math/math.c
@@ -0,0 +1,125 @@
+#include <Mw/LowLevelMath.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <unistd.h>
+#include "math.h"
+#include "Mw/BaseTypes.h"
+#include "x86intrin.h"
+
+MwLLVec _MwLLVecCreateGeneric(MwLLVecType ty, ...) {
+	MwLLVecUnion un;
+	MwLLVec	     vec;
+	va_list	     va;
+
+	va_start(va, ty);
+
+	// clang-format off
+#define _A_B(ty) un.ty.a = va_arg(va, int); un.ty.b = va_arg(va, int);
+#define _C_D(ty) un.ty.c = va_arg(va, int); un.ty.d = va_arg(va, int);
+#define _E_F(ty) un.ty.e = va_arg(va, int); un.ty.f = va_arg(va, int);
+#define _G_H(ty) un.ty.g = va_arg(va, int); un.ty.h = va_arg(va, int);
+switch(ty) {
+	case _MwLLVecTypeU8x8:  _A_B(u8);   _C_D(u8);   _E_F(u8);   _G_H(u8);   break;
+	case _MwLLVecTypeU16x4: _A_B(u16);  _C_D(u16);                          break;
+	case _MwLLVecTypeU32x2: _A_B(u32);                                      break;
+	case _MwLLVecTypeI8x8:  _A_B(i8);   _C_D(i8);   _E_F(i8);   _G_H(i8);   break;
+	case _MwLLVecTypeI16x4: _A_B(i16);  _C_D(i16);                          break;
+	case _MwLLVecTypeI32x2: _A_B(i32);                                      break;
+	case _MwLLVecType_Max: break;
+}
+#undef _A_B
+#undef _C_D
+#undef _E_F
+#undef _G_H
+	// clang-format on
+
+	va_end(va);
+
+	vec.ty = ty;
+	vec.un = un;
+
+	return vec;
+}
+
+static MwBool hasMMX(void) {
+	MwU32 eax = 1;
+	MwU32 ebx, edx;
+
+	__asm__ __volatile__(
+	    "cpuid" : "=a"(eax), "=b"(ebx), "=d"(edx)
+	    : "a"(1));
+
+	return (edx & (1 << 23)) == (1 << 23);
+}
+
+static MwLLMathVTable** mwLLMultiTable;
+static MwLLMathVTable*	multiTableSetupAndGet(MwLLVecType ty);
+static MwLLMathVTable*	multiTableGet(MwLLVecType ty);
+
+static MwLLMathVTable* (*mwLLmathFunc)(MwLLVecType ty) = multiTableSetupAndGet;
+
+static MwLLMathVTable* getMultiTable(MwLLVecType ty) {
+	return mwLLmathFunc(ty);
+}
+
+static MwLLMathVTable* multiTableSetupAndGet(MwLLVecType ty) {
+	mwLLMultiTable = default_multi_table();
+
+#if defined(__i386__) || defined(__x86_64__)
+	if(hasMMX()) {
+		mmx_apply(mwLLMultiTable);
+	}
+#endif
+
+	mwLLmathFunc = multiTableGet;
+
+	return mwLLMultiTable[ty];
+}
+static MwLLMathVTable* multiTableGet(MwLLVecType ty) {
+	return mwLLMultiTable[ty];
+}
+
+void MwLLMathAdd(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	assert(a->ty == b->ty && a->ty == out->ty && b->ty == out->ty);
+	return getMultiTable(a->ty)->Add(a, b, out);
+};
+void MwLLMathSub(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	assert(a->ty == b->ty && a->ty == out->ty && b->ty == out->ty);
+	return getMultiTable(a->ty)->Sub(a, b, out);
+};
+void MwLLMathMultiply(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	assert(a->ty == b->ty && a->ty == out->ty && b->ty == out->ty);
+	return getMultiTable(a->ty)->Multiply(a, b, out);
+};
+void MwLLMathReciprocal(MwLLVec* a, MwLLVec* out) {
+	assert(a->ty == out->ty);
+	return getMultiTable(a->ty)->Reciprocal(a, out);
+};
+void MwLLMathSquareRoot(MwLLVec* a, MwLLVec* out) {
+	assert(a->ty == out->ty);
+	return getMultiTable(a->ty)->SquareRoot(a, out);
+};
+
+void MwLLMathShiftRight(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	assert(a->ty == b->ty && a->ty == out->ty && b->ty == out->ty);
+	return getMultiTable(a->ty)->ShiftRight(a, b, out);
+};
+void MwLLMathShiftLeft(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	assert(a->ty == b->ty && a->ty == out->ty && b->ty == out->ty);
+	return getMultiTable(a->ty)->ShiftLeft(a, b, out);
+};
+void MwLLMathEqual(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	assert(a->ty == b->ty && a->ty == out->ty && b->ty == out->ty);
+	return getMultiTable(a->ty)->Equal(a, b, out);
+};
+void MwLLMathGreaterThen(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	assert(a->ty == b->ty && a->ty == out->ty && b->ty == out->ty);
+	return getMultiTable(a->ty)->GreaterThen(a, b, out);
+};
+void MwLLMathAnd(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.all = a->un.all & b->un.all;
+};
+void MwLLMathOr(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.all = a->un.all | b->un.all;
+};
--- a/src/math/math.h
+++ b/src/math/math.h
@@ -0,0 +1,28 @@
+/* $Id$ */
+
+#ifndef __MW_LOWLEVEL_INTERNAL_MATH_H__
+#define __MW_LOWLEVEL_INTERNAL_MATH_H__
+
+#include <Mw/LowLevelMath.h>
+
+struct _MwLLMathVTable {
+	void (*Add)(MwLLVec* a, MwLLVec* b, MwLLVec* out);
+	void (*Multiply)(MwLLVec* a, MwLLVec* b, MwLLVec* out);
+	void (*Sub)(MwLLVec* a, MwLLVec* b, MwLLVec* out);
+	void (*Reciprocal)(MwLLVec* a, MwLLVec* out);
+	void (*SquareRoot)(MwLLVec* a, MwLLVec* out);
+	void (*And)(MwLLVec* a, MwLLVec* b, MwLLVec* out);
+	void (*Or)(MwLLVec* a, MwLLVec* b, MwLLVec* out);
+	void (*ShiftRight)(MwLLVec* a, MwLLVec* b, MwLLVec* out);
+	void (*ShiftLeft)(MwLLVec* a, MwLLVec* b, MwLLVec* out);
+	void (*Equal)(MwLLVec* a, MwLLVec* b, MwLLVec* out);
+	void (*GreaterThen)(MwLLVec* a, MwLLVec* b, MwLLVec* out);
+	void (*LesserThen)(MwLLVec* a, MwLLVec* b, MwLLVec* out);
+};
+
+typedef struct _MwLLMathVTable MwLLMathVTable;
+
+MwLLMathVTable** default_multi_table();
+void		 mmx_apply(MwLLMathVTable**);
+
+#endif
--- a/src/math/mmx.c
+++ b/src/math/mmx.c
@@ -0,0 +1,103 @@
+#include <Mw/LowLevelMath.h>
+#include "math.h"
+#include <assert.h>
+#include <mmintrin.h>
+#include <stdio.h>
+#include <x86intrin.h>
+
+#define DO_MMX_INTRINSIC(intrin, _ty, _rty, _tyn) \
+	__m64 m	     = intrin(*(__m64*)&a->un._ty, *(__m64*)&b->un._ty); \
+	out->un._rty = *(struct _tyn*)&m;
+
+static void mmx_add_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	DO_MMX_INTRINSIC(_m_paddusb, u8, u8, _MwLLVecDataU8x8);
+};
+static void mmx_sub_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	DO_MMX_INTRINSIC(_m_psubusb, u8, u8, _MwLLVecDataU8x8);
+};
+static void mmx_equal_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	DO_MMX_INTRINSIC(_m_pcmpeqb, u8, u8, _MwLLVecDataU8x8);
+};
+static void mmx_greaterThen_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	DO_MMX_INTRINSIC(_m_pcmpgtb, u8, u8, _MwLLVecDataU8x8);
+};
+
+static void mmx_add_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	DO_MMX_INTRINSIC(_m_paddusw, u16, u16, _MwLLVecDataU16x4);
+}
+static void mmx_sub_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	DO_MMX_INTRINSIC(_m_psubusw, u16, u16, _MwLLVecDataU16x4);
+}
+static void mmx_shiftRight_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	DO_MMX_INTRINSIC(_m_psrlw, u16, u16, _MwLLVecDataU16x4);
+};
+static void mmx_shiftLeft_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	DO_MMX_INTRINSIC(_m_psllw, u16, u16, _MwLLVecDataU16x4);
+}
+static void mmx_equal_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	DO_MMX_INTRINSIC(_m_pcmpeqw, u16, u16, _MwLLVecDataU16x4);
+}
+static void mmx_greaterThen_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	DO_MMX_INTRINSIC(_m_pcmpgtw, u16, u16, _MwLLVecDataU16x4);
+}
+
+static void mmx_add_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	DO_MMX_INTRINSIC(_m_paddd, u32, u32, _MwLLVecDataU32x2);
+}
+static void mmx_sub_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	DO_MMX_INTRINSIC(_m_psubd, u32, u32, _MwLLVecDataU32x2);
+}
+static void mmx_shiftRight_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	DO_MMX_INTRINSIC(_m_psrld, u32, u32, _MwLLVecDataU32x2);
+};
+static void mmx_shiftLeft_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	DO_MMX_INTRINSIC(_m_pslld, u32, u32, _MwLLVecDataU32x2);
+}
+static void mmx_equal_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	DO_MMX_INTRINSIC(_m_pcmpeqw, u32, u32, _MwLLVecDataU32x2);
+}
+static void mmx_greaterThen_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	DO_MMX_INTRINSIC(_m_pcmpgtw, u32, u32, _MwLLVecDataU32x2);
+}
+
+static void mmx_add_i8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	DO_MMX_INTRINSIC(_m_paddsb, i8, i8, _MwLLVecDataI8x8);
+};
+static void mmx_sub_i8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	DO_MMX_INTRINSIC(_m_psubsb, i8, i8, _MwLLVecDataI8x8);
+};
+
+static void mmx_add_i16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	DO_MMX_INTRINSIC(_m_paddsw, i16, i16, _MwLLVecDataI16x4);
+}
+static void mmx_sub_i16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	DO_MMX_INTRINSIC(_m_psubsw, i16, i16, _MwLLVecDataI16x4);
+}
+
+void mmx_apply(MwLLMathVTable** t) {
+	t[_MwLLVecTypeU8x8]->Add	  = mmx_add_u8;
+	t[_MwLLVecTypeU8x8]->Sub	  = mmx_sub_u8;
+	t[_MwLLVecTypeU8x8]->GreaterThen  = mmx_greaterThen_u8;
+	t[_MwLLVecTypeU8x8]->Equal	  = mmx_equal_u8;
+	t[_MwLLVecTypeU16x4]->Add	  = mmx_add_u16;
+	t[_MwLLVecTypeU16x4]->Sub	  = mmx_sub_u16;
+	t[_MwLLVecTypeU16x4]->ShiftLeft	  = mmx_shiftLeft_u16;
+	t[_MwLLVecTypeU16x4]->ShiftRight  = mmx_shiftRight_u16;
+	t[_MwLLVecTypeU16x4]->GreaterThen = mmx_greaterThen_u16;
+	t[_MwLLVecTypeU16x4]->Equal	  = mmx_equal_u16;
+	t[_MwLLVecTypeU32x2]->Add	  = mmx_add_u32;
+	t[_MwLLVecTypeU32x2]->Sub	  = mmx_sub_u32;
+	t[_MwLLVecTypeU32x2]->ShiftLeft	  = mmx_shiftLeft_u32;
+	t[_MwLLVecTypeU32x2]->ShiftRight  = mmx_shiftRight_u32;
+	t[_MwLLVecTypeU32x2]->GreaterThen = mmx_greaterThen_u32;
+	t[_MwLLVecTypeU32x2]->Equal	  = mmx_equal_u32;
+
+	t[_MwLLVecTypeI8x8]->Add	 = mmx_add_i8;
+	t[_MwLLVecTypeI8x8]->Sub	 = mmx_sub_i8;
+	t[_MwLLVecTypeI16x4]->Add	 = mmx_add_i16;
+	t[_MwLLVecTypeI16x4]->Sub	 = mmx_sub_i16;
+	t[_MwLLVecTypeI16x4]->ShiftLeft	 = mmx_shiftLeft_u16;
+	t[_MwLLVecTypeI16x4]->ShiftRight = mmx_shiftRight_u16;
+	t[_MwLLVecTypeI32x2]->ShiftLeft	 = mmx_shiftLeft_u32;
+	t[_MwLLVecTypeI32x2]->ShiftRight = mmx_shiftRight_u32;
+}
--- a/src/math/mmx_guard.c
+++ b/src/math/mmx_guard.c
@@ -0,0 +1,6 @@
+#if defined(__WATCOMC__) || defined(__i386__) || defined(__amd64__)
+#include "mmx.c"
+#else
+void mmx_apply(MwLLMathVTable** t) {
+}
+#endif