From 3719f17250b2b0de95d23cdd4c54c8c636d5e22e Mon Sep 17 00:00:00 2001
From: IoIxD <ioixd@users.noreply.github.com>
Date: Sat, 1 Nov 2025 03:37:40 +0000
Subject: [PATCH] merge pr #7 (take 2)

git-svn-id: http://svn2.nishi.boats/svn/milsko/trunk@540 b9cfdab3-6d41-4d17-bbe4-086880011989
---
 include/Mw/ColorPicker.h        |  38 +++
 include/Mw/LowLevelMath.h       |  76 ++++++
 mk/math.mk                      |   1 +
 src/color_picker/color_picker.c | 220 +++++++++++++++++
 src/color_picker/color_picker.h |  44 ++++
 src/math/default.c              | 412 ++++++++++++++++++++++++++++++++
 src/math/math.c                 | 125 ++++++++++
 src/math/math.h                 |  28 +++
 src/math/mmx.c                  | 103 ++++++++
 src/math/mmx_guard.c            |   6 +
 10 files changed, 1053 insertions(+)
 create mode 100644 include/Mw/ColorPicker.h
 create mode 100644 include/Mw/LowLevelMath.h
 create mode 100644 mk/math.mk
 create mode 100644 src/color_picker/color_picker.c
 create mode 100644 src/color_picker/color_picker.h
 create mode 100644 src/math/default.c
 create mode 100644 src/math/math.c
 create mode 100644 src/math/math.h
 create mode 100644 src/math/mmx.c
 create mode 100644 src/math/mmx_guard.c

diff --git a/include/Mw/ColorPicker.h b/include/Mw/ColorPicker.h
new file mode 100644
index 0000000..7cea187
--- /dev/null
+++ b/include/Mw/ColorPicker.h
@@ -0,0 +1,38 @@
+/* $Id$ */
+
+/*!
+ * @file Mw/ColorPicker.h
+ * @brief Color picker
+ */
+#ifndef __MW_COLORPICKER_H__
+#define __MW_COLORPICKER_H__
+
+#include <Mw/MachDep.h>
+#include <Mw/TypeDefs.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct _MwRGB MwRGB;
+typedef struct _MwHSV MwHSV;
+
+struct _MwRGB {
+	double r;
+	double g;
+	double b;
+};
+
+struct _MwHSV {
+	double h; // angle in degrees
+	double s; // a fraction between 0 and 1
+	double v; // a fraction between 0 and 1
+};
+
+MWDECL MwWidget MwColorPicker(MwWidget handle, const char* title);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/include/Mw/LowLevelMath.h b/include/Mw/LowLevelMath.h
new file mode 100644
index 0000000..d1493b4
--- /dev/null
+++ b/include/Mw/LowLevelMath.h
@@ -0,0 +1,76 @@
+/* $Id$ */
+/*!
+ * @file Mw/LowLevelMath.h
+ * @brief A few portable functions for supporting simultaneously supporting SIMD and not supporting it
+ * @warning This is mostly used internally. Anything undocumented, and/or anything with an _ prefix (that doesn't have a corresponding typedef) should be avoided.
+ */
+
+#ifndef __MW_LOWLEVEL_MATH_H__
+#define __MW_LOWLEVEL_MATH_H__
+
+#include <Mw/BaseTypes.h>
+
+#if !defined(__i386__) && !defined(__x86_64__)
+#warning LowLevelMath.h does not yet support non-X86 platforms
+#endif
+
+/*!
+ * @brief Generic vector type
+ * @warning Do not try to instantiate this yourself, use the appropriate functions instead.
+ */
+typedef struct _MwLLVec MwLLVec;
+
+typedef enum _MwLLVecType   MwLLVecType;
+typedef union _MwLLVecUnion MwLLVecUnion;
+
+// clang-format off
+struct _MwLLVecDataU8x8  { MwU8 a; MwU8 b; MwU8 c; MwU8 d; MwU8 e; MwU8 f; MwU8 g; MwU8 h;};
+struct _MwLLVecDataU16x4 { MwU16 a; MwU16 b; MwU16 c; MwU16 d;};
+struct _MwLLVecDataU32x2 { MwU32 a; MwU32 b;};
+struct _MwLLVecDataI8x8  { MwI8 a; MwI8 b; MwI8 c; MwI8 d; MwI8 e; MwI8 f; MwI8 g; MwI8 h;};
+struct _MwLLVecDataI16x4 { MwI16 a; MwI16 b; MwI16 c; MwI16 d;};
+struct _MwLLVecDataI32x2 { MwI32 a; MwI32 b;};
+union _MwLLVecUnion {
+	struct _MwLLVecDataU8x8 u8; struct _MwLLVecDataU16x4 u16; struct _MwLLVecDataU32x2 u32;
+	struct _MwLLVecDataI8x8 i8; struct _MwLLVecDataI16x4 i16; struct _MwLLVecDataI32x2 i32;
+	MwU64 all;
+};
+// clang-format on
+enum _MwLLVecType {
+	_MwLLVecTypeU8x8  = 0,
+	_MwLLVecTypeU16x4 = 1,
+	_MwLLVecTypeU32x2 = 2,
+	_MwLLVecTypeI8x8  = 3,
+	_MwLLVecTypeI16x4 = 4,
+	_MwLLVecTypeI32x2 = 5,
+
+	_MwLLVecType_Max,
+};
+struct _MwLLVec {
+	MwLLVecType  ty;
+	MwLLVecUnion un;
+};
+
+MwLLVec _MwLLVecCreateGeneric(MwLLVecType ty, ...);
+
+#define MwLLVecU8x8(a, b, c, d, e, f, g, h) _MwLLVecCreateGeneric(_MwLLVecTypeU8x8, a, b, c, d, e, f, g, h)
+#define MwLLVecU16x4(a, b, c, d) _MwLLVecCreateGeneric(_MwLLVecTypeU16x4, a, b, c, d)
+#define MwLLVecU32x2(a, b) _MwLLVecCreateGeneric(_MwLLVecTypeU32x2, a, b)
+#define MwLLVecI8x8(a, b, c, d, e, f, g, h) _MwLLVecCreateGeneric(_MwLLVecTypeI8x8, a, b, c, d, e, f, g, h)
+#define MwLLVecI16x4(a, b, c, d) _MwLLVecCreateGeneric(_MwLLVecTypeI16x4, a, b, c, d)
+#define MwLLVecI32x2(a, b) _MwLLVecCreateGeneric(_MwLLVecTypeI32x2, a, b)
+
+void MwLLMathAdd(MwLLVec* a, MwLLVec* b, MwLLVec* out);
+void MwLLMathMultiply(MwLLVec* a, MwLLVec* b, MwLLVec* out);
+void MwLLMathSub(MwLLVec* a, MwLLVec* b, MwLLVec* out);
+void MwLLMathReciprocal(MwLLVec* a, MwLLVec* out);
+void MwLLMathSquareRoot(MwLLVec* a, MwLLVec* out);
+void MwLLMathAnd(MwLLVec* a, MwLLVec* b, MwLLVec* out);
+void MwLLMathOr(MwLLVec* a, MwLLVec* b, MwLLVec* out);
+void MwLLMathShiftRight(MwLLVec* a, MwLLVec* b, MwLLVec* out);
+void MwLLMathShiftLeft(MwLLVec* a, MwLLVec* b, MwLLVec* out);
+void MwLLMathEqual(MwLLVec* a, MwLLVec* b, MwLLVec* out);
+void MwLLMathGreaterThen(MwLLVec* a, MwLLVec* b, MwLLVec* out);
+void MwLLMathLesserThen(MwLLVec* a, MwLLVec* b, MwLLVec* out);
+
+#endif
diff --git a/mk/math.mk b/mk/math.mk
new file mode 100644
index 0000000..4e009a5
--- /dev/null
+++ b/mk/math.mk
@@ -0,0 +1 @@
+L_OBJS += src/math/default.o src/math/math.o src/math/mmx_guard.o
diff --git a/src/color_picker/color_picker.c b/src/color_picker/color_picker.c
new file mode 100644
index 0000000..43814ef
--- /dev/null
+++ b/src/color_picker/color_picker.c
@@ -0,0 +1,220 @@
+/* $Id$ */
+
+#include "color_picker.h"
+#include "Mw/Core.h"
+#include <stdlib.h>
+#include <string.h>
+
+#include <Mw/LowLevelMath.h>
+
+static MwRGB hsv2rgb(MwHSV in) {
+	MwRGB out;
+
+	int    i = (int)(floor(in.h * 6));
+	double f = in.h * 6 - i;
+	double p = in.v * (1 - in.s);
+	double q = in.v * (1 - f * in.s);
+	double t = in.v * (1 - (1 - f) * in.s);
+
+	switch(i % 6) {
+	case 0:
+		out.r = in.v, out.g = t, out.b = p;
+		break;
+	case 1:
+		out.r = q, out.g = in.v, out.b = p;
+		break;
+	case 2:
+		out.r = p, out.g = in.v, out.b = t;
+		break;
+	case 3:
+		out.r = p, out.g = q, out.b = in.v;
+		break;
+	case 4:
+		out.r = t, out.g = p, out.b = in.v;
+		break;
+	case 5:
+		out.r = in.v, out.g = p, out.b = q;
+		break;
+	}
+
+	return out;
+}
+
+static void color_picker_wheel_image_update(color_picker* picker) {
+	int y, x;
+	for(y = 0; y < PICKER_SIZE; y++) {
+		for(x = 0; x < PICKER_SIZE; x++) {
+			int i  = ((y * PICKER_SIZE) + x) * 4;
+			int _x = x - (PICKER_SIZE / 2);
+			int _y = y - (PICKER_SIZE / 2);
+
+			double dist = sqrt(_x * _x + _y * _y);
+
+			if(dist >= 180.) {
+				picker->inner.color_picker_image_data[i]     = 0;
+				picker->inner.color_picker_image_data[i + 1] = 0;
+				picker->inner.color_picker_image_data[i + 2] = 0;
+				picker->inner.color_picker_image_data[i + 3] = 0;
+			} else {
+				double xd = (M_PI / 180.) * ((double)_x);
+				double yd = (M_PI / 180.) * ((double)_y);
+
+				float angle = atan2(yd, xd) - M_PI;
+				float hue   = (angle * 180.) / M_PI;
+
+				MwHSV hsv_v;
+				MwRGB color;
+
+				if(hue < 0.0) {
+					hue += 360;
+				}
+				hsv_v.h = hue / 360.;
+				hsv_v.s = (dist / 179.61);
+				hsv_v.v = picker->inner.value;
+				color	= hsv2rgb(hsv_v);
+
+				picker->inner.color_picker_image_data[i]     = color.r * 255;
+				picker->inner.color_picker_image_data[i + 1] = color.g * 255;
+				picker->inner.color_picker_image_data[i + 2] = color.b * 255;
+
+				picker->inner.color_picker_image_data[i + 3] = 255;
+			}
+		}
+	}
+	if(picker->inner.color_picker_pixmap != NULL) {
+		MwLLDestroyPixmap(picker->inner.color_picker_pixmap);
+	}
+	picker->inner.color_picker_pixmap = MwLoadRaw(
+	    picker->inner.parent, picker->inner.color_picker_image_data, PICKER_SIZE, PICKER_SIZE);
+	MwVaApply(picker->inner.color_picker_img, MwNpixmap, picker->inner.color_picker_pixmap, NULL);
+}
+
+static void color_picker_click(MwWidget handle, void* user, void* call) {
+	color_picker* picker = (color_picker*)user;
+	MwLLMouse*    mouse  = (MwLLMouse*)call;
+	char*	      hexColor;
+	int	      i, r, g, b, a;
+
+	(void)handle;
+	(void)user;
+	(void)call;
+
+	color_picker_wheel_image_update(picker);
+
+	i = ((mouse->point.y * PICKER_SIZE) + mouse->point.x) * 4;
+
+	r = picker->inner.color_picker_image_data[i];
+	g = picker->inner.color_picker_image_data[i + 1];
+	b = picker->inner.color_picker_image_data[i + 2];
+	a = picker->inner.color_picker_image_data[i + 3];
+
+	(void)a;
+
+	hexColor      = malloc(8);
+	char* fgColor = malloc(8);
+	snprintf(hexColor, 8, "#%02X%02X%02X", r, g, b);
+
+	int fr = r > 128 ? 0 : 255;
+	int fg = g > 128 ? 0 : 255;
+	int fb = b > 128 ? 0 : 255;
+
+	snprintf(fgColor, 8, "#%02X%02X%02X", fr, fg, fb);
+	MwSetText(picker->inner.color_display, MwNbackground, hexColor);
+	MwSetText(picker->inner.color_display_text, MwNforeground, fgColor);
+
+	MwSetText(picker->inner.color_display_text, MwNbackground, hexColor);
+	MwSetText(picker->inner.color_display_text, MwNtext, hexColor);
+	free(hexColor);
+}
+static void color_picker_on_change_value(MwWidget handle, void* user,
+					 void* call) {
+	color_picker* picker = (color_picker*)user;
+
+	int value = MwGetInteger(handle, MwNvalue);
+	int diff  = MwGetInteger(handle, MwNchangedBy);
+
+	(void)diff;
+	(void)call;
+
+	picker->inner.value = 1.0 - ((double)value / 1024.);
+
+	color_picker_wheel_image_update(picker);
+}
+color_picker* color_picker_setup(MwWidget parent, int w, int h) {
+	color_picker* picker = malloc(sizeof(color_picker));
+	memset(picker, 0, sizeof(color_picker));
+
+	picker->inner.parent = parent;
+
+	picker->inner.color_picker_img =
+	    MwVaCreateWidget(MwImageClass, "image", picker->inner.parent, IMG_POS_X(w), IMG_POS_Y(h),
+			     PICKER_SIZE, PICKER_SIZE, NULL);
+
+	picker->inner.color_picker_image_data = malloc(PICKER_SIZE * PICKER_SIZE * 4);
+	picker->inner.color_display_image_data =
+	    malloc(PICKER_SIZE * COLOR_DISPLAY_HEIGHT * 4);
+
+	picker->inner.color_picker_pixmap  = NULL;
+	picker->inner.color_display_pixmap = NULL;
+	picker->inner.value		   = 1;
+
+	color_picker_wheel_image_update(picker);
+
+	MwAddUserHandler(picker->inner.color_picker_img, MwNmouseDownHandler,
+			 color_picker_click, picker);
+
+	picker->inner.color_display = MwCreateWidget(
+	    MwFrameClass, "colorDisplayFrame", picker->inner.parent, IMG_POS_X(w),
+	    IMG_POS_Y(h) + PICKER_SIZE + MARGIN, PICKER_SIZE, PICKER_SIZE / 16);
+	MwSetText(picker->inner.color_display, MwNbackground, "#000000");
+	MwSetInteger(picker->inner.color_display, MwnhasBorder, 1);
+	MwSetInteger(picker->inner.color_display, MwNinverted, 1);
+
+	picker->inner.color_display_text = MwCreateWidget(
+	    MwLabelClass, "colorDisplayFrameText", picker->inner.color_display,
+	    MwDefaultBorderWidth(parent), MwDefaultBorderWidth(parent),
+	    PICKER_SIZE - MwDefaultBorderWidth(parent),
+	    (PICKER_SIZE / 16) - (MwDefaultBorderWidth(parent) * 2));
+
+	picker->inner.value_slider = MwVaCreateWidget(
+	    MwScrollBarClass, "value-slider", picker->inner.parent,
+	    // x
+	    IMG_POS_X(w) + PICKER_SIZE + MARGIN,
+
+	    // y
+	    IMG_POS_Y(h),
+
+	    // width
+	    SCROLL_BAR_WIDTH,
+
+	    // height
+	    PICKER_SIZE,
+
+	    MwNorientation, MwVERTICAL, MwNminValue, 0, MwNmaxValue, 1024, NULL);
+
+	MwAddUserHandler(picker->inner.value_slider, MwNchangedHandler,
+			 color_picker_on_change_value, picker);
+
+	return picker;
+};
+
+MwWidget MwColorPicker(MwWidget handle, const char* title) {
+	MwPoint	      p;
+	color_picker* wheel;
+	MwWidget      window;
+	int	      ww = MwGetInteger(handle, MwNwidth);
+	int	      wh = MwGetInteger(handle, MwNheight);
+	p.x = p.y = 0;
+
+	window = MwVaCreateWidget(MwWindowClass, "main", handle, MwDEFAULT, MwDEFAULT,
+				  ww, wh, MwNtitle, title, NULL);
+
+	wheel = color_picker_setup(window, ww, wh);
+
+	MwLLDetach(window->lowlevel, &p);
+	MwLLMakePopup(window->lowlevel, handle->lowlevel);
+
+	MwLLVec v = MwLLVecU8x8(0, 0, 0, 0, 0, 0, 0, 0);
+
+	return window;
+}
diff --git a/src/color_picker/color_picker.h b/src/color_picker/color_picker.h
new file mode 100644
index 0000000..065dbb2
--- /dev/null
+++ b/src/color_picker/color_picker.h
@@ -0,0 +1,44 @@
+/* $Id$ */
+
+#ifndef __MW_INTERNAL_COLOR_PICKER_H
+#define __MW_INTERNAL_COLOR_PICKER_H
+
+#include <Mw/ColorPicker.h>
+#include <Mw/Milsko.h>
+
+#define PICKER_SIZE 360
+#define IMG_POS_X(w) ((w - PICKER_SIZE) / 2)
+#define IMG_POS_Y(h) ((h - PICKER_SIZE) / 2)
+#define SCROLL_BAR_WIDTH 12
+#define MARGIN (PICKER_SIZE / 32)
+#define COLOR_DISPLAY_HEIGHT 12
+
+typedef struct _color_picker	    color_picker;
+typedef struct _color_picker_inner  color_picker_inner;
+typedef struct _color_picker_vtable color_picker_vtable;
+
+struct _color_picker_inner {
+	MwWidget       parent;
+	MwWidget       color_picker_img;
+	MwWidget       value_slider;
+	MwWidget       color_display;
+	MwWidget       color_display_text;
+	MwLLPixmap     color_picker_pixmap;
+	MwLLPixmap     color_display_pixmap;
+	double	       value;
+	unsigned char* color_picker_image_data;
+	unsigned char* color_display_image_data;
+	MwPoint	       point;
+};
+
+struct _color_picker_vtable {
+};
+
+struct _color_picker {
+	color_picker_inner  inner;
+	color_picker_vtable vtable;
+};
+
+color_picker* color_picker_setup(MwWidget parent, int w, int h);
+
+#endif
diff --git a/src/math/default.c b/src/math/default.c
new file mode 100644
index 0000000..5233178
--- /dev/null
+++ b/src/math/default.c
@@ -0,0 +1,412 @@
+#include <Mw/LowLevelMath.h>
+#include "math.h"
+#include <assert.h>
+#include <math.h>
+
+static void default_add_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.u8.a = a->un.u8.a + b->un.u8.a;
+	out->un.u8.b = a->un.u8.b + b->un.u8.b;
+	out->un.u8.c = a->un.u8.c + b->un.u8.c;
+	out->un.u8.d = a->un.u8.d + b->un.u8.d;
+	out->un.u8.e = a->un.u8.e + b->un.u8.e;
+	out->un.u8.f = a->un.u8.f + b->un.u8.f;
+	out->un.u8.g = a->un.u8.g + b->un.u8.g;
+	out->un.u8.h = a->un.u8.h + b->un.u8.h;
+};
+static void default_sub_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.u8.a = a->un.u8.a - b->un.u8.a;
+	out->un.u8.b = a->un.u8.b - b->un.u8.b;
+	out->un.u8.c = a->un.u8.c - b->un.u8.c;
+	out->un.u8.d = a->un.u8.d - b->un.u8.d;
+	out->un.u8.e = a->un.u8.e - b->un.u8.e;
+	out->un.u8.f = a->un.u8.f - b->un.u8.f;
+	out->un.u8.g = a->un.u8.g - b->un.u8.g;
+	out->un.u8.h = a->un.u8.h - b->un.u8.h;
+};
+static void default_multiply_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.u8.a = a->un.u8.a * b->un.u8.a;
+	out->un.u8.b = a->un.u8.b * b->un.u8.b;
+	out->un.u8.c = a->un.u8.c * b->un.u8.c;
+	out->un.u8.d = a->un.u8.d * b->un.u8.d;
+	out->un.u8.e = a->un.u8.e * b->un.u8.e;
+	out->un.u8.f = a->un.u8.f * b->un.u8.f;
+	out->un.u8.g = a->un.u8.g * b->un.u8.g;
+	out->un.u8.h = a->un.u8.h * b->un.u8.h;
+};
+static void default_reciprocal_u8(MwLLVec* a, MwLLVec* out) {
+	out->un.u8.a = powf(a->un.u8.a, -1);
+	out->un.u8.b = powf(a->un.u8.b, -1);
+	out->un.u8.c = powf(a->un.u8.c, -1);
+	out->un.u8.d = powf(a->un.u8.d, -1);
+	out->un.u8.e = powf(a->un.u8.e, -1);
+	out->un.u8.f = powf(a->un.u8.f, -1);
+	out->un.u8.g = powf(a->un.u8.g, -1);
+	out->un.u8.h = powf(a->un.u8.h, -1);
+};
+static void default_squareRoot_u8(MwLLVec* a, MwLLVec* out) {
+	out->un.u8.a = sqrt(a->un.u8.a);
+	out->un.u8.b = sqrt(a->un.u8.b);
+	out->un.u8.c = sqrt(a->un.u8.c);
+	out->un.u8.d = sqrt(a->un.u8.d);
+	out->un.u8.e = sqrt(a->un.u8.e);
+	out->un.u8.f = sqrt(a->un.u8.f);
+	out->un.u8.g = sqrt(a->un.u8.g);
+	out->un.u8.h = sqrt(a->un.u8.h);
+}
+static void default_shiftRight_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.u8.a = a->un.u8.a >> b->un.u8.a;
+	out->un.u8.b = a->un.u8.b >> b->un.u8.b;
+	out->un.u8.c = a->un.u8.c >> b->un.u8.c;
+	out->un.u8.d = a->un.u8.d >> b->un.u8.d;
+	out->un.u8.e = a->un.u8.e >> b->un.u8.e;
+	out->un.u8.f = a->un.u8.f >> b->un.u8.f;
+	out->un.u8.g = a->un.u8.g >> b->un.u8.g;
+	out->un.u8.h = a->un.u8.h >> b->un.u8.h;
+};
+static void default_shiftLeft_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.u8.a = a->un.u8.a << b->un.u8.a;
+	out->un.u8.b = a->un.u8.b << b->un.u8.b;
+	out->un.u8.c = a->un.u8.c << b->un.u8.c;
+	out->un.u8.d = a->un.u8.d << b->un.u8.d;
+	out->un.u8.e = a->un.u8.e << b->un.u8.e;
+	out->un.u8.f = a->un.u8.f << b->un.u8.f;
+	out->un.u8.g = a->un.u8.g << b->un.u8.g;
+	out->un.u8.h = a->un.u8.h << b->un.u8.h;
+}
+static void default_equal_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.u8.a = a->un.u8.a == b->un.u8.a;
+	out->un.u8.b = a->un.u8.b == b->un.u8.b;
+	out->un.u8.c = a->un.u8.c == b->un.u8.c;
+	out->un.u8.d = a->un.u8.d == b->un.u8.d;
+	out->un.u8.e = a->un.u8.e == b->un.u8.e;
+	out->un.u8.f = a->un.u8.f == b->un.u8.f;
+	out->un.u8.g = a->un.u8.g == b->un.u8.g;
+	out->un.u8.h = a->un.u8.h == b->un.u8.h;
+};
+static void default_greaterThen_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.u8.a = a->un.u8.a >= b->un.u8.a;
+	out->un.u8.b = a->un.u8.b >= b->un.u8.b;
+	out->un.u8.c = a->un.u8.c >= b->un.u8.c;
+	out->un.u8.d = a->un.u8.d >= b->un.u8.d;
+	out->un.u8.e = a->un.u8.e >= b->un.u8.e;
+	out->un.u8.f = a->un.u8.f >= b->un.u8.f;
+	out->un.u8.g = a->un.u8.g >= b->un.u8.g;
+	out->un.u8.h = a->un.u8.h >= b->un.u8.h;
+};
+static MwLLMathVTable table_u8 = {
+    .Add	 = default_add_u8,
+    .Sub	 = default_sub_u8,
+    .Multiply	 = default_multiply_u8,
+    .Reciprocal	 = default_reciprocal_u8,
+    .SquareRoot	 = default_squareRoot_u8,
+    .ShiftRight	 = default_shiftRight_u8,
+    .ShiftLeft	 = default_shiftLeft_u8,
+    .Equal	 = default_equal_u8,
+    .GreaterThen = default_greaterThen_u8,
+};
+static void default_add_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.u16.a = a->un.u16.a + b->un.u16.a;
+	out->un.u16.b = a->un.u16.b + b->un.u16.b;
+	out->un.u16.c = a->un.u16.c + b->un.u16.c;
+	out->un.u16.d = a->un.u16.d + b->un.u16.d;
+}
+static void default_sub_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.u16.a = a->un.u16.a - b->un.u16.a;
+	out->un.u16.b = a->un.u16.b - b->un.u16.b;
+	out->un.u16.c = a->un.u16.c - b->un.u16.c;
+	out->un.u16.d = a->un.u16.d - b->un.u16.d;
+}
+static void default_multiply_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.u16.a = a->un.u16.a * b->un.u16.a;
+	out->un.u16.b = a->un.u16.b * b->un.u16.b;
+	out->un.u16.c = a->un.u16.c * b->un.u16.c;
+	out->un.u16.d = a->un.u16.d * b->un.u16.d;
+}
+static void default_reciprocal_u16(MwLLVec* a, MwLLVec* out) {
+	out->un.u16.a = powf(a->un.u16.a, -1);
+	out->un.u16.b = powf(a->un.u16.b, -1);
+	out->un.u16.c = powf(a->un.u16.c, -1);
+	out->un.u16.d = powf(a->un.u16.d, -1);
+};
+static void default_squareRoot_u16(MwLLVec* a, MwLLVec* out) {
+	out->un.u16.a = sqrt(a->un.u16.a);
+	out->un.u16.b = sqrt(a->un.u16.b);
+	out->un.u16.c = sqrt(a->un.u16.c);
+	out->un.u16.d = sqrt(a->un.u16.d);
+};
+
+static void default_shiftRight_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.u16.a = a->un.u16.a >> b->un.u16.a;
+	out->un.u16.b = a->un.u16.b >> b->un.u16.b;
+	out->un.u16.c = a->un.u16.c >> b->un.u16.c;
+	out->un.u16.d = a->un.u16.d >> b->un.u16.d;
+};
+static void default_shiftLeft_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.u16.a = a->un.u16.a << b->un.u16.a;
+	out->un.u16.b = a->un.u16.b << b->un.u16.b;
+	out->un.u16.c = a->un.u16.c << b->un.u16.c;
+	out->un.u16.d = a->un.u16.d << b->un.u16.d;
+}
+static void default_equal_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.u16.a = a->un.u16.a == b->un.u16.a;
+	out->un.u16.b = a->un.u16.b == b->un.u16.b;
+	out->un.u16.c = a->un.u16.c == b->un.u16.c;
+	out->un.u16.d = a->un.u16.d == b->un.u16.d;
+}
+static void default_greaterThen_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.u16.a = a->un.u16.a >= b->un.u16.a;
+	out->un.u16.b = a->un.u16.b >= b->un.u16.b;
+	out->un.u16.c = a->un.u16.c >= b->un.u16.c;
+	out->un.u16.d = a->un.u16.d >= b->un.u16.d;
+}
+static MwLLMathVTable table_u16 = {
+    .Add	 = default_add_u16,
+    .Sub	 = default_sub_u16,
+    .Multiply	 = default_multiply_u16,
+    .Reciprocal	 = default_reciprocal_u16,
+    .SquareRoot	 = default_squareRoot_u16,
+    .ShiftRight	 = default_shiftRight_u16,
+    .ShiftLeft	 = default_shiftLeft_u16,
+    .Equal	 = default_equal_u16,
+    .GreaterThen = default_greaterThen_u16,
+};
+
+static void default_add_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.u32.a = a->un.u32.a + b->un.u32.a;
+	out->un.u32.b = a->un.u32.b + b->un.u32.b;
+}
+static void default_sub_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.u32.a = a->un.u32.a - b->un.u32.a;
+	out->un.u32.b = a->un.u32.b - b->un.u32.b;
+}
+static void default_multiply_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.u32.a = a->un.u32.a * b->un.u32.a;
+	out->un.u32.b = a->un.u32.b * b->un.u32.b;
+}
+static void default_reciprocal_u32(MwLLVec* a, MwLLVec* out) {
+	out->un.u32.a = powf(a->un.u32.a, -1);
+	out->un.u32.b = powf(a->un.u32.b, -1);
+};
+static void default_squareRoot_u32(MwLLVec* a, MwLLVec* out) {
+	out->un.u32.a = sqrt(a->un.u32.a);
+	out->un.u32.b = sqrt(a->un.u32.b);
+};
+
+static void default_shiftRight_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.u32.a = a->un.u32.a >> b->un.u32.a;
+	out->un.u32.b = a->un.u32.b >> b->un.u32.b;
+};
+static void default_shiftLeft_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.u32.a = a->un.u32.a << b->un.u32.a;
+	out->un.u32.b = a->un.u32.b << b->un.u32.b;
+}
+static void default_equal_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.u32.a = a->un.u32.a == b->un.u32.a;
+	out->un.u32.b = a->un.u32.b == b->un.u32.b;
+}
+static void default_greaterThen_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.u32.a = a->un.u32.a >= b->un.u32.a;
+	out->un.u32.b = a->un.u32.b >= b->un.u32.b;
+}
+static MwLLMathVTable table_u32 = {
+    .Add	 = default_add_u32,
+    .Sub	 = default_sub_u32,
+    .Multiply	 = default_multiply_u32,
+    .Reciprocal	 = default_reciprocal_u32,
+    .SquareRoot	 = default_squareRoot_u32,
+    .ShiftRight	 = default_shiftRight_u32,
+    .ShiftLeft	 = default_shiftLeft_u32,
+    .Equal	 = default_equal_u32,
+    .GreaterThen = default_greaterThen_u32,
+};
+
+static void default_add_i8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.i8.a = a->un.i8.a + b->un.i8.a;
+	out->un.i8.b = a->un.i8.b + b->un.i8.b;
+	out->un.i8.c = a->un.i8.c + b->un.i8.c;
+	out->un.i8.d = a->un.i8.d + b->un.i8.d;
+	out->un.i8.e = a->un.i8.e + b->un.i8.e;
+	out->un.i8.f = a->un.i8.f + b->un.i8.f;
+	out->un.i8.g = a->un.i8.g + b->un.i8.g;
+	out->un.i8.h = a->un.i8.h + b->un.i8.h;
+};
+static void default_sub_i8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.i8.a = a->un.i8.a - b->un.i8.a;
+	out->un.i8.b = a->un.i8.b - b->un.i8.b;
+	out->un.i8.c = a->un.i8.c - b->un.i8.c;
+	out->un.i8.d = a->un.i8.d - b->un.i8.d;
+	out->un.i8.e = a->un.i8.e - b->un.i8.e;
+	out->un.i8.f = a->un.i8.f - b->un.i8.f;
+	out->un.i8.g = a->un.i8.g - b->un.i8.g;
+	out->un.i8.h = a->un.i8.h - b->un.i8.h;
+};
+static void default_multiply_i8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.i8.a = a->un.i8.a * b->un.i8.a;
+	out->un.i8.b = a->un.i8.b * b->un.i8.b;
+	out->un.i8.c = a->un.i8.c * b->un.i8.c;
+	out->un.i8.d = a->un.i8.d * b->un.i8.d;
+	out->un.i8.e = a->un.i8.e * b->un.i8.e;
+	out->un.i8.f = a->un.i8.f * b->un.i8.f;
+	out->un.i8.g = a->un.i8.g * b->un.i8.g;
+	out->un.i8.h = a->un.i8.h * b->un.i8.h;
+};
+static void default_reciprocal_i8(MwLLVec* a, MwLLVec* out) {
+	out->un.i8.a = powf(a->un.i8.a, -1);
+	out->un.i8.b = powf(a->un.i8.b, -1);
+	out->un.i8.c = powf(a->un.i8.c, -1);
+	out->un.i8.d = powf(a->un.i8.d, -1);
+	out->un.i8.e = powf(a->un.i8.e, -1);
+	out->un.i8.f = powf(a->un.i8.f, -1);
+	out->un.i8.g = powf(a->un.i8.g, -1);
+	out->un.i8.h = powf(a->un.i8.h, -1);
+};
+static void default_squareRoot_i8(MwLLVec* a, MwLLVec* out) {
+	out->un.i8.a = sqrt(a->un.i8.a);
+	out->un.i8.b = sqrt(a->un.i8.b);
+	out->un.i8.c = sqrt(a->un.i8.c);
+	out->un.i8.d = sqrt(a->un.i8.d);
+	out->un.i8.e = sqrt(a->un.i8.e);
+	out->un.i8.f = sqrt(a->un.i8.f);
+	out->un.i8.g = sqrt(a->un.i8.g);
+	out->un.i8.h = sqrt(a->un.i8.h);
+}
+
+static void default_equal_i8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.i8.a = a->un.i8.a == b->un.i8.a;
+	out->un.i8.b = a->un.i8.b == b->un.i8.b;
+	out->un.i8.c = a->un.i8.c == b->un.i8.c;
+	out->un.i8.d = a->un.i8.d == b->un.i8.d;
+	out->un.i8.e = a->un.i8.e == b->un.i8.e;
+	out->un.i8.f = a->un.i8.f == b->un.i8.f;
+	out->un.i8.g = a->un.i8.g == b->un.i8.g;
+	out->un.i8.h = a->un.i8.h == b->un.i8.h;
+};
+static void default_greaterThen_i8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.i8.a = a->un.i8.a >= b->un.i8.a;
+	out->un.i8.b = a->un.i8.b >= b->un.i8.b;
+	out->un.i8.c = a->un.i8.c >= b->un.i8.c;
+	out->un.i8.d = a->un.i8.d >= b->un.i8.d;
+	out->un.i8.e = a->un.i8.e >= b->un.i8.e;
+	out->un.i8.f = a->un.i8.f >= b->un.i8.f;
+	out->un.i8.g = a->un.i8.g >= b->un.i8.g;
+	out->un.i8.h = a->un.i8.h >= b->un.i8.h;
+};
+static MwLLMathVTable table_i8 = {
+    .Add	 = default_add_i8,
+    .Sub	 = default_sub_i8,
+    .Multiply	 = default_multiply_i8,
+    .Reciprocal	 = default_reciprocal_i8,
+    .SquareRoot	 = default_squareRoot_i8,
+    .ShiftRight	 = default_shiftRight_u8,
+    .ShiftLeft	 = default_shiftLeft_u8,
+    .Equal	 = default_equal_i8,
+    .GreaterThen = default_greaterThen_i8,
+};
+static void default_add_i16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.i16.a = a->un.i16.a + b->un.i16.a;
+	out->un.i16.b = a->un.i16.b + b->un.i16.b;
+	out->un.i16.c = a->un.i16.c + b->un.i16.c;
+	out->un.i16.d = a->un.i16.d + b->un.i16.d;
+}
+static void default_sub_i16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.i16.a = a->un.i16.a - b->un.i16.a;
+	out->un.i16.b = a->un.i16.b - b->un.i16.b;
+	out->un.i16.c = a->un.i16.c - b->un.i16.c;
+	out->un.i16.d = a->un.i16.d - b->un.i16.d;
+}
+static void default_multiply_i16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.i16.a = a->un.i16.a * b->un.i16.a;
+	out->un.i16.b = a->un.i16.b * b->un.i16.b;
+	out->un.i16.c = a->un.i16.c * b->un.i16.c;
+	out->un.i16.d = a->un.i16.d * b->un.i16.d;
+}
+static void default_reciprocal_i16(MwLLVec* a, MwLLVec* out) {
+	out->un.i16.a = powf(a->un.i16.a, -1);
+	out->un.i16.b = powf(a->un.i16.b, -1);
+	out->un.i16.c = powf(a->un.i16.c, -1);
+	out->un.i16.d = powf(a->un.i16.d, -1);
+};
+static void default_squareRoot_i16(MwLLVec* a, MwLLVec* out) {
+	out->un.i16.a = sqrt(a->un.i16.a);
+	out->un.i16.b = sqrt(a->un.i16.b);
+	out->un.i16.c = sqrt(a->un.i16.c);
+	out->un.i16.d = sqrt(a->un.i16.d);
+};
+
+static void default_equal_i16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.i16.a = a->un.i16.a == b->un.i16.a;
+	out->un.i16.b = a->un.i16.b == b->un.i16.b;
+	out->un.i16.c = a->un.i16.c == b->un.i16.c;
+	out->un.i16.d = a->un.i16.d == b->un.i16.d;
+}
+static void default_greaterThen_i16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.i16.a = a->un.i16.a >= b->un.i16.a;
+	out->un.i16.b = a->un.i16.b >= b->un.i16.b;
+	out->un.i16.c = a->un.i16.c >= b->un.i16.c;
+	out->un.i16.d = a->un.i16.d >= b->un.i16.d;
+}
+static MwLLMathVTable table_i16 = {
+    .Add	 = default_add_i16,
+    .Sub	 = default_sub_i16,
+    .Multiply	 = default_multiply_i16,
+    .Reciprocal	 = default_reciprocal_i16,
+    .SquareRoot	 = default_squareRoot_i16,
+    .ShiftRight	 = default_shiftRight_u16,
+    .ShiftLeft	 = default_shiftLeft_u16,
+    .Equal	 = default_equal_i16,
+    .GreaterThen = default_greaterThen_i16,
+};
+
+static void default_add_i32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.i32.a = a->un.i32.a + b->un.i32.a;
+	out->un.i32.b = a->un.i32.b + b->un.i32.b;
+}
+static void default_sub_i32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.i32.a = a->un.i32.a - b->un.i32.a;
+	out->un.i32.b = a->un.i32.b - b->un.i32.b;
+}
+static void default_multiply_i32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.i32.a = a->un.i32.a * b->un.i32.a;
+	out->un.i32.b = a->un.i32.b * b->un.i32.b;
+}
+static void default_reciprocal_i32(MwLLVec* a, MwLLVec* out) {
+	out->un.i32.a = powf(a->un.i32.a, -1);
+	out->un.i32.b = powf(a->un.i32.b, -1);
+};
+static void default_squareRoot_i32(MwLLVec* a, MwLLVec* out) {
+	out->un.i32.a = sqrt(a->un.i32.a);
+	out->un.i32.b = sqrt(a->un.i32.b);
+};
+
+static void default_equal_i32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.i32.a = a->un.i32.a == b->un.i32.a;
+	out->un.i32.b = a->un.i32.b == b->un.i32.b;
+}
+static void default_greaterThen_i32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.i32.a = a->un.i32.a >= b->un.i32.a;
+	out->un.i32.b = a->un.i32.b >= b->un.i32.b;
+}
+static MwLLMathVTable table_i32 = {
+    .Add	 = default_add_i32,
+    .Sub	 = default_sub_i32,
+    .Multiply	 = default_multiply_i32,
+    .Reciprocal	 = default_reciprocal_i32,
+    .SquareRoot	 = default_squareRoot_i32,
+    .ShiftRight	 = default_shiftRight_u32,
+    .ShiftLeft	 = default_shiftLeft_u32,
+    .Equal	 = default_equal_i32,
+    .GreaterThen = default_greaterThen_i32,
+};
+
+static MwLLMathVTable* defaultMultiTable[_MwLLVecType_Max] = {
+    &table_u8,	// _MwLLVecTypeU8x8
+    &table_u16, // _MwLLVecTypeU16x4
+    &table_u32, // _MwLLVecTypeU32x2
+    &table_i8,	// _MwLLVecTypeI8x8
+    &table_i16, // _MwLLVecTypeI16x4
+    &table_i32, // _MwLLVecTypeI32x2
+};
+
+MwLLMathVTable** default_multi_table() {
+	return defaultMultiTable;
+}
diff --git a/src/math/math.c b/src/math/math.c
new file mode 100644
index 0000000..152dea8
--- /dev/null
+++ b/src/math/math.c
@@ -0,0 +1,125 @@
+#include <Mw/LowLevelMath.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <unistd.h>
+#include "math.h"
+#include "Mw/BaseTypes.h"
+#include "x86intrin.h"
+
+MwLLVec _MwLLVecCreateGeneric(MwLLVecType ty, ...) {
+	MwLLVecUnion un;
+	MwLLVec	     vec;
+	va_list	     va;
+
+	va_start(va, ty);
+
+	// clang-format off
+#define _A_B(ty) un.ty.a = va_arg(va, int); un.ty.b = va_arg(va, int);
+#define _C_D(ty) un.ty.c = va_arg(va, int); un.ty.d = va_arg(va, int);
+#define _E_F(ty) un.ty.e = va_arg(va, int); un.ty.f = va_arg(va, int);
+#define _G_H(ty) un.ty.g = va_arg(va, int); un.ty.h = va_arg(va, int);
+switch(ty) {
+	case _MwLLVecTypeU8x8:  _A_B(u8);   _C_D(u8);   _E_F(u8);   _G_H(u8);   break;
+	case _MwLLVecTypeU16x4: _A_B(u16);  _C_D(u16);                          break;
+	case _MwLLVecTypeU32x2: _A_B(u32);                                      break;
+	case _MwLLVecTypeI8x8:  _A_B(i8);   _C_D(i8);   _E_F(i8);   _G_H(i8);   break;
+	case _MwLLVecTypeI16x4: _A_B(i16);  _C_D(i16);                          break;
+	case _MwLLVecTypeI32x2: _A_B(i32);                                      break;
+	case _MwLLVecType_Max: break;
+}
+#undef _A_B
+#undef _C_D
+#undef _E_F
+#undef _G_H
+	// clang-format on
+
+	va_end(va);
+
+	vec.ty = ty;
+	vec.un = un;
+
+	return vec;
+}
+
+static MwBool hasMMX(void) {
+	MwU32 eax = 1;
+	MwU32 ebx, edx;
+
+	__asm__ __volatile__(
+	    "cpuid" : "=a"(eax), "=b"(ebx), "=d"(edx)
+	    : "a"(1));
+
+	return (edx & (1 << 23)) == (1 << 23);
+}
+
+static MwLLMathVTable** mwLLMultiTable;
+static MwLLMathVTable*	multiTableSetupAndGet(MwLLVecType ty);
+static MwLLMathVTable*	multiTableGet(MwLLVecType ty);
+
+static MwLLMathVTable* (*mwLLmathFunc)(MwLLVecType ty) = multiTableSetupAndGet;
+
+static MwLLMathVTable* getMultiTable(MwLLVecType ty) {
+	return mwLLmathFunc(ty);
+}
+
+static MwLLMathVTable* multiTableSetupAndGet(MwLLVecType ty) {
+	mwLLMultiTable = default_multi_table();
+
+#if defined(__i386__) || defined(__x86_64__)
+	if(hasMMX()) {
+		mmx_apply(mwLLMultiTable);
+	}
+#endif
+
+	mwLLmathFunc = multiTableGet;
+
+	return mwLLMultiTable[ty];
+}
+static MwLLMathVTable* multiTableGet(MwLLVecType ty) {
+	return mwLLMultiTable[ty];
+}
+
+void MwLLMathAdd(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	assert(a->ty == b->ty && a->ty == out->ty && b->ty == out->ty);
+	return getMultiTable(a->ty)->Add(a, b, out);
+};
+void MwLLMathSub(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	assert(a->ty == b->ty && a->ty == out->ty && b->ty == out->ty);
+	return getMultiTable(a->ty)->Sub(a, b, out);
+};
+void MwLLMathMultiply(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	assert(a->ty == b->ty && a->ty == out->ty && b->ty == out->ty);
+	return getMultiTable(a->ty)->Multiply(a, b, out);
+};
+void MwLLMathReciprocal(MwLLVec* a, MwLLVec* out) {
+	assert(a->ty == out->ty);
+	return getMultiTable(a->ty)->Reciprocal(a, out);
+};
+void MwLLMathSquareRoot(MwLLVec* a, MwLLVec* out) {
+	assert(a->ty == out->ty);
+	return getMultiTable(a->ty)->SquareRoot(a, out);
+};
+
+void MwLLMathShiftRight(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	assert(a->ty == b->ty && a->ty == out->ty && b->ty == out->ty);
+	return getMultiTable(a->ty)->ShiftRight(a, b, out);
+};
+void MwLLMathShiftLeft(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	assert(a->ty == b->ty && a->ty == out->ty && b->ty == out->ty);
+	return getMultiTable(a->ty)->ShiftLeft(a, b, out);
+};
+void MwLLMathEqual(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	assert(a->ty == b->ty && a->ty == out->ty && b->ty == out->ty);
+	return getMultiTable(a->ty)->Equal(a, b, out);
+};
+void MwLLMathGreaterThen(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	assert(a->ty == b->ty && a->ty == out->ty && b->ty == out->ty);
+	return getMultiTable(a->ty)->GreaterThen(a, b, out);
+};
+void MwLLMathAnd(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.all = a->un.all & b->un.all;
+};
+void MwLLMathOr(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	out->un.all = a->un.all | b->un.all;
+};
diff --git a/src/math/math.h b/src/math/math.h
new file mode 100644
index 0000000..b637c42
--- /dev/null
+++ b/src/math/math.h
@@ -0,0 +1,28 @@
+/* $Id$ */
+
+#ifndef __MW_LOWLEVEL_INTERNAL_MATH_H__
+#define __MW_LOWLEVEL_INTERNAL_MATH_H__
+
+#include <Mw/LowLevelMath.h>
+
+struct _MwLLMathVTable {
+	void (*Add)(MwLLVec* a, MwLLVec* b, MwLLVec* out);
+	void (*Multiply)(MwLLVec* a, MwLLVec* b, MwLLVec* out);
+	void (*Sub)(MwLLVec* a, MwLLVec* b, MwLLVec* out);
+	void (*Reciprocal)(MwLLVec* a, MwLLVec* out);
+	void (*SquareRoot)(MwLLVec* a, MwLLVec* out);
+	void (*And)(MwLLVec* a, MwLLVec* b, MwLLVec* out);
+	void (*Or)(MwLLVec* a, MwLLVec* b, MwLLVec* out);
+	void (*ShiftRight)(MwLLVec* a, MwLLVec* b, MwLLVec* out);
+	void (*ShiftLeft)(MwLLVec* a, MwLLVec* b, MwLLVec* out);
+	void (*Equal)(MwLLVec* a, MwLLVec* b, MwLLVec* out);
+	void (*GreaterThen)(MwLLVec* a, MwLLVec* b, MwLLVec* out);
+	void (*LesserThen)(MwLLVec* a, MwLLVec* b, MwLLVec* out);
+};
+
+typedef struct _MwLLMathVTable MwLLMathVTable;
+
+MwLLMathVTable** default_multi_table();
+void		 mmx_apply(MwLLMathVTable**);
+
+#endif
diff --git a/src/math/mmx.c b/src/math/mmx.c
new file mode 100644
index 0000000..e45c399
--- /dev/null
+++ b/src/math/mmx.c
@@ -0,0 +1,103 @@
+#include <Mw/LowLevelMath.h>
+#include "math.h"
+#include <assert.h>
+#include <mmintrin.h>
+#include <stdio.h>
+#include <x86intrin.h>
+
+#define DO_MMX_INTRINSIC(intrin, _ty, _rty, _tyn) \
+	__m64 m	     = intrin(*(__m64*)&a->un._ty, *(__m64*)&b->un._ty); \
+	out->un._rty = *(struct _tyn*)&m;
+
+static void mmx_add_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	DO_MMX_INTRINSIC(_m_paddusb, u8, u8, _MwLLVecDataU8x8);
+};
+static void mmx_sub_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	DO_MMX_INTRINSIC(_m_psubusb, u8, u8, _MwLLVecDataU8x8);
+};
+static void mmx_equal_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	DO_MMX_INTRINSIC(_m_pcmpeqb, u8, u8, _MwLLVecDataU8x8);
+};
+static void mmx_greaterThen_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	DO_MMX_INTRINSIC(_m_pcmpgtb, u8, u8, _MwLLVecDataU8x8);
+};
+
+static void mmx_add_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	DO_MMX_INTRINSIC(_m_paddusw, u16, u16, _MwLLVecDataU16x4);
+}
+static void mmx_sub_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	DO_MMX_INTRINSIC(_m_psubusw, u16, u16, _MwLLVecDataU16x4);
+}
+static void mmx_shiftRight_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	DO_MMX_INTRINSIC(_m_psrlw, u16, u16, _MwLLVecDataU16x4);
+};
+static void mmx_shiftLeft_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	DO_MMX_INTRINSIC(_m_psllw, u16, u16, _MwLLVecDataU16x4);
+}
+static void mmx_equal_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	DO_MMX_INTRINSIC(_m_pcmpeqw, u16, u16, _MwLLVecDataU16x4);
+}
+static void mmx_greaterThen_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	DO_MMX_INTRINSIC(_m_pcmpgtw, u16, u16, _MwLLVecDataU16x4);
+}
+
+static void mmx_add_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	DO_MMX_INTRINSIC(_m_paddd, u32, u32, _MwLLVecDataU32x2);
+}
+static void mmx_sub_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	DO_MMX_INTRINSIC(_m_psubd, u32, u32, _MwLLVecDataU32x2);
+}
+static void mmx_shiftRight_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	DO_MMX_INTRINSIC(_m_psrld, u32, u32, _MwLLVecDataU32x2);
+};
+static void mmx_shiftLeft_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	DO_MMX_INTRINSIC(_m_pslld, u32, u32, _MwLLVecDataU32x2);
+}
+static void mmx_equal_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	DO_MMX_INTRINSIC(_m_pcmpeqw, u32, u32, _MwLLVecDataU32x2);
+}
+static void mmx_greaterThen_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	DO_MMX_INTRINSIC(_m_pcmpgtw, u32, u32, _MwLLVecDataU32x2);
+}
+
+static void mmx_add_i8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	DO_MMX_INTRINSIC(_m_paddsb, i8, i8, _MwLLVecDataI8x8);
+};
+static void mmx_sub_i8(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	DO_MMX_INTRINSIC(_m_psubsb, i8, i8, _MwLLVecDataI8x8);
+};
+
+static void mmx_add_i16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	DO_MMX_INTRINSIC(_m_paddsw, i16, i16, _MwLLVecDataI16x4);
+}
+static void mmx_sub_i16(MwLLVec* a, MwLLVec* b, MwLLVec* out) {
+	DO_MMX_INTRINSIC(_m_psubsw, i16, i16, _MwLLVecDataI16x4);
+}
+
+void mmx_apply(MwLLMathVTable** t) {
+	t[_MwLLVecTypeU8x8]->Add	  = mmx_add_u8;
+	t[_MwLLVecTypeU8x8]->Sub	  = mmx_sub_u8;
+	t[_MwLLVecTypeU8x8]->GreaterThen  = mmx_greaterThen_u8;
+	t[_MwLLVecTypeU8x8]->Equal	  = mmx_equal_u8;
+	t[_MwLLVecTypeU16x4]->Add	  = mmx_add_u16;
+	t[_MwLLVecTypeU16x4]->Sub	  = mmx_sub_u16;
+	t[_MwLLVecTypeU16x4]->ShiftLeft	  = mmx_shiftLeft_u16;
+	t[_MwLLVecTypeU16x4]->ShiftRight  = mmx_shiftRight_u16;
+	t[_MwLLVecTypeU16x4]->GreaterThen = mmx_greaterThen_u16;
+	t[_MwLLVecTypeU16x4]->Equal	  = mmx_equal_u16;
+	t[_MwLLVecTypeU32x2]->Add	  = mmx_add_u32;
+	t[_MwLLVecTypeU32x2]->Sub	  = mmx_sub_u32;
+	t[_MwLLVecTypeU32x2]->ShiftLeft	  = mmx_shiftLeft_u32;
+	t[_MwLLVecTypeU32x2]->ShiftRight  = mmx_shiftRight_u32;
+	t[_MwLLVecTypeU32x2]->GreaterThen = mmx_greaterThen_u32;
+	t[_MwLLVecTypeU32x2]->Equal	  = mmx_equal_u32;
+
+	t[_MwLLVecTypeI8x8]->Add	 = mmx_add_i8;
+	t[_MwLLVecTypeI8x8]->Sub	 = mmx_sub_i8;
+	t[_MwLLVecTypeI16x4]->Add	 = mmx_add_i16;
+	t[_MwLLVecTypeI16x4]->Sub	 = mmx_sub_i16;
+	t[_MwLLVecTypeI16x4]->ShiftLeft	 = mmx_shiftLeft_u16;
+	t[_MwLLVecTypeI16x4]->ShiftRight = mmx_shiftRight_u16;
+	t[_MwLLVecTypeI32x2]->ShiftLeft	 = mmx_shiftLeft_u32;
+	t[_MwLLVecTypeI32x2]->ShiftRight = mmx_shiftRight_u32;
+}
diff --git a/src/math/mmx_guard.c b/src/math/mmx_guard.c
new file mode 100644
index 0000000..7d2fdd3
--- /dev/null
+++ b/src/math/mmx_guard.c
@@ -0,0 +1,6 @@
+#if defined(__WATCOMC__) || defined(__i386__) || defined(__amd64__)
+#include "mmx.c"
+#else
+void mmx_apply(MwLLMathVTable** t) {
+}
+#endif