From 3719f17250b2b0de95d23cdd4c54c8c636d5e22e Mon Sep 17 00:00:00 2001 From: IoIxD Date: Sat, 1 Nov 2025 03:37:40 +0000 Subject: [PATCH] merge pr #7 (take 2) git-svn-id: http://svn2.nishi.boats/svn/milsko/trunk@540 b9cfdab3-6d41-4d17-bbe4-086880011989 --- include/Mw/ColorPicker.h | 38 +++ include/Mw/LowLevelMath.h | 76 ++++++ mk/math.mk | 1 + src/color_picker/color_picker.c | 220 +++++++++++++++++ src/color_picker/color_picker.h | 44 ++++ src/math/default.c | 412 ++++++++++++++++++++++++++++++++ src/math/math.c | 125 ++++++++++ src/math/math.h | 28 +++ src/math/mmx.c | 103 ++++++++ src/math/mmx_guard.c | 6 + 10 files changed, 1053 insertions(+) create mode 100644 include/Mw/ColorPicker.h create mode 100644 include/Mw/LowLevelMath.h create mode 100644 mk/math.mk create mode 100644 src/color_picker/color_picker.c create mode 100644 src/color_picker/color_picker.h create mode 100644 src/math/default.c create mode 100644 src/math/math.c create mode 100644 src/math/math.h create mode 100644 src/math/mmx.c create mode 100644 src/math/mmx_guard.c diff --git a/include/Mw/ColorPicker.h b/include/Mw/ColorPicker.h new file mode 100644 index 0000000..7cea187 --- /dev/null +++ b/include/Mw/ColorPicker.h @@ -0,0 +1,38 @@ +/* $Id$ */ + +/*! + * @file Mw/ColorPicker.h + * @brief Color picker + */ +#ifndef __MW_COLORPICKER_H__ +#define __MW_COLORPICKER_H__ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct _MwRGB MwRGB; +typedef struct _MwHSV MwHSV; + +struct _MwRGB { + double r; + double g; + double b; +}; + +struct _MwHSV { + double h; // angle in degrees + double s; // a fraction between 0 and 1 + double v; // a fraction between 0 and 1 +}; + +MWDECL MwWidget MwColorPicker(MwWidget handle, const char* title); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/Mw/LowLevelMath.h b/include/Mw/LowLevelMath.h new file mode 100644 index 0000000..d1493b4 --- /dev/null +++ b/include/Mw/LowLevelMath.h @@ -0,0 +1,76 @@ +/* $Id$ */ +/*! + * @file Mw/LowLevelMath.h + * @brief A few portable functions for supporting simultaneously supporting SIMD and not supporting it + * @warning This is mostly used internally. Anything undocumented, and/or anything with an _ prefix (that doesn't have a corresponding typedef) should be avoided. + */ + +#ifndef __MW_LOWLEVEL_MATH_H__ +#define __MW_LOWLEVEL_MATH_H__ + +#include + +#if !defined(__i386__) && !defined(__x86_64__) +#warning LowLevelMath.h does not yet support non-X86 platforms +#endif + +/*! + * @brief Generic vector type + * @warning Do not try to instantiate this yourself, use the appropriate functions instead. + */ +typedef struct _MwLLVec MwLLVec; + +typedef enum _MwLLVecType MwLLVecType; +typedef union _MwLLVecUnion MwLLVecUnion; + +// clang-format off +struct _MwLLVecDataU8x8 { MwU8 a; MwU8 b; MwU8 c; MwU8 d; MwU8 e; MwU8 f; MwU8 g; MwU8 h;}; +struct _MwLLVecDataU16x4 { MwU16 a; MwU16 b; MwU16 c; MwU16 d;}; +struct _MwLLVecDataU32x2 { MwU32 a; MwU32 b;}; +struct _MwLLVecDataI8x8 { MwI8 a; MwI8 b; MwI8 c; MwI8 d; MwI8 e; MwI8 f; MwI8 g; MwI8 h;}; +struct _MwLLVecDataI16x4 { MwI16 a; MwI16 b; MwI16 c; MwI16 d;}; +struct _MwLLVecDataI32x2 { MwI32 a; MwI32 b;}; +union _MwLLVecUnion { + struct _MwLLVecDataU8x8 u8; struct _MwLLVecDataU16x4 u16; struct _MwLLVecDataU32x2 u32; + struct _MwLLVecDataI8x8 i8; struct _MwLLVecDataI16x4 i16; struct _MwLLVecDataI32x2 i32; + MwU64 all; +}; +// clang-format on +enum _MwLLVecType { + _MwLLVecTypeU8x8 = 0, + _MwLLVecTypeU16x4 = 1, + _MwLLVecTypeU32x2 = 2, + _MwLLVecTypeI8x8 = 3, + _MwLLVecTypeI16x4 = 4, + _MwLLVecTypeI32x2 = 5, + + _MwLLVecType_Max, +}; +struct _MwLLVec { + MwLLVecType ty; + MwLLVecUnion un; +}; + +MwLLVec _MwLLVecCreateGeneric(MwLLVecType ty, ...); + +#define MwLLVecU8x8(a, b, c, d, e, f, g, h) _MwLLVecCreateGeneric(_MwLLVecTypeU8x8, a, b, c, d, e, f, g, h) +#define MwLLVecU16x4(a, b, c, d) _MwLLVecCreateGeneric(_MwLLVecTypeU16x4, a, b, c, d) +#define MwLLVecU32x2(a, b) _MwLLVecCreateGeneric(_MwLLVecTypeU32x2, a, b) +#define MwLLVecI8x8(a, b, c, d, e, f, g, h) _MwLLVecCreateGeneric(_MwLLVecTypeI8x8, a, b, c, d, e, f, g, h) +#define MwLLVecI16x4(a, b, c, d) _MwLLVecCreateGeneric(_MwLLVecTypeI16x4, a, b, c, d) +#define MwLLVecI32x2(a, b) _MwLLVecCreateGeneric(_MwLLVecTypeI32x2, a, b) + +void MwLLMathAdd(MwLLVec* a, MwLLVec* b, MwLLVec* out); +void MwLLMathMultiply(MwLLVec* a, MwLLVec* b, MwLLVec* out); +void MwLLMathSub(MwLLVec* a, MwLLVec* b, MwLLVec* out); +void MwLLMathReciprocal(MwLLVec* a, MwLLVec* out); +void MwLLMathSquareRoot(MwLLVec* a, MwLLVec* out); +void MwLLMathAnd(MwLLVec* a, MwLLVec* b, MwLLVec* out); +void MwLLMathOr(MwLLVec* a, MwLLVec* b, MwLLVec* out); +void MwLLMathShiftRight(MwLLVec* a, MwLLVec* b, MwLLVec* out); +void MwLLMathShiftLeft(MwLLVec* a, MwLLVec* b, MwLLVec* out); +void MwLLMathEqual(MwLLVec* a, MwLLVec* b, MwLLVec* out); +void MwLLMathGreaterThen(MwLLVec* a, MwLLVec* b, MwLLVec* out); +void MwLLMathLesserThen(MwLLVec* a, MwLLVec* b, MwLLVec* out); + +#endif diff --git a/mk/math.mk b/mk/math.mk new file mode 100644 index 0000000..4e009a5 --- /dev/null +++ b/mk/math.mk @@ -0,0 +1 @@ +L_OBJS += src/math/default.o src/math/math.o src/math/mmx_guard.o diff --git a/src/color_picker/color_picker.c b/src/color_picker/color_picker.c new file mode 100644 index 0000000..43814ef --- /dev/null +++ b/src/color_picker/color_picker.c @@ -0,0 +1,220 @@ +/* $Id$ */ + +#include "color_picker.h" +#include "Mw/Core.h" +#include +#include + +#include + +static MwRGB hsv2rgb(MwHSV in) { + MwRGB out; + + int i = (int)(floor(in.h * 6)); + double f = in.h * 6 - i; + double p = in.v * (1 - in.s); + double q = in.v * (1 - f * in.s); + double t = in.v * (1 - (1 - f) * in.s); + + switch(i % 6) { + case 0: + out.r = in.v, out.g = t, out.b = p; + break; + case 1: + out.r = q, out.g = in.v, out.b = p; + break; + case 2: + out.r = p, out.g = in.v, out.b = t; + break; + case 3: + out.r = p, out.g = q, out.b = in.v; + break; + case 4: + out.r = t, out.g = p, out.b = in.v; + break; + case 5: + out.r = in.v, out.g = p, out.b = q; + break; + } + + return out; +} + +static void color_picker_wheel_image_update(color_picker* picker) { + int y, x; + for(y = 0; y < PICKER_SIZE; y++) { + for(x = 0; x < PICKER_SIZE; x++) { + int i = ((y * PICKER_SIZE) + x) * 4; + int _x = x - (PICKER_SIZE / 2); + int _y = y - (PICKER_SIZE / 2); + + double dist = sqrt(_x * _x + _y * _y); + + if(dist >= 180.) { + picker->inner.color_picker_image_data[i] = 0; + picker->inner.color_picker_image_data[i + 1] = 0; + picker->inner.color_picker_image_data[i + 2] = 0; + picker->inner.color_picker_image_data[i + 3] = 0; + } else { + double xd = (M_PI / 180.) * ((double)_x); + double yd = (M_PI / 180.) * ((double)_y); + + float angle = atan2(yd, xd) - M_PI; + float hue = (angle * 180.) / M_PI; + + MwHSV hsv_v; + MwRGB color; + + if(hue < 0.0) { + hue += 360; + } + hsv_v.h = hue / 360.; + hsv_v.s = (dist / 179.61); + hsv_v.v = picker->inner.value; + color = hsv2rgb(hsv_v); + + picker->inner.color_picker_image_data[i] = color.r * 255; + picker->inner.color_picker_image_data[i + 1] = color.g * 255; + picker->inner.color_picker_image_data[i + 2] = color.b * 255; + + picker->inner.color_picker_image_data[i + 3] = 255; + } + } + } + if(picker->inner.color_picker_pixmap != NULL) { + MwLLDestroyPixmap(picker->inner.color_picker_pixmap); + } + picker->inner.color_picker_pixmap = MwLoadRaw( + picker->inner.parent, picker->inner.color_picker_image_data, PICKER_SIZE, PICKER_SIZE); + MwVaApply(picker->inner.color_picker_img, MwNpixmap, picker->inner.color_picker_pixmap, NULL); +} + +static void color_picker_click(MwWidget handle, void* user, void* call) { + color_picker* picker = (color_picker*)user; + MwLLMouse* mouse = (MwLLMouse*)call; + char* hexColor; + int i, r, g, b, a; + + (void)handle; + (void)user; + (void)call; + + color_picker_wheel_image_update(picker); + + i = ((mouse->point.y * PICKER_SIZE) + mouse->point.x) * 4; + + r = picker->inner.color_picker_image_data[i]; + g = picker->inner.color_picker_image_data[i + 1]; + b = picker->inner.color_picker_image_data[i + 2]; + a = picker->inner.color_picker_image_data[i + 3]; + + (void)a; + + hexColor = malloc(8); + char* fgColor = malloc(8); + snprintf(hexColor, 8, "#%02X%02X%02X", r, g, b); + + int fr = r > 128 ? 0 : 255; + int fg = g > 128 ? 0 : 255; + int fb = b > 128 ? 0 : 255; + + snprintf(fgColor, 8, "#%02X%02X%02X", fr, fg, fb); + MwSetText(picker->inner.color_display, MwNbackground, hexColor); + MwSetText(picker->inner.color_display_text, MwNforeground, fgColor); + + MwSetText(picker->inner.color_display_text, MwNbackground, hexColor); + MwSetText(picker->inner.color_display_text, MwNtext, hexColor); + free(hexColor); +} +static void color_picker_on_change_value(MwWidget handle, void* user, + void* call) { + color_picker* picker = (color_picker*)user; + + int value = MwGetInteger(handle, MwNvalue); + int diff = MwGetInteger(handle, MwNchangedBy); + + (void)diff; + (void)call; + + picker->inner.value = 1.0 - ((double)value / 1024.); + + color_picker_wheel_image_update(picker); +} +color_picker* color_picker_setup(MwWidget parent, int w, int h) { + color_picker* picker = malloc(sizeof(color_picker)); + memset(picker, 0, sizeof(color_picker)); + + picker->inner.parent = parent; + + picker->inner.color_picker_img = + MwVaCreateWidget(MwImageClass, "image", picker->inner.parent, IMG_POS_X(w), IMG_POS_Y(h), + PICKER_SIZE, PICKER_SIZE, NULL); + + picker->inner.color_picker_image_data = malloc(PICKER_SIZE * PICKER_SIZE * 4); + picker->inner.color_display_image_data = + malloc(PICKER_SIZE * COLOR_DISPLAY_HEIGHT * 4); + + picker->inner.color_picker_pixmap = NULL; + picker->inner.color_display_pixmap = NULL; + picker->inner.value = 1; + + color_picker_wheel_image_update(picker); + + MwAddUserHandler(picker->inner.color_picker_img, MwNmouseDownHandler, + color_picker_click, picker); + + picker->inner.color_display = MwCreateWidget( + MwFrameClass, "colorDisplayFrame", picker->inner.parent, IMG_POS_X(w), + IMG_POS_Y(h) + PICKER_SIZE + MARGIN, PICKER_SIZE, PICKER_SIZE / 16); + MwSetText(picker->inner.color_display, MwNbackground, "#000000"); + MwSetInteger(picker->inner.color_display, MwnhasBorder, 1); + MwSetInteger(picker->inner.color_display, MwNinverted, 1); + + picker->inner.color_display_text = MwCreateWidget( + MwLabelClass, "colorDisplayFrameText", picker->inner.color_display, + MwDefaultBorderWidth(parent), MwDefaultBorderWidth(parent), + PICKER_SIZE - MwDefaultBorderWidth(parent), + (PICKER_SIZE / 16) - (MwDefaultBorderWidth(parent) * 2)); + + picker->inner.value_slider = MwVaCreateWidget( + MwScrollBarClass, "value-slider", picker->inner.parent, + // x + IMG_POS_X(w) + PICKER_SIZE + MARGIN, + + // y + IMG_POS_Y(h), + + // width + SCROLL_BAR_WIDTH, + + // height + PICKER_SIZE, + + MwNorientation, MwVERTICAL, MwNminValue, 0, MwNmaxValue, 1024, NULL); + + MwAddUserHandler(picker->inner.value_slider, MwNchangedHandler, + color_picker_on_change_value, picker); + + return picker; +}; + +MwWidget MwColorPicker(MwWidget handle, const char* title) { + MwPoint p; + color_picker* wheel; + MwWidget window; + int ww = MwGetInteger(handle, MwNwidth); + int wh = MwGetInteger(handle, MwNheight); + p.x = p.y = 0; + + window = MwVaCreateWidget(MwWindowClass, "main", handle, MwDEFAULT, MwDEFAULT, + ww, wh, MwNtitle, title, NULL); + + wheel = color_picker_setup(window, ww, wh); + + MwLLDetach(window->lowlevel, &p); + MwLLMakePopup(window->lowlevel, handle->lowlevel); + + MwLLVec v = MwLLVecU8x8(0, 0, 0, 0, 0, 0, 0, 0); + + return window; +} diff --git a/src/color_picker/color_picker.h b/src/color_picker/color_picker.h new file mode 100644 index 0000000..065dbb2 --- /dev/null +++ b/src/color_picker/color_picker.h @@ -0,0 +1,44 @@ +/* $Id$ */ + +#ifndef __MW_INTERNAL_COLOR_PICKER_H +#define __MW_INTERNAL_COLOR_PICKER_H + +#include +#include + +#define PICKER_SIZE 360 +#define IMG_POS_X(w) ((w - PICKER_SIZE) / 2) +#define IMG_POS_Y(h) ((h - PICKER_SIZE) / 2) +#define SCROLL_BAR_WIDTH 12 +#define MARGIN (PICKER_SIZE / 32) +#define COLOR_DISPLAY_HEIGHT 12 + +typedef struct _color_picker color_picker; +typedef struct _color_picker_inner color_picker_inner; +typedef struct _color_picker_vtable color_picker_vtable; + +struct _color_picker_inner { + MwWidget parent; + MwWidget color_picker_img; + MwWidget value_slider; + MwWidget color_display; + MwWidget color_display_text; + MwLLPixmap color_picker_pixmap; + MwLLPixmap color_display_pixmap; + double value; + unsigned char* color_picker_image_data; + unsigned char* color_display_image_data; + MwPoint point; +}; + +struct _color_picker_vtable { +}; + +struct _color_picker { + color_picker_inner inner; + color_picker_vtable vtable; +}; + +color_picker* color_picker_setup(MwWidget parent, int w, int h); + +#endif diff --git a/src/math/default.c b/src/math/default.c new file mode 100644 index 0000000..5233178 --- /dev/null +++ b/src/math/default.c @@ -0,0 +1,412 @@ +#include +#include "math.h" +#include +#include + +static void default_add_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + out->un.u8.a = a->un.u8.a + b->un.u8.a; + out->un.u8.b = a->un.u8.b + b->un.u8.b; + out->un.u8.c = a->un.u8.c + b->un.u8.c; + out->un.u8.d = a->un.u8.d + b->un.u8.d; + out->un.u8.e = a->un.u8.e + b->un.u8.e; + out->un.u8.f = a->un.u8.f + b->un.u8.f; + out->un.u8.g = a->un.u8.g + b->un.u8.g; + out->un.u8.h = a->un.u8.h + b->un.u8.h; +}; +static void default_sub_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + out->un.u8.a = a->un.u8.a - b->un.u8.a; + out->un.u8.b = a->un.u8.b - b->un.u8.b; + out->un.u8.c = a->un.u8.c - b->un.u8.c; + out->un.u8.d = a->un.u8.d - b->un.u8.d; + out->un.u8.e = a->un.u8.e - b->un.u8.e; + out->un.u8.f = a->un.u8.f - b->un.u8.f; + out->un.u8.g = a->un.u8.g - b->un.u8.g; + out->un.u8.h = a->un.u8.h - b->un.u8.h; +}; +static void default_multiply_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + out->un.u8.a = a->un.u8.a * b->un.u8.a; + out->un.u8.b = a->un.u8.b * b->un.u8.b; + out->un.u8.c = a->un.u8.c * b->un.u8.c; + out->un.u8.d = a->un.u8.d * b->un.u8.d; + out->un.u8.e = a->un.u8.e * b->un.u8.e; + out->un.u8.f = a->un.u8.f * b->un.u8.f; + out->un.u8.g = a->un.u8.g * b->un.u8.g; + out->un.u8.h = a->un.u8.h * b->un.u8.h; +}; +static void default_reciprocal_u8(MwLLVec* a, MwLLVec* out) { + out->un.u8.a = powf(a->un.u8.a, -1); + out->un.u8.b = powf(a->un.u8.b, -1); + out->un.u8.c = powf(a->un.u8.c, -1); + out->un.u8.d = powf(a->un.u8.d, -1); + out->un.u8.e = powf(a->un.u8.e, -1); + out->un.u8.f = powf(a->un.u8.f, -1); + out->un.u8.g = powf(a->un.u8.g, -1); + out->un.u8.h = powf(a->un.u8.h, -1); +}; +static void default_squareRoot_u8(MwLLVec* a, MwLLVec* out) { + out->un.u8.a = sqrt(a->un.u8.a); + out->un.u8.b = sqrt(a->un.u8.b); + out->un.u8.c = sqrt(a->un.u8.c); + out->un.u8.d = sqrt(a->un.u8.d); + out->un.u8.e = sqrt(a->un.u8.e); + out->un.u8.f = sqrt(a->un.u8.f); + out->un.u8.g = sqrt(a->un.u8.g); + out->un.u8.h = sqrt(a->un.u8.h); +} +static void default_shiftRight_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + out->un.u8.a = a->un.u8.a >> b->un.u8.a; + out->un.u8.b = a->un.u8.b >> b->un.u8.b; + out->un.u8.c = a->un.u8.c >> b->un.u8.c; + out->un.u8.d = a->un.u8.d >> b->un.u8.d; + out->un.u8.e = a->un.u8.e >> b->un.u8.e; + out->un.u8.f = a->un.u8.f >> b->un.u8.f; + out->un.u8.g = a->un.u8.g >> b->un.u8.g; + out->un.u8.h = a->un.u8.h >> b->un.u8.h; +}; +static void default_shiftLeft_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + out->un.u8.a = a->un.u8.a << b->un.u8.a; + out->un.u8.b = a->un.u8.b << b->un.u8.b; + out->un.u8.c = a->un.u8.c << b->un.u8.c; + out->un.u8.d = a->un.u8.d << b->un.u8.d; + out->un.u8.e = a->un.u8.e << b->un.u8.e; + out->un.u8.f = a->un.u8.f << b->un.u8.f; + out->un.u8.g = a->un.u8.g << b->un.u8.g; + out->un.u8.h = a->un.u8.h << b->un.u8.h; +} +static void default_equal_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + out->un.u8.a = a->un.u8.a == b->un.u8.a; + out->un.u8.b = a->un.u8.b == b->un.u8.b; + out->un.u8.c = a->un.u8.c == b->un.u8.c; + out->un.u8.d = a->un.u8.d == b->un.u8.d; + out->un.u8.e = a->un.u8.e == b->un.u8.e; + out->un.u8.f = a->un.u8.f == b->un.u8.f; + out->un.u8.g = a->un.u8.g == b->un.u8.g; + out->un.u8.h = a->un.u8.h == b->un.u8.h; +}; +static void default_greaterThen_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + out->un.u8.a = a->un.u8.a >= b->un.u8.a; + out->un.u8.b = a->un.u8.b >= b->un.u8.b; + out->un.u8.c = a->un.u8.c >= b->un.u8.c; + out->un.u8.d = a->un.u8.d >= b->un.u8.d; + out->un.u8.e = a->un.u8.e >= b->un.u8.e; + out->un.u8.f = a->un.u8.f >= b->un.u8.f; + out->un.u8.g = a->un.u8.g >= b->un.u8.g; + out->un.u8.h = a->un.u8.h >= b->un.u8.h; +}; +static MwLLMathVTable table_u8 = { + .Add = default_add_u8, + .Sub = default_sub_u8, + .Multiply = default_multiply_u8, + .Reciprocal = default_reciprocal_u8, + .SquareRoot = default_squareRoot_u8, + .ShiftRight = default_shiftRight_u8, + .ShiftLeft = default_shiftLeft_u8, + .Equal = default_equal_u8, + .GreaterThen = default_greaterThen_u8, +}; +static void default_add_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + out->un.u16.a = a->un.u16.a + b->un.u16.a; + out->un.u16.b = a->un.u16.b + b->un.u16.b; + out->un.u16.c = a->un.u16.c + b->un.u16.c; + out->un.u16.d = a->un.u16.d + b->un.u16.d; +} +static void default_sub_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + out->un.u16.a = a->un.u16.a - b->un.u16.a; + out->un.u16.b = a->un.u16.b - b->un.u16.b; + out->un.u16.c = a->un.u16.c - b->un.u16.c; + out->un.u16.d = a->un.u16.d - b->un.u16.d; +} +static void default_multiply_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + out->un.u16.a = a->un.u16.a * b->un.u16.a; + out->un.u16.b = a->un.u16.b * b->un.u16.b; + out->un.u16.c = a->un.u16.c * b->un.u16.c; + out->un.u16.d = a->un.u16.d * b->un.u16.d; +} +static void default_reciprocal_u16(MwLLVec* a, MwLLVec* out) { + out->un.u16.a = powf(a->un.u16.a, -1); + out->un.u16.b = powf(a->un.u16.b, -1); + out->un.u16.c = powf(a->un.u16.c, -1); + out->un.u16.d = powf(a->un.u16.d, -1); +}; +static void default_squareRoot_u16(MwLLVec* a, MwLLVec* out) { + out->un.u16.a = sqrt(a->un.u16.a); + out->un.u16.b = sqrt(a->un.u16.b); + out->un.u16.c = sqrt(a->un.u16.c); + out->un.u16.d = sqrt(a->un.u16.d); +}; + +static void default_shiftRight_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + out->un.u16.a = a->un.u16.a >> b->un.u16.a; + out->un.u16.b = a->un.u16.b >> b->un.u16.b; + out->un.u16.c = a->un.u16.c >> b->un.u16.c; + out->un.u16.d = a->un.u16.d >> b->un.u16.d; +}; +static void default_shiftLeft_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + out->un.u16.a = a->un.u16.a << b->un.u16.a; + out->un.u16.b = a->un.u16.b << b->un.u16.b; + out->un.u16.c = a->un.u16.c << b->un.u16.c; + out->un.u16.d = a->un.u16.d << b->un.u16.d; +} +static void default_equal_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + out->un.u16.a = a->un.u16.a == b->un.u16.a; + out->un.u16.b = a->un.u16.b == b->un.u16.b; + out->un.u16.c = a->un.u16.c == b->un.u16.c; + out->un.u16.d = a->un.u16.d == b->un.u16.d; +} +static void default_greaterThen_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + out->un.u16.a = a->un.u16.a >= b->un.u16.a; + out->un.u16.b = a->un.u16.b >= b->un.u16.b; + out->un.u16.c = a->un.u16.c >= b->un.u16.c; + out->un.u16.d = a->un.u16.d >= b->un.u16.d; +} +static MwLLMathVTable table_u16 = { + .Add = default_add_u16, + .Sub = default_sub_u16, + .Multiply = default_multiply_u16, + .Reciprocal = default_reciprocal_u16, + .SquareRoot = default_squareRoot_u16, + .ShiftRight = default_shiftRight_u16, + .ShiftLeft = default_shiftLeft_u16, + .Equal = default_equal_u16, + .GreaterThen = default_greaterThen_u16, +}; + +static void default_add_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + out->un.u32.a = a->un.u32.a + b->un.u32.a; + out->un.u32.b = a->un.u32.b + b->un.u32.b; +} +static void default_sub_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + out->un.u32.a = a->un.u32.a - b->un.u32.a; + out->un.u32.b = a->un.u32.b - b->un.u32.b; +} +static void default_multiply_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + out->un.u32.a = a->un.u32.a * b->un.u32.a; + out->un.u32.b = a->un.u32.b * b->un.u32.b; +} +static void default_reciprocal_u32(MwLLVec* a, MwLLVec* out) { + out->un.u32.a = powf(a->un.u32.a, -1); + out->un.u32.b = powf(a->un.u32.b, -1); +}; +static void default_squareRoot_u32(MwLLVec* a, MwLLVec* out) { + out->un.u32.a = sqrt(a->un.u32.a); + out->un.u32.b = sqrt(a->un.u32.b); +}; + +static void default_shiftRight_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + out->un.u32.a = a->un.u32.a >> b->un.u32.a; + out->un.u32.b = a->un.u32.b >> b->un.u32.b; +}; +static void default_shiftLeft_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + out->un.u32.a = a->un.u32.a << b->un.u32.a; + out->un.u32.b = a->un.u32.b << b->un.u32.b; +} +static void default_equal_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + out->un.u32.a = a->un.u32.a == b->un.u32.a; + out->un.u32.b = a->un.u32.b == b->un.u32.b; +} +static void default_greaterThen_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + out->un.u32.a = a->un.u32.a >= b->un.u32.a; + out->un.u32.b = a->un.u32.b >= b->un.u32.b; +} +static MwLLMathVTable table_u32 = { + .Add = default_add_u32, + .Sub = default_sub_u32, + .Multiply = default_multiply_u32, + .Reciprocal = default_reciprocal_u32, + .SquareRoot = default_squareRoot_u32, + .ShiftRight = default_shiftRight_u32, + .ShiftLeft = default_shiftLeft_u32, + .Equal = default_equal_u32, + .GreaterThen = default_greaterThen_u32, +}; + +static void default_add_i8(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + out->un.i8.a = a->un.i8.a + b->un.i8.a; + out->un.i8.b = a->un.i8.b + b->un.i8.b; + out->un.i8.c = a->un.i8.c + b->un.i8.c; + out->un.i8.d = a->un.i8.d + b->un.i8.d; + out->un.i8.e = a->un.i8.e + b->un.i8.e; + out->un.i8.f = a->un.i8.f + b->un.i8.f; + out->un.i8.g = a->un.i8.g + b->un.i8.g; + out->un.i8.h = a->un.i8.h + b->un.i8.h; +}; +static void default_sub_i8(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + out->un.i8.a = a->un.i8.a - b->un.i8.a; + out->un.i8.b = a->un.i8.b - b->un.i8.b; + out->un.i8.c = a->un.i8.c - b->un.i8.c; + out->un.i8.d = a->un.i8.d - b->un.i8.d; + out->un.i8.e = a->un.i8.e - b->un.i8.e; + out->un.i8.f = a->un.i8.f - b->un.i8.f; + out->un.i8.g = a->un.i8.g - b->un.i8.g; + out->un.i8.h = a->un.i8.h - b->un.i8.h; +}; +static void default_multiply_i8(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + out->un.i8.a = a->un.i8.a * b->un.i8.a; + out->un.i8.b = a->un.i8.b * b->un.i8.b; + out->un.i8.c = a->un.i8.c * b->un.i8.c; + out->un.i8.d = a->un.i8.d * b->un.i8.d; + out->un.i8.e = a->un.i8.e * b->un.i8.e; + out->un.i8.f = a->un.i8.f * b->un.i8.f; + out->un.i8.g = a->un.i8.g * b->un.i8.g; + out->un.i8.h = a->un.i8.h * b->un.i8.h; +}; +static void default_reciprocal_i8(MwLLVec* a, MwLLVec* out) { + out->un.i8.a = powf(a->un.i8.a, -1); + out->un.i8.b = powf(a->un.i8.b, -1); + out->un.i8.c = powf(a->un.i8.c, -1); + out->un.i8.d = powf(a->un.i8.d, -1); + out->un.i8.e = powf(a->un.i8.e, -1); + out->un.i8.f = powf(a->un.i8.f, -1); + out->un.i8.g = powf(a->un.i8.g, -1); + out->un.i8.h = powf(a->un.i8.h, -1); +}; +static void default_squareRoot_i8(MwLLVec* a, MwLLVec* out) { + out->un.i8.a = sqrt(a->un.i8.a); + out->un.i8.b = sqrt(a->un.i8.b); + out->un.i8.c = sqrt(a->un.i8.c); + out->un.i8.d = sqrt(a->un.i8.d); + out->un.i8.e = sqrt(a->un.i8.e); + out->un.i8.f = sqrt(a->un.i8.f); + out->un.i8.g = sqrt(a->un.i8.g); + out->un.i8.h = sqrt(a->un.i8.h); +} + +static void default_equal_i8(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + out->un.i8.a = a->un.i8.a == b->un.i8.a; + out->un.i8.b = a->un.i8.b == b->un.i8.b; + out->un.i8.c = a->un.i8.c == b->un.i8.c; + out->un.i8.d = a->un.i8.d == b->un.i8.d; + out->un.i8.e = a->un.i8.e == b->un.i8.e; + out->un.i8.f = a->un.i8.f == b->un.i8.f; + out->un.i8.g = a->un.i8.g == b->un.i8.g; + out->un.i8.h = a->un.i8.h == b->un.i8.h; +}; +static void default_greaterThen_i8(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + out->un.i8.a = a->un.i8.a >= b->un.i8.a; + out->un.i8.b = a->un.i8.b >= b->un.i8.b; + out->un.i8.c = a->un.i8.c >= b->un.i8.c; + out->un.i8.d = a->un.i8.d >= b->un.i8.d; + out->un.i8.e = a->un.i8.e >= b->un.i8.e; + out->un.i8.f = a->un.i8.f >= b->un.i8.f; + out->un.i8.g = a->un.i8.g >= b->un.i8.g; + out->un.i8.h = a->un.i8.h >= b->un.i8.h; +}; +static MwLLMathVTable table_i8 = { + .Add = default_add_i8, + .Sub = default_sub_i8, + .Multiply = default_multiply_i8, + .Reciprocal = default_reciprocal_i8, + .SquareRoot = default_squareRoot_i8, + .ShiftRight = default_shiftRight_u8, + .ShiftLeft = default_shiftLeft_u8, + .Equal = default_equal_i8, + .GreaterThen = default_greaterThen_i8, +}; +static void default_add_i16(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + out->un.i16.a = a->un.i16.a + b->un.i16.a; + out->un.i16.b = a->un.i16.b + b->un.i16.b; + out->un.i16.c = a->un.i16.c + b->un.i16.c; + out->un.i16.d = a->un.i16.d + b->un.i16.d; +} +static void default_sub_i16(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + out->un.i16.a = a->un.i16.a - b->un.i16.a; + out->un.i16.b = a->un.i16.b - b->un.i16.b; + out->un.i16.c = a->un.i16.c - b->un.i16.c; + out->un.i16.d = a->un.i16.d - b->un.i16.d; +} +static void default_multiply_i16(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + out->un.i16.a = a->un.i16.a * b->un.i16.a; + out->un.i16.b = a->un.i16.b * b->un.i16.b; + out->un.i16.c = a->un.i16.c * b->un.i16.c; + out->un.i16.d = a->un.i16.d * b->un.i16.d; +} +static void default_reciprocal_i16(MwLLVec* a, MwLLVec* out) { + out->un.i16.a = powf(a->un.i16.a, -1); + out->un.i16.b = powf(a->un.i16.b, -1); + out->un.i16.c = powf(a->un.i16.c, -1); + out->un.i16.d = powf(a->un.i16.d, -1); +}; +static void default_squareRoot_i16(MwLLVec* a, MwLLVec* out) { + out->un.i16.a = sqrt(a->un.i16.a); + out->un.i16.b = sqrt(a->un.i16.b); + out->un.i16.c = sqrt(a->un.i16.c); + out->un.i16.d = sqrt(a->un.i16.d); +}; + +static void default_equal_i16(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + out->un.i16.a = a->un.i16.a == b->un.i16.a; + out->un.i16.b = a->un.i16.b == b->un.i16.b; + out->un.i16.c = a->un.i16.c == b->un.i16.c; + out->un.i16.d = a->un.i16.d == b->un.i16.d; +} +static void default_greaterThen_i16(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + out->un.i16.a = a->un.i16.a >= b->un.i16.a; + out->un.i16.b = a->un.i16.b >= b->un.i16.b; + out->un.i16.c = a->un.i16.c >= b->un.i16.c; + out->un.i16.d = a->un.i16.d >= b->un.i16.d; +} +static MwLLMathVTable table_i16 = { + .Add = default_add_i16, + .Sub = default_sub_i16, + .Multiply = default_multiply_i16, + .Reciprocal = default_reciprocal_i16, + .SquareRoot = default_squareRoot_i16, + .ShiftRight = default_shiftRight_u16, + .ShiftLeft = default_shiftLeft_u16, + .Equal = default_equal_i16, + .GreaterThen = default_greaterThen_i16, +}; + +static void default_add_i32(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + out->un.i32.a = a->un.i32.a + b->un.i32.a; + out->un.i32.b = a->un.i32.b + b->un.i32.b; +} +static void default_sub_i32(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + out->un.i32.a = a->un.i32.a - b->un.i32.a; + out->un.i32.b = a->un.i32.b - b->un.i32.b; +} +static void default_multiply_i32(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + out->un.i32.a = a->un.i32.a * b->un.i32.a; + out->un.i32.b = a->un.i32.b * b->un.i32.b; +} +static void default_reciprocal_i32(MwLLVec* a, MwLLVec* out) { + out->un.i32.a = powf(a->un.i32.a, -1); + out->un.i32.b = powf(a->un.i32.b, -1); +}; +static void default_squareRoot_i32(MwLLVec* a, MwLLVec* out) { + out->un.i32.a = sqrt(a->un.i32.a); + out->un.i32.b = sqrt(a->un.i32.b); +}; + +static void default_equal_i32(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + out->un.i32.a = a->un.i32.a == b->un.i32.a; + out->un.i32.b = a->un.i32.b == b->un.i32.b; +} +static void default_greaterThen_i32(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + out->un.i32.a = a->un.i32.a >= b->un.i32.a; + out->un.i32.b = a->un.i32.b >= b->un.i32.b; +} +static MwLLMathVTable table_i32 = { + .Add = default_add_i32, + .Sub = default_sub_i32, + .Multiply = default_multiply_i32, + .Reciprocal = default_reciprocal_i32, + .SquareRoot = default_squareRoot_i32, + .ShiftRight = default_shiftRight_u32, + .ShiftLeft = default_shiftLeft_u32, + .Equal = default_equal_i32, + .GreaterThen = default_greaterThen_i32, +}; + +static MwLLMathVTable* defaultMultiTable[_MwLLVecType_Max] = { + &table_u8, // _MwLLVecTypeU8x8 + &table_u16, // _MwLLVecTypeU16x4 + &table_u32, // _MwLLVecTypeU32x2 + &table_i8, // _MwLLVecTypeI8x8 + &table_i16, // _MwLLVecTypeI16x4 + &table_i32, // _MwLLVecTypeI32x2 +}; + +MwLLMathVTable** default_multi_table() { + return defaultMultiTable; +} diff --git a/src/math/math.c b/src/math/math.c new file mode 100644 index 0000000..152dea8 --- /dev/null +++ b/src/math/math.c @@ -0,0 +1,125 @@ +#include +#include +#include +#include +#include +#include "math.h" +#include "Mw/BaseTypes.h" +#include "x86intrin.h" + +MwLLVec _MwLLVecCreateGeneric(MwLLVecType ty, ...) { + MwLLVecUnion un; + MwLLVec vec; + va_list va; + + va_start(va, ty); + + // clang-format off +#define _A_B(ty) un.ty.a = va_arg(va, int); un.ty.b = va_arg(va, int); +#define _C_D(ty) un.ty.c = va_arg(va, int); un.ty.d = va_arg(va, int); +#define _E_F(ty) un.ty.e = va_arg(va, int); un.ty.f = va_arg(va, int); +#define _G_H(ty) un.ty.g = va_arg(va, int); un.ty.h = va_arg(va, int); +switch(ty) { + case _MwLLVecTypeU8x8: _A_B(u8); _C_D(u8); _E_F(u8); _G_H(u8); break; + case _MwLLVecTypeU16x4: _A_B(u16); _C_D(u16); break; + case _MwLLVecTypeU32x2: _A_B(u32); break; + case _MwLLVecTypeI8x8: _A_B(i8); _C_D(i8); _E_F(i8); _G_H(i8); break; + case _MwLLVecTypeI16x4: _A_B(i16); _C_D(i16); break; + case _MwLLVecTypeI32x2: _A_B(i32); break; + case _MwLLVecType_Max: break; +} +#undef _A_B +#undef _C_D +#undef _E_F +#undef _G_H + // clang-format on + + va_end(va); + + vec.ty = ty; + vec.un = un; + + return vec; +} + +static MwBool hasMMX(void) { + MwU32 eax = 1; + MwU32 ebx, edx; + + __asm__ __volatile__( + "cpuid" : "=a"(eax), "=b"(ebx), "=d"(edx) + : "a"(1)); + + return (edx & (1 << 23)) == (1 << 23); +} + +static MwLLMathVTable** mwLLMultiTable; +static MwLLMathVTable* multiTableSetupAndGet(MwLLVecType ty); +static MwLLMathVTable* multiTableGet(MwLLVecType ty); + +static MwLLMathVTable* (*mwLLmathFunc)(MwLLVecType ty) = multiTableSetupAndGet; + +static MwLLMathVTable* getMultiTable(MwLLVecType ty) { + return mwLLmathFunc(ty); +} + +static MwLLMathVTable* multiTableSetupAndGet(MwLLVecType ty) { + mwLLMultiTable = default_multi_table(); + +#if defined(__i386__) || defined(__x86_64__) + if(hasMMX()) { + mmx_apply(mwLLMultiTable); + } +#endif + + mwLLmathFunc = multiTableGet; + + return mwLLMultiTable[ty]; +} +static MwLLMathVTable* multiTableGet(MwLLVecType ty) { + return mwLLMultiTable[ty]; +} + +void MwLLMathAdd(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + assert(a->ty == b->ty && a->ty == out->ty && b->ty == out->ty); + return getMultiTable(a->ty)->Add(a, b, out); +}; +void MwLLMathSub(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + assert(a->ty == b->ty && a->ty == out->ty && b->ty == out->ty); + return getMultiTable(a->ty)->Sub(a, b, out); +}; +void MwLLMathMultiply(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + assert(a->ty == b->ty && a->ty == out->ty && b->ty == out->ty); + return getMultiTable(a->ty)->Multiply(a, b, out); +}; +void MwLLMathReciprocal(MwLLVec* a, MwLLVec* out) { + assert(a->ty == out->ty); + return getMultiTable(a->ty)->Reciprocal(a, out); +}; +void MwLLMathSquareRoot(MwLLVec* a, MwLLVec* out) { + assert(a->ty == out->ty); + return getMultiTable(a->ty)->SquareRoot(a, out); +}; + +void MwLLMathShiftRight(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + assert(a->ty == b->ty && a->ty == out->ty && b->ty == out->ty); + return getMultiTable(a->ty)->ShiftRight(a, b, out); +}; +void MwLLMathShiftLeft(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + assert(a->ty == b->ty && a->ty == out->ty && b->ty == out->ty); + return getMultiTable(a->ty)->ShiftLeft(a, b, out); +}; +void MwLLMathEqual(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + assert(a->ty == b->ty && a->ty == out->ty && b->ty == out->ty); + return getMultiTable(a->ty)->Equal(a, b, out); +}; +void MwLLMathGreaterThen(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + assert(a->ty == b->ty && a->ty == out->ty && b->ty == out->ty); + return getMultiTable(a->ty)->GreaterThen(a, b, out); +}; +void MwLLMathAnd(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + out->un.all = a->un.all & b->un.all; +}; +void MwLLMathOr(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + out->un.all = a->un.all | b->un.all; +}; diff --git a/src/math/math.h b/src/math/math.h new file mode 100644 index 0000000..b637c42 --- /dev/null +++ b/src/math/math.h @@ -0,0 +1,28 @@ +/* $Id$ */ + +#ifndef __MW_LOWLEVEL_INTERNAL_MATH_H__ +#define __MW_LOWLEVEL_INTERNAL_MATH_H__ + +#include + +struct _MwLLMathVTable { + void (*Add)(MwLLVec* a, MwLLVec* b, MwLLVec* out); + void (*Multiply)(MwLLVec* a, MwLLVec* b, MwLLVec* out); + void (*Sub)(MwLLVec* a, MwLLVec* b, MwLLVec* out); + void (*Reciprocal)(MwLLVec* a, MwLLVec* out); + void (*SquareRoot)(MwLLVec* a, MwLLVec* out); + void (*And)(MwLLVec* a, MwLLVec* b, MwLLVec* out); + void (*Or)(MwLLVec* a, MwLLVec* b, MwLLVec* out); + void (*ShiftRight)(MwLLVec* a, MwLLVec* b, MwLLVec* out); + void (*ShiftLeft)(MwLLVec* a, MwLLVec* b, MwLLVec* out); + void (*Equal)(MwLLVec* a, MwLLVec* b, MwLLVec* out); + void (*GreaterThen)(MwLLVec* a, MwLLVec* b, MwLLVec* out); + void (*LesserThen)(MwLLVec* a, MwLLVec* b, MwLLVec* out); +}; + +typedef struct _MwLLMathVTable MwLLMathVTable; + +MwLLMathVTable** default_multi_table(); +void mmx_apply(MwLLMathVTable**); + +#endif diff --git a/src/math/mmx.c b/src/math/mmx.c new file mode 100644 index 0000000..e45c399 --- /dev/null +++ b/src/math/mmx.c @@ -0,0 +1,103 @@ +#include +#include "math.h" +#include +#include +#include +#include + +#define DO_MMX_INTRINSIC(intrin, _ty, _rty, _tyn) \ + __m64 m = intrin(*(__m64*)&a->un._ty, *(__m64*)&b->un._ty); \ + out->un._rty = *(struct _tyn*)&m; + +static void mmx_add_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + DO_MMX_INTRINSIC(_m_paddusb, u8, u8, _MwLLVecDataU8x8); +}; +static void mmx_sub_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + DO_MMX_INTRINSIC(_m_psubusb, u8, u8, _MwLLVecDataU8x8); +}; +static void mmx_equal_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + DO_MMX_INTRINSIC(_m_pcmpeqb, u8, u8, _MwLLVecDataU8x8); +}; +static void mmx_greaterThen_u8(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + DO_MMX_INTRINSIC(_m_pcmpgtb, u8, u8, _MwLLVecDataU8x8); +}; + +static void mmx_add_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + DO_MMX_INTRINSIC(_m_paddusw, u16, u16, _MwLLVecDataU16x4); +} +static void mmx_sub_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + DO_MMX_INTRINSIC(_m_psubusw, u16, u16, _MwLLVecDataU16x4); +} +static void mmx_shiftRight_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + DO_MMX_INTRINSIC(_m_psrlw, u16, u16, _MwLLVecDataU16x4); +}; +static void mmx_shiftLeft_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + DO_MMX_INTRINSIC(_m_psllw, u16, u16, _MwLLVecDataU16x4); +} +static void mmx_equal_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + DO_MMX_INTRINSIC(_m_pcmpeqw, u16, u16, _MwLLVecDataU16x4); +} +static void mmx_greaterThen_u16(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + DO_MMX_INTRINSIC(_m_pcmpgtw, u16, u16, _MwLLVecDataU16x4); +} + +static void mmx_add_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + DO_MMX_INTRINSIC(_m_paddd, u32, u32, _MwLLVecDataU32x2); +} +static void mmx_sub_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + DO_MMX_INTRINSIC(_m_psubd, u32, u32, _MwLLVecDataU32x2); +} +static void mmx_shiftRight_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + DO_MMX_INTRINSIC(_m_psrld, u32, u32, _MwLLVecDataU32x2); +}; +static void mmx_shiftLeft_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + DO_MMX_INTRINSIC(_m_pslld, u32, u32, _MwLLVecDataU32x2); +} +static void mmx_equal_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + DO_MMX_INTRINSIC(_m_pcmpeqw, u32, u32, _MwLLVecDataU32x2); +} +static void mmx_greaterThen_u32(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + DO_MMX_INTRINSIC(_m_pcmpgtw, u32, u32, _MwLLVecDataU32x2); +} + +static void mmx_add_i8(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + DO_MMX_INTRINSIC(_m_paddsb, i8, i8, _MwLLVecDataI8x8); +}; +static void mmx_sub_i8(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + DO_MMX_INTRINSIC(_m_psubsb, i8, i8, _MwLLVecDataI8x8); +}; + +static void mmx_add_i16(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + DO_MMX_INTRINSIC(_m_paddsw, i16, i16, _MwLLVecDataI16x4); +} +static void mmx_sub_i16(MwLLVec* a, MwLLVec* b, MwLLVec* out) { + DO_MMX_INTRINSIC(_m_psubsw, i16, i16, _MwLLVecDataI16x4); +} + +void mmx_apply(MwLLMathVTable** t) { + t[_MwLLVecTypeU8x8]->Add = mmx_add_u8; + t[_MwLLVecTypeU8x8]->Sub = mmx_sub_u8; + t[_MwLLVecTypeU8x8]->GreaterThen = mmx_greaterThen_u8; + t[_MwLLVecTypeU8x8]->Equal = mmx_equal_u8; + t[_MwLLVecTypeU16x4]->Add = mmx_add_u16; + t[_MwLLVecTypeU16x4]->Sub = mmx_sub_u16; + t[_MwLLVecTypeU16x4]->ShiftLeft = mmx_shiftLeft_u16; + t[_MwLLVecTypeU16x4]->ShiftRight = mmx_shiftRight_u16; + t[_MwLLVecTypeU16x4]->GreaterThen = mmx_greaterThen_u16; + t[_MwLLVecTypeU16x4]->Equal = mmx_equal_u16; + t[_MwLLVecTypeU32x2]->Add = mmx_add_u32; + t[_MwLLVecTypeU32x2]->Sub = mmx_sub_u32; + t[_MwLLVecTypeU32x2]->ShiftLeft = mmx_shiftLeft_u32; + t[_MwLLVecTypeU32x2]->ShiftRight = mmx_shiftRight_u32; + t[_MwLLVecTypeU32x2]->GreaterThen = mmx_greaterThen_u32; + t[_MwLLVecTypeU32x2]->Equal = mmx_equal_u32; + + t[_MwLLVecTypeI8x8]->Add = mmx_add_i8; + t[_MwLLVecTypeI8x8]->Sub = mmx_sub_i8; + t[_MwLLVecTypeI16x4]->Add = mmx_add_i16; + t[_MwLLVecTypeI16x4]->Sub = mmx_sub_i16; + t[_MwLLVecTypeI16x4]->ShiftLeft = mmx_shiftLeft_u16; + t[_MwLLVecTypeI16x4]->ShiftRight = mmx_shiftRight_u16; + t[_MwLLVecTypeI32x2]->ShiftLeft = mmx_shiftLeft_u32; + t[_MwLLVecTypeI32x2]->ShiftRight = mmx_shiftRight_u32; +} diff --git a/src/math/mmx_guard.c b/src/math/mmx_guard.c new file mode 100644 index 0000000..7d2fdd3 --- /dev/null +++ b/src/math/mmx_guard.c @@ -0,0 +1,6 @@ +#if defined(__WATCOMC__) || defined(__i386__) || defined(__amd64__) +#include "mmx.c" +#else +void mmx_apply(MwLLMathVTable** t) { +} +#endif