diff --git a/meson.build b/meson.build index d6c7a6082..8aa493d70 100644 --- a/meson.build +++ b/meson.build @@ -71,6 +71,7 @@ add_project_arguments(global_cpp_args, language: 'cpp') sdl2_dep = dependency('sdl2') thread_dep = dependency('threads') dl_dep = cc.find_library('dl', required: true) +glm_dep = dependency('glm') stb = subproject('stb').get_variable('stb_inc') stb_dep = declare_dependency(include_directories: stb) diff --git a/targets/app/linux/Stubs/DirectXMath/DirectXCollision.h b/targets/app/linux/Stubs/DirectXMath/DirectXCollision.h deleted file mode 100644 index 6605197bd..000000000 --- a/targets/app/linux/Stubs/DirectXMath/DirectXCollision.h +++ /dev/null @@ -1,448 +0,0 @@ -//------------------------------------------------------------------------------------- -// DirectXCollision.h -- C++ Collision Math library -// -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. -// -// http://go.microsoft.com/fwlink/?LinkID=615560 -//------------------------------------------------------------------------------------- - -#pragma once - -#include "DirectXMath.h" - -namespace DirectX { - -enum ContainmentType { DISJOINT = 0, INTERSECTS = 1, CONTAINS = 2 }; - -enum PlaneIntersectionType { FRONT = 0, INTERSECTING = 1, BACK = 2 }; - -struct BoundingBox; -struct BoundingOrientedBox; -struct BoundingFrustum; - -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4324 4820) -// C4324: alignment padding warnings -// C4820: Off by default noise -#endif - -//------------------------------------------------------------------------------------- -// Bounding sphere -//------------------------------------------------------------------------------------- -struct BoundingSphere { - XMFLOAT3 Center; // Center of the sphere. - float Radius; // Radius of the sphere. - - // Creators - BoundingSphere() noexcept : Center(0, 0, 0), Radius(1.f) {} - - BoundingSphere(const BoundingSphere&) = default; - BoundingSphere& operator=(const BoundingSphere&) = default; - - BoundingSphere(BoundingSphere&&) = default; - BoundingSphere& operator=(BoundingSphere&&) = default; - - constexpr BoundingSphere(_In_ const XMFLOAT3& center, - _In_ float radius) noexcept - : Center(center), Radius(radius) {} - - // Methods - void XM_CALLCONV Transform(_Out_ BoundingSphere& Out, - _In_ FXMMATRIX M) const noexcept; - void XM_CALLCONV Transform(_Out_ BoundingSphere& Out, _In_ float Scale, - _In_ FXMVECTOR Rotation, - _In_ FXMVECTOR Translation) const noexcept; - // Transform the sphere - - ContainmentType XM_CALLCONV Contains(_In_ FXMVECTOR Point) const noexcept; - ContainmentType XM_CALLCONV Contains(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, - _In_ FXMVECTOR V2) const noexcept; - ContainmentType Contains(_In_ const BoundingSphere& sh) const noexcept; - ContainmentType Contains(_In_ const BoundingBox& box) const noexcept; - ContainmentType Contains( - _In_ const BoundingOrientedBox& box) const noexcept; - ContainmentType Contains(_In_ const BoundingFrustum& fr) const noexcept; - - bool Intersects(_In_ const BoundingSphere& sh) const noexcept; - bool Intersects(_In_ const BoundingBox& box) const noexcept; - bool Intersects(_In_ const BoundingOrientedBox& box) const noexcept; - bool Intersects(_In_ const BoundingFrustum& fr) const noexcept; - - bool XM_CALLCONV Intersects(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, - _In_ FXMVECTOR V2) const noexcept; - // Triangle-sphere test - - PlaneIntersectionType XM_CALLCONV - Intersects(_In_ FXMVECTOR Plane) const noexcept; - // Plane-sphere test - - bool XM_CALLCONV Intersects(_In_ FXMVECTOR Origin, _In_ FXMVECTOR Direction, - _Out_ float& Dist) const noexcept; - // Ray-sphere test - - ContainmentType XM_CALLCONV - ContainedBy(_In_ FXMVECTOR Plane0, _In_ FXMVECTOR Plane1, - _In_ FXMVECTOR Plane2, _In_ GXMVECTOR Plane3, - _In_ HXMVECTOR Plane4, _In_ HXMVECTOR Plane5) const noexcept; - // Test sphere against six planes (see BoundingFrustum::GetPlanes) - - // Static methods - static void CreateMerged(_Out_ BoundingSphere& Out, - _In_ const BoundingSphere& S1, - _In_ const BoundingSphere& S2) noexcept; - - static void CreateFromBoundingBox(_Out_ BoundingSphere& Out, - _In_ const BoundingBox& box) noexcept; - static void CreateFromBoundingBox( - _Out_ BoundingSphere& Out, - _In_ const BoundingOrientedBox& box) noexcept; - - static void CreateFromPoints(_Out_ BoundingSphere& Out, _In_ size_t Count, - _In_reads_bytes_(sizeof(XMFLOAT3) + - Stride * (Count - 1)) - const XMFLOAT3* pPoints, - _In_ size_t Stride) noexcept; - - static void CreateFromFrustum(_Out_ BoundingSphere& Out, - _In_ const BoundingFrustum& fr) noexcept; -}; - -//------------------------------------------------------------------------------------- -// Axis-aligned bounding box -//------------------------------------------------------------------------------------- -struct BoundingBox { - static constexpr size_t CORNER_COUNT = 8; - - XMFLOAT3 Center; // Center of the box. - XMFLOAT3 Extents; // Distance from the center to each side. - - // Creators - BoundingBox() noexcept : Center(0, 0, 0), Extents(1.f, 1.f, 1.f) {} - - BoundingBox(const BoundingBox&) = default; - BoundingBox& operator=(const BoundingBox&) = default; - - BoundingBox(BoundingBox&&) = default; - BoundingBox& operator=(BoundingBox&&) = default; - - constexpr BoundingBox(_In_ const XMFLOAT3& center, - _In_ const XMFLOAT3& extents) noexcept - : Center(center), Extents(extents) {} - - // Methods - void XM_CALLCONV Transform(_Out_ BoundingBox& Out, - _In_ FXMMATRIX M) const noexcept; - void XM_CALLCONV Transform(_Out_ BoundingBox& Out, _In_ float Scale, - _In_ FXMVECTOR Rotation, - _In_ FXMVECTOR Translation) const noexcept; - - void GetCorners(_Out_writes_(8) XMFLOAT3* Corners) const noexcept; - // Gets the 8 corners of the box - - ContainmentType XM_CALLCONV Contains(_In_ FXMVECTOR Point) const noexcept; - ContainmentType XM_CALLCONV Contains(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, - _In_ FXMVECTOR V2) const noexcept; - ContainmentType Contains(_In_ const BoundingSphere& sh) const noexcept; - ContainmentType Contains(_In_ const BoundingBox& box) const noexcept; - ContainmentType Contains( - _In_ const BoundingOrientedBox& box) const noexcept; - ContainmentType Contains(_In_ const BoundingFrustum& fr) const noexcept; - - bool Intersects(_In_ const BoundingSphere& sh) const noexcept; - bool Intersects(_In_ const BoundingBox& box) const noexcept; - bool Intersects(_In_ const BoundingOrientedBox& box) const noexcept; - bool Intersects(_In_ const BoundingFrustum& fr) const noexcept; - - bool XM_CALLCONV Intersects(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, - _In_ FXMVECTOR V2) const noexcept; - // Triangle-Box test - - PlaneIntersectionType XM_CALLCONV - Intersects(_In_ FXMVECTOR Plane) const noexcept; - // Plane-box test - - bool XM_CALLCONV Intersects(_In_ FXMVECTOR Origin, _In_ FXMVECTOR Direction, - _Out_ float& Dist) const noexcept; - // Ray-Box test - - ContainmentType XM_CALLCONV - ContainedBy(_In_ FXMVECTOR Plane0, _In_ FXMVECTOR Plane1, - _In_ FXMVECTOR Plane2, _In_ GXMVECTOR Plane3, - _In_ HXMVECTOR Plane4, _In_ HXMVECTOR Plane5) const noexcept; - // Test box against six planes (see BoundingFrustum::GetPlanes) - - // Static methods - static void CreateMerged(_Out_ BoundingBox& Out, _In_ const BoundingBox& b1, - _In_ const BoundingBox& b2) noexcept; - - static void CreateFromSphere(_Out_ BoundingBox& Out, - _In_ const BoundingSphere& sh) noexcept; - - static void XM_CALLCONV CreateFromPoints(_Out_ BoundingBox& Out, - _In_ FXMVECTOR pt1, - _In_ FXMVECTOR pt2) noexcept; - static void CreateFromPoints(_Out_ BoundingBox& Out, _In_ size_t Count, - _In_reads_bytes_(sizeof(XMFLOAT3) + - Stride * (Count - 1)) - const XMFLOAT3* pPoints, - _In_ size_t Stride) noexcept; -}; - -//------------------------------------------------------------------------------------- -// Oriented bounding box -//------------------------------------------------------------------------------------- -struct BoundingOrientedBox { - static constexpr size_t CORNER_COUNT = 8; - - XMFLOAT3 Center; // Center of the box. - XMFLOAT3 Extents; // Distance from the center to each side. - XMFLOAT4 - Orientation; // Unit quaternion representing rotation (box -> world). - - // Creators - BoundingOrientedBox() noexcept - : Center(0, 0, 0), Extents(1.f, 1.f, 1.f), Orientation(0, 0, 0, 1.f) {} - - BoundingOrientedBox(const BoundingOrientedBox&) = default; - BoundingOrientedBox& operator=(const BoundingOrientedBox&) = default; - - BoundingOrientedBox(BoundingOrientedBox&&) = default; - BoundingOrientedBox& operator=(BoundingOrientedBox&&) = default; - - constexpr BoundingOrientedBox(_In_ const XMFLOAT3& center, - _In_ const XMFLOAT3& extents, - _In_ const XMFLOAT4& orientation) noexcept - : Center(center), Extents(extents), Orientation(orientation) {} - - // Methods - void XM_CALLCONV Transform(_Out_ BoundingOrientedBox& Out, - _In_ FXMMATRIX M) const noexcept; - void XM_CALLCONV Transform(_Out_ BoundingOrientedBox& Out, _In_ float Scale, - _In_ FXMVECTOR Rotation, - _In_ FXMVECTOR Translation) const noexcept; - - void GetCorners(_Out_writes_(8) XMFLOAT3* Corners) const noexcept; - // Gets the 8 corners of the box - - ContainmentType XM_CALLCONV Contains(_In_ FXMVECTOR Point) const noexcept; - ContainmentType XM_CALLCONV Contains(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, - _In_ FXMVECTOR V2) const noexcept; - ContainmentType Contains(_In_ const BoundingSphere& sh) const noexcept; - ContainmentType Contains(_In_ const BoundingBox& box) const noexcept; - ContainmentType Contains( - _In_ const BoundingOrientedBox& box) const noexcept; - ContainmentType Contains(_In_ const BoundingFrustum& fr) const noexcept; - - bool Intersects(_In_ const BoundingSphere& sh) const noexcept; - bool Intersects(_In_ const BoundingBox& box) const noexcept; - bool Intersects(_In_ const BoundingOrientedBox& box) const noexcept; - bool Intersects(_In_ const BoundingFrustum& fr) const noexcept; - - bool XM_CALLCONV Intersects(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, - _In_ FXMVECTOR V2) const noexcept; - // Triangle-OrientedBox test - - PlaneIntersectionType XM_CALLCONV - Intersects(_In_ FXMVECTOR Plane) const noexcept; - // Plane-OrientedBox test - - bool XM_CALLCONV Intersects(_In_ FXMVECTOR Origin, _In_ FXMVECTOR Direction, - _Out_ float& Dist) const noexcept; - // Ray-OrientedBox test - - ContainmentType XM_CALLCONV - ContainedBy(_In_ FXMVECTOR Plane0, _In_ FXMVECTOR Plane1, - _In_ FXMVECTOR Plane2, _In_ GXMVECTOR Plane3, - _In_ HXMVECTOR Plane4, _In_ HXMVECTOR Plane5) const noexcept; - // Test OrientedBox against six planes (see BoundingFrustum::GetPlanes) - - // Static methods - static void CreateFromBoundingBox(_Out_ BoundingOrientedBox& Out, - _In_ const BoundingBox& box) noexcept; - - static void CreateFromPoints(_Out_ BoundingOrientedBox& Out, - _In_ size_t Count, - _In_reads_bytes_(sizeof(XMFLOAT3) + - Stride * (Count - 1)) - const XMFLOAT3* pPoints, - _In_ size_t Stride) noexcept; -}; - -//------------------------------------------------------------------------------------- -// Bounding frustum -//------------------------------------------------------------------------------------- -struct BoundingFrustum { - static constexpr size_t CORNER_COUNT = 8; - - XMFLOAT3 Origin; // Origin of the frustum (and projection). - XMFLOAT4 Orientation; // Quaternion representing rotation. - - float RightSlope; // Positive X (X/Z) - float LeftSlope; // Negative X - float TopSlope; // Positive Y (Y/Z) - float BottomSlope; // Negative Y - float Near, Far; // Z of the near plane and far plane. - - // Creators - BoundingFrustum() noexcept - : Origin(0, 0, 0), - Orientation(0, 0, 0, 1.f), - RightSlope(1.f), - LeftSlope(-1.f), - TopSlope(1.f), - BottomSlope(-1.f), - Near(0), - Far(1.f) {} - - BoundingFrustum(const BoundingFrustum&) = default; - BoundingFrustum& operator=(const BoundingFrustum&) = default; - - BoundingFrustum(BoundingFrustum&&) = default; - BoundingFrustum& operator=(BoundingFrustum&&) = default; - - constexpr BoundingFrustum(_In_ const XMFLOAT3& origin, - _In_ const XMFLOAT4& orientation, - _In_ float rightSlope, _In_ float leftSlope, - _In_ float topSlope, _In_ float bottomSlope, - _In_ float nearPlane, - _In_ float farPlane) noexcept - : Origin(origin), - Orientation(orientation), - RightSlope(rightSlope), - LeftSlope(leftSlope), - TopSlope(topSlope), - BottomSlope(bottomSlope), - Near(nearPlane), - Far(farPlane) {} - BoundingFrustum(_In_ CXMMATRIX Projection, bool rhcoords = false) noexcept; - - // Methods - void XM_CALLCONV Transform(_Out_ BoundingFrustum& Out, - _In_ FXMMATRIX M) const noexcept; - void XM_CALLCONV Transform(_Out_ BoundingFrustum& Out, _In_ float Scale, - _In_ FXMVECTOR Rotation, - _In_ FXMVECTOR Translation) const noexcept; - - void GetCorners(_Out_writes_(8) XMFLOAT3* Corners) const noexcept; - // Gets the 8 corners of the frustum - - ContainmentType XM_CALLCONV Contains(_In_ FXMVECTOR Point) const noexcept; - ContainmentType XM_CALLCONV Contains(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, - _In_ FXMVECTOR V2) const noexcept; - ContainmentType Contains(_In_ const BoundingSphere& sp) const noexcept; - ContainmentType Contains(_In_ const BoundingBox& box) const noexcept; - ContainmentType Contains( - _In_ const BoundingOrientedBox& box) const noexcept; - ContainmentType Contains(_In_ const BoundingFrustum& fr) const noexcept; - // Frustum-Frustum test - - bool Intersects(_In_ const BoundingSphere& sh) const noexcept; - bool Intersects(_In_ const BoundingBox& box) const noexcept; - bool Intersects(_In_ const BoundingOrientedBox& box) const noexcept; - bool Intersects(_In_ const BoundingFrustum& fr) const noexcept; - - bool XM_CALLCONV Intersects(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, - _In_ FXMVECTOR V2) const noexcept; - // Triangle-Frustum test - - PlaneIntersectionType XM_CALLCONV - Intersects(_In_ FXMVECTOR Plane) const noexcept; - // Plane-Frustum test - - bool XM_CALLCONV Intersects(_In_ FXMVECTOR rayOrigin, - _In_ FXMVECTOR Direction, - _Out_ float& Dist) const noexcept; - // Ray-Frustum test - - ContainmentType XM_CALLCONV - ContainedBy(_In_ FXMVECTOR Plane0, _In_ FXMVECTOR Plane1, - _In_ FXMVECTOR Plane2, _In_ GXMVECTOR Plane3, - _In_ HXMVECTOR Plane4, _In_ HXMVECTOR Plane5) const noexcept; - // Test frustum against six planes (see BoundingFrustum::GetPlanes) - - void GetPlanes(_Out_opt_ XMVECTOR* NearPlane, _Out_opt_ XMVECTOR* FarPlane, - _Out_opt_ XMVECTOR* RightPlane, - _Out_opt_ XMVECTOR* LeftPlane, _Out_opt_ XMVECTOR* TopPlane, - _Out_opt_ XMVECTOR* BottomPlane) const noexcept; - // Create 6 Planes representation of Frustum - - // Static methods - static void XM_CALLCONV CreateFromMatrix(_Out_ BoundingFrustum& Out, - _In_ FXMMATRIX Projection, - bool rhcoords = false) noexcept; -}; - -//----------------------------------------------------------------------------- -// Triangle intersection testing routines. -//----------------------------------------------------------------------------- -namespace TriangleTests { -bool XM_CALLCONV Intersects(_In_ FXMVECTOR Origin, _In_ FXMVECTOR Direction, - _In_ FXMVECTOR V0, _In_ GXMVECTOR V1, - _In_ HXMVECTOR V2, _Out_ float& Dist) noexcept; -// Ray-Triangle - -bool XM_CALLCONV Intersects(_In_ FXMVECTOR A0, _In_ FXMVECTOR A1, - _In_ FXMVECTOR A2, _In_ GXMVECTOR B0, - _In_ HXMVECTOR B1, _In_ HXMVECTOR B2) noexcept; -// Triangle-Triangle - -PlaneIntersectionType XM_CALLCONV Intersects(_In_ FXMVECTOR V0, - _In_ FXMVECTOR V1, - _In_ FXMVECTOR V2, - _In_ GXMVECTOR Plane) noexcept; -// Plane-Triangle - -ContainmentType XM_CALLCONV -ContainedBy(_In_ FXMVECTOR V0, _In_ FXMVECTOR V1, _In_ FXMVECTOR V2, - _In_ GXMVECTOR Plane0, _In_ HXMVECTOR Plane1, _In_ HXMVECTOR Plane2, - _In_ CXMVECTOR Plane3, _In_ CXMVECTOR Plane4, - _In_ CXMVECTOR Plane5) noexcept; -// Test a triangle against six planes at once (see BoundingFrustum::GetPlanes) -} // namespace TriangleTests - -#ifdef _MSC_VER -#pragma warning(pop) -#endif - -/**************************************************************************** - * - * Implementation - * - ****************************************************************************/ - -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4068 4365 4616 6001) -// C4068/4616: ignore unknown pragmas -// C4365: Off by default noise -// C6001: False positives -#endif - -#ifdef _PREFAST_ -#pragma prefast(push) -#pragma prefast(disable : 25000, "FXMVECTOR is 16 bytes") -#pragma prefast(disable : 26495, "Union initialization confuses /analyze") -#endif - -#ifdef __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wfloat-equal" -#pragma clang diagnostic ignored "-Wunknown-warning-option" -#pragma clang diagnostic ignored "-Wunsafe-buffer-usage" -#endif - -#include "DirectXCollision.inl" - -#ifdef __clang__ -#pragma clang diagnostic pop -#endif -#ifdef _PREFAST_ -#pragma prefast(pop) -#endif -#ifdef _MSC_VER -#pragma warning(pop) -#endif - -} // namespace DirectX diff --git a/targets/app/linux/Stubs/DirectXMath/DirectXCollision.inl b/targets/app/linux/Stubs/DirectXMath/DirectXCollision.inl deleted file mode 100644 index f2db42359..000000000 --- a/targets/app/linux/Stubs/DirectXMath/DirectXCollision.inl +++ /dev/null @@ -1,4921 +0,0 @@ -//------------------------------------------------------------------------------------- -// DirectXCollision.inl -- C++ Collision Math library -// -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. -// -// http://go.microsoft.com/fwlink/?LinkID=615560 -//------------------------------------------------------------------------------------- - -#pragma once - -XMGLOBALCONST XMVECTORF32 g_BoxOffset[8] = { - {{{-1.0f, -1.0f, 1.0f, 0.0f}}}, {{{1.0f, -1.0f, 1.0f, 0.0f}}}, - {{{1.0f, 1.0f, 1.0f, 0.0f}}}, {{{-1.0f, 1.0f, 1.0f, 0.0f}}}, - {{{-1.0f, -1.0f, -1.0f, 0.0f}}}, {{{1.0f, -1.0f, -1.0f, 0.0f}}}, - {{{1.0f, 1.0f, -1.0f, 0.0f}}}, {{{-1.0f, 1.0f, -1.0f, 0.0f}}}, -}; - -XMGLOBALCONST XMVECTORF32 g_RayEpsilon = {{{1e-20f, 1e-20f, 1e-20f, 1e-20f}}}; -XMGLOBALCONST XMVECTORF32 g_RayNegEpsilon = { - {{-1e-20f, -1e-20f, -1e-20f, -1e-20f}}}; -XMGLOBALCONST XMVECTORF32 g_FltMin = { - {{-FLT_MAX, -FLT_MAX, -FLT_MAX, -FLT_MAX}}}; -XMGLOBALCONST XMVECTORF32 g_FltMax = {{{FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX}}}; - -namespace MathInternal { - -//----------------------------------------------------------------------------- -// Return true if any of the elements of a 3 vector are equal to 0xffffffff. -// Slightly more efficient than using XMVector3EqualInt. -//----------------------------------------------------------------------------- -inline bool XMVector3AnyTrue(_In_ FXMVECTOR V) noexcept { - // Duplicate the fourth element from the first element. - XMVECTOR C = - XMVectorSwizzle( - V); - - return XMComparisonAnyTrue(XMVector4EqualIntR(C, XMVectorTrueInt())); -} - -//----------------------------------------------------------------------------- -// Return true if all of the elements of a 3 vector are equal to 0xffffffff. -// Slightly more efficient than using XMVector3EqualInt. -//----------------------------------------------------------------------------- -inline bool XMVector3AllTrue(_In_ FXMVECTOR V) noexcept { - // Duplicate the fourth element from the first element. - XMVECTOR C = - XMVectorSwizzle( - V); - - return XMComparisonAllTrue(XMVector4EqualIntR(C, XMVectorTrueInt())); -} - -#if defined(_PREFAST_) || !defined(NDEBUG) - -XMGLOBALCONST XMVECTORF32 g_UnitVectorEpsilon = { - {{1.0e-4f, 1.0e-4f, 1.0e-4f, 1.0e-4f}}}; -XMGLOBALCONST XMVECTORF32 g_UnitQuaternionEpsilon = { - {{1.0e-4f, 1.0e-4f, 1.0e-4f, 1.0e-4f}}}; -XMGLOBALCONST XMVECTORF32 g_UnitPlaneEpsilon = { - {{1.0e-4f, 1.0e-4f, 1.0e-4f, 1.0e-4f}}}; - -//----------------------------------------------------------------------------- -// Return true if the vector is a unit vector (length == 1). -//----------------------------------------------------------------------------- -inline bool XMVector3IsUnit(_In_ FXMVECTOR V) noexcept { - XMVECTOR Difference = - XMVectorSubtract(XMVector3Length(V), XMVectorSplatOne()); - return XMVector4Less(XMVectorAbs(Difference), g_UnitVectorEpsilon); -} - -//----------------------------------------------------------------------------- -// Return true if the quaterion is a unit quaternion. -//----------------------------------------------------------------------------- -inline bool XMQuaternionIsUnit(_In_ FXMVECTOR Q) noexcept { - XMVECTOR Difference = - XMVectorSubtract(XMVector4Length(Q), XMVectorSplatOne()); - return XMVector4Less(XMVectorAbs(Difference), g_UnitQuaternionEpsilon); -} - -//----------------------------------------------------------------------------- -// Return true if the plane is a unit plane. -//----------------------------------------------------------------------------- -inline bool XMPlaneIsUnit(_In_ FXMVECTOR Plane) noexcept { - XMVECTOR Difference = - XMVectorSubtract(XMVector3Length(Plane), XMVectorSplatOne()); - return XMVector4Less(XMVectorAbs(Difference), g_UnitPlaneEpsilon); -} - -#endif // _PREFAST_ || !NDEBUG - -//----------------------------------------------------------------------------- -inline XMVECTOR XMPlaneTransform(_In_ FXMVECTOR Plane, _In_ FXMVECTOR Rotation, - _In_ FXMVECTOR Translation) noexcept { - XMVECTOR vNormal = XMVector3Rotate(Plane, Rotation); - XMVECTOR vD = XMVectorSubtract(XMVectorSplatW(Plane), - XMVector3Dot(vNormal, Translation)); - - return XMVectorInsert<0, 0, 0, 0, 1>(vNormal, vD); -} - -//----------------------------------------------------------------------------- -// Return the point on the line segement (S1, S2) nearest the point P. -//----------------------------------------------------------------------------- -inline XMVECTOR PointOnLineSegmentNearestPoint(_In_ FXMVECTOR S1, - _In_ FXMVECTOR S2, - _In_ FXMVECTOR P) noexcept { - XMVECTOR Dir = XMVectorSubtract(S2, S1); - XMVECTOR Projection = - XMVectorSubtract(XMVector3Dot(P, Dir), XMVector3Dot(S1, Dir)); - XMVECTOR LengthSq = XMVector3Dot(Dir, Dir); - - XMVECTOR t = XMVectorMultiply(Projection, XMVectorReciprocal(LengthSq)); - XMVECTOR Point = XMVectorMultiplyAdd(t, Dir, S1); - - // t < 0 - XMVECTOR SelectS1 = XMVectorLess(Projection, XMVectorZero()); - Point = XMVectorSelect(Point, S1, SelectS1); - - // t > 1 - XMVECTOR SelectS2 = XMVectorGreater(Projection, LengthSq); - Point = XMVectorSelect(Point, S2, SelectS2); - - return Point; -} - -//----------------------------------------------------------------------------- -// Test if the point (P) on the plane of the triangle is inside the triangle -// (V0, V1, V2). -//----------------------------------------------------------------------------- -inline XMVECTOR XM_CALLCONV -PointOnPlaneInsideTriangle(_In_ FXMVECTOR P, _In_ FXMVECTOR V0, - _In_ FXMVECTOR V1, _In_ GXMVECTOR V2) noexcept { - // Compute the triangle normal. - XMVECTOR N = - XMVector3Cross(XMVectorSubtract(V2, V0), XMVectorSubtract(V1, V0)); - - // Compute the cross products of the vector from the base of each edge to - // the point with each edge vector. - XMVECTOR C0 = - XMVector3Cross(XMVectorSubtract(P, V0), XMVectorSubtract(V1, V0)); - XMVECTOR C1 = - XMVector3Cross(XMVectorSubtract(P, V1), XMVectorSubtract(V2, V1)); - XMVECTOR C2 = - XMVector3Cross(XMVectorSubtract(P, V2), XMVectorSubtract(V0, V2)); - - // If the cross product points in the same direction as the normal the the - // point is inside the edge (it is zero if is on the edge). - XMVECTOR Zero = XMVectorZero(); - XMVECTOR Inside0 = XMVectorGreaterOrEqual(XMVector3Dot(C0, N), Zero); - XMVECTOR Inside1 = XMVectorGreaterOrEqual(XMVector3Dot(C1, N), Zero); - XMVECTOR Inside2 = XMVectorGreaterOrEqual(XMVector3Dot(C2, N), Zero); - - // If the point inside all of the edges it is inside. - return XMVectorAndInt(XMVectorAndInt(Inside0, Inside1), Inside2); -} - -//----------------------------------------------------------------------------- -inline bool SolveCubic(_In_ float e, _In_ float f, _In_ float g, _Out_ float* t, - _Out_ float* u, _Out_ float* v) noexcept { - float p, q, h, rc, d, theta, costh3, sinth3; - - p = f - e * e / 3.0f; - q = g - e * f / 3.0f + e * e * e * 2.0f / 27.0f; - h = q * q / 4.0f + p * p * p / 27.0f; - - if (h > 0) { - *t = *u = *v = 0.f; - return false; // only one real root - } - - if ((h == 0) && (q == 0)) // all the same root - { - *t = -e / 3; - *u = -e / 3; - *v = -e / 3; - - return true; - } - - d = sqrtf(q * q / 4.0f - h); - if (d < 0) - rc = -powf(-d, 1.0f / 3.0f); - else - rc = powf(d, 1.0f / 3.0f); - - theta = XMScalarACos(-q / (2.0f * d)); - costh3 = XMScalarCos(theta / 3.0f); - sinth3 = sqrtf(3.0f) * XMScalarSin(theta / 3.0f); - *t = 2.0f * rc * costh3 - e / 3.0f; - *u = -rc * (costh3 + sinth3) - e / 3.0f; - *v = -rc * (costh3 - sinth3) - e / 3.0f; - - return true; -} - -//----------------------------------------------------------------------------- -inline XMVECTOR CalculateEigenVector(_In_ float m11, _In_ float m12, - _In_ float m13, _In_ float m22, - _In_ float m23, _In_ float m33, - _In_ float e) noexcept { - float fTmp[3]; - fTmp[0] = m12 * m23 - m13 * (m22 - e); - fTmp[1] = m13 * m12 - m23 * (m11 - e); - fTmp[2] = (m11 - e) * (m22 - e) - m12 * m12; - - XMVECTOR vTmp = XMLoadFloat3(reinterpret_cast(fTmp)); - - if (XMVector3Equal(vTmp, XMVectorZero())) // planar or linear - { - float f1, f2, f3; - - // we only have one equation - find a valid one - if ((m11 - e != 0) || (m12 != 0) || (m13 != 0)) { - f1 = m11 - e; - f2 = m12; - f3 = m13; - } else if ((m12 != 0) || (m22 - e != 0) || (m23 != 0)) { - f1 = m12; - f2 = m22 - e; - f3 = m23; - } else if ((m13 != 0) || (m23 != 0) || (m33 - e != 0)) { - f1 = m13; - f2 = m23; - f3 = m33 - e; - } else { - // error, we'll just make something up - we have NO context - f1 = 1.0f; - f2 = 0.0f; - f3 = 0.0f; - } - - if (f1 == 0) - vTmp = XMVectorSetX(vTmp, 0.0f); - else - vTmp = XMVectorSetX(vTmp, 1.0f); - - if (f2 == 0) - vTmp = XMVectorSetY(vTmp, 0.0f); - else - vTmp = XMVectorSetY(vTmp, 1.0f); - - if (f3 == 0) { - vTmp = XMVectorSetZ(vTmp, 0.0f); - // recalculate y to make equation work - if (m12 != 0) vTmp = XMVectorSetY(vTmp, -f1 / f2); - } else { - vTmp = XMVectorSetZ(vTmp, (f2 - f1) / f3); - } - } - - if (XMVectorGetX(XMVector3LengthSq(vTmp)) > 1e-5f) { - return XMVector3Normalize(vTmp); - } else { - // Multiply by a value large enough to make the vector non-zero. - vTmp = XMVectorScale(vTmp, 1e5f); - return XMVector3Normalize(vTmp); - } -} - -//----------------------------------------------------------------------------- -inline bool CalculateEigenVectors(_In_ float m11, _In_ float m12, - _In_ float m13, _In_ float m22, - _In_ float m23, _In_ float m33, _In_ float e1, - _In_ float e2, _In_ float e3, - _Out_ XMVECTOR* pV1, _Out_ XMVECTOR* pV2, - _Out_ XMVECTOR* pV3) noexcept { - *pV1 = DirectX::MathInternal::CalculateEigenVector(m11, m12, m13, m22, m23, - m33, e1); - *pV2 = DirectX::MathInternal::CalculateEigenVector(m11, m12, m13, m22, m23, - m33, e2); - *pV3 = DirectX::MathInternal::CalculateEigenVector(m11, m12, m13, m22, m23, - m33, e3); - - bool v1z = false; - bool v2z = false; - bool v3z = false; - - XMVECTOR Zero = XMVectorZero(); - - if (XMVector3Equal(*pV1, Zero)) v1z = true; - - if (XMVector3Equal(*pV2, Zero)) v2z = true; - - if (XMVector3Equal(*pV3, Zero)) v3z = true; - - bool e12 = (fabsf(XMVectorGetX(XMVector3Dot(*pV1, *pV2))) > - 0.1f); // check for non-orthogonal vectors - bool e13 = (fabsf(XMVectorGetX(XMVector3Dot(*pV1, *pV3))) > 0.1f); - bool e23 = (fabsf(XMVectorGetX(XMVector3Dot(*pV2, *pV3))) > 0.1f); - - if ((v1z && v2z && v3z) || (e12 && e13 && e23) || (e12 && v3z) || - (e13 && v2z) || (e23 && v1z)) // all eigenvectors are 0- any basis set - { - *pV1 = g_XMIdentityR0.v; - *pV2 = g_XMIdentityR1.v; - *pV3 = g_XMIdentityR2.v; - return true; - } - - if (v1z && v2z) { - XMVECTOR vTmp = XMVector3Cross(g_XMIdentityR1, *pV3); - if (XMVectorGetX(XMVector3LengthSq(vTmp)) < 1e-5f) { - vTmp = XMVector3Cross(g_XMIdentityR0, *pV3); - } - *pV1 = XMVector3Normalize(vTmp); - *pV2 = XMVector3Cross(*pV3, *pV1); - return true; - } - - if (v3z && v1z) { - XMVECTOR vTmp = XMVector3Cross(g_XMIdentityR1, *pV2); - if (XMVectorGetX(XMVector3LengthSq(vTmp)) < 1e-5f) { - vTmp = XMVector3Cross(g_XMIdentityR0, *pV2); - } - *pV3 = XMVector3Normalize(vTmp); - *pV1 = XMVector3Cross(*pV2, *pV3); - return true; - } - - if (v2z && v3z) { - XMVECTOR vTmp = XMVector3Cross(g_XMIdentityR1, *pV1); - if (XMVectorGetX(XMVector3LengthSq(vTmp)) < 1e-5f) { - vTmp = XMVector3Cross(g_XMIdentityR0, *pV1); - } - *pV2 = XMVector3Normalize(vTmp); - *pV3 = XMVector3Cross(*pV1, *pV2); - return true; - } - - if ((v1z) || e12) { - *pV1 = XMVector3Cross(*pV2, *pV3); - return true; - } - - if ((v2z) || e23) { - *pV2 = XMVector3Cross(*pV3, *pV1); - return true; - } - - if ((v3z) || e13) { - *pV3 = XMVector3Cross(*pV1, *pV2); - return true; - } - - return true; -} - -//----------------------------------------------------------------------------- -inline bool CalculateEigenVectorsFromCovarianceMatrix( - _In_ float Cxx, _In_ float Cyy, _In_ float Czz, _In_ float Cxy, - _In_ float Cxz, _In_ float Cyz, _Out_ XMVECTOR* pV1, _Out_ XMVECTOR* pV2, - _Out_ XMVECTOR* pV3) noexcept { - // Calculate the eigenvalues by solving a cubic equation. - float e = -(Cxx + Cyy + Czz); - float f = - Cxx * Cyy + Cyy * Czz + Czz * Cxx - Cxy * Cxy - Cxz * Cxz - Cyz * Cyz; - float g = Cxy * Cxy * Czz + Cxz * Cxz * Cyy + Cyz * Cyz * Cxx - - Cxy * Cyz * Cxz * 2.0f - Cxx * Cyy * Czz; - - float ev1, ev2, ev3; - if (!DirectX::MathInternal::SolveCubic(e, f, g, &ev1, &ev2, &ev3)) { - // set them to arbitrary orthonormal basis set - *pV1 = g_XMIdentityR0.v; - *pV2 = g_XMIdentityR1.v; - *pV3 = g_XMIdentityR2.v; - return false; - } - - return DirectX::MathInternal::CalculateEigenVectors( - Cxx, Cxy, Cxz, Cyy, Cyz, Czz, ev1, ev2, ev3, pV1, pV2, pV3); -} - -//----------------------------------------------------------------------------- -inline void XM_CALLCONV FastIntersectTrianglePlane(FXMVECTOR V0, FXMVECTOR V1, - FXMVECTOR V2, - GXMVECTOR Plane, - XMVECTOR& Outside, - XMVECTOR& Inside) noexcept { - // Plane0 - XMVECTOR Dist0 = XMVector4Dot(V0, Plane); - XMVECTOR Dist1 = XMVector4Dot(V1, Plane); - XMVECTOR Dist2 = XMVector4Dot(V2, Plane); - - XMVECTOR MinDist = XMVectorMin(Dist0, Dist1); - MinDist = XMVectorMin(MinDist, Dist2); - - XMVECTOR MaxDist = XMVectorMax(Dist0, Dist1); - MaxDist = XMVectorMax(MaxDist, Dist2); - - XMVECTOR Zero = XMVectorZero(); - - // Outside the plane? - Outside = XMVectorGreater(MinDist, Zero); - - // Fully inside the plane? - Inside = XMVectorLess(MaxDist, Zero); -} - -//----------------------------------------------------------------------------- -inline void FastIntersectSpherePlane(_In_ FXMVECTOR Center, - _In_ FXMVECTOR Radius, - _In_ FXMVECTOR Plane, - _Out_ XMVECTOR& Outside, - _Out_ XMVECTOR& Inside) noexcept { - XMVECTOR Dist = XMVector4Dot(Center, Plane); - - // Outside the plane? - Outside = XMVectorGreater(Dist, Radius); - - // Fully inside the plane? - Inside = XMVectorLess(Dist, XMVectorNegate(Radius)); -} - -//----------------------------------------------------------------------------- -inline void FastIntersectAxisAlignedBoxPlane(_In_ FXMVECTOR Center, - _In_ FXMVECTOR Extents, - _In_ FXMVECTOR Plane, - _Out_ XMVECTOR& Outside, - _Out_ XMVECTOR& Inside) noexcept { - // Compute the distance to the center of the box. - XMVECTOR Dist = XMVector4Dot(Center, Plane); - - // Project the axes of the box onto the normal of the plane. Half the - // length of the projection (sometime called the "radius") is equal to - // h(u) * abs(n dot b(u))) + h(v) * abs(n dot b(v)) + h(w) * abs(n dot b(w)) - // where h(i) are extents of the box, n is the plane normal, and b(i) are - // the axes of the box. In this case b(i) = [(1,0,0), (0,1,0), (0,0,1)]. - XMVECTOR Radius = XMVector3Dot(Extents, XMVectorAbs(Plane)); - - // Outside the plane? - Outside = XMVectorGreater(Dist, Radius); - - // Fully inside the plane? - Inside = XMVectorLess(Dist, XMVectorNegate(Radius)); -} - -//----------------------------------------------------------------------------- -inline void XM_CALLCONV FastIntersectOrientedBoxPlane( - _In_ FXMVECTOR Center, _In_ FXMVECTOR Extents, _In_ FXMVECTOR Axis0, - _In_ GXMVECTOR Axis1, _In_ HXMVECTOR Axis2, _In_ HXMVECTOR Plane, - _Out_ XMVECTOR& Outside, _Out_ XMVECTOR& Inside) noexcept { - // Compute the distance to the center of the box. - XMVECTOR Dist = XMVector4Dot(Center, Plane); - - // Project the axes of the box onto the normal of the plane. Half the - // length of the projection (sometime called the "radius") is equal to - // h(u) * abs(n dot b(u))) + h(v) * abs(n dot b(v)) + h(w) * abs(n dot b(w)) - // where h(i) are extents of the box, n is the plane normal, and b(i) are - // the axes of the box. - XMVECTOR Radius = XMVector3Dot(Plane, Axis0); - Radius = XMVectorInsert<0, 0, 1, 0, 0>(Radius, XMVector3Dot(Plane, Axis1)); - Radius = XMVectorInsert<0, 0, 0, 1, 0>(Radius, XMVector3Dot(Plane, Axis2)); - Radius = XMVector3Dot(Extents, XMVectorAbs(Radius)); - - // Outside the plane? - Outside = XMVectorGreater(Dist, Radius); - - // Fully inside the plane? - Inside = XMVectorLess(Dist, XMVectorNegate(Radius)); -} - -//----------------------------------------------------------------------------- -inline void XM_CALLCONV FastIntersectFrustumPlane( - _In_ FXMVECTOR Point0, _In_ FXMVECTOR Point1, _In_ FXMVECTOR Point2, - _In_ GXMVECTOR Point3, _In_ HXMVECTOR Point4, _In_ HXMVECTOR Point5, - _In_ CXMVECTOR Point6, _In_ CXMVECTOR Point7, _In_ CXMVECTOR Plane, - _Out_ XMVECTOR& Outside, _Out_ XMVECTOR& Inside) noexcept { - // Find the min/max projection of the frustum onto the plane normal. - XMVECTOR Min, Max, Dist; - - Min = Max = XMVector3Dot(Plane, Point0); - - Dist = XMVector3Dot(Plane, Point1); - Min = XMVectorMin(Min, Dist); - Max = XMVectorMax(Max, Dist); - - Dist = XMVector3Dot(Plane, Point2); - Min = XMVectorMin(Min, Dist); - Max = XMVectorMax(Max, Dist); - - Dist = XMVector3Dot(Plane, Point3); - Min = XMVectorMin(Min, Dist); - Max = XMVectorMax(Max, Dist); - - Dist = XMVector3Dot(Plane, Point4); - Min = XMVectorMin(Min, Dist); - Max = XMVectorMax(Max, Dist); - - Dist = XMVector3Dot(Plane, Point5); - Min = XMVectorMin(Min, Dist); - Max = XMVectorMax(Max, Dist); - - Dist = XMVector3Dot(Plane, Point6); - Min = XMVectorMin(Min, Dist); - Max = XMVectorMax(Max, Dist); - - Dist = XMVector3Dot(Plane, Point7); - Min = XMVectorMin(Min, Dist); - Max = XMVectorMax(Max, Dist); - - XMVECTOR PlaneDist = XMVectorNegate(XMVectorSplatW(Plane)); - - // Outside the plane? - Outside = XMVectorGreater(Min, PlaneDist); - - // Fully inside the plane? - Inside = XMVectorLess(Max, PlaneDist); -} - -} // namespace MathInternal - -/**************************************************************************** - * - * BoundingSphere - * - ****************************************************************************/ - -//----------------------------------------------------------------------------- -// Transform a sphere by an angle preserving transform. -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline void XM_CALLCONV -BoundingSphere::Transform(BoundingSphere& Out, FXMMATRIX M) const noexcept { - // Load the center of the sphere. - XMVECTOR vCenter = XMLoadFloat3(&Center); - - // Transform the center of the sphere. - XMVECTOR C = XMVector3Transform(vCenter, M); - - XMVECTOR dX = XMVector3Dot(M.r[0], M.r[0]); - XMVECTOR dY = XMVector3Dot(M.r[1], M.r[1]); - XMVECTOR dZ = XMVector3Dot(M.r[2], M.r[2]); - - XMVECTOR d = XMVectorMax(dX, XMVectorMax(dY, dZ)); - - // Store the center sphere. - XMStoreFloat3(&Out.Center, C); - - // Scale the radius of the pshere. - float Scale = sqrtf(XMVectorGetX(d)); - Out.Radius = Radius * Scale; -} - -_Use_decl_annotations_ inline void XM_CALLCONV -BoundingSphere::Transform(BoundingSphere& Out, float Scale, FXMVECTOR Rotation, - FXMVECTOR Translation) const noexcept { - // Load the center of the sphere. - XMVECTOR vCenter = XMLoadFloat3(&Center); - - // Transform the center of the sphere. - vCenter = XMVectorAdd( - XMVector3Rotate(XMVectorScale(vCenter, Scale), Rotation), Translation); - - // Store the center sphere. - XMStoreFloat3(&Out.Center, vCenter); - - // Scale the radius of the pshere. - Out.Radius = Radius * Scale; -} - -//----------------------------------------------------------------------------- -// Point in sphere test. -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline ContainmentType XM_CALLCONV -BoundingSphere::Contains(FXMVECTOR Point) const noexcept { - XMVECTOR vCenter = XMLoadFloat3(&Center); - XMVECTOR vRadius = XMVectorReplicatePtr(&Radius); - - XMVECTOR DistanceSquared = - XMVector3LengthSq(XMVectorSubtract(Point, vCenter)); - XMVECTOR RadiusSquared = XMVectorMultiply(vRadius, vRadius); - - return XMVector3LessOrEqual(DistanceSquared, RadiusSquared) ? CONTAINS - : DISJOINT; -} - -//----------------------------------------------------------------------------- -// Triangle in sphere test -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline ContainmentType XM_CALLCONV -BoundingSphere::Contains(FXMVECTOR V0, FXMVECTOR V1, - FXMVECTOR V2) const noexcept { - if (!Intersects(V0, V1, V2)) return DISJOINT; - - XMVECTOR vCenter = XMLoadFloat3(&Center); - XMVECTOR vRadius = XMVectorReplicatePtr(&Radius); - XMVECTOR RadiusSquared = XMVectorMultiply(vRadius, vRadius); - - XMVECTOR DistanceSquared = XMVector3LengthSq(XMVectorSubtract(V0, vCenter)); - XMVECTOR Inside = XMVectorLessOrEqual(DistanceSquared, RadiusSquared); - - DistanceSquared = XMVector3LengthSq(XMVectorSubtract(V1, vCenter)); - Inside = XMVectorAndInt( - Inside, XMVectorLessOrEqual(DistanceSquared, RadiusSquared)); - - DistanceSquared = XMVector3LengthSq(XMVectorSubtract(V2, vCenter)); - Inside = XMVectorAndInt( - Inside, XMVectorLessOrEqual(DistanceSquared, RadiusSquared)); - - return (XMVector3EqualInt(Inside, XMVectorTrueInt())) ? CONTAINS - : INTERSECTS; -} - -//----------------------------------------------------------------------------- -// Sphere in sphere test. -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline ContainmentType BoundingSphere::Contains( - const BoundingSphere& sh) const noexcept { - XMVECTOR Center1 = XMLoadFloat3(&Center); - float r1 = Radius; - - XMVECTOR Center2 = XMLoadFloat3(&sh.Center); - float r2 = sh.Radius; - - XMVECTOR V = XMVectorSubtract(Center2, Center1); - - XMVECTOR Dist = XMVector3Length(V); - - float d = XMVectorGetX(Dist); - - return (r1 + r2 >= d) ? ((r1 - r2 >= d) ? CONTAINS : INTERSECTS) : DISJOINT; -} - -//----------------------------------------------------------------------------- -// Axis-aligned box in sphere test -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline ContainmentType BoundingSphere::Contains( - const BoundingBox& box) const noexcept { - if (!box.Intersects(*this)) return DISJOINT; - - XMVECTOR vCenter = XMLoadFloat3(&Center); - XMVECTOR vRadius = XMVectorReplicatePtr(&Radius); - XMVECTOR RadiusSq = XMVectorMultiply(vRadius, vRadius); - - XMVECTOR boxCenter = XMLoadFloat3(&box.Center); - XMVECTOR boxExtents = XMLoadFloat3(&box.Extents); - - XMVECTOR InsideAll = XMVectorTrueInt(); - - XMVECTOR offset = XMVectorSubtract(boxCenter, vCenter); - - for (size_t i = 0; i < BoundingBox::CORNER_COUNT; ++i) { - XMVECTOR C = XMVectorMultiplyAdd(boxExtents, g_BoxOffset[i], offset); - XMVECTOR d = XMVector3LengthSq(C); - InsideAll = XMVectorAndInt(InsideAll, XMVectorLessOrEqual(d, RadiusSq)); - } - - return (XMVector3EqualInt(InsideAll, XMVectorTrueInt())) ? CONTAINS - : INTERSECTS; -} - -//----------------------------------------------------------------------------- -// Oriented box in sphere test -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline ContainmentType BoundingSphere::Contains( - const BoundingOrientedBox& box) const noexcept { - if (!box.Intersects(*this)) return DISJOINT; - - XMVECTOR vCenter = XMLoadFloat3(&Center); - XMVECTOR vRadius = XMVectorReplicatePtr(&Radius); - XMVECTOR RadiusSq = XMVectorMultiply(vRadius, vRadius); - - XMVECTOR boxCenter = XMLoadFloat3(&box.Center); - XMVECTOR boxExtents = XMLoadFloat3(&box.Extents); - XMVECTOR boxOrientation = XMLoadFloat4(&box.Orientation); - - assert(DirectX::MathInternal::XMQuaternionIsUnit(boxOrientation)); - - XMVECTOR InsideAll = XMVectorTrueInt(); - - for (size_t i = 0; i < BoundingOrientedBox::CORNER_COUNT; ++i) { - XMVECTOR C = XMVectorAdd( - XMVector3Rotate(XMVectorMultiply(boxExtents, g_BoxOffset[i]), - boxOrientation), - boxCenter); - XMVECTOR d = XMVector3LengthSq(XMVectorSubtract(vCenter, C)); - InsideAll = XMVectorAndInt(InsideAll, XMVectorLessOrEqual(d, RadiusSq)); - } - - return (XMVector3EqualInt(InsideAll, XMVectorTrueInt())) ? CONTAINS - : INTERSECTS; -} - -//----------------------------------------------------------------------------- -// Frustum in sphere test -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline ContainmentType BoundingSphere::Contains( - const BoundingFrustum& fr) const noexcept { - if (!fr.Intersects(*this)) return DISJOINT; - - XMVECTOR vCenter = XMLoadFloat3(&Center); - XMVECTOR vRadius = XMVectorReplicatePtr(&Radius); - XMVECTOR RadiusSq = XMVectorMultiply(vRadius, vRadius); - - XMVECTOR vOrigin = XMLoadFloat3(&fr.Origin); - XMVECTOR vOrientation = XMLoadFloat4(&fr.Orientation); - - assert(DirectX::MathInternal::XMQuaternionIsUnit(vOrientation)); - - // Build the corners of the frustum. - XMVECTOR vRightTop = XMVectorSet(fr.RightSlope, fr.TopSlope, 1.0f, 0.0f); - XMVECTOR vRightBottom = - XMVectorSet(fr.RightSlope, fr.BottomSlope, 1.0f, 0.0f); - XMVECTOR vLeftTop = XMVectorSet(fr.LeftSlope, fr.TopSlope, 1.0f, 0.0f); - XMVECTOR vLeftBottom = - XMVectorSet(fr.LeftSlope, fr.BottomSlope, 1.0f, 0.0f); - XMVECTOR vNear = XMVectorReplicatePtr(&fr.Near); - XMVECTOR vFar = XMVectorReplicatePtr(&fr.Far); - - XMVECTOR Corners[BoundingFrustum::CORNER_COUNT]; - Corners[0] = XMVectorMultiply(vRightTop, vNear); - Corners[1] = XMVectorMultiply(vRightBottom, vNear); - Corners[2] = XMVectorMultiply(vLeftTop, vNear); - Corners[3] = XMVectorMultiply(vLeftBottom, vNear); - Corners[4] = XMVectorMultiply(vRightTop, vFar); - Corners[5] = XMVectorMultiply(vRightBottom, vFar); - Corners[6] = XMVectorMultiply(vLeftTop, vFar); - Corners[7] = XMVectorMultiply(vLeftBottom, vFar); - - XMVECTOR InsideAll = XMVectorTrueInt(); - for (size_t i = 0; i < BoundingFrustum::CORNER_COUNT; ++i) { - XMVECTOR C = - XMVectorAdd(XMVector3Rotate(Corners[i], vOrientation), vOrigin); - XMVECTOR d = XMVector3LengthSq(XMVectorSubtract(vCenter, C)); - InsideAll = XMVectorAndInt(InsideAll, XMVectorLessOrEqual(d, RadiusSq)); - } - - return (XMVector3EqualInt(InsideAll, XMVectorTrueInt())) ? CONTAINS - : INTERSECTS; -} - -//----------------------------------------------------------------------------- -// Sphere vs. sphere test. -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline bool BoundingSphere::Intersects( - const BoundingSphere& sh) const noexcept { - // Load A. - XMVECTOR vCenterA = XMLoadFloat3(&Center); - XMVECTOR vRadiusA = XMVectorReplicatePtr(&Radius); - - // Load B. - XMVECTOR vCenterB = XMLoadFloat3(&sh.Center); - XMVECTOR vRadiusB = XMVectorReplicatePtr(&sh.Radius); - - // Distance squared between centers. - XMVECTOR Delta = XMVectorSubtract(vCenterB, vCenterA); - XMVECTOR DistanceSquared = XMVector3LengthSq(Delta); - - // Sum of the radii squared. - XMVECTOR RadiusSquared = XMVectorAdd(vRadiusA, vRadiusB); - RadiusSquared = XMVectorMultiply(RadiusSquared, RadiusSquared); - - return XMVector3LessOrEqual(DistanceSquared, RadiusSquared); -} - -//----------------------------------------------------------------------------- -// Box vs. sphere test. -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline bool BoundingSphere::Intersects( - const BoundingBox& box) const noexcept { - return box.Intersects(*this); -} - -_Use_decl_annotations_ inline bool BoundingSphere::Intersects( - const BoundingOrientedBox& box) const noexcept { - return box.Intersects(*this); -} - -//----------------------------------------------------------------------------- -// Frustum vs. sphere test. -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline bool BoundingSphere::Intersects( - const BoundingFrustum& fr) const noexcept { - return fr.Intersects(*this); -} - -//----------------------------------------------------------------------------- -// Triangle vs sphere test -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline bool XM_CALLCONV BoundingSphere::Intersects( - FXMVECTOR V0, FXMVECTOR V1, FXMVECTOR V2) const noexcept { - // Load the sphere. - XMVECTOR vCenter = XMLoadFloat3(&Center); - XMVECTOR vRadius = XMVectorReplicatePtr(&Radius); - - // Compute the plane of the triangle (has to be normalized). - XMVECTOR N = XMVector3Normalize( - XMVector3Cross(XMVectorSubtract(V1, V0), XMVectorSubtract(V2, V0))); - - // Assert that the triangle is not degenerate. - assert(!XMVector3Equal(N, XMVectorZero())); - - // Find the nearest feature on the triangle to the sphere. - XMVECTOR Dist = XMVector3Dot(XMVectorSubtract(vCenter, V0), N); - - // If the center of the sphere is farther from the plane of the triangle - // than the radius of the sphere, then there cannot be an intersection. - XMVECTOR NoIntersection = XMVectorLess(Dist, XMVectorNegate(vRadius)); - NoIntersection = - XMVectorOrInt(NoIntersection, XMVectorGreater(Dist, vRadius)); - - // Project the center of the sphere onto the plane of the triangle. - XMVECTOR Point = XMVectorNegativeMultiplySubtract(N, Dist, vCenter); - - // Is it inside all the edges? If so we intersect because the distance - // to the plane is less than the radius. - XMVECTOR Intersection = - DirectX::MathInternal::PointOnPlaneInsideTriangle(Point, V0, V1, V2); - - // Find the nearest point on each edge. - XMVECTOR RadiusSq = XMVectorMultiply(vRadius, vRadius); - - // Edge 0,1 - Point = - DirectX::MathInternal::PointOnLineSegmentNearestPoint(V0, V1, vCenter); - - // If the distance to the center of the sphere to the point is less than - // the radius of the sphere then it must intersect. - Intersection = XMVectorOrInt( - Intersection, - XMVectorLessOrEqual(XMVector3LengthSq(XMVectorSubtract(vCenter, Point)), - RadiusSq)); - - // Edge 1,2 - Point = - DirectX::MathInternal::PointOnLineSegmentNearestPoint(V1, V2, vCenter); - - // If the distance to the center of the sphere to the point is less than - // the radius of the sphere then it must intersect. - Intersection = XMVectorOrInt( - Intersection, - XMVectorLessOrEqual(XMVector3LengthSq(XMVectorSubtract(vCenter, Point)), - RadiusSq)); - - // Edge 2,0 - Point = - DirectX::MathInternal::PointOnLineSegmentNearestPoint(V2, V0, vCenter); - - // If the distance to the center of the sphere to the point is less than - // the radius of the sphere then it must intersect. - Intersection = XMVectorOrInt( - Intersection, - XMVectorLessOrEqual(XMVector3LengthSq(XMVectorSubtract(vCenter, Point)), - RadiusSq)); - - return XMVector4EqualInt(XMVectorAndCInt(Intersection, NoIntersection), - XMVectorTrueInt()); -} - -//----------------------------------------------------------------------------- -// Sphere-plane intersection -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline PlaneIntersectionType XM_CALLCONV -BoundingSphere::Intersects(FXMVECTOR Plane) const noexcept { - assert(DirectX::MathInternal::XMPlaneIsUnit(Plane)); - - // Load the sphere. - XMVECTOR vCenter = XMLoadFloat3(&Center); - XMVECTOR vRadius = XMVectorReplicatePtr(&Radius); - - // Set w of the center to one so we can dot4 with a plane. - vCenter = XMVectorInsert<0, 0, 0, 0, 1>(vCenter, XMVectorSplatOne()); - - XMVECTOR Outside, Inside; - DirectX::MathInternal::FastIntersectSpherePlane(vCenter, vRadius, Plane, - Outside, Inside); - - // If the sphere is outside any plane it is outside. - if (XMVector4EqualInt(Outside, XMVectorTrueInt())) return FRONT; - - // If the sphere is inside all planes it is inside. - if (XMVector4EqualInt(Inside, XMVectorTrueInt())) return BACK; - - // The sphere is not inside all planes or outside a plane it intersects. - return INTERSECTING; -} - -//----------------------------------------------------------------------------- -// Compute the intersection of a ray (Origin, Direction) with a sphere. -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline bool XM_CALLCONV BoundingSphere::Intersects( - FXMVECTOR Origin, FXMVECTOR Direction, float& Dist) const noexcept { - assert(DirectX::MathInternal::XMVector3IsUnit(Direction)); - - XMVECTOR vCenter = XMLoadFloat3(&Center); - XMVECTOR vRadius = XMVectorReplicatePtr(&Radius); - - // l is the vector from the ray origin to the center of the sphere. - XMVECTOR l = XMVectorSubtract(vCenter, Origin); - - // s is the projection of the l onto the ray direction. - XMVECTOR s = XMVector3Dot(l, Direction); - - XMVECTOR l2 = XMVector3Dot(l, l); - - XMVECTOR r2 = XMVectorMultiply(vRadius, vRadius); - - // m2 is squared distance from the center of the sphere to the projection. - XMVECTOR m2 = XMVectorNegativeMultiplySubtract(s, s, l2); - - XMVECTOR NoIntersection; - - // If the ray origin is outside the sphere and the center of the sphere is - // behind the ray origin there is no intersection. - NoIntersection = XMVectorAndInt(XMVectorLess(s, XMVectorZero()), - XMVectorGreater(l2, r2)); - - // If the squared distance from the center of the sphere to the projection - // is greater than the radius squared the ray will miss the sphere. - NoIntersection = XMVectorOrInt(NoIntersection, XMVectorGreater(m2, r2)); - - // The ray hits the sphere, compute the nearest intersection point. - XMVECTOR q = XMVectorSqrt(XMVectorSubtract(r2, m2)); - XMVECTOR t1 = XMVectorSubtract(s, q); - XMVECTOR t2 = XMVectorAdd(s, q); - - XMVECTOR OriginInside = XMVectorLessOrEqual(l2, r2); - XMVECTOR t = XMVectorSelect(t1, t2, OriginInside); - - if (XMVector4NotEqualInt(NoIntersection, XMVectorTrueInt())) { - // Store the x-component to *pDist. - XMStoreFloat(&Dist, t); - return true; - } - - Dist = 0.f; - return false; -} - -//----------------------------------------------------------------------------- -// Test a sphere vs 6 planes (typically forming a frustum). -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline ContainmentType XM_CALLCONV -BoundingSphere::ContainedBy(FXMVECTOR Plane0, FXMVECTOR Plane1, - FXMVECTOR Plane2, GXMVECTOR Plane3, - HXMVECTOR Plane4, HXMVECTOR Plane5) const noexcept { - // Load the sphere. - XMVECTOR vCenter = XMLoadFloat3(&Center); - XMVECTOR vRadius = XMVectorReplicatePtr(&Radius); - - // Set w of the center to one so we can dot4 with a plane. - vCenter = XMVectorInsert<0, 0, 0, 0, 1>(vCenter, XMVectorSplatOne()); - - XMVECTOR Outside, Inside; - - // Test against each plane. - DirectX::MathInternal::FastIntersectSpherePlane(vCenter, vRadius, Plane0, - Outside, Inside); - - XMVECTOR AnyOutside = Outside; - XMVECTOR AllInside = Inside; - - DirectX::MathInternal::FastIntersectSpherePlane(vCenter, vRadius, Plane1, - Outside, Inside); - AnyOutside = XMVectorOrInt(AnyOutside, Outside); - AllInside = XMVectorAndInt(AllInside, Inside); - - DirectX::MathInternal::FastIntersectSpherePlane(vCenter, vRadius, Plane2, - Outside, Inside); - AnyOutside = XMVectorOrInt(AnyOutside, Outside); - AllInside = XMVectorAndInt(AllInside, Inside); - - DirectX::MathInternal::FastIntersectSpherePlane(vCenter, vRadius, Plane3, - Outside, Inside); - AnyOutside = XMVectorOrInt(AnyOutside, Outside); - AllInside = XMVectorAndInt(AllInside, Inside); - - DirectX::MathInternal::FastIntersectSpherePlane(vCenter, vRadius, Plane4, - Outside, Inside); - AnyOutside = XMVectorOrInt(AnyOutside, Outside); - AllInside = XMVectorAndInt(AllInside, Inside); - - DirectX::MathInternal::FastIntersectSpherePlane(vCenter, vRadius, Plane5, - Outside, Inside); - AnyOutside = XMVectorOrInt(AnyOutside, Outside); - AllInside = XMVectorAndInt(AllInside, Inside); - - // If the sphere is outside any plane it is outside. - if (XMVector4EqualInt(AnyOutside, XMVectorTrueInt())) return DISJOINT; - - // If the sphere is inside all planes it is inside. - if (XMVector4EqualInt(AllInside, XMVectorTrueInt())) return CONTAINS; - - // The sphere is not inside all planes or outside a plane, it may intersect. - return INTERSECTS; -} - -//----------------------------------------------------------------------------- -// Creates a bounding sphere that contains two other bounding spheres -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline void BoundingSphere::CreateMerged( - BoundingSphere& Out, const BoundingSphere& S1, - const BoundingSphere& S2) noexcept { - XMVECTOR Center1 = XMLoadFloat3(&S1.Center); - float r1 = S1.Radius; - - XMVECTOR Center2 = XMLoadFloat3(&S2.Center); - float r2 = S2.Radius; - - XMVECTOR V = XMVectorSubtract(Center2, Center1); - - XMVECTOR Dist = XMVector3Length(V); - - float d = XMVectorGetX(Dist); - - if (r1 + r2 >= d) { - if (r1 - r2 >= d) { - Out = S1; - return; - } else if (r2 - r1 >= d) { - Out = S2; - return; - } - } - - XMVECTOR N = XMVectorDivide(V, Dist); - - float t1 = XMMin(-r1, d - r2); - float t2 = XMMax(r1, d + r2); - float t_5 = (t2 - t1) * 0.5f; - - XMVECTOR NCenter = - XMVectorAdd(Center1, XMVectorMultiply(N, XMVectorReplicate(t_5 + t1))); - - XMStoreFloat3(&Out.Center, NCenter); - Out.Radius = t_5; -} - -//----------------------------------------------------------------------------- -// Create sphere enscribing bounding box -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline void BoundingSphere::CreateFromBoundingBox( - BoundingSphere& Out, const BoundingBox& box) noexcept { - Out.Center = box.Center; - XMVECTOR vExtents = XMLoadFloat3(&box.Extents); - Out.Radius = XMVectorGetX(XMVector3Length(vExtents)); -} - -_Use_decl_annotations_ inline void BoundingSphere::CreateFromBoundingBox( - BoundingSphere& Out, const BoundingOrientedBox& box) noexcept { - // Bounding box orientation is irrelevant because a sphere is rotationally - // invariant - Out.Center = box.Center; - XMVECTOR vExtents = XMLoadFloat3(&box.Extents); - Out.Radius = XMVectorGetX(XMVector3Length(vExtents)); -} - -//----------------------------------------------------------------------------- -// Find the approximate smallest enclosing bounding sphere for a set of -// points. Exact computation of the smallest enclosing bounding sphere is -// possible but is slower and requires a more complex algorithm. -// The algorithm is based on Jack Ritter, "An Efficient Bounding Sphere", -// Graphics Gems. -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline void BoundingSphere::CreateFromPoints( - BoundingSphere& Out, size_t Count, const XMFLOAT3* pPoints, - size_t Stride) noexcept { - assert(Count > 0); - assert(pPoints); - - // Find the points with minimum and maximum x, y, and z - XMVECTOR MinX, MaxX, MinY, MaxY, MinZ, MaxZ; - - MinX = MaxX = MinY = MaxY = MinZ = MaxZ = XMLoadFloat3(pPoints); - - for (size_t i = 1; i < Count; ++i) { - XMVECTOR Point = XMLoadFloat3(reinterpret_cast( - reinterpret_cast(pPoints) + i * Stride)); - - float px = XMVectorGetX(Point); - float py = XMVectorGetY(Point); - float pz = XMVectorGetZ(Point); - - if (px < XMVectorGetX(MinX)) MinX = Point; - - if (px > XMVectorGetX(MaxX)) MaxX = Point; - - if (py < XMVectorGetY(MinY)) MinY = Point; - - if (py > XMVectorGetY(MaxY)) MaxY = Point; - - if (pz < XMVectorGetZ(MinZ)) MinZ = Point; - - if (pz > XMVectorGetZ(MaxZ)) MaxZ = Point; - } - - // Use the min/max pair that are farthest apart to form the initial sphere. - XMVECTOR DeltaX = XMVectorSubtract(MaxX, MinX); - XMVECTOR DistX = XMVector3Length(DeltaX); - - XMVECTOR DeltaY = XMVectorSubtract(MaxY, MinY); - XMVECTOR DistY = XMVector3Length(DeltaY); - - XMVECTOR DeltaZ = XMVectorSubtract(MaxZ, MinZ); - XMVECTOR DistZ = XMVector3Length(DeltaZ); - - XMVECTOR vCenter; - XMVECTOR vRadius; - - if (XMVector3Greater(DistX, DistY)) { - if (XMVector3Greater(DistX, DistZ)) { - // Use min/max x. - vCenter = XMVectorLerp(MaxX, MinX, 0.5f); - vRadius = XMVectorScale(DistX, 0.5f); - } else { - // Use min/max z. - vCenter = XMVectorLerp(MaxZ, MinZ, 0.5f); - vRadius = XMVectorScale(DistZ, 0.5f); - } - } else // Y >= X - { - if (XMVector3Greater(DistY, DistZ)) { - // Use min/max y. - vCenter = XMVectorLerp(MaxY, MinY, 0.5f); - vRadius = XMVectorScale(DistY, 0.5f); - } else { - // Use min/max z. - vCenter = XMVectorLerp(MaxZ, MinZ, 0.5f); - vRadius = XMVectorScale(DistZ, 0.5f); - } - } - - // Add any points not inside the sphere. - for (size_t i = 0; i < Count; ++i) { - XMVECTOR Point = XMLoadFloat3(reinterpret_cast( - reinterpret_cast(pPoints) + i * Stride)); - - XMVECTOR Delta = XMVectorSubtract(Point, vCenter); - - XMVECTOR Dist = XMVector3Length(Delta); - - if (XMVector3Greater(Dist, vRadius)) { - // Adjust sphere to include the new point. - vRadius = XMVectorScale(XMVectorAdd(vRadius, Dist), 0.5f); - vCenter = XMVectorAdd( - vCenter, XMVectorMultiply( - XMVectorSubtract(XMVectorReplicate(1.0f), - XMVectorDivide(vRadius, Dist)), - Delta)); - } - } - - XMStoreFloat3(&Out.Center, vCenter); - XMStoreFloat(&Out.Radius, vRadius); -} - -//----------------------------------------------------------------------------- -// Create sphere containing frustum -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline void BoundingSphere::CreateFromFrustum( - BoundingSphere& Out, const BoundingFrustum& fr) noexcept { - XMFLOAT3 Corners[BoundingFrustum::CORNER_COUNT]; - fr.GetCorners(Corners); - CreateFromPoints(Out, BoundingFrustum::CORNER_COUNT, Corners, - sizeof(XMFLOAT3)); -} - -/**************************************************************************** - * - * BoundingBox - * - ****************************************************************************/ - -//----------------------------------------------------------------------------- -// Transform an axis aligned box by an angle preserving transform. -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline void XM_CALLCONV -BoundingBox::Transform(BoundingBox& Out, FXMMATRIX M) const noexcept { - // Load center and extents. - XMVECTOR vCenter = XMLoadFloat3(&Center); - XMVECTOR vExtents = XMLoadFloat3(&Extents); - - // Compute and transform the corners and find new min/max bounds. - XMVECTOR Corner = XMVectorMultiplyAdd(vExtents, g_BoxOffset[0], vCenter); - Corner = XMVector3Transform(Corner, M); - - XMVECTOR Min, Max; - Min = Max = Corner; - - for (size_t i = 1; i < CORNER_COUNT; ++i) { - Corner = XMVectorMultiplyAdd(vExtents, g_BoxOffset[i], vCenter); - Corner = XMVector3Transform(Corner, M); - - Min = XMVectorMin(Min, Corner); - Max = XMVectorMax(Max, Corner); - } - - // Store center and extents. - XMStoreFloat3(&Out.Center, XMVectorScale(XMVectorAdd(Min, Max), 0.5f)); - XMStoreFloat3(&Out.Extents, - XMVectorScale(XMVectorSubtract(Max, Min), 0.5f)); -} - -_Use_decl_annotations_ inline void XM_CALLCONV -BoundingBox::Transform(BoundingBox& Out, float Scale, FXMVECTOR Rotation, - FXMVECTOR Translation) const noexcept { - assert(DirectX::MathInternal::XMQuaternionIsUnit(Rotation)); - - // Load center and extents. - XMVECTOR vCenter = XMLoadFloat3(&Center); - XMVECTOR vExtents = XMLoadFloat3(&Extents); - - XMVECTOR VectorScale = XMVectorReplicate(Scale); - - // Compute and transform the corners and find new min/max bounds. - XMVECTOR Corner = XMVectorMultiplyAdd(vExtents, g_BoxOffset[0], vCenter); - Corner = XMVectorAdd( - XMVector3Rotate(XMVectorMultiply(Corner, VectorScale), Rotation), - Translation); - - XMVECTOR Min, Max; - Min = Max = Corner; - - for (size_t i = 1; i < CORNER_COUNT; ++i) { - Corner = XMVectorMultiplyAdd(vExtents, g_BoxOffset[i], vCenter); - Corner = XMVectorAdd( - XMVector3Rotate(XMVectorMultiply(Corner, VectorScale), Rotation), - Translation); - - Min = XMVectorMin(Min, Corner); - Max = XMVectorMax(Max, Corner); - } - - // Store center and extents. - XMStoreFloat3(&Out.Center, XMVectorScale(XMVectorAdd(Min, Max), 0.5f)); - XMStoreFloat3(&Out.Extents, - XMVectorScale(XMVectorSubtract(Max, Min), 0.5f)); -} - -//----------------------------------------------------------------------------- -// Get the corner points of the box -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline void BoundingBox::GetCorners( - XMFLOAT3* Corners) const noexcept { - assert(Corners != nullptr); - - // Load the box - XMVECTOR vCenter = XMLoadFloat3(&Center); - XMVECTOR vExtents = XMLoadFloat3(&Extents); - - for (size_t i = 0; i < CORNER_COUNT; ++i) { - XMVECTOR C = XMVectorMultiplyAdd(vExtents, g_BoxOffset[i], vCenter); - XMStoreFloat3(&Corners[i], C); - } -} - -//----------------------------------------------------------------------------- -// Point in axis-aligned box test -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline ContainmentType XM_CALLCONV -BoundingBox::Contains(FXMVECTOR Point) const noexcept { - XMVECTOR vCenter = XMLoadFloat3(&Center); - XMVECTOR vExtents = XMLoadFloat3(&Extents); - - return XMVector3InBounds(XMVectorSubtract(Point, vCenter), vExtents) - ? CONTAINS - : DISJOINT; -} - -//----------------------------------------------------------------------------- -// Triangle in axis-aligned box test -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline ContainmentType XM_CALLCONV -BoundingBox::Contains(FXMVECTOR V0, FXMVECTOR V1, FXMVECTOR V2) const noexcept { - if (!Intersects(V0, V1, V2)) return DISJOINT; - - XMVECTOR vCenter = XMLoadFloat3(&Center); - XMVECTOR vExtents = XMLoadFloat3(&Extents); - - XMVECTOR d = XMVectorAbs(XMVectorSubtract(V0, vCenter)); - XMVECTOR Inside = XMVectorLessOrEqual(d, vExtents); - - d = XMVectorAbs(XMVectorSubtract(V1, vCenter)); - Inside = XMVectorAndInt(Inside, XMVectorLessOrEqual(d, vExtents)); - - d = XMVectorAbs(XMVectorSubtract(V2, vCenter)); - Inside = XMVectorAndInt(Inside, XMVectorLessOrEqual(d, vExtents)); - - return (XMVector3EqualInt(Inside, XMVectorTrueInt())) ? CONTAINS - : INTERSECTS; -} - -//----------------------------------------------------------------------------- -// Sphere in axis-aligned box test -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline ContainmentType BoundingBox::Contains( - const BoundingSphere& sh) const noexcept { - XMVECTOR SphereCenter = XMLoadFloat3(&sh.Center); - XMVECTOR SphereRadius = XMVectorReplicatePtr(&sh.Radius); - - XMVECTOR BoxCenter = XMLoadFloat3(&Center); - XMVECTOR BoxExtents = XMLoadFloat3(&Extents); - - XMVECTOR BoxMin = XMVectorSubtract(BoxCenter, BoxExtents); - XMVECTOR BoxMax = XMVectorAdd(BoxCenter, BoxExtents); - - // Find the distance to the nearest point on the box. - // for each i in (x, y, z) - // if (SphereCenter(i) < BoxMin(i)) d2 += (SphereCenter(i) - BoxMin(i)) ^ 2 - // else if (SphereCenter(i) > BoxMax(i)) d2 += (SphereCenter(i) - BoxMax(i)) - // ^ 2 - - XMVECTOR d = XMVectorZero(); - - // Compute d for each dimension. - XMVECTOR LessThanMin = XMVectorLess(SphereCenter, BoxMin); - XMVECTOR GreaterThanMax = XMVectorGreater(SphereCenter, BoxMax); - - XMVECTOR MinDelta = XMVectorSubtract(SphereCenter, BoxMin); - XMVECTOR MaxDelta = XMVectorSubtract(SphereCenter, BoxMax); - - // Choose value for each dimension based on the comparison. - d = XMVectorSelect(d, MinDelta, LessThanMin); - d = XMVectorSelect(d, MaxDelta, GreaterThanMax); - - // Use a dot-product to square them and sum them together. - XMVECTOR d2 = XMVector3Dot(d, d); - - if (XMVector3Greater(d2, XMVectorMultiply(SphereRadius, SphereRadius))) - return DISJOINT; - - XMVECTOR InsideAll = - XMVectorLessOrEqual(XMVectorAdd(BoxMin, SphereRadius), SphereCenter); - InsideAll = XMVectorAndInt( - InsideAll, XMVectorLessOrEqual(SphereCenter, - XMVectorSubtract(BoxMax, SphereRadius))); - InsideAll = XMVectorAndInt( - InsideAll, - XMVectorGreater(XMVectorSubtract(BoxMax, BoxMin), SphereRadius)); - - return (XMVector3EqualInt(InsideAll, XMVectorTrueInt())) ? CONTAINS - : INTERSECTS; -} - -//----------------------------------------------------------------------------- -// Axis-aligned box in axis-aligned box test -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline ContainmentType BoundingBox::Contains( - const BoundingBox& box) const noexcept { - XMVECTOR CenterA = XMLoadFloat3(&Center); - XMVECTOR ExtentsA = XMLoadFloat3(&Extents); - - XMVECTOR CenterB = XMLoadFloat3(&box.Center); - XMVECTOR ExtentsB = XMLoadFloat3(&box.Extents); - - XMVECTOR MinA = XMVectorSubtract(CenterA, ExtentsA); - XMVECTOR MaxA = XMVectorAdd(CenterA, ExtentsA); - - XMVECTOR MinB = XMVectorSubtract(CenterB, ExtentsB); - XMVECTOR MaxB = XMVectorAdd(CenterB, ExtentsB); - - // for each i in (x, y, z) if a_min(i) > b_max(i) or b_min(i) > a_max(i) - // then return false - XMVECTOR Disjoint = - XMVectorOrInt(XMVectorGreater(MinA, MaxB), XMVectorGreater(MinB, MaxA)); - - if (DirectX::MathInternal::XMVector3AnyTrue(Disjoint)) return DISJOINT; - - // for each i in (x, y, z) if a_min(i) <= b_min(i) and b_max(i) <= a_max(i) - // then A contains B - XMVECTOR Inside = XMVectorAndInt(XMVectorLessOrEqual(MinA, MinB), - XMVectorLessOrEqual(MaxB, MaxA)); - - return DirectX::MathInternal::XMVector3AllTrue(Inside) ? CONTAINS - : INTERSECTS; -} - -//----------------------------------------------------------------------------- -// Oriented box in axis-aligned box test -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline ContainmentType BoundingBox::Contains( - const BoundingOrientedBox& box) const noexcept { - if (!box.Intersects(*this)) return DISJOINT; - - XMVECTOR vCenter = XMLoadFloat3(&Center); - XMVECTOR vExtents = XMLoadFloat3(&Extents); - - // Subtract off the AABB center to remove a subtract below - XMVECTOR oCenter = XMVectorSubtract(XMLoadFloat3(&box.Center), vCenter); - - XMVECTOR oExtents = XMLoadFloat3(&box.Extents); - XMVECTOR oOrientation = XMLoadFloat4(&box.Orientation); - - assert(DirectX::MathInternal::XMQuaternionIsUnit(oOrientation)); - - XMVECTOR Inside = XMVectorTrueInt(); - - for (size_t i = 0; i < BoundingOrientedBox::CORNER_COUNT; ++i) { - XMVECTOR C = XMVectorAdd( - XMVector3Rotate(XMVectorMultiply(oExtents, g_BoxOffset[i]), - oOrientation), - oCenter); - XMVECTOR d = XMVectorAbs(C); - Inside = XMVectorAndInt(Inside, XMVectorLessOrEqual(d, vExtents)); - } - - return (XMVector3EqualInt(Inside, XMVectorTrueInt())) ? CONTAINS - : INTERSECTS; -} - -//----------------------------------------------------------------------------- -// Frustum in axis-aligned box test -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline ContainmentType BoundingBox::Contains( - const BoundingFrustum& fr) const noexcept { - if (!fr.Intersects(*this)) return DISJOINT; - - XMFLOAT3 Corners[BoundingFrustum::CORNER_COUNT]; - fr.GetCorners(Corners); - - XMVECTOR vCenter = XMLoadFloat3(&Center); - XMVECTOR vExtents = XMLoadFloat3(&Extents); - - XMVECTOR Inside = XMVectorTrueInt(); - - for (size_t i = 0; i < BoundingFrustum::CORNER_COUNT; ++i) { - XMVECTOR Point = XMLoadFloat3(&Corners[i]); - XMVECTOR d = XMVectorAbs(XMVectorSubtract(Point, vCenter)); - Inside = XMVectorAndInt(Inside, XMVectorLessOrEqual(d, vExtents)); - } - - return (XMVector3EqualInt(Inside, XMVectorTrueInt())) ? CONTAINS - : INTERSECTS; -} - -//----------------------------------------------------------------------------- -// Sphere vs axis-aligned box test -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline bool BoundingBox::Intersects( - const BoundingSphere& sh) const noexcept { - XMVECTOR SphereCenter = XMLoadFloat3(&sh.Center); - XMVECTOR SphereRadius = XMVectorReplicatePtr(&sh.Radius); - - XMVECTOR BoxCenter = XMLoadFloat3(&Center); - XMVECTOR BoxExtents = XMLoadFloat3(&Extents); - - XMVECTOR BoxMin = XMVectorSubtract(BoxCenter, BoxExtents); - XMVECTOR BoxMax = XMVectorAdd(BoxCenter, BoxExtents); - - // Find the distance to the nearest point on the box. - // for each i in (x, y, z) - // if (SphereCenter(i) < BoxMin(i)) d2 += (SphereCenter(i) - BoxMin(i)) ^ 2 - // else if (SphereCenter(i) > BoxMax(i)) d2 += (SphereCenter(i) - BoxMax(i)) - // ^ 2 - - XMVECTOR d = XMVectorZero(); - - // Compute d for each dimension. - XMVECTOR LessThanMin = XMVectorLess(SphereCenter, BoxMin); - XMVECTOR GreaterThanMax = XMVectorGreater(SphereCenter, BoxMax); - - XMVECTOR MinDelta = XMVectorSubtract(SphereCenter, BoxMin); - XMVECTOR MaxDelta = XMVectorSubtract(SphereCenter, BoxMax); - - // Choose value for each dimension based on the comparison. - d = XMVectorSelect(d, MinDelta, LessThanMin); - d = XMVectorSelect(d, MaxDelta, GreaterThanMax); - - // Use a dot-product to square them and sum them together. - XMVECTOR d2 = XMVector3Dot(d, d); - - return XMVector3LessOrEqual(d2, - XMVectorMultiply(SphereRadius, SphereRadius)); -} - -//----------------------------------------------------------------------------- -// Axis-aligned box vs. axis-aligned box test -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline bool BoundingBox::Intersects( - const BoundingBox& box) const noexcept { - XMVECTOR CenterA = XMLoadFloat3(&Center); - XMVECTOR ExtentsA = XMLoadFloat3(&Extents); - - XMVECTOR CenterB = XMLoadFloat3(&box.Center); - XMVECTOR ExtentsB = XMLoadFloat3(&box.Extents); - - XMVECTOR MinA = XMVectorSubtract(CenterA, ExtentsA); - XMVECTOR MaxA = XMVectorAdd(CenterA, ExtentsA); - - XMVECTOR MinB = XMVectorSubtract(CenterB, ExtentsB); - XMVECTOR MaxB = XMVectorAdd(CenterB, ExtentsB); - - // for each i in (x, y, z) if a_min(i) > b_max(i) or b_min(i) > a_max(i) - // then return false - XMVECTOR Disjoint = - XMVectorOrInt(XMVectorGreater(MinA, MaxB), XMVectorGreater(MinB, MaxA)); - - return !DirectX::MathInternal::XMVector3AnyTrue(Disjoint); -} - -//----------------------------------------------------------------------------- -// Oriented box vs. axis-aligned box test -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline bool BoundingBox::Intersects( - const BoundingOrientedBox& box) const noexcept { - return box.Intersects(*this); -} - -//----------------------------------------------------------------------------- -// Frustum vs. axis-aligned box test -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline bool BoundingBox::Intersects( - const BoundingFrustum& fr) const noexcept { - return fr.Intersects(*this); -} - -//----------------------------------------------------------------------------- -// Triangle vs. axis aligned box test -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline bool XM_CALLCONV BoundingBox::Intersects( - FXMVECTOR V0, FXMVECTOR V1, FXMVECTOR V2) const noexcept { - XMVECTOR Zero = XMVectorZero(); - - // Load the box. - XMVECTOR vCenter = XMLoadFloat3(&Center); - XMVECTOR vExtents = XMLoadFloat3(&Extents); - - XMVECTOR BoxMin = XMVectorSubtract(vCenter, vExtents); - XMVECTOR BoxMax = XMVectorAdd(vCenter, vExtents); - - // Test the axes of the box (in effect test the AAB against the minimal AAB - // around the triangle). - XMVECTOR TriMin = XMVectorMin(XMVectorMin(V0, V1), V2); - XMVECTOR TriMax = XMVectorMax(XMVectorMax(V0, V1), V2); - - // for each i in (x, y, z) if a_min(i) > b_max(i) or b_min(i) > a_max(i) - // then disjoint - XMVECTOR Disjoint = XMVectorOrInt(XMVectorGreater(TriMin, BoxMax), - XMVectorGreater(BoxMin, TriMax)); - if (DirectX::MathInternal::XMVector3AnyTrue(Disjoint)) return false; - - // Test the plane of the triangle. - XMVECTOR Normal = - XMVector3Cross(XMVectorSubtract(V1, V0), XMVectorSubtract(V2, V0)); - XMVECTOR Dist = XMVector3Dot(Normal, V0); - - // Assert that the triangle is not degenerate. - assert(!XMVector3Equal(Normal, Zero)); - - // for each i in (x, y, z) if n(i) >= 0 then v_min(i)=b_min(i), - // v_max(i)=b_max(i) else v_min(i)=b_max(i), v_max(i)=b_min(i) - XMVECTOR NormalSelect = XMVectorGreater(Normal, Zero); - XMVECTOR V_Min = XMVectorSelect(BoxMax, BoxMin, NormalSelect); - XMVECTOR V_Max = XMVectorSelect(BoxMin, BoxMax, NormalSelect); - - // if n dot v_min + d > 0 || n dot v_max + d < 0 then disjoint - XMVECTOR MinDist = XMVector3Dot(V_Min, Normal); - XMVECTOR MaxDist = XMVector3Dot(V_Max, Normal); - - XMVECTOR NoIntersection = XMVectorGreater(MinDist, Dist); - NoIntersection = XMVectorOrInt(NoIntersection, XMVectorLess(MaxDist, Dist)); - - // Move the box center to zero to simplify the following tests. - XMVECTOR TV0 = XMVectorSubtract(V0, vCenter); - XMVECTOR TV1 = XMVectorSubtract(V1, vCenter); - XMVECTOR TV2 = XMVectorSubtract(V2, vCenter); - - // Test the edge/edge axes (3*3). - XMVECTOR e0 = XMVectorSubtract(TV1, TV0); - XMVECTOR e1 = XMVectorSubtract(TV2, TV1); - XMVECTOR e2 = XMVectorSubtract(TV0, TV2); - - // Make w zero. - e0 = XMVectorInsert<0, 0, 0, 0, 1>(e0, Zero); - e1 = XMVectorInsert<0, 0, 0, 0, 1>(e1, Zero); - e2 = XMVectorInsert<0, 0, 0, 0, 1>(e2, Zero); - - XMVECTOR Axis; - XMVECTOR p0, p1, p2; - XMVECTOR Min, Max; - XMVECTOR Radius; - - // Axis == (1,0,0) x e0 = (0, -e0.z, e0.y) - Axis = XMVectorPermute(e0, XMVectorNegate(e0)); - p0 = XMVector3Dot(TV0, Axis); - // p1 = XMVector3Dot( V1, Axis ); // p1 = p0; - p2 = XMVector3Dot(TV2, Axis); - Min = XMVectorMin(p0, p2); - Max = XMVectorMax(p0, p2); - Radius = XMVector3Dot(vExtents, XMVectorAbs(Axis)); - NoIntersection = - XMVectorOrInt(NoIntersection, XMVectorGreater(Min, Radius)); - NoIntersection = XMVectorOrInt(NoIntersection, - XMVectorLess(Max, XMVectorNegate(Radius))); - - // Axis == (1,0,0) x e1 = (0, -e1.z, e1.y) - Axis = XMVectorPermute(e1, XMVectorNegate(e1)); - p0 = XMVector3Dot(TV0, Axis); - p1 = XMVector3Dot(TV1, Axis); - // p2 = XMVector3Dot( V2, Axis ); // p2 = p1; - Min = XMVectorMin(p0, p1); - Max = XMVectorMax(p0, p1); - Radius = XMVector3Dot(vExtents, XMVectorAbs(Axis)); - NoIntersection = - XMVectorOrInt(NoIntersection, XMVectorGreater(Min, Radius)); - NoIntersection = XMVectorOrInt(NoIntersection, - XMVectorLess(Max, XMVectorNegate(Radius))); - - // Axis == (1,0,0) x e2 = (0, -e2.z, e2.y) - Axis = XMVectorPermute(e2, XMVectorNegate(e2)); - p0 = XMVector3Dot(TV0, Axis); - p1 = XMVector3Dot(TV1, Axis); - // p2 = XMVector3Dot( V2, Axis ); // p2 = p0; - Min = XMVectorMin(p0, p1); - Max = XMVectorMax(p0, p1); - Radius = XMVector3Dot(vExtents, XMVectorAbs(Axis)); - NoIntersection = - XMVectorOrInt(NoIntersection, XMVectorGreater(Min, Radius)); - NoIntersection = XMVectorOrInt(NoIntersection, - XMVectorLess(Max, XMVectorNegate(Radius))); - - // Axis == (0,1,0) x e0 = (e0.z, 0, -e0.x) - Axis = XMVectorPermute(e0, XMVectorNegate(e0)); - p0 = XMVector3Dot(TV0, Axis); - // p1 = XMVector3Dot( V1, Axis ); // p1 = p0; - p2 = XMVector3Dot(TV2, Axis); - Min = XMVectorMin(p0, p2); - Max = XMVectorMax(p0, p2); - Radius = XMVector3Dot(vExtents, XMVectorAbs(Axis)); - NoIntersection = - XMVectorOrInt(NoIntersection, XMVectorGreater(Min, Radius)); - NoIntersection = XMVectorOrInt(NoIntersection, - XMVectorLess(Max, XMVectorNegate(Radius))); - - // Axis == (0,1,0) x e1 = (e1.z, 0, -e1.x) - Axis = XMVectorPermute(e1, XMVectorNegate(e1)); - p0 = XMVector3Dot(TV0, Axis); - p1 = XMVector3Dot(TV1, Axis); - // p2 = XMVector3Dot( V2, Axis ); // p2 = p1; - Min = XMVectorMin(p0, p1); - Max = XMVectorMax(p0, p1); - Radius = XMVector3Dot(vExtents, XMVectorAbs(Axis)); - NoIntersection = - XMVectorOrInt(NoIntersection, XMVectorGreater(Min, Radius)); - NoIntersection = XMVectorOrInt(NoIntersection, - XMVectorLess(Max, XMVectorNegate(Radius))); - - // Axis == (0,0,1) x e2 = (e2.z, 0, -e2.x) - Axis = XMVectorPermute(e2, XMVectorNegate(e2)); - p0 = XMVector3Dot(TV0, Axis); - p1 = XMVector3Dot(TV1, Axis); - // p2 = XMVector3Dot( V2, Axis ); // p2 = p0; - Min = XMVectorMin(p0, p1); - Max = XMVectorMax(p0, p1); - Radius = XMVector3Dot(vExtents, XMVectorAbs(Axis)); - NoIntersection = - XMVectorOrInt(NoIntersection, XMVectorGreater(Min, Radius)); - NoIntersection = XMVectorOrInt(NoIntersection, - XMVectorLess(Max, XMVectorNegate(Radius))); - - // Axis == (0,0,1) x e0 = (-e0.y, e0.x, 0) - Axis = XMVectorPermute(e0, XMVectorNegate(e0)); - p0 = XMVector3Dot(TV0, Axis); - // p1 = XMVector3Dot( V1, Axis ); // p1 = p0; - p2 = XMVector3Dot(TV2, Axis); - Min = XMVectorMin(p0, p2); - Max = XMVectorMax(p0, p2); - Radius = XMVector3Dot(vExtents, XMVectorAbs(Axis)); - NoIntersection = - XMVectorOrInt(NoIntersection, XMVectorGreater(Min, Radius)); - NoIntersection = XMVectorOrInt(NoIntersection, - XMVectorLess(Max, XMVectorNegate(Radius))); - - // Axis == (0,0,1) x e1 = (-e1.y, e1.x, 0) - Axis = XMVectorPermute(e1, XMVectorNegate(e1)); - p0 = XMVector3Dot(TV0, Axis); - p1 = XMVector3Dot(TV1, Axis); - // p2 = XMVector3Dot( V2, Axis ); // p2 = p1; - Min = XMVectorMin(p0, p1); - Max = XMVectorMax(p0, p1); - Radius = XMVector3Dot(vExtents, XMVectorAbs(Axis)); - NoIntersection = - XMVectorOrInt(NoIntersection, XMVectorGreater(Min, Radius)); - NoIntersection = XMVectorOrInt(NoIntersection, - XMVectorLess(Max, XMVectorNegate(Radius))); - - // Axis == (0,0,1) x e2 = (-e2.y, e2.x, 0) - Axis = XMVectorPermute(e2, XMVectorNegate(e2)); - p0 = XMVector3Dot(TV0, Axis); - p1 = XMVector3Dot(TV1, Axis); - // p2 = XMVector3Dot( V2, Axis ); // p2 = p0; - Min = XMVectorMin(p0, p1); - Max = XMVectorMax(p0, p1); - Radius = XMVector3Dot(vExtents, XMVectorAbs(Axis)); - NoIntersection = - XMVectorOrInt(NoIntersection, XMVectorGreater(Min, Radius)); - NoIntersection = XMVectorOrInt(NoIntersection, - XMVectorLess(Max, XMVectorNegate(Radius))); - - return XMVector4NotEqualInt(NoIntersection, XMVectorTrueInt()); -} - -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline PlaneIntersectionType XM_CALLCONV -BoundingBox::Intersects(FXMVECTOR Plane) const noexcept { - assert(DirectX::MathInternal::XMPlaneIsUnit(Plane)); - - // Load the box. - XMVECTOR vCenter = XMLoadFloat3(&Center); - XMVECTOR vExtents = XMLoadFloat3(&Extents); - - // Set w of the center to one so we can dot4 with a plane. - vCenter = XMVectorInsert<0, 0, 0, 0, 1>(vCenter, XMVectorSplatOne()); - - XMVECTOR Outside, Inside; - DirectX::MathInternal::FastIntersectAxisAlignedBoxPlane( - vCenter, vExtents, Plane, Outside, Inside); - - // If the box is outside any plane it is outside. - if (XMVector4EqualInt(Outside, XMVectorTrueInt())) return FRONT; - - // If the box is inside all planes it is inside. - if (XMVector4EqualInt(Inside, XMVectorTrueInt())) return BACK; - - // The box is not inside all planes or outside a plane it intersects. - return INTERSECTING; -} - -//----------------------------------------------------------------------------- -// Compute the intersection of a ray (Origin, Direction) with an axis aligned -// box using the slabs method. -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline bool XM_CALLCONV BoundingBox::Intersects( - FXMVECTOR Origin, FXMVECTOR Direction, float& Dist) const noexcept { - assert(DirectX::MathInternal::XMVector3IsUnit(Direction)); - - // Load the box. - XMVECTOR vCenter = XMLoadFloat3(&Center); - XMVECTOR vExtents = XMLoadFloat3(&Extents); - - // Adjust ray origin to be relative to center of the box. - XMVECTOR TOrigin = XMVectorSubtract(vCenter, Origin); - - // Compute the dot product againt each axis of the box. - // Since the axii are (1,0,0), (0,1,0), (0,0,1) no computation is necessary. - XMVECTOR AxisDotOrigin = TOrigin; - XMVECTOR AxisDotDirection = Direction; - - // if (fabs(AxisDotDirection) <= Epsilon) the ray is nearly parallel to the - // slab. - XMVECTOR IsParallel = - XMVectorLessOrEqual(XMVectorAbs(AxisDotDirection), g_RayEpsilon); - - // Test against all three axii simultaneously. - XMVECTOR InverseAxisDotDirection = XMVectorReciprocal(AxisDotDirection); - XMVECTOR t1 = XMVectorMultiply(XMVectorSubtract(AxisDotOrigin, vExtents), - InverseAxisDotDirection); - XMVECTOR t2 = XMVectorMultiply(XMVectorAdd(AxisDotOrigin, vExtents), - InverseAxisDotDirection); - - // Compute the max of min(t1,t2) and the min of max(t1,t2) ensuring we don't - // use the results from any directions parallel to the slab. - XMVECTOR t_min = XMVectorSelect(XMVectorMin(t1, t2), g_FltMin, IsParallel); - XMVECTOR t_max = XMVectorSelect(XMVectorMax(t1, t2), g_FltMax, IsParallel); - - // t_min.x = maximum( t_min.x, t_min.y, t_min.z ); - // t_max.x = minimum( t_max.x, t_max.y, t_max.z ); - t_min = XMVectorMax(t_min, XMVectorSplatY(t_min)); // x = max(x,y) - t_min = XMVectorMax(t_min, XMVectorSplatZ(t_min)); // x = max(max(x,y),z) - t_max = XMVectorMin(t_max, XMVectorSplatY(t_max)); // x = min(x,y) - t_max = XMVectorMin(t_max, XMVectorSplatZ(t_max)); // x = min(min(x,y),z) - - // if ( t_min > t_max ) return false; - XMVECTOR NoIntersection = - XMVectorGreater(XMVectorSplatX(t_min), XMVectorSplatX(t_max)); - - // if ( t_max < 0.0f ) return false; - NoIntersection = XMVectorOrInt( - NoIntersection, XMVectorLess(XMVectorSplatX(t_max), XMVectorZero())); - - // if (IsParallel && (-Extents > AxisDotOrigin || Extents < AxisDotOrigin)) - // return false; - XMVECTOR ParallelOverlap = XMVectorInBounds(AxisDotOrigin, vExtents); - NoIntersection = XMVectorOrInt( - NoIntersection, XMVectorAndCInt(IsParallel, ParallelOverlap)); - - if (!DirectX::MathInternal::XMVector3AnyTrue(NoIntersection)) { - // Store the x-component to *pDist - XMStoreFloat(&Dist, t_min); - return true; - } - - Dist = 0.f; - return false; -} - -//----------------------------------------------------------------------------- -// Test an axis alinged box vs 6 planes (typically forming a frustum). -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline ContainmentType XM_CALLCONV -BoundingBox::ContainedBy(FXMVECTOR Plane0, FXMVECTOR Plane1, FXMVECTOR Plane2, - GXMVECTOR Plane3, HXMVECTOR Plane4, - HXMVECTOR Plane5) const noexcept { - // Load the box. - XMVECTOR vCenter = XMLoadFloat3(&Center); - XMVECTOR vExtents = XMLoadFloat3(&Extents); - - // Set w of the center to one so we can dot4 with a plane. - vCenter = XMVectorInsert<0, 0, 0, 0, 1>(vCenter, XMVectorSplatOne()); - - XMVECTOR Outside, Inside; - - // Test against each plane. - DirectX::MathInternal::FastIntersectAxisAlignedBoxPlane( - vCenter, vExtents, Plane0, Outside, Inside); - - XMVECTOR AnyOutside = Outside; - XMVECTOR AllInside = Inside; - - DirectX::MathInternal::FastIntersectAxisAlignedBoxPlane( - vCenter, vExtents, Plane1, Outside, Inside); - AnyOutside = XMVectorOrInt(AnyOutside, Outside); - AllInside = XMVectorAndInt(AllInside, Inside); - - DirectX::MathInternal::FastIntersectAxisAlignedBoxPlane( - vCenter, vExtents, Plane2, Outside, Inside); - AnyOutside = XMVectorOrInt(AnyOutside, Outside); - AllInside = XMVectorAndInt(AllInside, Inside); - - DirectX::MathInternal::FastIntersectAxisAlignedBoxPlane( - vCenter, vExtents, Plane3, Outside, Inside); - AnyOutside = XMVectorOrInt(AnyOutside, Outside); - AllInside = XMVectorAndInt(AllInside, Inside); - - DirectX::MathInternal::FastIntersectAxisAlignedBoxPlane( - vCenter, vExtents, Plane4, Outside, Inside); - AnyOutside = XMVectorOrInt(AnyOutside, Outside); - AllInside = XMVectorAndInt(AllInside, Inside); - - DirectX::MathInternal::FastIntersectAxisAlignedBoxPlane( - vCenter, vExtents, Plane5, Outside, Inside); - AnyOutside = XMVectorOrInt(AnyOutside, Outside); - AllInside = XMVectorAndInt(AllInside, Inside); - - // If the box is outside any plane it is outside. - if (XMVector4EqualInt(AnyOutside, XMVectorTrueInt())) return DISJOINT; - - // If the box is inside all planes it is inside. - if (XMVector4EqualInt(AllInside, XMVectorTrueInt())) return CONTAINS; - - // The box is not inside all planes or outside a plane, it may intersect. - return INTERSECTS; -} - -//----------------------------------------------------------------------------- -// Create axis-aligned box that contains two other bounding boxes -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline void BoundingBox::CreateMerged( - BoundingBox& Out, const BoundingBox& b1, const BoundingBox& b2) noexcept { - XMVECTOR b1Center = XMLoadFloat3(&b1.Center); - XMVECTOR b1Extents = XMLoadFloat3(&b1.Extents); - - XMVECTOR b2Center = XMLoadFloat3(&b2.Center); - XMVECTOR b2Extents = XMLoadFloat3(&b2.Extents); - - XMVECTOR Min = XMVectorSubtract(b1Center, b1Extents); - Min = XMVectorMin(Min, XMVectorSubtract(b2Center, b2Extents)); - - XMVECTOR Max = XMVectorAdd(b1Center, b1Extents); - Max = XMVectorMax(Max, XMVectorAdd(b2Center, b2Extents)); - - assert(XMVector3LessOrEqual(Min, Max)); - - XMStoreFloat3(&Out.Center, XMVectorScale(XMVectorAdd(Min, Max), 0.5f)); - XMStoreFloat3(&Out.Extents, - XMVectorScale(XMVectorSubtract(Max, Min), 0.5f)); -} - -//----------------------------------------------------------------------------- -// Create axis-aligned box that contains a bounding sphere -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline void BoundingBox::CreateFromSphere( - BoundingBox& Out, const BoundingSphere& sh) noexcept { - XMVECTOR spCenter = XMLoadFloat3(&sh.Center); - XMVECTOR shRadius = XMVectorReplicatePtr(&sh.Radius); - - XMVECTOR Min = XMVectorSubtract(spCenter, shRadius); - XMVECTOR Max = XMVectorAdd(spCenter, shRadius); - - assert(XMVector3LessOrEqual(Min, Max)); - - XMStoreFloat3(&Out.Center, XMVectorScale(XMVectorAdd(Min, Max), 0.5f)); - XMStoreFloat3(&Out.Extents, - XMVectorScale(XMVectorSubtract(Max, Min), 0.5f)); -} - -//----------------------------------------------------------------------------- -// Create axis-aligned box from min/max points -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline void XM_CALLCONV BoundingBox::CreateFromPoints( - BoundingBox& Out, FXMVECTOR pt1, FXMVECTOR pt2) noexcept { - XMVECTOR Min = XMVectorMin(pt1, pt2); - XMVECTOR Max = XMVectorMax(pt1, pt2); - - // Store center and extents. - XMStoreFloat3(&Out.Center, XMVectorScale(XMVectorAdd(Min, Max), 0.5f)); - XMStoreFloat3(&Out.Extents, - XMVectorScale(XMVectorSubtract(Max, Min), 0.5f)); -} - -//----------------------------------------------------------------------------- -// Find the minimum axis aligned bounding box containing a set of points. -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline void BoundingBox::CreateFromPoints( - BoundingBox& Out, size_t Count, const XMFLOAT3* pPoints, - size_t Stride) noexcept { - assert(Count > 0); - assert(pPoints); - - // Find the minimum and maximum x, y, and z - XMVECTOR vMin, vMax; - - vMin = vMax = XMLoadFloat3(pPoints); - - for (size_t i = 1; i < Count; ++i) { - XMVECTOR Point = XMLoadFloat3(reinterpret_cast( - reinterpret_cast(pPoints) + i * Stride)); - - vMin = XMVectorMin(vMin, Point); - vMax = XMVectorMax(vMax, Point); - } - - // Store center and extents. - XMStoreFloat3(&Out.Center, XMVectorScale(XMVectorAdd(vMin, vMax), 0.5f)); - XMStoreFloat3(&Out.Extents, - XMVectorScale(XMVectorSubtract(vMax, vMin), 0.5f)); -} - -/**************************************************************************** - * - * BoundingOrientedBox - * - ****************************************************************************/ - -//----------------------------------------------------------------------------- -// Transform an oriented box by an angle preserving transform. -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline void XM_CALLCONV BoundingOrientedBox::Transform( - BoundingOrientedBox& Out, FXMMATRIX M) const noexcept { - // Load the box. - XMVECTOR vCenter = XMLoadFloat3(&Center); - XMVECTOR vExtents = XMLoadFloat3(&Extents); - XMVECTOR vOrientation = XMLoadFloat4(&Orientation); - - assert(DirectX::MathInternal::XMQuaternionIsUnit(vOrientation)); - - // Composite the box rotation and the transform rotation. - XMMATRIX nM; - nM.r[0] = XMVector3Normalize(M.r[0]); - nM.r[1] = XMVector3Normalize(M.r[1]); - nM.r[2] = XMVector3Normalize(M.r[2]); - nM.r[3] = g_XMIdentityR3; - XMVECTOR Rotation = XMQuaternionRotationMatrix(nM); - vOrientation = XMQuaternionMultiply(vOrientation, Rotation); - - // Transform the center. - vCenter = XMVector3Transform(vCenter, M); - - // Scale the box extents. - XMVECTOR dX = XMVector3Length(M.r[0]); - XMVECTOR dY = XMVector3Length(M.r[1]); - XMVECTOR dZ = XMVector3Length(M.r[2]); - - XMVECTOR VectorScale = XMVectorSelect(dY, dX, g_XMSelect1000); - VectorScale = XMVectorSelect(dZ, VectorScale, g_XMSelect1100); - vExtents = XMVectorMultiply(vExtents, VectorScale); - - // Store the box. - XMStoreFloat3(&Out.Center, vCenter); - XMStoreFloat3(&Out.Extents, vExtents); - XMStoreFloat4(&Out.Orientation, vOrientation); -} - -_Use_decl_annotations_ inline void XM_CALLCONV BoundingOrientedBox::Transform( - BoundingOrientedBox& Out, float Scale, FXMVECTOR Rotation, - FXMVECTOR Translation) const noexcept { - assert(DirectX::MathInternal::XMQuaternionIsUnit(Rotation)); - - // Load the box. - XMVECTOR vCenter = XMLoadFloat3(&Center); - XMVECTOR vExtents = XMLoadFloat3(&Extents); - XMVECTOR vOrientation = XMLoadFloat4(&Orientation); - - assert(DirectX::MathInternal::XMQuaternionIsUnit(vOrientation)); - - // Composite the box rotation and the transform rotation. - vOrientation = XMQuaternionMultiply(vOrientation, Rotation); - - // Transform the center. - XMVECTOR VectorScale = XMVectorReplicate(Scale); - vCenter = XMVectorAdd( - XMVector3Rotate(XMVectorMultiply(vCenter, VectorScale), Rotation), - Translation); - - // Scale the box extents. - vExtents = XMVectorMultiply(vExtents, VectorScale); - - // Store the box. - XMStoreFloat3(&Out.Center, vCenter); - XMStoreFloat3(&Out.Extents, vExtents); - XMStoreFloat4(&Out.Orientation, vOrientation); -} - -//----------------------------------------------------------------------------- -// Get the corner points of the box -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline void BoundingOrientedBox::GetCorners( - XMFLOAT3* Corners) const noexcept { - assert(Corners != nullptr); - - // Load the box - XMVECTOR vCenter = XMLoadFloat3(&Center); - XMVECTOR vExtents = XMLoadFloat3(&Extents); - XMVECTOR vOrientation = XMLoadFloat4(&Orientation); - - assert(DirectX::MathInternal::XMQuaternionIsUnit(vOrientation)); - - for (size_t i = 0; i < CORNER_COUNT; ++i) { - XMVECTOR C = XMVectorAdd( - XMVector3Rotate(XMVectorMultiply(vExtents, g_BoxOffset[i]), - vOrientation), - vCenter); - XMStoreFloat3(&Corners[i], C); - } -} - -//----------------------------------------------------------------------------- -// Point in oriented box test. -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline ContainmentType XM_CALLCONV -BoundingOrientedBox::Contains(FXMVECTOR Point) const noexcept { - XMVECTOR vCenter = XMLoadFloat3(&Center); - XMVECTOR vExtents = XMLoadFloat3(&Extents); - XMVECTOR vOrientation = XMLoadFloat4(&Orientation); - - // Transform the point to be local to the box. - XMVECTOR TPoint = - XMVector3InverseRotate(XMVectorSubtract(Point, vCenter), vOrientation); - - return XMVector3InBounds(TPoint, vExtents) ? CONTAINS : DISJOINT; -} - -//----------------------------------------------------------------------------- -// Triangle in oriented bounding box -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline ContainmentType XM_CALLCONV -BoundingOrientedBox::Contains(FXMVECTOR V0, FXMVECTOR V1, - FXMVECTOR V2) const noexcept { - // Load the box center & orientation. - XMVECTOR vCenter = XMLoadFloat3(&Center); - XMVECTOR vOrientation = XMLoadFloat4(&Orientation); - - // Transform the triangle vertices into the space of the box. - XMVECTOR TV0 = - XMVector3InverseRotate(XMVectorSubtract(V0, vCenter), vOrientation); - XMVECTOR TV1 = - XMVector3InverseRotate(XMVectorSubtract(V1, vCenter), vOrientation); - XMVECTOR TV2 = - XMVector3InverseRotate(XMVectorSubtract(V2, vCenter), vOrientation); - - BoundingBox box; - box.Center = XMFLOAT3(0.0f, 0.0f, 0.0f); - box.Extents = Extents; - - // Use the triangle vs axis aligned box intersection routine. - return box.Contains(TV0, TV1, TV2); -} - -//----------------------------------------------------------------------------- -// Sphere in oriented bounding box -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline ContainmentType BoundingOrientedBox::Contains( - const BoundingSphere& sh) const noexcept { - XMVECTOR SphereCenter = XMLoadFloat3(&sh.Center); - XMVECTOR SphereRadius = XMVectorReplicatePtr(&sh.Radius); - - XMVECTOR BoxCenter = XMLoadFloat3(&Center); - XMVECTOR BoxExtents = XMLoadFloat3(&Extents); - XMVECTOR BoxOrientation = XMLoadFloat4(&Orientation); - - assert(DirectX::MathInternal::XMQuaternionIsUnit(BoxOrientation)); - - // Transform the center of the sphere to be local to the box. - // BoxMin = -BoxExtents - // BoxMax = +BoxExtents - SphereCenter = XMVector3InverseRotate( - XMVectorSubtract(SphereCenter, BoxCenter), BoxOrientation); - - // Find the distance to the nearest point on the box. - // for each i in (x, y, z) - // if (SphereCenter(i) < BoxMin(i)) d2 += (SphereCenter(i) - BoxMin(i)) ^ 2 - // else if (SphereCenter(i) > BoxMax(i)) d2 += (SphereCenter(i) - BoxMax(i)) - // ^ 2 - - XMVECTOR d = XMVectorZero(); - - // Compute d for each dimension. - XMVECTOR LessThanMin = - XMVectorLess(SphereCenter, XMVectorNegate(BoxExtents)); - XMVECTOR GreaterThanMax = XMVectorGreater(SphereCenter, BoxExtents); - - XMVECTOR MinDelta = XMVectorAdd(SphereCenter, BoxExtents); - XMVECTOR MaxDelta = XMVectorSubtract(SphereCenter, BoxExtents); - - // Choose value for each dimension based on the comparison. - d = XMVectorSelect(d, MinDelta, LessThanMin); - d = XMVectorSelect(d, MaxDelta, GreaterThanMax); - - // Use a dot-product to square them and sum them together. - XMVECTOR d2 = XMVector3Dot(d, d); - XMVECTOR SphereRadiusSq = XMVectorMultiply(SphereRadius, SphereRadius); - - if (XMVector4Greater(d2, SphereRadiusSq)) return DISJOINT; - - // See if we are completely inside the box - XMVECTOR SMin = XMVectorSubtract(SphereCenter, SphereRadius); - XMVECTOR SMax = XMVectorAdd(SphereCenter, SphereRadius); - - return (XMVector3InBounds(SMin, BoxExtents) && - XMVector3InBounds(SMax, BoxExtents)) - ? CONTAINS - : INTERSECTS; -} - -//----------------------------------------------------------------------------- -// Axis aligned box vs. oriented box. Constructs an oriented box and uses -// the oriented box vs. oriented box test. -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline ContainmentType BoundingOrientedBox::Contains( - const BoundingBox& box) const noexcept { - // Make the axis aligned box oriented and do an OBB vs OBB test. - BoundingOrientedBox obox(box.Center, box.Extents, - XMFLOAT4(0.f, 0.f, 0.f, 1.f)); - return Contains(obox); -} - -//----------------------------------------------------------------------------- -// Oriented bounding box in oriented bounding box -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline ContainmentType BoundingOrientedBox::Contains( - const BoundingOrientedBox& box) const noexcept { - if (!Intersects(box)) return DISJOINT; - - // Load the boxes - XMVECTOR aCenter = XMLoadFloat3(&Center); - XMVECTOR aExtents = XMLoadFloat3(&Extents); - XMVECTOR aOrientation = XMLoadFloat4(&Orientation); - - assert(DirectX::MathInternal::XMQuaternionIsUnit(aOrientation)); - - XMVECTOR bCenter = XMLoadFloat3(&box.Center); - XMVECTOR bExtents = XMLoadFloat3(&box.Extents); - XMVECTOR bOrientation = XMLoadFloat4(&box.Orientation); - - assert(DirectX::MathInternal::XMQuaternionIsUnit(bOrientation)); - - XMVECTOR offset = XMVectorSubtract(bCenter, aCenter); - - for (size_t i = 0; i < CORNER_COUNT; ++i) { - // Cb = rotate( bExtents * corneroffset[i], bOrientation ) + bcenter - // Ca = invrotate( Cb - aCenter, aOrientation ) - - XMVECTOR C = XMVectorAdd( - XMVector3Rotate(XMVectorMultiply(bExtents, g_BoxOffset[i]), - bOrientation), - offset); - C = XMVector3InverseRotate(C, aOrientation); - - if (!XMVector3InBounds(C, aExtents)) return INTERSECTS; - } - - return CONTAINS; -} - -//----------------------------------------------------------------------------- -// Frustum in oriented bounding box -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline ContainmentType BoundingOrientedBox::Contains( - const BoundingFrustum& fr) const noexcept { - if (!fr.Intersects(*this)) return DISJOINT; - - XMFLOAT3 Corners[BoundingFrustum::CORNER_COUNT]; - fr.GetCorners(Corners); - - // Load the box - XMVECTOR vCenter = XMLoadFloat3(&Center); - XMVECTOR vExtents = XMLoadFloat3(&Extents); - XMVECTOR vOrientation = XMLoadFloat4(&Orientation); - - assert(DirectX::MathInternal::XMQuaternionIsUnit(vOrientation)); - - for (size_t i = 0; i < BoundingFrustum::CORNER_COUNT; ++i) { - XMVECTOR C = XMVector3InverseRotate( - XMVectorSubtract(XMLoadFloat3(&Corners[i]), vCenter), vOrientation); - - if (!XMVector3InBounds(C, vExtents)) return INTERSECTS; - } - - return CONTAINS; -} - -//----------------------------------------------------------------------------- -// Sphere vs. oriented box test -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline bool BoundingOrientedBox::Intersects( - const BoundingSphere& sh) const noexcept { - XMVECTOR SphereCenter = XMLoadFloat3(&sh.Center); - XMVECTOR SphereRadius = XMVectorReplicatePtr(&sh.Radius); - - XMVECTOR BoxCenter = XMLoadFloat3(&Center); - XMVECTOR BoxExtents = XMLoadFloat3(&Extents); - XMVECTOR BoxOrientation = XMLoadFloat4(&Orientation); - - assert(DirectX::MathInternal::XMQuaternionIsUnit(BoxOrientation)); - - // Transform the center of the sphere to be local to the box. - // BoxMin = -BoxExtents - // BoxMax = +BoxExtents - SphereCenter = XMVector3InverseRotate( - XMVectorSubtract(SphereCenter, BoxCenter), BoxOrientation); - - // Find the distance to the nearest point on the box. - // for each i in (x, y, z) - // if (SphereCenter(i) < BoxMin(i)) d2 += (SphereCenter(i) - BoxMin(i)) ^ 2 - // else if (SphereCenter(i) > BoxMax(i)) d2 += (SphereCenter(i) - BoxMax(i)) - // ^ 2 - - XMVECTOR d = XMVectorZero(); - - // Compute d for each dimension. - XMVECTOR LessThanMin = - XMVectorLess(SphereCenter, XMVectorNegate(BoxExtents)); - XMVECTOR GreaterThanMax = XMVectorGreater(SphereCenter, BoxExtents); - - XMVECTOR MinDelta = XMVectorAdd(SphereCenter, BoxExtents); - XMVECTOR MaxDelta = XMVectorSubtract(SphereCenter, BoxExtents); - - // Choose value for each dimension based on the comparison. - d = XMVectorSelect(d, MinDelta, LessThanMin); - d = XMVectorSelect(d, MaxDelta, GreaterThanMax); - - // Use a dot-product to square them and sum them together. - XMVECTOR d2 = XMVector3Dot(d, d); - - return XMVector4LessOrEqual(d2, - XMVectorMultiply(SphereRadius, SphereRadius)) - ? true - : false; -} - -//----------------------------------------------------------------------------- -// Axis aligned box vs. oriented box. Constructs an oriented box and uses -// the oriented box vs. oriented box test. -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline bool BoundingOrientedBox::Intersects( - const BoundingBox& box) const noexcept { - // Make the axis aligned box oriented and do an OBB vs OBB test. - BoundingOrientedBox obox(box.Center, box.Extents, - XMFLOAT4(0.f, 0.f, 0.f, 1.f)); - return Intersects(obox); -} - -//----------------------------------------------------------------------------- -// Fast oriented box / oriented box intersection test using the separating axis -// theorem. -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline bool BoundingOrientedBox::Intersects( - const BoundingOrientedBox& box) const noexcept { - // Build the 3x3 rotation matrix that defines the orientation of B relative - // to A. - XMVECTOR A_quat = XMLoadFloat4(&Orientation); - XMVECTOR B_quat = XMLoadFloat4(&box.Orientation); - - assert(DirectX::MathInternal::XMQuaternionIsUnit(A_quat)); - assert(DirectX::MathInternal::XMQuaternionIsUnit(B_quat)); - - XMVECTOR Q = XMQuaternionMultiply(A_quat, XMQuaternionConjugate(B_quat)); - XMMATRIX R = XMMatrixRotationQuaternion(Q); - - // Compute the translation of B relative to A. - XMVECTOR A_cent = XMLoadFloat3(&Center); - XMVECTOR B_cent = XMLoadFloat3(&box.Center); - XMVECTOR t = - XMVector3InverseRotate(XMVectorSubtract(B_cent, A_cent), A_quat); - - // - // h(A) = extents of A. - // h(B) = extents of B. - // - // a(u) = axes of A = (1,0,0), (0,1,0), (0,0,1) - // b(u) = axes of B relative to A = (r00,r10,r20), (r01,r11,r21), - // (r02,r12,r22) - // - // For each possible separating axis l: - // d(A) = sum (for i = u,v,w) h(A)(i) * abs( a(i) dot l ) - // d(B) = sum (for i = u,v,w) h(B)(i) * abs( b(i) dot l ) - // if abs( t dot l ) > d(A) + d(B) then disjoint - // - - // Load extents of A and B. - XMVECTOR h_A = XMLoadFloat3(&Extents); - XMVECTOR h_B = XMLoadFloat3(&box.Extents); - - // Rows. Note R[0,1,2]X.w = 0. - XMVECTOR R0X = R.r[0]; - XMVECTOR R1X = R.r[1]; - XMVECTOR R2X = R.r[2]; - - R = XMMatrixTranspose(R); - - // Columns. Note RX[0,1,2].w = 0. - XMVECTOR RX0 = R.r[0]; - XMVECTOR RX1 = R.r[1]; - XMVECTOR RX2 = R.r[2]; - - // Absolute value of rows. - XMVECTOR AR0X = XMVectorAbs(R0X); - XMVECTOR AR1X = XMVectorAbs(R1X); - XMVECTOR AR2X = XMVectorAbs(R2X); - - // Absolute value of columns. - XMVECTOR ARX0 = XMVectorAbs(RX0); - XMVECTOR ARX1 = XMVectorAbs(RX1); - XMVECTOR ARX2 = XMVectorAbs(RX2); - - // Test each of the 15 possible seperating axii. - XMVECTOR d, d_A, d_B; - - // l = a(u) = (1, 0, 0) - // t dot l = t.x - // d(A) = h(A).x - // d(B) = h(B) dot abs(r00, r01, r02) - d = XMVectorSplatX(t); - d_A = XMVectorSplatX(h_A); - d_B = XMVector3Dot(h_B, AR0X); - XMVECTOR NoIntersection = - XMVectorGreater(XMVectorAbs(d), XMVectorAdd(d_A, d_B)); - - // l = a(v) = (0, 1, 0) - // t dot l = t.y - // d(A) = h(A).y - // d(B) = h(B) dot abs(r10, r11, r12) - d = XMVectorSplatY(t); - d_A = XMVectorSplatY(h_A); - d_B = XMVector3Dot(h_B, AR1X); - NoIntersection = XMVectorOrInt( - NoIntersection, XMVectorGreater(XMVectorAbs(d), XMVectorAdd(d_A, d_B))); - - // l = a(w) = (0, 0, 1) - // t dot l = t.z - // d(A) = h(A).z - // d(B) = h(B) dot abs(r20, r21, r22) - d = XMVectorSplatZ(t); - d_A = XMVectorSplatZ(h_A); - d_B = XMVector3Dot(h_B, AR2X); - NoIntersection = XMVectorOrInt( - NoIntersection, XMVectorGreater(XMVectorAbs(d), XMVectorAdd(d_A, d_B))); - - // l = b(u) = (r00, r10, r20) - // d(A) = h(A) dot abs(r00, r10, r20) - // d(B) = h(B).x - d = XMVector3Dot(t, RX0); - d_A = XMVector3Dot(h_A, ARX0); - d_B = XMVectorSplatX(h_B); - NoIntersection = XMVectorOrInt( - NoIntersection, XMVectorGreater(XMVectorAbs(d), XMVectorAdd(d_A, d_B))); - - // l = b(v) = (r01, r11, r21) - // d(A) = h(A) dot abs(r01, r11, r21) - // d(B) = h(B).y - d = XMVector3Dot(t, RX1); - d_A = XMVector3Dot(h_A, ARX1); - d_B = XMVectorSplatY(h_B); - NoIntersection = XMVectorOrInt( - NoIntersection, XMVectorGreater(XMVectorAbs(d), XMVectorAdd(d_A, d_B))); - - // l = b(w) = (r02, r12, r22) - // d(A) = h(A) dot abs(r02, r12, r22) - // d(B) = h(B).z - d = XMVector3Dot(t, RX2); - d_A = XMVector3Dot(h_A, ARX2); - d_B = XMVectorSplatZ(h_B); - NoIntersection = XMVectorOrInt( - NoIntersection, XMVectorGreater(XMVectorAbs(d), XMVectorAdd(d_A, d_B))); - - // l = a(u) x b(u) = (0, -r20, r10) - // d(A) = h(A) dot abs(0, r20, r10) - // d(B) = h(B) dot abs(0, r02, r01) - d = XMVector3Dot( - t, XMVectorPermute(RX0, XMVectorNegate(RX0))); - d_A = XMVector3Dot( - h_A, - XMVectorSwizzle( - ARX0)); - d_B = XMVector3Dot( - h_B, - XMVectorSwizzle( - AR0X)); - NoIntersection = XMVectorOrInt( - NoIntersection, XMVectorGreater(XMVectorAbs(d), XMVectorAdd(d_A, d_B))); - - // l = a(u) x b(v) = (0, -r21, r11) - // d(A) = h(A) dot abs(0, r21, r11) - // d(B) = h(B) dot abs(r02, 0, r00) - d = XMVector3Dot( - t, XMVectorPermute(RX1, XMVectorNegate(RX1))); - d_A = XMVector3Dot( - h_A, - XMVectorSwizzle( - ARX1)); - d_B = XMVector3Dot( - h_B, - XMVectorSwizzle( - AR0X)); - NoIntersection = XMVectorOrInt( - NoIntersection, XMVectorGreater(XMVectorAbs(d), XMVectorAdd(d_A, d_B))); - - // l = a(u) x b(w) = (0, -r22, r12) - // d(A) = h(A) dot abs(0, r22, r12) - // d(B) = h(B) dot abs(r01, r00, 0) - d = XMVector3Dot( - t, XMVectorPermute(RX2, XMVectorNegate(RX2))); - d_A = XMVector3Dot( - h_A, - XMVectorSwizzle( - ARX2)); - d_B = XMVector3Dot( - h_B, - XMVectorSwizzle( - AR0X)); - NoIntersection = XMVectorOrInt( - NoIntersection, XMVectorGreater(XMVectorAbs(d), XMVectorAdd(d_A, d_B))); - - // l = a(v) x b(u) = (r20, 0, -r00) - // d(A) = h(A) dot abs(r20, 0, r00) - // d(B) = h(B) dot abs(0, r12, r11) - d = XMVector3Dot( - t, XMVectorPermute(RX0, XMVectorNegate(RX0))); - d_A = XMVector3Dot( - h_A, - XMVectorSwizzle( - ARX0)); - d_B = XMVector3Dot( - h_B, - XMVectorSwizzle( - AR1X)); - NoIntersection = XMVectorOrInt( - NoIntersection, XMVectorGreater(XMVectorAbs(d), XMVectorAdd(d_A, d_B))); - - // l = a(v) x b(v) = (r21, 0, -r01) - // d(A) = h(A) dot abs(r21, 0, r01) - // d(B) = h(B) dot abs(r12, 0, r10) - d = XMVector3Dot( - t, XMVectorPermute(RX1, XMVectorNegate(RX1))); - d_A = XMVector3Dot( - h_A, - XMVectorSwizzle( - ARX1)); - d_B = XMVector3Dot( - h_B, - XMVectorSwizzle( - AR1X)); - NoIntersection = XMVectorOrInt( - NoIntersection, XMVectorGreater(XMVectorAbs(d), XMVectorAdd(d_A, d_B))); - - // l = a(v) x b(w) = (r22, 0, -r02) - // d(A) = h(A) dot abs(r22, 0, r02) - // d(B) = h(B) dot abs(r11, r10, 0) - d = XMVector3Dot( - t, XMVectorPermute(RX2, XMVectorNegate(RX2))); - d_A = XMVector3Dot( - h_A, - XMVectorSwizzle( - ARX2)); - d_B = XMVector3Dot( - h_B, - XMVectorSwizzle( - AR1X)); - NoIntersection = XMVectorOrInt( - NoIntersection, XMVectorGreater(XMVectorAbs(d), XMVectorAdd(d_A, d_B))); - - // l = a(w) x b(u) = (-r10, r00, 0) - // d(A) = h(A) dot abs(r10, r00, 0) - // d(B) = h(B) dot abs(0, r22, r21) - d = XMVector3Dot( - t, XMVectorPermute(RX0, XMVectorNegate(RX0))); - d_A = XMVector3Dot( - h_A, - XMVectorSwizzle( - ARX0)); - d_B = XMVector3Dot( - h_B, - XMVectorSwizzle( - AR2X)); - NoIntersection = XMVectorOrInt( - NoIntersection, XMVectorGreater(XMVectorAbs(d), XMVectorAdd(d_A, d_B))); - - // l = a(w) x b(v) = (-r11, r01, 0) - // d(A) = h(A) dot abs(r11, r01, 0) - // d(B) = h(B) dot abs(r22, 0, r20) - d = XMVector3Dot( - t, XMVectorPermute(RX1, XMVectorNegate(RX1))); - d_A = XMVector3Dot( - h_A, - XMVectorSwizzle( - ARX1)); - d_B = XMVector3Dot( - h_B, - XMVectorSwizzle( - AR2X)); - NoIntersection = XMVectorOrInt( - NoIntersection, XMVectorGreater(XMVectorAbs(d), XMVectorAdd(d_A, d_B))); - - // l = a(w) x b(w) = (-r12, r02, 0) - // d(A) = h(A) dot abs(r12, r02, 0) - // d(B) = h(B) dot abs(r21, r20, 0) - d = XMVector3Dot( - t, XMVectorPermute(RX2, XMVectorNegate(RX2))); - d_A = XMVector3Dot( - h_A, - XMVectorSwizzle( - ARX2)); - d_B = XMVector3Dot( - h_B, - XMVectorSwizzle( - AR2X)); - NoIntersection = XMVectorOrInt( - NoIntersection, XMVectorGreater(XMVectorAbs(d), XMVectorAdd(d_A, d_B))); - - // No seperating axis found, boxes must intersect. - return XMVector4NotEqualInt(NoIntersection, XMVectorTrueInt()) ? true - : false; -} - -//----------------------------------------------------------------------------- -// Frustum vs. oriented box test -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline bool BoundingOrientedBox::Intersects( - const BoundingFrustum& fr) const noexcept { - return fr.Intersects(*this); -} - -//----------------------------------------------------------------------------- -// Triangle vs. oriented box test. -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline bool XM_CALLCONV BoundingOrientedBox::Intersects( - FXMVECTOR V0, FXMVECTOR V1, FXMVECTOR V2) const noexcept { - // Load the box center & orientation. - XMVECTOR vCenter = XMLoadFloat3(&Center); - XMVECTOR vOrientation = XMLoadFloat4(&Orientation); - - // Transform the triangle vertices into the space of the box. - XMVECTOR TV0 = - XMVector3InverseRotate(XMVectorSubtract(V0, vCenter), vOrientation); - XMVECTOR TV1 = - XMVector3InverseRotate(XMVectorSubtract(V1, vCenter), vOrientation); - XMVECTOR TV2 = - XMVector3InverseRotate(XMVectorSubtract(V2, vCenter), vOrientation); - - BoundingBox box; - box.Center = XMFLOAT3(0.0f, 0.0f, 0.0f); - box.Extents = Extents; - - // Use the triangle vs axis aligned box intersection routine. - return box.Intersects(TV0, TV1, TV2); -} - -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline PlaneIntersectionType XM_CALLCONV -BoundingOrientedBox::Intersects(FXMVECTOR Plane) const noexcept { - assert(DirectX::MathInternal::XMPlaneIsUnit(Plane)); - - // Load the box. - XMVECTOR vCenter = XMLoadFloat3(&Center); - XMVECTOR vExtents = XMLoadFloat3(&Extents); - XMVECTOR BoxOrientation = XMLoadFloat4(&Orientation); - - assert(DirectX::MathInternal::XMQuaternionIsUnit(BoxOrientation)); - - // Set w of the center to one so we can dot4 with a plane. - vCenter = XMVectorInsert<0, 0, 0, 0, 1>(vCenter, XMVectorSplatOne()); - - // Build the 3x3 rotation matrix that defines the box axes. - XMMATRIX R = XMMatrixRotationQuaternion(BoxOrientation); - - XMVECTOR Outside, Inside; - DirectX::MathInternal::FastIntersectOrientedBoxPlane( - vCenter, vExtents, R.r[0], R.r[1], R.r[2], Plane, Outside, Inside); - - // If the box is outside any plane it is outside. - if (XMVector4EqualInt(Outside, XMVectorTrueInt())) return FRONT; - - // If the box is inside all planes it is inside. - if (XMVector4EqualInt(Inside, XMVectorTrueInt())) return BACK; - - // The box is not inside all planes or outside a plane it intersects. - return INTERSECTING; -} - -//----------------------------------------------------------------------------- -// Compute the intersection of a ray (Origin, Direction) with an oriented box -// using the slabs method. -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline bool XM_CALLCONV BoundingOrientedBox::Intersects( - FXMVECTOR Origin, FXMVECTOR Direction, float& Dist) const noexcept { - assert(DirectX::MathInternal::XMVector3IsUnit(Direction)); - - static const XMVECTORU32 SelectY = { - {{XM_SELECT_0, XM_SELECT_1, XM_SELECT_0, XM_SELECT_0}}}; - static const XMVECTORU32 SelectZ = { - {{XM_SELECT_0, XM_SELECT_0, XM_SELECT_1, XM_SELECT_0}}}; - - // Load the box. - XMVECTOR vCenter = XMLoadFloat3(&Center); - XMVECTOR vExtents = XMLoadFloat3(&Extents); - XMVECTOR vOrientation = XMLoadFloat4(&Orientation); - - assert(DirectX::MathInternal::XMQuaternionIsUnit(vOrientation)); - - // Get the boxes normalized side directions. - XMMATRIX R = XMMatrixRotationQuaternion(vOrientation); - - // Adjust ray origin to be relative to center of the box. - XMVECTOR TOrigin = XMVectorSubtract(vCenter, Origin); - - // Compute the dot product againt each axis of the box. - XMVECTOR AxisDotOrigin = XMVector3Dot(R.r[0], TOrigin); - AxisDotOrigin = - XMVectorSelect(AxisDotOrigin, XMVector3Dot(R.r[1], TOrigin), SelectY); - AxisDotOrigin = - XMVectorSelect(AxisDotOrigin, XMVector3Dot(R.r[2], TOrigin), SelectZ); - - XMVECTOR AxisDotDirection = XMVector3Dot(R.r[0], Direction); - AxisDotDirection = XMVectorSelect(AxisDotDirection, - XMVector3Dot(R.r[1], Direction), SelectY); - AxisDotDirection = XMVectorSelect(AxisDotDirection, - XMVector3Dot(R.r[2], Direction), SelectZ); - - // if (fabs(AxisDotDirection) <= Epsilon) the ray is nearly parallel to the - // slab. - XMVECTOR IsParallel = - XMVectorLessOrEqual(XMVectorAbs(AxisDotDirection), g_RayEpsilon); - - // Test against all three axes simultaneously. - XMVECTOR InverseAxisDotDirection = XMVectorReciprocal(AxisDotDirection); - XMVECTOR t1 = XMVectorMultiply(XMVectorSubtract(AxisDotOrigin, vExtents), - InverseAxisDotDirection); - XMVECTOR t2 = XMVectorMultiply(XMVectorAdd(AxisDotOrigin, vExtents), - InverseAxisDotDirection); - - // Compute the max of min(t1,t2) and the min of max(t1,t2) ensuring we don't - // use the results from any directions parallel to the slab. - XMVECTOR t_min = XMVectorSelect(XMVectorMin(t1, t2), g_FltMin, IsParallel); - XMVECTOR t_max = XMVectorSelect(XMVectorMax(t1, t2), g_FltMax, IsParallel); - - // t_min.x = maximum( t_min.x, t_min.y, t_min.z ); - // t_max.x = minimum( t_max.x, t_max.y, t_max.z ); - t_min = XMVectorMax(t_min, XMVectorSplatY(t_min)); // x = max(x,y) - t_min = - XMVectorMax(t_min, XMVectorSplatZ(t_min)); // x = max(std::max(x,y),z) - t_max = XMVectorMin(t_max, XMVectorSplatY(t_max)); // x = min(x,y) - t_max = - XMVectorMin(t_max, XMVectorSplatZ(t_max)); // x = min(std::min(x,y),z) - - // if ( t_min > t_max ) return false; - XMVECTOR NoIntersection = - XMVectorGreater(XMVectorSplatX(t_min), XMVectorSplatX(t_max)); - - // if ( t_max < 0.0f ) return false; - NoIntersection = XMVectorOrInt( - NoIntersection, XMVectorLess(XMVectorSplatX(t_max), XMVectorZero())); - - // if (IsParallel && (-Extents > AxisDotOrigin || Extents < AxisDotOrigin)) - // return false; - XMVECTOR ParallelOverlap = XMVectorInBounds(AxisDotOrigin, vExtents); - NoIntersection = XMVectorOrInt( - NoIntersection, XMVectorAndCInt(IsParallel, ParallelOverlap)); - - if (!DirectX::MathInternal::XMVector3AnyTrue(NoIntersection)) { - // Store the x-component to *pDist - XMStoreFloat(&Dist, t_min); - return true; - } - - Dist = 0.f; - return false; -} - -//----------------------------------------------------------------------------- -// Test an oriented box vs 6 planes (typically forming a frustum). -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline ContainmentType XM_CALLCONV -BoundingOrientedBox::ContainedBy(FXMVECTOR Plane0, FXMVECTOR Plane1, - FXMVECTOR Plane2, GXMVECTOR Plane3, - HXMVECTOR Plane4, - HXMVECTOR Plane5) const noexcept { - // Load the box. - XMVECTOR vCenter = XMLoadFloat3(&Center); - XMVECTOR vExtents = XMLoadFloat3(&Extents); - XMVECTOR BoxOrientation = XMLoadFloat4(&Orientation); - - assert(DirectX::MathInternal::XMQuaternionIsUnit(BoxOrientation)); - - // Set w of the center to one so we can dot4 with a plane. - vCenter = XMVectorInsert<0, 0, 0, 0, 1>(vCenter, XMVectorSplatOne()); - - // Build the 3x3 rotation matrix that defines the box axes. - XMMATRIX R = XMMatrixRotationQuaternion(BoxOrientation); - - XMVECTOR Outside, Inside; - - // Test against each plane. - DirectX::MathInternal::FastIntersectOrientedBoxPlane( - vCenter, vExtents, R.r[0], R.r[1], R.r[2], Plane0, Outside, Inside); - - XMVECTOR AnyOutside = Outside; - XMVECTOR AllInside = Inside; - - DirectX::MathInternal::FastIntersectOrientedBoxPlane( - vCenter, vExtents, R.r[0], R.r[1], R.r[2], Plane1, Outside, Inside); - AnyOutside = XMVectorOrInt(AnyOutside, Outside); - AllInside = XMVectorAndInt(AllInside, Inside); - - DirectX::MathInternal::FastIntersectOrientedBoxPlane( - vCenter, vExtents, R.r[0], R.r[1], R.r[2], Plane2, Outside, Inside); - AnyOutside = XMVectorOrInt(AnyOutside, Outside); - AllInside = XMVectorAndInt(AllInside, Inside); - - DirectX::MathInternal::FastIntersectOrientedBoxPlane( - vCenter, vExtents, R.r[0], R.r[1], R.r[2], Plane3, Outside, Inside); - AnyOutside = XMVectorOrInt(AnyOutside, Outside); - AllInside = XMVectorAndInt(AllInside, Inside); - - DirectX::MathInternal::FastIntersectOrientedBoxPlane( - vCenter, vExtents, R.r[0], R.r[1], R.r[2], Plane4, Outside, Inside); - AnyOutside = XMVectorOrInt(AnyOutside, Outside); - AllInside = XMVectorAndInt(AllInside, Inside); - - DirectX::MathInternal::FastIntersectOrientedBoxPlane( - vCenter, vExtents, R.r[0], R.r[1], R.r[2], Plane5, Outside, Inside); - AnyOutside = XMVectorOrInt(AnyOutside, Outside); - AllInside = XMVectorAndInt(AllInside, Inside); - - // If the box is outside any plane it is outside. - if (XMVector4EqualInt(AnyOutside, XMVectorTrueInt())) return DISJOINT; - - // If the box is inside all planes it is inside. - if (XMVector4EqualInt(AllInside, XMVectorTrueInt())) return CONTAINS; - - // The box is not inside all planes or outside a plane, it may intersect. - return INTERSECTS; -} - -//----------------------------------------------------------------------------- -// Create oriented bounding box from axis-aligned bounding box -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline void BoundingOrientedBox::CreateFromBoundingBox( - BoundingOrientedBox& Out, const BoundingBox& box) noexcept { - Out.Center = box.Center; - Out.Extents = box.Extents; - Out.Orientation = XMFLOAT4(0.f, 0.f, 0.f, 1.f); -} - -//----------------------------------------------------------------------------- -// Find the approximate minimum oriented bounding box containing a set of -// points. Exact computation of minimum oriented bounding box is possible but -// is slower and requires a more complex algorithm. -// The algorithm works by computing the inertia tensor of the points and then -// using the eigenvectors of the intertia tensor as the axes of the box. -// Computing the intertia tensor of the convex hull of the points will usually -// result in better bounding box but the computation is more complex. -// Exact computation of the minimum oriented bounding box is possible but the -// best know algorithm is O(N^3) and is significanly more complex to implement. -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline void BoundingOrientedBox::CreateFromPoints( - BoundingOrientedBox& Out, size_t Count, const XMFLOAT3* pPoints, - size_t Stride) noexcept { - assert(Count > 0); - assert(pPoints != nullptr); - - XMVECTOR CenterOfMass = XMVectorZero(); - - // Compute the center of mass and inertia tensor of the points. - for (size_t i = 0; i < Count; ++i) { - XMVECTOR Point = XMLoadFloat3(reinterpret_cast( - reinterpret_cast(pPoints) + i * Stride)); - - CenterOfMass = XMVectorAdd(CenterOfMass, Point); - } - - CenterOfMass = XMVectorMultiply( - CenterOfMass, XMVectorReciprocal(XMVectorReplicate(float(Count)))); - - // Compute the inertia tensor of the points around the center of mass. - // Using the center of mass is not strictly necessary, but will hopefully - // improve the stability of finding the eigenvectors. - XMVECTOR XX_YY_ZZ = XMVectorZero(); - XMVECTOR XY_XZ_YZ = XMVectorZero(); - - for (size_t i = 0; i < Count; ++i) { - XMVECTOR Point = XMVectorSubtract( - XMLoadFloat3(reinterpret_cast( - reinterpret_cast(pPoints) + i * Stride)), - CenterOfMass); - - XX_YY_ZZ = XMVectorAdd(XX_YY_ZZ, XMVectorMultiply(Point, Point)); - - XMVECTOR XXY = XMVectorSwizzle(Point); - XMVECTOR YZZ = XMVectorSwizzle(Point); - - XY_XZ_YZ = XMVectorAdd(XY_XZ_YZ, XMVectorMultiply(XXY, YZZ)); - } - - XMVECTOR v1, v2, v3; - - // Compute the eigenvectors of the inertia tensor. - DirectX::MathInternal::CalculateEigenVectorsFromCovarianceMatrix( - XMVectorGetX(XX_YY_ZZ), XMVectorGetY(XX_YY_ZZ), XMVectorGetZ(XX_YY_ZZ), - XMVectorGetX(XY_XZ_YZ), XMVectorGetY(XY_XZ_YZ), XMVectorGetZ(XY_XZ_YZ), - &v1, &v2, &v3); - - // Put them in a matrix. - XMMATRIX R; - - R.r[0] = XMVectorSetW(v1, 0.f); - R.r[1] = XMVectorSetW(v2, 0.f); - R.r[2] = XMVectorSetW(v3, 0.f); - R.r[3] = g_XMIdentityR3.v; - - // Multiply by -1 to convert the matrix into a right handed coordinate - // system (Det ~= 1) in case the eigenvectors form a left handed - // coordinate system (Det ~= -1) because XMQuaternionRotationMatrix only - // works on right handed matrices. - XMVECTOR Det = XMMatrixDeterminant(R); - - if (XMVector4Less(Det, XMVectorZero())) { - R.r[0] = XMVectorMultiply(R.r[0], g_XMNegativeOne.v); - R.r[1] = XMVectorMultiply(R.r[1], g_XMNegativeOne.v); - R.r[2] = XMVectorMultiply(R.r[2], g_XMNegativeOne.v); - } - - // Get the rotation quaternion from the matrix. - XMVECTOR vOrientation = XMQuaternionRotationMatrix(R); - - // Make sure it is normal (in case the vectors are slightly non-orthogonal). - vOrientation = XMQuaternionNormalize(vOrientation); - - // Rebuild the rotation matrix from the quaternion. - R = XMMatrixRotationQuaternion(vOrientation); - - // Build the rotation into the rotated space. - XMMATRIX InverseR = XMMatrixTranspose(R); - - // Find the minimum OBB using the eigenvectors as the axes. - XMVECTOR vMin, vMax; - - vMin = vMax = XMVector3TransformNormal(XMLoadFloat3(pPoints), InverseR); - - for (size_t i = 1; i < Count; ++i) { - XMVECTOR Point = XMVector3TransformNormal( - XMLoadFloat3(reinterpret_cast( - reinterpret_cast(pPoints) + i * Stride)), - InverseR); - - vMin = XMVectorMin(vMin, Point); - vMax = XMVectorMax(vMax, Point); - } - - // Rotate the center into world space. - XMVECTOR vCenter = XMVectorScale(XMVectorAdd(vMin, vMax), 0.5f); - vCenter = XMVector3TransformNormal(vCenter, R); - - // Store center, extents, and orientation. - XMStoreFloat3(&Out.Center, vCenter); - XMStoreFloat3(&Out.Extents, - XMVectorScale(XMVectorSubtract(vMax, vMin), 0.5f)); - XMStoreFloat4(&Out.Orientation, vOrientation); -} - -/**************************************************************************** - * - * BoundingFrustum - * - ****************************************************************************/ - -_Use_decl_annotations_ inline BoundingFrustum::BoundingFrustum( - CXMMATRIX Projection, bool rhcoords) noexcept { - CreateFromMatrix(*this, Projection, rhcoords); -} - -//----------------------------------------------------------------------------- -// Transform a frustum by an angle preserving transform. -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline void XM_CALLCONV -BoundingFrustum::Transform(BoundingFrustum& Out, FXMMATRIX M) const noexcept { - // Load the frustum. - XMVECTOR vOrigin = XMLoadFloat3(&Origin); - XMVECTOR vOrientation = XMLoadFloat4(&Orientation); - - assert(DirectX::MathInternal::XMQuaternionIsUnit(vOrientation)); - - // Composite the frustum rotation and the transform rotation - XMMATRIX nM; - nM.r[0] = XMVector3Normalize(M.r[0]); - nM.r[1] = XMVector3Normalize(M.r[1]); - nM.r[2] = XMVector3Normalize(M.r[2]); - nM.r[3] = g_XMIdentityR3; - XMVECTOR Rotation = XMQuaternionRotationMatrix(nM); - vOrientation = XMQuaternionMultiply(vOrientation, Rotation); - - // Transform the center. - vOrigin = XMVector3Transform(vOrigin, M); - - // Store the frustum. - XMStoreFloat3(&Out.Origin, vOrigin); - XMStoreFloat4(&Out.Orientation, vOrientation); - - // Scale the near and far distances (the slopes remain the same). - XMVECTOR dX = XMVector3Dot(M.r[0], M.r[0]); - XMVECTOR dY = XMVector3Dot(M.r[1], M.r[1]); - XMVECTOR dZ = XMVector3Dot(M.r[2], M.r[2]); - - XMVECTOR d = XMVectorMax(dX, XMVectorMax(dY, dZ)); - float Scale = sqrtf(XMVectorGetX(d)); - - Out.Near = Near * Scale; - Out.Far = Far * Scale; - - // Copy the slopes. - Out.RightSlope = RightSlope; - Out.LeftSlope = LeftSlope; - Out.TopSlope = TopSlope; - Out.BottomSlope = BottomSlope; -} - -_Use_decl_annotations_ inline void XM_CALLCONV BoundingFrustum::Transform( - BoundingFrustum& Out, float Scale, FXMVECTOR Rotation, - FXMVECTOR Translation) const noexcept { - assert(DirectX::MathInternal::XMQuaternionIsUnit(Rotation)); - - // Load the frustum. - XMVECTOR vOrigin = XMLoadFloat3(&Origin); - XMVECTOR vOrientation = XMLoadFloat4(&Orientation); - - assert(DirectX::MathInternal::XMQuaternionIsUnit(vOrientation)); - - // Composite the frustum rotation and the transform rotation. - vOrientation = XMQuaternionMultiply(vOrientation, Rotation); - - // Transform the origin. - vOrigin = XMVectorAdd( - XMVector3Rotate(XMVectorScale(vOrigin, Scale), Rotation), Translation); - - // Store the frustum. - XMStoreFloat3(&Out.Origin, vOrigin); - XMStoreFloat4(&Out.Orientation, vOrientation); - - // Scale the near and far distances (the slopes remain the same). - Out.Near = Near * Scale; - Out.Far = Far * Scale; - - // Copy the slopes. - Out.RightSlope = RightSlope; - Out.LeftSlope = LeftSlope; - Out.TopSlope = TopSlope; - Out.BottomSlope = BottomSlope; -} - -//----------------------------------------------------------------------------- -// Get the corner points of the frustum -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline void BoundingFrustum::GetCorners( - XMFLOAT3* Corners) const noexcept { - assert(Corners != nullptr); - - // Load origin and orientation of the frustum. - XMVECTOR vOrigin = XMLoadFloat3(&Origin); - XMVECTOR vOrientation = XMLoadFloat4(&Orientation); - - assert(DirectX::MathInternal::XMQuaternionIsUnit(vOrientation)); - - // Build the corners of the frustum. - XMVECTOR vRightTop = XMVectorSet(RightSlope, TopSlope, 1.0f, 0.0f); - XMVECTOR vRightBottom = XMVectorSet(RightSlope, BottomSlope, 1.0f, 0.0f); - XMVECTOR vLeftTop = XMVectorSet(LeftSlope, TopSlope, 1.0f, 0.0f); - XMVECTOR vLeftBottom = XMVectorSet(LeftSlope, BottomSlope, 1.0f, 0.0f); - XMVECTOR vNear = XMVectorReplicatePtr(&Near); - XMVECTOR vFar = XMVectorReplicatePtr(&Far); - - // Returns 8 corners position of bounding frustum. - // Near Far - // 0----1 4----5 - // | | | | - // | | | | - // 3----2 7----6 - - XMVECTOR vCorners[CORNER_COUNT]; - vCorners[0] = XMVectorMultiply(vLeftTop, vNear); - vCorners[1] = XMVectorMultiply(vRightTop, vNear); - vCorners[2] = XMVectorMultiply(vRightBottom, vNear); - vCorners[3] = XMVectorMultiply(vLeftBottom, vNear); - vCorners[4] = XMVectorMultiply(vLeftTop, vFar); - vCorners[5] = XMVectorMultiply(vRightTop, vFar); - vCorners[6] = XMVectorMultiply(vRightBottom, vFar); - vCorners[7] = XMVectorMultiply(vLeftBottom, vFar); - - for (size_t i = 0; i < CORNER_COUNT; ++i) { - XMVECTOR C = - XMVectorAdd(XMVector3Rotate(vCorners[i], vOrientation), vOrigin); - XMStoreFloat3(&Corners[i], C); - } -} - -//----------------------------------------------------------------------------- -// Point in frustum test. -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline ContainmentType XM_CALLCONV -BoundingFrustum::Contains(FXMVECTOR Point) const noexcept { - // Build frustum planes. - XMVECTOR Planes[6]; - Planes[0] = XMVectorSet(0.0f, 0.0f, -1.0f, Near); - Planes[1] = XMVectorSet(0.0f, 0.0f, 1.0f, -Far); - Planes[2] = XMVectorSet(1.0f, 0.0f, -RightSlope, 0.0f); - Planes[3] = XMVectorSet(-1.0f, 0.0f, LeftSlope, 0.0f); - Planes[4] = XMVectorSet(0.0f, 1.0f, -TopSlope, 0.0f); - Planes[5] = XMVectorSet(0.0f, -1.0f, BottomSlope, 0.0f); - - // Load origin and orientation. - XMVECTOR vOrigin = XMLoadFloat3(&Origin); - XMVECTOR vOrientation = XMLoadFloat4(&Orientation); - - assert(DirectX::MathInternal::XMQuaternionIsUnit(vOrientation)); - - // Transform point into local space of frustum. - XMVECTOR TPoint = - XMVector3InverseRotate(XMVectorSubtract(Point, vOrigin), vOrientation); - - // Set w to one. - TPoint = XMVectorInsert<0, 0, 0, 0, 1>(TPoint, XMVectorSplatOne()); - - XMVECTOR Zero = XMVectorZero(); - XMVECTOR Outside = Zero; - - // Test point against each plane of the frustum. - for (size_t i = 0; i < 6; ++i) { - XMVECTOR Dot = XMVector4Dot(TPoint, Planes[i]); - Outside = XMVectorOrInt(Outside, XMVectorGreater(Dot, Zero)); - } - - return XMVector4NotEqualInt(Outside, XMVectorTrueInt()) ? CONTAINS - : DISJOINT; -} - -//----------------------------------------------------------------------------- -// Triangle vs frustum test. -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline ContainmentType XM_CALLCONV -BoundingFrustum::Contains(FXMVECTOR V0, FXMVECTOR V1, - FXMVECTOR V2) const noexcept { - // Load origin and orientation of the frustum. - XMVECTOR vOrigin = XMLoadFloat3(&Origin); - XMVECTOR vOrientation = XMLoadFloat4(&Orientation); - - // Create 6 planes (do it inline to encourage use of registers) - XMVECTOR NearPlane = XMVectorSet(0.0f, 0.0f, -1.0f, Near); - NearPlane = DirectX::MathInternal::XMPlaneTransform(NearPlane, vOrientation, - vOrigin); - NearPlane = XMPlaneNormalize(NearPlane); - - XMVECTOR FarPlane = XMVectorSet(0.0f, 0.0f, 1.0f, -Far); - FarPlane = DirectX::MathInternal::XMPlaneTransform(FarPlane, vOrientation, - vOrigin); - FarPlane = XMPlaneNormalize(FarPlane); - - XMVECTOR RightPlane = XMVectorSet(1.0f, 0.0f, -RightSlope, 0.0f); - RightPlane = DirectX::MathInternal::XMPlaneTransform(RightPlane, - vOrientation, vOrigin); - RightPlane = XMPlaneNormalize(RightPlane); - - XMVECTOR LeftPlane = XMVectorSet(-1.0f, 0.0f, LeftSlope, 0.0f); - LeftPlane = DirectX::MathInternal::XMPlaneTransform(LeftPlane, vOrientation, - vOrigin); - LeftPlane = XMPlaneNormalize(LeftPlane); - - XMVECTOR TopPlane = XMVectorSet(0.0f, 1.0f, -TopSlope, 0.0f); - TopPlane = DirectX::MathInternal::XMPlaneTransform(TopPlane, vOrientation, - vOrigin); - TopPlane = XMPlaneNormalize(TopPlane); - - XMVECTOR BottomPlane = XMVectorSet(0.0f, -1.0f, BottomSlope, 0.0f); - BottomPlane = DirectX::MathInternal::XMPlaneTransform( - BottomPlane, vOrientation, vOrigin); - BottomPlane = XMPlaneNormalize(BottomPlane); - - return TriangleTests::ContainedBy(V0, V1, V2, NearPlane, FarPlane, - RightPlane, LeftPlane, TopPlane, - BottomPlane); -} - -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline ContainmentType BoundingFrustum::Contains( - const BoundingSphere& sh) const noexcept { - // Load origin and orientation of the frustum. - XMVECTOR vOrigin = XMLoadFloat3(&Origin); - XMVECTOR vOrientation = XMLoadFloat4(&Orientation); - - // Create 6 planes (do it inline to encourage use of registers) - XMVECTOR NearPlane = XMVectorSet(0.0f, 0.0f, -1.0f, Near); - NearPlane = DirectX::MathInternal::XMPlaneTransform(NearPlane, vOrientation, - vOrigin); - NearPlane = XMPlaneNormalize(NearPlane); - - XMVECTOR FarPlane = XMVectorSet(0.0f, 0.0f, 1.0f, -Far); - FarPlane = DirectX::MathInternal::XMPlaneTransform(FarPlane, vOrientation, - vOrigin); - FarPlane = XMPlaneNormalize(FarPlane); - - XMVECTOR RightPlane = XMVectorSet(1.0f, 0.0f, -RightSlope, 0.0f); - RightPlane = DirectX::MathInternal::XMPlaneTransform(RightPlane, - vOrientation, vOrigin); - RightPlane = XMPlaneNormalize(RightPlane); - - XMVECTOR LeftPlane = XMVectorSet(-1.0f, 0.0f, LeftSlope, 0.0f); - LeftPlane = DirectX::MathInternal::XMPlaneTransform(LeftPlane, vOrientation, - vOrigin); - LeftPlane = XMPlaneNormalize(LeftPlane); - - XMVECTOR TopPlane = XMVectorSet(0.0f, 1.0f, -TopSlope, 0.0f); - TopPlane = DirectX::MathInternal::XMPlaneTransform(TopPlane, vOrientation, - vOrigin); - TopPlane = XMPlaneNormalize(TopPlane); - - XMVECTOR BottomPlane = XMVectorSet(0.0f, -1.0f, BottomSlope, 0.0f); - BottomPlane = DirectX::MathInternal::XMPlaneTransform( - BottomPlane, vOrientation, vOrigin); - BottomPlane = XMPlaneNormalize(BottomPlane); - - return sh.ContainedBy(NearPlane, FarPlane, RightPlane, LeftPlane, TopPlane, - BottomPlane); -} - -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline ContainmentType BoundingFrustum::Contains( - const BoundingBox& box) const noexcept { - // Load origin and orientation of the frustum. - XMVECTOR vOrigin = XMLoadFloat3(&Origin); - XMVECTOR vOrientation = XMLoadFloat4(&Orientation); - - // Create 6 planes (do it inline to encourage use of registers) - XMVECTOR NearPlane = XMVectorSet(0.0f, 0.0f, -1.0f, Near); - NearPlane = DirectX::MathInternal::XMPlaneTransform(NearPlane, vOrientation, - vOrigin); - NearPlane = XMPlaneNormalize(NearPlane); - - XMVECTOR FarPlane = XMVectorSet(0.0f, 0.0f, 1.0f, -Far); - FarPlane = DirectX::MathInternal::XMPlaneTransform(FarPlane, vOrientation, - vOrigin); - FarPlane = XMPlaneNormalize(FarPlane); - - XMVECTOR RightPlane = XMVectorSet(1.0f, 0.0f, -RightSlope, 0.0f); - RightPlane = DirectX::MathInternal::XMPlaneTransform(RightPlane, - vOrientation, vOrigin); - RightPlane = XMPlaneNormalize(RightPlane); - - XMVECTOR LeftPlane = XMVectorSet(-1.0f, 0.0f, LeftSlope, 0.0f); - LeftPlane = DirectX::MathInternal::XMPlaneTransform(LeftPlane, vOrientation, - vOrigin); - LeftPlane = XMPlaneNormalize(LeftPlane); - - XMVECTOR TopPlane = XMVectorSet(0.0f, 1.0f, -TopSlope, 0.0f); - TopPlane = DirectX::MathInternal::XMPlaneTransform(TopPlane, vOrientation, - vOrigin); - TopPlane = XMPlaneNormalize(TopPlane); - - XMVECTOR BottomPlane = XMVectorSet(0.0f, -1.0f, BottomSlope, 0.0f); - BottomPlane = DirectX::MathInternal::XMPlaneTransform( - BottomPlane, vOrientation, vOrigin); - BottomPlane = XMPlaneNormalize(BottomPlane); - - return box.ContainedBy(NearPlane, FarPlane, RightPlane, LeftPlane, TopPlane, - BottomPlane); -} - -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline ContainmentType BoundingFrustum::Contains( - const BoundingOrientedBox& box) const noexcept { - // Load origin and orientation of the frustum. - XMVECTOR vOrigin = XMLoadFloat3(&Origin); - XMVECTOR vOrientation = XMLoadFloat4(&Orientation); - - // Create 6 planes (do it inline to encourage use of registers) - XMVECTOR NearPlane = XMVectorSet(0.0f, 0.0f, -1.0f, Near); - NearPlane = DirectX::MathInternal::XMPlaneTransform(NearPlane, vOrientation, - vOrigin); - NearPlane = XMPlaneNormalize(NearPlane); - - XMVECTOR FarPlane = XMVectorSet(0.0f, 0.0f, 1.0f, -Far); - FarPlane = DirectX::MathInternal::XMPlaneTransform(FarPlane, vOrientation, - vOrigin); - FarPlane = XMPlaneNormalize(FarPlane); - - XMVECTOR RightPlane = XMVectorSet(1.0f, 0.0f, -RightSlope, 0.0f); - RightPlane = DirectX::MathInternal::XMPlaneTransform(RightPlane, - vOrientation, vOrigin); - RightPlane = XMPlaneNormalize(RightPlane); - - XMVECTOR LeftPlane = XMVectorSet(-1.0f, 0.0f, LeftSlope, 0.0f); - LeftPlane = DirectX::MathInternal::XMPlaneTransform(LeftPlane, vOrientation, - vOrigin); - LeftPlane = XMPlaneNormalize(LeftPlane); - - XMVECTOR TopPlane = XMVectorSet(0.0f, 1.0f, -TopSlope, 0.0f); - TopPlane = DirectX::MathInternal::XMPlaneTransform(TopPlane, vOrientation, - vOrigin); - TopPlane = XMPlaneNormalize(TopPlane); - - XMVECTOR BottomPlane = XMVectorSet(0.0f, -1.0f, BottomSlope, 0.0f); - BottomPlane = DirectX::MathInternal::XMPlaneTransform( - BottomPlane, vOrientation, vOrigin); - BottomPlane = XMPlaneNormalize(BottomPlane); - - return box.ContainedBy(NearPlane, FarPlane, RightPlane, LeftPlane, TopPlane, - BottomPlane); -} - -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline ContainmentType BoundingFrustum::Contains( - const BoundingFrustum& fr) const noexcept { - // Load origin and orientation of the frustum. - XMVECTOR vOrigin = XMLoadFloat3(&Origin); - XMVECTOR vOrientation = XMLoadFloat4(&Orientation); - - // Create 6 planes (do it inline to encourage use of registers) - XMVECTOR NearPlane = XMVectorSet(0.0f, 0.0f, -1.0f, Near); - NearPlane = DirectX::MathInternal::XMPlaneTransform(NearPlane, vOrientation, - vOrigin); - NearPlane = XMPlaneNormalize(NearPlane); - - XMVECTOR FarPlane = XMVectorSet(0.0f, 0.0f, 1.0f, -Far); - FarPlane = DirectX::MathInternal::XMPlaneTransform(FarPlane, vOrientation, - vOrigin); - FarPlane = XMPlaneNormalize(FarPlane); - - XMVECTOR RightPlane = XMVectorSet(1.0f, 0.0f, -RightSlope, 0.0f); - RightPlane = DirectX::MathInternal::XMPlaneTransform(RightPlane, - vOrientation, vOrigin); - RightPlane = XMPlaneNormalize(RightPlane); - - XMVECTOR LeftPlane = XMVectorSet(-1.0f, 0.0f, LeftSlope, 0.0f); - LeftPlane = DirectX::MathInternal::XMPlaneTransform(LeftPlane, vOrientation, - vOrigin); - LeftPlane = XMPlaneNormalize(LeftPlane); - - XMVECTOR TopPlane = XMVectorSet(0.0f, 1.0f, -TopSlope, 0.0f); - TopPlane = DirectX::MathInternal::XMPlaneTransform(TopPlane, vOrientation, - vOrigin); - TopPlane = XMPlaneNormalize(TopPlane); - - XMVECTOR BottomPlane = XMVectorSet(0.0f, -1.0f, BottomSlope, 0.0f); - BottomPlane = DirectX::MathInternal::XMPlaneTransform( - BottomPlane, vOrientation, vOrigin); - BottomPlane = XMPlaneNormalize(BottomPlane); - - return fr.ContainedBy(NearPlane, FarPlane, RightPlane, LeftPlane, TopPlane, - BottomPlane); -} - -//----------------------------------------------------------------------------- -// Exact sphere vs frustum test. The algorithm first checks the sphere against -// the planes of the frustum, then if the plane checks were indeterminate finds -// the nearest feature (plane, line, point) on the frustum to the center of the -// sphere and compares the distance to the nearest feature to the radius of the -// sphere -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline bool BoundingFrustum::Intersects( - const BoundingSphere& sh) const noexcept { - XMVECTOR Zero = XMVectorZero(); - - // Build the frustum planes. - XMVECTOR Planes[6]; - Planes[0] = XMVectorSet(0.0f, 0.0f, -1.0f, Near); - Planes[1] = XMVectorSet(0.0f, 0.0f, 1.0f, -Far); - Planes[2] = XMVectorSet(1.0f, 0.0f, -RightSlope, 0.0f); - Planes[3] = XMVectorSet(-1.0f, 0.0f, LeftSlope, 0.0f); - Planes[4] = XMVectorSet(0.0f, 1.0f, -TopSlope, 0.0f); - Planes[5] = XMVectorSet(0.0f, -1.0f, BottomSlope, 0.0f); - - // Normalize the planes so we can compare to the sphere radius. - Planes[2] = XMVector3Normalize(Planes[2]); - Planes[3] = XMVector3Normalize(Planes[3]); - Planes[4] = XMVector3Normalize(Planes[4]); - Planes[5] = XMVector3Normalize(Planes[5]); - - // Load origin and orientation of the frustum. - XMVECTOR vOrigin = XMLoadFloat3(&Origin); - XMVECTOR vOrientation = XMLoadFloat4(&Orientation); - - assert(DirectX::MathInternal::XMQuaternionIsUnit(vOrientation)); - - // Load the sphere. - XMVECTOR vCenter = XMLoadFloat3(&sh.Center); - XMVECTOR vRadius = XMVectorReplicatePtr(&sh.Radius); - - // Transform the center of the sphere into the local space of frustum. - vCenter = XMVector3InverseRotate(XMVectorSubtract(vCenter, vOrigin), - vOrientation); - - // Set w of the center to one so we can dot4 with the plane. - vCenter = XMVectorInsert<0, 0, 0, 0, 1>(vCenter, XMVectorSplatOne()); - - // Check against each plane of the frustum. - XMVECTOR Outside = XMVectorFalseInt(); - XMVECTOR InsideAll = XMVectorTrueInt(); - XMVECTOR CenterInsideAll = XMVectorTrueInt(); - - XMVECTOR Dist[6]; - - for (size_t i = 0; i < 6; ++i) { - Dist[i] = XMVector4Dot(vCenter, Planes[i]); - - // Outside the plane? - Outside = XMVectorOrInt(Outside, XMVectorGreater(Dist[i], vRadius)); - - // Fully inside the plane? - InsideAll = XMVectorAndInt( - InsideAll, XMVectorLessOrEqual(Dist[i], XMVectorNegate(vRadius))); - - // Check if the center is inside the plane. - CenterInsideAll = - XMVectorAndInt(CenterInsideAll, XMVectorLessOrEqual(Dist[i], Zero)); - } - - // If the sphere is outside any of the planes it is outside. - if (XMVector4EqualInt(Outside, XMVectorTrueInt())) return false; - - // If the sphere is inside all planes it is fully inside. - if (XMVector4EqualInt(InsideAll, XMVectorTrueInt())) return true; - - // If the center of the sphere is inside all planes and the sphere - // intersects one or more planes then it must intersect. - if (XMVector4EqualInt(CenterInsideAll, XMVectorTrueInt())) return true; - - // The sphere may be outside the frustum or intersecting the frustum. - // Find the nearest feature (face, edge, or corner) on the frustum - // to the sphere. - - // The faces adjacent to each face are: - static const size_t adjacent_faces[6][4] = {{2, 3, 4, 5}, // 0 - {2, 3, 4, 5}, // 1 - {0, 1, 4, 5}, // 2 - {0, 1, 4, 5}, // 3 - {0, 1, 2, 3}, // 4 - {0, 1, 2, 3}}; // 5 - - XMVECTOR Intersects = XMVectorFalseInt(); - - // Check to see if the nearest feature is one of the planes. - for (size_t i = 0; i < 6; ++i) { - // Find the nearest point on the plane to the center of the sphere. - XMVECTOR Point = - XMVectorNegativeMultiplySubtract(Planes[i], Dist[i], vCenter); - - // Set w of the point to one. - Point = XMVectorInsert<0, 0, 0, 0, 1>(Point, XMVectorSplatOne()); - - // If the point is inside the face (inside the adjacent planes) then - // this plane is the nearest feature. - XMVECTOR InsideFace = XMVectorTrueInt(); - - for (size_t j = 0; j < 4; j++) { - size_t plane_index = adjacent_faces[i][j]; - - InsideFace = XMVectorAndInt( - InsideFace, - XMVectorLessOrEqual(XMVector4Dot(Point, Planes[plane_index]), - Zero)); - } - - // Since we have already checked distance from the plane we know that - // the sphere must intersect if this plane is the nearest feature. - Intersects = XMVectorOrInt( - Intersects, - XMVectorAndInt(XMVectorGreater(Dist[i], Zero), InsideFace)); - } - - if (XMVector4EqualInt(Intersects, XMVectorTrueInt())) return true; - - // Build the corners of the frustum. - XMVECTOR vRightTop = XMVectorSet(RightSlope, TopSlope, 1.0f, 0.0f); - XMVECTOR vRightBottom = XMVectorSet(RightSlope, BottomSlope, 1.0f, 0.0f); - XMVECTOR vLeftTop = XMVectorSet(LeftSlope, TopSlope, 1.0f, 0.0f); - XMVECTOR vLeftBottom = XMVectorSet(LeftSlope, BottomSlope, 1.0f, 0.0f); - XMVECTOR vNear = XMVectorReplicatePtr(&Near); - XMVECTOR vFar = XMVectorReplicatePtr(&Far); - - XMVECTOR Corners[CORNER_COUNT]; - Corners[0] = XMVectorMultiply(vRightTop, vNear); - Corners[1] = XMVectorMultiply(vRightBottom, vNear); - Corners[2] = XMVectorMultiply(vLeftTop, vNear); - Corners[3] = XMVectorMultiply(vLeftBottom, vNear); - Corners[4] = XMVectorMultiply(vRightTop, vFar); - Corners[5] = XMVectorMultiply(vRightBottom, vFar); - Corners[6] = XMVectorMultiply(vLeftTop, vFar); - Corners[7] = XMVectorMultiply(vLeftBottom, vFar); - - // The Edges are: - static const size_t edges[12][2] = { - {0, 1}, {2, 3}, {0, 2}, {1, 3}, // Near plane - {4, 5}, {6, 7}, {4, 6}, {5, 7}, // Far plane - {0, 4}, {1, 5}, {2, 6}, {3, 7}, - }; // Near to far - - XMVECTOR RadiusSq = XMVectorMultiply(vRadius, vRadius); - - // Check to see if the nearest feature is one of the edges (or corners). - for (size_t i = 0; i < 12; ++i) { - size_t ei0 = edges[i][0]; - size_t ei1 = edges[i][1]; - - // Find the nearest point on the edge to the center of the sphere. - // The corners of the frustum are included as the endpoints of the - // edges. - XMVECTOR Point = DirectX::MathInternal::PointOnLineSegmentNearestPoint( - Corners[ei0], Corners[ei1], vCenter); - - XMVECTOR Delta = XMVectorSubtract(vCenter, Point); - - XMVECTOR DistSq = XMVector3Dot(Delta, Delta); - - // If the distance to the center of the sphere to the point is less than - // the radius of the sphere then it must intersect. - Intersects = - XMVectorOrInt(Intersects, XMVectorLessOrEqual(DistSq, RadiusSq)); - } - - if (XMVector4EqualInt(Intersects, XMVectorTrueInt())) return true; - - // The sphere must be outside the frustum. - return false; -} - -//----------------------------------------------------------------------------- -// Exact axis aligned box vs frustum test. Constructs an oriented box and uses -// the oriented box vs frustum test. -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline bool BoundingFrustum::Intersects( - const BoundingBox& box) const noexcept { - // Make the axis aligned box oriented and do an OBB vs frustum test. - BoundingOrientedBox obox(box.Center, box.Extents, - XMFLOAT4(0.f, 0.f, 0.f, 1.f)); - return Intersects(obox); -} - -//----------------------------------------------------------------------------- -// Exact oriented box vs frustum test. -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline bool BoundingFrustum::Intersects( - const BoundingOrientedBox& box) const noexcept { - static const XMVECTORU32 SelectY = { - {{XM_SELECT_0, XM_SELECT_1, XM_SELECT_0, XM_SELECT_0}}}; - static const XMVECTORU32 SelectZ = { - {{XM_SELECT_0, XM_SELECT_0, XM_SELECT_1, XM_SELECT_0}}}; - - XMVECTOR Zero = XMVectorZero(); - - // Build the frustum planes. - XMVECTOR Planes[6]; - Planes[0] = XMVectorSet(0.0f, 0.0f, -1.0f, Near); - Planes[1] = XMVectorSet(0.0f, 0.0f, 1.0f, -Far); - Planes[2] = XMVectorSet(1.0f, 0.0f, -RightSlope, 0.0f); - Planes[3] = XMVectorSet(-1.0f, 0.0f, LeftSlope, 0.0f); - Planes[4] = XMVectorSet(0.0f, 1.0f, -TopSlope, 0.0f); - Planes[5] = XMVectorSet(0.0f, -1.0f, BottomSlope, 0.0f); - - // Load origin and orientation of the frustum. - XMVECTOR vOrigin = XMLoadFloat3(&Origin); - XMVECTOR FrustumOrientation = XMLoadFloat4(&Orientation); - - assert(DirectX::MathInternal::XMQuaternionIsUnit(FrustumOrientation)); - - // Load the box. - XMVECTOR Center = XMLoadFloat3(&box.Center); - XMVECTOR Extents = XMLoadFloat3(&box.Extents); - XMVECTOR BoxOrientation = XMLoadFloat4(&box.Orientation); - - assert(DirectX::MathInternal::XMQuaternionIsUnit(BoxOrientation)); - - // Transform the oriented box into the space of the frustum in order to - // minimize the number of transforms we have to do. - Center = XMVector3InverseRotate(XMVectorSubtract(Center, vOrigin), - FrustumOrientation); - BoxOrientation = XMQuaternionMultiply( - BoxOrientation, XMQuaternionConjugate(FrustumOrientation)); - - // Set w of the center to one so we can dot4 with the plane. - Center = XMVectorInsert<0, 0, 0, 0, 1>(Center, XMVectorSplatOne()); - - // Build the 3x3 rotation matrix that defines the box axes. - XMMATRIX R = XMMatrixRotationQuaternion(BoxOrientation); - - // Check against each plane of the frustum. - XMVECTOR Outside = XMVectorFalseInt(); - XMVECTOR InsideAll = XMVectorTrueInt(); - XMVECTOR CenterInsideAll = XMVectorTrueInt(); - - for (size_t i = 0; i < 6; ++i) { - // Compute the distance to the center of the box. - XMVECTOR Dist = XMVector4Dot(Center, Planes[i]); - - // Project the axes of the box onto the normal of the plane. Half the - // length of the projection (sometime called the "radius") is equal to - // h(u) * abs(n dot b(u))) + h(v) * abs(n dot b(v)) + h(w) * abs(n dot - // b(w)) where h(i) are extents of the box, n is the plane normal, and - // b(i) are the axes of the box. - XMVECTOR Radius = XMVector3Dot(Planes[i], R.r[0]); - Radius = - XMVectorSelect(Radius, XMVector3Dot(Planes[i], R.r[1]), SelectY); - Radius = - XMVectorSelect(Radius, XMVector3Dot(Planes[i], R.r[2]), SelectZ); - Radius = XMVector3Dot(Extents, XMVectorAbs(Radius)); - - // Outside the plane? - Outside = XMVectorOrInt(Outside, XMVectorGreater(Dist, Radius)); - - // Fully inside the plane? - InsideAll = XMVectorAndInt( - InsideAll, XMVectorLessOrEqual(Dist, XMVectorNegate(Radius))); - - // Check if the center is inside the plane. - CenterInsideAll = - XMVectorAndInt(CenterInsideAll, XMVectorLessOrEqual(Dist, Zero)); - } - - // If the box is outside any of the planes it is outside. - if (XMVector4EqualInt(Outside, XMVectorTrueInt())) return false; - - // If the box is inside all planes it is fully inside. - if (XMVector4EqualInt(InsideAll, XMVectorTrueInt())) return true; - - // If the center of the box is inside all planes and the box intersects - // one or more planes then it must intersect. - if (XMVector4EqualInt(CenterInsideAll, XMVectorTrueInt())) return true; - - // Build the corners of the frustum. - XMVECTOR vRightTop = XMVectorSet(RightSlope, TopSlope, 1.0f, 0.0f); - XMVECTOR vRightBottom = XMVectorSet(RightSlope, BottomSlope, 1.0f, 0.0f); - XMVECTOR vLeftTop = XMVectorSet(LeftSlope, TopSlope, 1.0f, 0.0f); - XMVECTOR vLeftBottom = XMVectorSet(LeftSlope, BottomSlope, 1.0f, 0.0f); - XMVECTOR vNear = XMVectorReplicatePtr(&Near); - XMVECTOR vFar = XMVectorReplicatePtr(&Far); - - XMVECTOR Corners[CORNER_COUNT]; - Corners[0] = XMVectorMultiply(vRightTop, vNear); - Corners[1] = XMVectorMultiply(vRightBottom, vNear); - Corners[2] = XMVectorMultiply(vLeftTop, vNear); - Corners[3] = XMVectorMultiply(vLeftBottom, vNear); - Corners[4] = XMVectorMultiply(vRightTop, vFar); - Corners[5] = XMVectorMultiply(vRightBottom, vFar); - Corners[6] = XMVectorMultiply(vLeftTop, vFar); - Corners[7] = XMVectorMultiply(vLeftBottom, vFar); - - // Test against box axes (3) - { - // Find the min/max values of the projection of the frustum onto each - // axis. - XMVECTOR FrustumMin, FrustumMax; - - FrustumMin = XMVector3Dot(Corners[0], R.r[0]); - FrustumMin = XMVectorSelect(FrustumMin, - XMVector3Dot(Corners[0], R.r[1]), SelectY); - FrustumMin = XMVectorSelect(FrustumMin, - XMVector3Dot(Corners[0], R.r[2]), SelectZ); - FrustumMax = FrustumMin; - - for (size_t i = 1; i < BoundingOrientedBox::CORNER_COUNT; ++i) { - XMVECTOR Temp = XMVector3Dot(Corners[i], R.r[0]); - Temp = - XMVectorSelect(Temp, XMVector3Dot(Corners[i], R.r[1]), SelectY); - Temp = - XMVectorSelect(Temp, XMVector3Dot(Corners[i], R.r[2]), SelectZ); - - FrustumMin = XMVectorMin(FrustumMin, Temp); - FrustumMax = XMVectorMax(FrustumMax, Temp); - } - - // Project the center of the box onto the axes. - XMVECTOR BoxDist = XMVector3Dot(Center, R.r[0]); - BoxDist = - XMVectorSelect(BoxDist, XMVector3Dot(Center, R.r[1]), SelectY); - BoxDist = - XMVectorSelect(BoxDist, XMVector3Dot(Center, R.r[2]), SelectZ); - - // The projection of the box onto the axis is just its Center and - // Extents. if (min > box_max || max < box_min) reject; - XMVECTOR Result = XMVectorOrInt( - XMVectorGreater(FrustumMin, XMVectorAdd(BoxDist, Extents)), - XMVectorLess(FrustumMax, XMVectorSubtract(BoxDist, Extents))); - - if (DirectX::MathInternal::XMVector3AnyTrue(Result)) return false; - } - - // Test against edge/edge axes (3*6). - XMVECTOR FrustumEdgeAxis[6]; - - FrustumEdgeAxis[0] = vRightTop; - FrustumEdgeAxis[1] = vRightBottom; - FrustumEdgeAxis[2] = vLeftTop; - FrustumEdgeAxis[3] = vLeftBottom; - FrustumEdgeAxis[4] = XMVectorSubtract(vRightTop, vLeftTop); - FrustumEdgeAxis[5] = XMVectorSubtract(vLeftBottom, vLeftTop); - - for (size_t i = 0; i < 3; ++i) { - for (size_t j = 0; j < 6; j++) { - // Compute the axis we are going to test. - XMVECTOR Axis = XMVector3Cross(R.r[i], FrustumEdgeAxis[j]); - - // Find the min/max values of the projection of the frustum onto the - // axis. - XMVECTOR FrustumMin, FrustumMax; - - FrustumMin = FrustumMax = XMVector3Dot(Axis, Corners[0]); - - for (size_t k = 1; k < CORNER_COUNT; k++) { - XMVECTOR Temp = XMVector3Dot(Axis, Corners[k]); - FrustumMin = XMVectorMin(FrustumMin, Temp); - FrustumMax = XMVectorMax(FrustumMax, Temp); - } - - // Project the center of the box onto the axis. - XMVECTOR Dist = XMVector3Dot(Center, Axis); - - // Project the axes of the box onto the axis to find the "radius" of - // the box. - XMVECTOR Radius = XMVector3Dot(Axis, R.r[0]); - Radius = - XMVectorSelect(Radius, XMVector3Dot(Axis, R.r[1]), SelectY); - Radius = - XMVectorSelect(Radius, XMVector3Dot(Axis, R.r[2]), SelectZ); - Radius = XMVector3Dot(Extents, XMVectorAbs(Radius)); - - // if (center > max + radius || center < min - radius) reject; - Outside = XMVectorOrInt( - Outside, - XMVectorGreater(Dist, XMVectorAdd(FrustumMax, Radius))); - Outside = XMVectorOrInt( - Outside, - XMVectorLess(Dist, XMVectorSubtract(FrustumMin, Radius))); - } - } - - if (XMVector4EqualInt(Outside, XMVectorTrueInt())) return false; - - // If we did not find a separating plane then the box must intersect the - // frustum. - return true; -} - -//----------------------------------------------------------------------------- -// Exact frustum vs frustum test. -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline bool BoundingFrustum::Intersects( - const BoundingFrustum& fr) const noexcept { - // Load origin and orientation of frustum B. - XMVECTOR OriginB = XMLoadFloat3(&Origin); - XMVECTOR OrientationB = XMLoadFloat4(&Orientation); - - assert(DirectX::MathInternal::XMQuaternionIsUnit(OrientationB)); - - // Build the planes of frustum B. - XMVECTOR AxisB[6]; - AxisB[0] = XMVectorSet(0.0f, 0.0f, -1.0f, 0.0f); - AxisB[1] = XMVectorSet(0.0f, 0.0f, 1.0f, 0.0f); - AxisB[2] = XMVectorSet(1.0f, 0.0f, -RightSlope, 0.0f); - AxisB[3] = XMVectorSet(-1.0f, 0.0f, LeftSlope, 0.0f); - AxisB[4] = XMVectorSet(0.0f, 1.0f, -TopSlope, 0.0f); - AxisB[5] = XMVectorSet(0.0f, -1.0f, BottomSlope, 0.0f); - - XMVECTOR PlaneDistB[6]; - PlaneDistB[0] = XMVectorNegate(XMVectorReplicatePtr(&Near)); - PlaneDistB[1] = XMVectorReplicatePtr(&Far); - PlaneDistB[2] = XMVectorZero(); - PlaneDistB[3] = XMVectorZero(); - PlaneDistB[4] = XMVectorZero(); - PlaneDistB[5] = XMVectorZero(); - - // Load origin and orientation of frustum A. - XMVECTOR OriginA = XMLoadFloat3(&fr.Origin); - XMVECTOR OrientationA = XMLoadFloat4(&fr.Orientation); - - assert(DirectX::MathInternal::XMQuaternionIsUnit(OrientationA)); - - // Transform frustum A into the space of the frustum B in order to - // minimize the number of transforms we have to do. - OriginA = XMVector3InverseRotate(XMVectorSubtract(OriginA, OriginB), - OrientationB); - OrientationA = - XMQuaternionMultiply(OrientationA, XMQuaternionConjugate(OrientationB)); - - // Build the corners of frustum A (in the local space of B). - XMVECTOR RightTopA = XMVectorSet(fr.RightSlope, fr.TopSlope, 1.0f, 0.0f); - XMVECTOR RightBottomA = - XMVectorSet(fr.RightSlope, fr.BottomSlope, 1.0f, 0.0f); - XMVECTOR LeftTopA = XMVectorSet(fr.LeftSlope, fr.TopSlope, 1.0f, 0.0f); - XMVECTOR LeftBottomA = - XMVectorSet(fr.LeftSlope, fr.BottomSlope, 1.0f, 0.0f); - XMVECTOR NearA = XMVectorReplicatePtr(&fr.Near); - XMVECTOR FarA = XMVectorReplicatePtr(&fr.Far); - - RightTopA = XMVector3Rotate(RightTopA, OrientationA); - RightBottomA = XMVector3Rotate(RightBottomA, OrientationA); - LeftTopA = XMVector3Rotate(LeftTopA, OrientationA); - LeftBottomA = XMVector3Rotate(LeftBottomA, OrientationA); - - XMVECTOR CornersA[CORNER_COUNT]; - CornersA[0] = XMVectorMultiplyAdd(RightTopA, NearA, OriginA); - CornersA[1] = XMVectorMultiplyAdd(RightBottomA, NearA, OriginA); - CornersA[2] = XMVectorMultiplyAdd(LeftTopA, NearA, OriginA); - CornersA[3] = XMVectorMultiplyAdd(LeftBottomA, NearA, OriginA); - CornersA[4] = XMVectorMultiplyAdd(RightTopA, FarA, OriginA); - CornersA[5] = XMVectorMultiplyAdd(RightBottomA, FarA, OriginA); - CornersA[6] = XMVectorMultiplyAdd(LeftTopA, FarA, OriginA); - CornersA[7] = XMVectorMultiplyAdd(LeftBottomA, FarA, OriginA); - - // Check frustum A against each plane of frustum B. - XMVECTOR Outside = XMVectorFalseInt(); - XMVECTOR InsideAll = XMVectorTrueInt(); - - for (size_t i = 0; i < 6; ++i) { - // Find the min/max projection of the frustum onto the plane normal. - XMVECTOR Min, Max; - - Min = Max = XMVector3Dot(AxisB[i], CornersA[0]); - - for (size_t j = 1; j < CORNER_COUNT; j++) { - XMVECTOR Temp = XMVector3Dot(AxisB[i], CornersA[j]); - Min = XMVectorMin(Min, Temp); - Max = XMVectorMax(Max, Temp); - } - - // Outside the plane? - Outside = XMVectorOrInt(Outside, XMVectorGreater(Min, PlaneDistB[i])); - - // Fully inside the plane? - InsideAll = - XMVectorAndInt(InsideAll, XMVectorLessOrEqual(Max, PlaneDistB[i])); - } - - // If the frustum A is outside any of the planes of frustum B it is outside. - if (XMVector4EqualInt(Outside, XMVectorTrueInt())) return false; - - // If frustum A is inside all planes of frustum B it is fully inside. - if (XMVector4EqualInt(InsideAll, XMVectorTrueInt())) return true; - - // Build the corners of frustum B. - XMVECTOR RightTopB = XMVectorSet(RightSlope, TopSlope, 1.0f, 0.0f); - XMVECTOR RightBottomB = XMVectorSet(RightSlope, BottomSlope, 1.0f, 0.0f); - XMVECTOR LeftTopB = XMVectorSet(LeftSlope, TopSlope, 1.0f, 0.0f); - XMVECTOR LeftBottomB = XMVectorSet(LeftSlope, BottomSlope, 1.0f, 0.0f); - XMVECTOR NearB = XMVectorReplicatePtr(&Near); - XMVECTOR FarB = XMVectorReplicatePtr(&Far); - - XMVECTOR CornersB[BoundingFrustum::CORNER_COUNT]; - CornersB[0] = XMVectorMultiply(RightTopB, NearB); - CornersB[1] = XMVectorMultiply(RightBottomB, NearB); - CornersB[2] = XMVectorMultiply(LeftTopB, NearB); - CornersB[3] = XMVectorMultiply(LeftBottomB, NearB); - CornersB[4] = XMVectorMultiply(RightTopB, FarB); - CornersB[5] = XMVectorMultiply(RightBottomB, FarB); - CornersB[6] = XMVectorMultiply(LeftTopB, FarB); - CornersB[7] = XMVectorMultiply(LeftBottomB, FarB); - - // Build the planes of frustum A (in the local space of B). - XMVECTOR AxisA[6]; - XMVECTOR PlaneDistA[6]; - - AxisA[0] = XMVectorSet(0.0f, 0.0f, -1.0f, 0.0f); - AxisA[1] = XMVectorSet(0.0f, 0.0f, 1.0f, 0.0f); - AxisA[2] = XMVectorSet(1.0f, 0.0f, -fr.RightSlope, 0.0f); - AxisA[3] = XMVectorSet(-1.0f, 0.0f, fr.LeftSlope, 0.0f); - AxisA[4] = XMVectorSet(0.0f, 1.0f, -fr.TopSlope, 0.0f); - AxisA[5] = XMVectorSet(0.0f, -1.0f, fr.BottomSlope, 0.0f); - - AxisA[0] = XMVector3Rotate(AxisA[0], OrientationA); - AxisA[1] = XMVectorNegate(AxisA[0]); - AxisA[2] = XMVector3Rotate(AxisA[2], OrientationA); - AxisA[3] = XMVector3Rotate(AxisA[3], OrientationA); - AxisA[4] = XMVector3Rotate(AxisA[4], OrientationA); - AxisA[5] = XMVector3Rotate(AxisA[5], OrientationA); - - PlaneDistA[0] = - XMVector3Dot(AxisA[0], CornersA[0]); // Re-use corner on near plane. - PlaneDistA[1] = - XMVector3Dot(AxisA[1], CornersA[4]); // Re-use corner on far plane. - PlaneDistA[2] = XMVector3Dot(AxisA[2], OriginA); - PlaneDistA[3] = XMVector3Dot(AxisA[3], OriginA); - PlaneDistA[4] = XMVector3Dot(AxisA[4], OriginA); - PlaneDistA[5] = XMVector3Dot(AxisA[5], OriginA); - - // Check each axis of frustum A for a seperating plane (5). - for (size_t i = 0; i < 6; ++i) { - // Find the minimum projection of the frustum onto the plane normal. - XMVECTOR Min; - - Min = XMVector3Dot(AxisA[i], CornersB[0]); - - for (size_t j = 1; j < CORNER_COUNT; j++) { - XMVECTOR Temp = XMVector3Dot(AxisA[i], CornersB[j]); - Min = XMVectorMin(Min, Temp); - } - - // Outside the plane? - Outside = XMVectorOrInt(Outside, XMVectorGreater(Min, PlaneDistA[i])); - } - - // If the frustum B is outside any of the planes of frustum A it is outside. - if (XMVector4EqualInt(Outside, XMVectorTrueInt())) return false; - - // Check edge/edge axes (6 * 6). - XMVECTOR FrustumEdgeAxisA[6]; - FrustumEdgeAxisA[0] = RightTopA; - FrustumEdgeAxisA[1] = RightBottomA; - FrustumEdgeAxisA[2] = LeftTopA; - FrustumEdgeAxisA[3] = LeftBottomA; - FrustumEdgeAxisA[4] = XMVectorSubtract(RightTopA, LeftTopA); - FrustumEdgeAxisA[5] = XMVectorSubtract(LeftBottomA, LeftTopA); - - XMVECTOR FrustumEdgeAxisB[6]; - FrustumEdgeAxisB[0] = RightTopB; - FrustumEdgeAxisB[1] = RightBottomB; - FrustumEdgeAxisB[2] = LeftTopB; - FrustumEdgeAxisB[3] = LeftBottomB; - FrustumEdgeAxisB[4] = XMVectorSubtract(RightTopB, LeftTopB); - FrustumEdgeAxisB[5] = XMVectorSubtract(LeftBottomB, LeftTopB); - - for (size_t i = 0; i < 6; ++i) { - for (size_t j = 0; j < 6; j++) { - // Compute the axis we are going to test. - XMVECTOR Axis = - XMVector3Cross(FrustumEdgeAxisA[i], FrustumEdgeAxisB[j]); - - // Find the min/max values of the projection of both frustums onto - // the axis. - XMVECTOR MinA, MaxA; - XMVECTOR MinB, MaxB; - - MinA = MaxA = XMVector3Dot(Axis, CornersA[0]); - MinB = MaxB = XMVector3Dot(Axis, CornersB[0]); - - for (size_t k = 1; k < CORNER_COUNT; k++) { - XMVECTOR TempA = XMVector3Dot(Axis, CornersA[k]); - MinA = XMVectorMin(MinA, TempA); - MaxA = XMVectorMax(MaxA, TempA); - - XMVECTOR TempB = XMVector3Dot(Axis, CornersB[k]); - MinB = XMVectorMin(MinB, TempB); - MaxB = XMVectorMax(MaxB, TempB); - } - - // if (MinA > MaxB || MinB > MaxA) reject - Outside = XMVectorOrInt(Outside, XMVectorGreater(MinA, MaxB)); - Outside = XMVectorOrInt(Outside, XMVectorGreater(MinB, MaxA)); - } - } - - // If there is a seperating plane, then the frustums do not intersect. - if (XMVector4EqualInt(Outside, XMVectorTrueInt())) return false; - - // If we did not find a separating plane then the frustums intersect. - return true; -} - -//----------------------------------------------------------------------------- -// Triangle vs frustum test. -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline bool XM_CALLCONV BoundingFrustum::Intersects( - FXMVECTOR V0, FXMVECTOR V1, FXMVECTOR V2) const noexcept { - // Build the frustum planes (NOTE: D is negated from the usual). - XMVECTOR Planes[6]; - Planes[0] = XMVectorSet(0.0f, 0.0f, -1.0f, -Near); - Planes[1] = XMVectorSet(0.0f, 0.0f, 1.0f, Far); - Planes[2] = XMVectorSet(1.0f, 0.0f, -RightSlope, 0.0f); - Planes[3] = XMVectorSet(-1.0f, 0.0f, LeftSlope, 0.0f); - Planes[4] = XMVectorSet(0.0f, 1.0f, -TopSlope, 0.0f); - Planes[5] = XMVectorSet(0.0f, -1.0f, BottomSlope, 0.0f); - - // Load origin and orientation of the frustum. - XMVECTOR vOrigin = XMLoadFloat3(&Origin); - XMVECTOR vOrientation = XMLoadFloat4(&Orientation); - - assert(DirectX::MathInternal::XMQuaternionIsUnit(vOrientation)); - - // Transform triangle into the local space of frustum. - XMVECTOR TV0 = - XMVector3InverseRotate(XMVectorSubtract(V0, vOrigin), vOrientation); - XMVECTOR TV1 = - XMVector3InverseRotate(XMVectorSubtract(V1, vOrigin), vOrientation); - XMVECTOR TV2 = - XMVector3InverseRotate(XMVectorSubtract(V2, vOrigin), vOrientation); - - // Test each vertex of the triangle against the frustum planes. - XMVECTOR Outside = XMVectorFalseInt(); - XMVECTOR InsideAll = XMVectorTrueInt(); - - for (size_t i = 0; i < 6; ++i) { - XMVECTOR Dist0 = XMVector3Dot(TV0, Planes[i]); - XMVECTOR Dist1 = XMVector3Dot(TV1, Planes[i]); - XMVECTOR Dist2 = XMVector3Dot(TV2, Planes[i]); - - XMVECTOR MinDist = XMVectorMin(Dist0, Dist1); - MinDist = XMVectorMin(MinDist, Dist2); - XMVECTOR MaxDist = XMVectorMax(Dist0, Dist1); - MaxDist = XMVectorMax(MaxDist, Dist2); - - XMVECTOR PlaneDist = XMVectorSplatW(Planes[i]); - - // Outside the plane? - Outside = XMVectorOrInt(Outside, XMVectorGreater(MinDist, PlaneDist)); - - // Fully inside the plane? - InsideAll = - XMVectorAndInt(InsideAll, XMVectorLessOrEqual(MaxDist, PlaneDist)); - } - - // If the triangle is outside any of the planes it is outside. - if (XMVector4EqualInt(Outside, XMVectorTrueInt())) return false; - - // If the triangle is inside all planes it is fully inside. - if (XMVector4EqualInt(InsideAll, XMVectorTrueInt())) return true; - - // Build the corners of the frustum. - XMVECTOR vRightTop = XMVectorSet(RightSlope, TopSlope, 1.0f, 0.0f); - XMVECTOR vRightBottom = XMVectorSet(RightSlope, BottomSlope, 1.0f, 0.0f); - XMVECTOR vLeftTop = XMVectorSet(LeftSlope, TopSlope, 1.0f, 0.0f); - XMVECTOR vLeftBottom = XMVectorSet(LeftSlope, BottomSlope, 1.0f, 0.0f); - XMVECTOR vNear = XMVectorReplicatePtr(&Near); - XMVECTOR vFar = XMVectorReplicatePtr(&Far); - - XMVECTOR Corners[CORNER_COUNT]; - Corners[0] = XMVectorMultiply(vRightTop, vNear); - Corners[1] = XMVectorMultiply(vRightBottom, vNear); - Corners[2] = XMVectorMultiply(vLeftTop, vNear); - Corners[3] = XMVectorMultiply(vLeftBottom, vNear); - Corners[4] = XMVectorMultiply(vRightTop, vFar); - Corners[5] = XMVectorMultiply(vRightBottom, vFar); - Corners[6] = XMVectorMultiply(vLeftTop, vFar); - Corners[7] = XMVectorMultiply(vLeftBottom, vFar); - - // Test the plane of the triangle. - XMVECTOR Normal = - XMVector3Cross(XMVectorSubtract(V1, V0), XMVectorSubtract(V2, V0)); - XMVECTOR Dist = XMVector3Dot(Normal, V0); - - XMVECTOR MinDist, MaxDist; - MinDist = MaxDist = XMVector3Dot(Corners[0], Normal); - for (size_t i = 1; i < CORNER_COUNT; ++i) { - XMVECTOR Temp = XMVector3Dot(Corners[i], Normal); - MinDist = XMVectorMin(MinDist, Temp); - MaxDist = XMVectorMax(MaxDist, Temp); - } - - Outside = XMVectorOrInt(XMVectorGreater(MinDist, Dist), - XMVectorLess(MaxDist, Dist)); - if (XMVector4EqualInt(Outside, XMVectorTrueInt())) return false; - - // Check the edge/edge axes (3*6). - XMVECTOR TriangleEdgeAxis[3]; - TriangleEdgeAxis[0] = XMVectorSubtract(V1, V0); - TriangleEdgeAxis[1] = XMVectorSubtract(V2, V1); - TriangleEdgeAxis[2] = XMVectorSubtract(V0, V2); - - XMVECTOR FrustumEdgeAxis[6]; - FrustumEdgeAxis[0] = vRightTop; - FrustumEdgeAxis[1] = vRightBottom; - FrustumEdgeAxis[2] = vLeftTop; - FrustumEdgeAxis[3] = vLeftBottom; - FrustumEdgeAxis[4] = XMVectorSubtract(vRightTop, vLeftTop); - FrustumEdgeAxis[5] = XMVectorSubtract(vLeftBottom, vLeftTop); - - for (size_t i = 0; i < 3; ++i) { - for (size_t j = 0; j < 6; j++) { - // Compute the axis we are going to test. - XMVECTOR Axis = - XMVector3Cross(TriangleEdgeAxis[i], FrustumEdgeAxis[j]); - - // Find the min/max of the projection of the triangle onto the axis. - XMVECTOR MinA, MaxA; - - XMVECTOR Dist0 = XMVector3Dot(V0, Axis); - XMVECTOR Dist1 = XMVector3Dot(V1, Axis); - XMVECTOR Dist2 = XMVector3Dot(V2, Axis); - - MinA = XMVectorMin(Dist0, Dist1); - MinA = XMVectorMin(MinA, Dist2); - MaxA = XMVectorMax(Dist0, Dist1); - MaxA = XMVectorMax(MaxA, Dist2); - - // Find the min/max of the projection of the frustum onto the axis. - XMVECTOR MinB, MaxB; - - MinB = MaxB = XMVector3Dot(Axis, Corners[0]); - - for (size_t k = 1; k < CORNER_COUNT; k++) { - XMVECTOR Temp = XMVector3Dot(Axis, Corners[k]); - MinB = XMVectorMin(MinB, Temp); - MaxB = XMVectorMax(MaxB, Temp); - } - - // if (MinA > MaxB || MinB > MaxA) reject; - Outside = XMVectorOrInt(Outside, XMVectorGreater(MinA, MaxB)); - Outside = XMVectorOrInt(Outside, XMVectorGreater(MinB, MaxA)); - } - } - - if (XMVector4EqualInt(Outside, XMVectorTrueInt())) return false; - - // If we did not find a separating plane then the triangle must intersect - // the frustum. - return true; -} - -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline PlaneIntersectionType XM_CALLCONV -BoundingFrustum::Intersects(FXMVECTOR Plane) const noexcept { - assert(DirectX::MathInternal::XMPlaneIsUnit(Plane)); - - // Load origin and orientation of the frustum. - XMVECTOR vOrigin = XMLoadFloat3(&Origin); - XMVECTOR vOrientation = XMLoadFloat4(&Orientation); - - assert(DirectX::MathInternal::XMQuaternionIsUnit(vOrientation)); - - // Set w of the origin to one so we can dot4 with a plane. - vOrigin = XMVectorInsert<0, 0, 0, 0, 1>(vOrigin, XMVectorSplatOne()); - - // Build the corners of the frustum (in world space). - XMVECTOR RightTop = XMVectorSet(RightSlope, TopSlope, 1.0f, 0.0f); - XMVECTOR RightBottom = XMVectorSet(RightSlope, BottomSlope, 1.0f, 0.0f); - XMVECTOR LeftTop = XMVectorSet(LeftSlope, TopSlope, 1.0f, 0.0f); - XMVECTOR LeftBottom = XMVectorSet(LeftSlope, BottomSlope, 1.0f, 0.0f); - XMVECTOR vNear = XMVectorReplicatePtr(&Near); - XMVECTOR vFar = XMVectorReplicatePtr(&Far); - - RightTop = XMVector3Rotate(RightTop, vOrientation); - RightBottom = XMVector3Rotate(RightBottom, vOrientation); - LeftTop = XMVector3Rotate(LeftTop, vOrientation); - LeftBottom = XMVector3Rotate(LeftBottom, vOrientation); - - XMVECTOR Corners0 = XMVectorMultiplyAdd(RightTop, vNear, vOrigin); - XMVECTOR Corners1 = XMVectorMultiplyAdd(RightBottom, vNear, vOrigin); - XMVECTOR Corners2 = XMVectorMultiplyAdd(LeftTop, vNear, vOrigin); - XMVECTOR Corners3 = XMVectorMultiplyAdd(LeftBottom, vNear, vOrigin); - XMVECTOR Corners4 = XMVectorMultiplyAdd(RightTop, vFar, vOrigin); - XMVECTOR Corners5 = XMVectorMultiplyAdd(RightBottom, vFar, vOrigin); - XMVECTOR Corners6 = XMVectorMultiplyAdd(LeftTop, vFar, vOrigin); - XMVECTOR Corners7 = XMVectorMultiplyAdd(LeftBottom, vFar, vOrigin); - - XMVECTOR Outside, Inside; - DirectX::MathInternal::FastIntersectFrustumPlane( - Corners0, Corners1, Corners2, Corners3, Corners4, Corners5, Corners6, - Corners7, Plane, Outside, Inside); - - // If the frustum is outside any plane it is outside. - if (XMVector4EqualInt(Outside, XMVectorTrueInt())) return FRONT; - - // If the frustum is inside all planes it is inside. - if (XMVector4EqualInt(Inside, XMVectorTrueInt())) return BACK; - - // The frustum is not inside all planes or outside a plane it intersects. - return INTERSECTING; -} - -//----------------------------------------------------------------------------- -// Ray vs. frustum test -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline bool XM_CALLCONV BoundingFrustum::Intersects( - FXMVECTOR rayOrigin, FXMVECTOR Direction, float& Dist) const noexcept { - // If ray starts inside the frustum, return a distance of 0 for the hit - if (Contains(rayOrigin) == CONTAINS) { - Dist = 0.0f; - return true; - } - - // Build the frustum planes. - XMVECTOR Planes[6]; - Planes[0] = XMVectorSet(0.0f, 0.0f, -1.0f, Near); - Planes[1] = XMVectorSet(0.0f, 0.0f, 1.0f, -Far); - Planes[2] = XMVectorSet(1.0f, 0.0f, -RightSlope, 0.0f); - Planes[3] = XMVectorSet(-1.0f, 0.0f, LeftSlope, 0.0f); - Planes[4] = XMVectorSet(0.0f, 1.0f, -TopSlope, 0.0f); - Planes[5] = XMVectorSet(0.0f, -1.0f, BottomSlope, 0.0f); - - // Load origin and orientation of the frustum. - XMVECTOR frOrigin = XMLoadFloat3(&Origin); - XMVECTOR frOrientation = XMLoadFloat4(&Orientation); - - // This algorithm based on "Fast Ray-Convex Polyhedron Intersectin," in - // James Arvo, ed., Graphics Gems II pp. 247-250 - float tnear = -FLT_MAX; - float tfar = FLT_MAX; - - for (size_t i = 0; i < 6; ++i) { - XMVECTOR Plane = DirectX::MathInternal::XMPlaneTransform( - Planes[i], frOrientation, frOrigin); - Plane = XMPlaneNormalize(Plane); - - XMVECTOR AxisDotOrigin = XMPlaneDotCoord(Plane, rayOrigin); - XMVECTOR AxisDotDirection = XMVector3Dot(Plane, Direction); - - if (XMVector3LessOrEqual(XMVectorAbs(AxisDotDirection), g_RayEpsilon)) { - // Ray is parallel to plane - check if ray origin is inside plane's - if (XMVector3Greater(AxisDotOrigin, g_XMZero)) { - // Ray origin is outside half-space. - Dist = 0.f; - return false; - } - } else { - // Ray not parallel - get distance to plane. - float vd = XMVectorGetX(AxisDotDirection); - float vn = XMVectorGetX(AxisDotOrigin); - float t = -vn / vd; - if (vd < 0.0f) { - // Front face - T is a near point. - if (t > tfar) { - Dist = 0.f; - return false; - } - if (t > tnear) { - // Hit near face. - tnear = t; - } - } else { - // back face - T is far point. - if (t < tnear) { - Dist = 0.f; - return false; - } - if (t < tfar) { - // Hit far face. - tfar = t; - } - } - } - } - - // Survived all tests. - // Note: if ray originates on polyhedron, may want to change 0.0f to some - // epsilon to avoid intersecting the originating face. - float distance = (tnear >= 0.0f) ? tnear : tfar; - if (distance >= 0.0f) { - Dist = distance; - return true; - } - - Dist = 0.f; - return false; -} - -//----------------------------------------------------------------------------- -// Test a frustum vs 6 planes (typically forming another frustum). -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline ContainmentType XM_CALLCONV -BoundingFrustum::ContainedBy(FXMVECTOR Plane0, FXMVECTOR Plane1, - FXMVECTOR Plane2, GXMVECTOR Plane3, - HXMVECTOR Plane4, - HXMVECTOR Plane5) const noexcept { - // Load origin and orientation of the frustum. - XMVECTOR vOrigin = XMLoadFloat3(&Origin); - XMVECTOR vOrientation = XMLoadFloat4(&Orientation); - - assert(DirectX::MathInternal::XMQuaternionIsUnit(vOrientation)); - - // Set w of the origin to one so we can dot4 with a plane. - vOrigin = XMVectorInsert<0, 0, 0, 0, 1>(vOrigin, XMVectorSplatOne()); - - // Build the corners of the frustum (in world space). - XMVECTOR RightTop = XMVectorSet(RightSlope, TopSlope, 1.0f, 0.0f); - XMVECTOR RightBottom = XMVectorSet(RightSlope, BottomSlope, 1.0f, 0.0f); - XMVECTOR LeftTop = XMVectorSet(LeftSlope, TopSlope, 1.0f, 0.0f); - XMVECTOR LeftBottom = XMVectorSet(LeftSlope, BottomSlope, 1.0f, 0.0f); - XMVECTOR vNear = XMVectorReplicatePtr(&Near); - XMVECTOR vFar = XMVectorReplicatePtr(&Far); - - RightTop = XMVector3Rotate(RightTop, vOrientation); - RightBottom = XMVector3Rotate(RightBottom, vOrientation); - LeftTop = XMVector3Rotate(LeftTop, vOrientation); - LeftBottom = XMVector3Rotate(LeftBottom, vOrientation); - - XMVECTOR Corners0 = XMVectorMultiplyAdd(RightTop, vNear, vOrigin); - XMVECTOR Corners1 = XMVectorMultiplyAdd(RightBottom, vNear, vOrigin); - XMVECTOR Corners2 = XMVectorMultiplyAdd(LeftTop, vNear, vOrigin); - XMVECTOR Corners3 = XMVectorMultiplyAdd(LeftBottom, vNear, vOrigin); - XMVECTOR Corners4 = XMVectorMultiplyAdd(RightTop, vFar, vOrigin); - XMVECTOR Corners5 = XMVectorMultiplyAdd(RightBottom, vFar, vOrigin); - XMVECTOR Corners6 = XMVectorMultiplyAdd(LeftTop, vFar, vOrigin); - XMVECTOR Corners7 = XMVectorMultiplyAdd(LeftBottom, vFar, vOrigin); - - XMVECTOR Outside, Inside; - - // Test against each plane. - DirectX::MathInternal::FastIntersectFrustumPlane( - Corners0, Corners1, Corners2, Corners3, Corners4, Corners5, Corners6, - Corners7, Plane0, Outside, Inside); - - XMVECTOR AnyOutside = Outside; - XMVECTOR AllInside = Inside; - - DirectX::MathInternal::FastIntersectFrustumPlane( - Corners0, Corners1, Corners2, Corners3, Corners4, Corners5, Corners6, - Corners7, Plane1, Outside, Inside); - - AnyOutside = XMVectorOrInt(AnyOutside, Outside); - AllInside = XMVectorAndInt(AllInside, Inside); - - DirectX::MathInternal::FastIntersectFrustumPlane( - Corners0, Corners1, Corners2, Corners3, Corners4, Corners5, Corners6, - Corners7, Plane2, Outside, Inside); - - AnyOutside = XMVectorOrInt(AnyOutside, Outside); - AllInside = XMVectorAndInt(AllInside, Inside); - - DirectX::MathInternal::FastIntersectFrustumPlane( - Corners0, Corners1, Corners2, Corners3, Corners4, Corners5, Corners6, - Corners7, Plane3, Outside, Inside); - - AnyOutside = XMVectorOrInt(AnyOutside, Outside); - AllInside = XMVectorAndInt(AllInside, Inside); - - DirectX::MathInternal::FastIntersectFrustumPlane( - Corners0, Corners1, Corners2, Corners3, Corners4, Corners5, Corners6, - Corners7, Plane4, Outside, Inside); - - AnyOutside = XMVectorOrInt(AnyOutside, Outside); - AllInside = XMVectorAndInt(AllInside, Inside); - - DirectX::MathInternal::FastIntersectFrustumPlane( - Corners0, Corners1, Corners2, Corners3, Corners4, Corners5, Corners6, - Corners7, Plane5, Outside, Inside); - - AnyOutside = XMVectorOrInt(AnyOutside, Outside); - AllInside = XMVectorAndInt(AllInside, Inside); - - // If the frustum is outside any plane it is outside. - if (XMVector4EqualInt(AnyOutside, XMVectorTrueInt())) return DISJOINT; - - // If the frustum is inside all planes it is inside. - if (XMVector4EqualInt(AllInside, XMVectorTrueInt())) return CONTAINS; - - // The frustum is not inside all planes or outside a plane, it may - // intersect. - return INTERSECTS; -} - -//----------------------------------------------------------------------------- -// Build the 6 frustum planes from a frustum. -// -// The intended use for these routines is for fast culling to a view frustum. -// When the volume being tested against a view frustum is small relative to the -// view frustum it is usually either inside all six planes of the frustum -// (CONTAINS) or outside one of the planes of the frustum (DISJOINT). If neither -// of these cases is true then it may or may not be intersecting the frustum -// (INTERSECTS) -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline void BoundingFrustum::GetPlanes( - XMVECTOR* NearPlane, XMVECTOR* FarPlane, XMVECTOR* RightPlane, - XMVECTOR* LeftPlane, XMVECTOR* TopPlane, - XMVECTOR* BottomPlane) const noexcept { - // Load origin and orientation of the frustum. - XMVECTOR vOrigin = XMLoadFloat3(&Origin); - XMVECTOR vOrientation = XMLoadFloat4(&Orientation); - - if (NearPlane) { - XMVECTOR vNearPlane = XMVectorSet(0.0f, 0.0f, -1.0f, Near); - vNearPlane = DirectX::MathInternal::XMPlaneTransform( - vNearPlane, vOrientation, vOrigin); - *NearPlane = XMPlaneNormalize(vNearPlane); - } - - if (FarPlane) { - XMVECTOR vFarPlane = XMVectorSet(0.0f, 0.0f, 1.0f, -Far); - vFarPlane = DirectX::MathInternal::XMPlaneTransform( - vFarPlane, vOrientation, vOrigin); - *FarPlane = XMPlaneNormalize(vFarPlane); - } - - if (RightPlane) { - XMVECTOR vRightPlane = XMVectorSet(1.0f, 0.0f, -RightSlope, 0.0f); - vRightPlane = DirectX::MathInternal::XMPlaneTransform( - vRightPlane, vOrientation, vOrigin); - *RightPlane = XMPlaneNormalize(vRightPlane); - } - - if (LeftPlane) { - XMVECTOR vLeftPlane = XMVectorSet(-1.0f, 0.0f, LeftSlope, 0.0f); - vLeftPlane = DirectX::MathInternal::XMPlaneTransform( - vLeftPlane, vOrientation, vOrigin); - *LeftPlane = XMPlaneNormalize(vLeftPlane); - } - - if (TopPlane) { - XMVECTOR vTopPlane = XMVectorSet(0.0f, 1.0f, -TopSlope, 0.0f); - vTopPlane = DirectX::MathInternal::XMPlaneTransform( - vTopPlane, vOrientation, vOrigin); - *TopPlane = XMPlaneNormalize(vTopPlane); - } - - if (BottomPlane) { - XMVECTOR vBottomPlane = XMVectorSet(0.0f, -1.0f, BottomSlope, 0.0f); - vBottomPlane = DirectX::MathInternal::XMPlaneTransform( - vBottomPlane, vOrientation, vOrigin); - *BottomPlane = XMPlaneNormalize(vBottomPlane); - } -} - -//----------------------------------------------------------------------------- -// Build a frustum from a persepective projection matrix. The matrix may only -// contain a projection; any rotation, translation or scale will cause the -// constructed frustum to be incorrect. -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline void XM_CALLCONV -BoundingFrustum::CreateFromMatrix(BoundingFrustum& Out, FXMMATRIX Projection, - bool rhcoords) noexcept { - // Corners of the projection frustum in NDC space. - static XMVECTORF32 NDCPoints[6] = { - {{{1.0f, 0.0f, 1.0f, 1.0f}}}, // right (at far plane) - {{{-1.0f, 0.0f, 1.0f, 1.0f}}}, // left - {{{0.0f, 1.0f, 1.0f, 1.0f}}}, // top - {{{0.0f, -1.0f, 1.0f, 1.0f}}}, // bottom - - {{{0.0f, 0.0f, 0.0f, 1.0f}}}, // near - {{{0.0f, 0.0f, 1.0f, 1.0f}}} // far - }; - - XMVECTOR Determinant; - XMMATRIX matInverse = XMMatrixInverse(&Determinant, Projection); - - // Compute the frustum corners in world space. - XMVECTOR Points[6]; - - for (size_t i = 0; i < 6; ++i) { - // Transform point. - Points[i] = XMVector4Transform(NDCPoints[i], matInverse); - } - - Out.Origin = XMFLOAT3(0.0f, 0.0f, 0.0f); - Out.Orientation = XMFLOAT4(0.0f, 0.0f, 0.0f, 1.0f); - - // Compute the slopes. - Points[0] = XMVectorMultiply(Points[0], - XMVectorReciprocal(XMVectorSplatZ(Points[0]))); - Points[1] = XMVectorMultiply(Points[1], - XMVectorReciprocal(XMVectorSplatZ(Points[1]))); - Points[2] = XMVectorMultiply(Points[2], - XMVectorReciprocal(XMVectorSplatZ(Points[2]))); - Points[3] = XMVectorMultiply(Points[3], - XMVectorReciprocal(XMVectorSplatZ(Points[3]))); - - Out.RightSlope = XMVectorGetX(Points[0]); - Out.LeftSlope = XMVectorGetX(Points[1]); - Out.TopSlope = XMVectorGetY(Points[2]); - Out.BottomSlope = XMVectorGetY(Points[3]); - - // Compute near and far. - Points[4] = XMVectorMultiply(Points[4], - XMVectorReciprocal(XMVectorSplatW(Points[4]))); - Points[5] = XMVectorMultiply(Points[5], - XMVectorReciprocal(XMVectorSplatW(Points[5]))); - - if (rhcoords) { - Out.Near = XMVectorGetZ(Points[5]); - Out.Far = XMVectorGetZ(Points[4]); - } else { - Out.Near = XMVectorGetZ(Points[4]); - Out.Far = XMVectorGetZ(Points[5]); - } -} - -/**************************************************************************** - * - * TriangleTests - * - ****************************************************************************/ - -namespace TriangleTests { - -//----------------------------------------------------------------------------- -// Compute the intersection of a ray (Origin, Direction) with a triangle -// (V0, V1, V2). Return true if there is an intersection and also set *pDist -// to the distance along the ray to the intersection. -// -// The algorithm is based on Moller, Tomas and Trumbore, "Fast, Minimum Storage -// Ray-Triangle Intersection", Journal of Graphics Tools, vol. 2, no. 1, -// pp 21-28, 1997. -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline bool XM_CALLCONV -Intersects(FXMVECTOR Origin, FXMVECTOR Direction, FXMVECTOR V0, GXMVECTOR V1, - HXMVECTOR V2, float& Dist) noexcept { - assert(DirectX::MathInternal::XMVector3IsUnit(Direction)); - - XMVECTOR Zero = XMVectorZero(); - - XMVECTOR e1 = XMVectorSubtract(V1, V0); - XMVECTOR e2 = XMVectorSubtract(V2, V0); - - // p = Direction ^ e2; - XMVECTOR p = XMVector3Cross(Direction, e2); - - // det = e1 * p; - XMVECTOR det = XMVector3Dot(e1, p); - - XMVECTOR u, v, t; - - if (XMVector3GreaterOrEqual(det, g_RayEpsilon)) { - // Determinate is positive (front side of the triangle). - XMVECTOR s = XMVectorSubtract(Origin, V0); - - // u = s * p; - u = XMVector3Dot(s, p); - - XMVECTOR NoIntersection = XMVectorLess(u, Zero); - NoIntersection = XMVectorOrInt(NoIntersection, XMVectorGreater(u, det)); - - // q = s ^ e1; - XMVECTOR q = XMVector3Cross(s, e1); - - // v = Direction * q; - v = XMVector3Dot(Direction, q); - - NoIntersection = XMVectorOrInt(NoIntersection, XMVectorLess(v, Zero)); - NoIntersection = XMVectorOrInt(NoIntersection, - XMVectorGreater(XMVectorAdd(u, v), det)); - - // t = e2 * q; - t = XMVector3Dot(e2, q); - - NoIntersection = XMVectorOrInt(NoIntersection, XMVectorLess(t, Zero)); - - if (XMVector4EqualInt(NoIntersection, XMVectorTrueInt())) { - Dist = 0.f; - return false; - } - } else if (XMVector3LessOrEqual(det, g_RayNegEpsilon)) { - // Determinate is negative (back side of the triangle). - XMVECTOR s = XMVectorSubtract(Origin, V0); - - // u = s * p; - u = XMVector3Dot(s, p); - - XMVECTOR NoIntersection = XMVectorGreater(u, Zero); - NoIntersection = XMVectorOrInt(NoIntersection, XMVectorLess(u, det)); - - // q = s ^ e1; - XMVECTOR q = XMVector3Cross(s, e1); - - // v = Direction * q; - v = XMVector3Dot(Direction, q); - - NoIntersection = - XMVectorOrInt(NoIntersection, XMVectorGreater(v, Zero)); - NoIntersection = - XMVectorOrInt(NoIntersection, XMVectorLess(XMVectorAdd(u, v), det)); - - // t = e2 * q; - t = XMVector3Dot(e2, q); - - NoIntersection = - XMVectorOrInt(NoIntersection, XMVectorGreater(t, Zero)); - - if (XMVector4EqualInt(NoIntersection, XMVectorTrueInt())) { - Dist = 0.f; - return false; - } - } else { - // Parallel ray. - Dist = 0.f; - return false; - } - - t = XMVectorDivide(t, det); - - // (u / det) and (v / dev) are the barycentric cooridinates of the - // intersection. - - // Store the x-component to *pDist - XMStoreFloat(&Dist, t); - - return true; -} - -//----------------------------------------------------------------------------- -// Test if two triangles intersect. -// -// The final test of algorithm is based on Shen, Heng, and Tang, "A Fast -// Triangle-Triangle Overlap Test Using Signed Distances", Journal of Graphics -// Tools, vol. 8, no. 1, pp 17-23, 2003 and Guigue and Devillers, "Fast and -// Robust Triangle-Triangle Overlap Test Using Orientation Predicates", Journal -// of Graphics Tools, vol. 8, no. 1, pp 25-32, 2003. -// -// The final test could be considered an edge-edge separating plane test with -// the 9 possible cases narrowed down to the only two pairs of edges that can -// actaully result in a seperation. -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline bool XM_CALLCONV -Intersects(FXMVECTOR A0, FXMVECTOR A1, FXMVECTOR A2, GXMVECTOR B0, HXMVECTOR B1, - HXMVECTOR B2) noexcept { - static const XMVECTORU32 SelectY = { - {{XM_SELECT_0, XM_SELECT_1, XM_SELECT_0, XM_SELECT_0}}}; - static const XMVECTORU32 SelectZ = { - {{XM_SELECT_0, XM_SELECT_0, XM_SELECT_1, XM_SELECT_0}}}; - static const XMVECTORU32 Select0111 = { - {{XM_SELECT_0, XM_SELECT_1, XM_SELECT_1, XM_SELECT_1}}}; - static const XMVECTORU32 Select1011 = { - {{XM_SELECT_1, XM_SELECT_0, XM_SELECT_1, XM_SELECT_1}}}; - static const XMVECTORU32 Select1101 = { - {{XM_SELECT_1, XM_SELECT_1, XM_SELECT_0, XM_SELECT_1}}}; - - XMVECTOR Zero = XMVectorZero(); - - // Compute the normal of triangle A. - XMVECTOR N1 = - XMVector3Cross(XMVectorSubtract(A1, A0), XMVectorSubtract(A2, A0)); - - // Assert that the triangle is not degenerate. - assert(!XMVector3Equal(N1, Zero)); - - // Test points of B against the plane of A. - XMVECTOR BDist = XMVector3Dot(N1, XMVectorSubtract(B0, A0)); - BDist = XMVectorSelect(BDist, XMVector3Dot(N1, XMVectorSubtract(B1, A0)), - SelectY); - BDist = XMVectorSelect(BDist, XMVector3Dot(N1, XMVectorSubtract(B2, A0)), - SelectZ); - - // Ensure robustness with co-planar triangles by zeroing small distances. - uint32_t BDistIsZeroCR; - XMVECTOR BDistIsZero = - XMVectorGreaterR(&BDistIsZeroCR, g_RayEpsilon, XMVectorAbs(BDist)); - BDist = XMVectorSelect(BDist, Zero, BDistIsZero); - - uint32_t BDistIsLessCR; - XMVECTOR BDistIsLess = XMVectorGreaterR(&BDistIsLessCR, Zero, BDist); - - uint32_t BDistIsGreaterCR; - XMVECTOR BDistIsGreater = XMVectorGreaterR(&BDistIsGreaterCR, BDist, Zero); - - // If all the points are on the same side we don't intersect. - if (XMComparisonAllTrue(BDistIsLessCR) || - XMComparisonAllTrue(BDistIsGreaterCR)) - return false; - - // Compute the normal of triangle B. - XMVECTOR N2 = - XMVector3Cross(XMVectorSubtract(B1, B0), XMVectorSubtract(B2, B0)); - - // Assert that the triangle is not degenerate. - assert(!XMVector3Equal(N2, Zero)); - - // Test points of A against the plane of B. - XMVECTOR ADist = XMVector3Dot(N2, XMVectorSubtract(A0, B0)); - ADist = XMVectorSelect(ADist, XMVector3Dot(N2, XMVectorSubtract(A1, B0)), - SelectY); - ADist = XMVectorSelect(ADist, XMVector3Dot(N2, XMVectorSubtract(A2, B0)), - SelectZ); - - // Ensure robustness with co-planar triangles by zeroing small distances. - uint32_t ADistIsZeroCR; - XMVECTOR ADistIsZero = - XMVectorGreaterR(&ADistIsZeroCR, g_RayEpsilon, XMVectorAbs(ADist)); - ADist = XMVectorSelect(ADist, Zero, ADistIsZero); - - uint32_t ADistIsLessCR; - XMVECTOR ADistIsLess = XMVectorGreaterR(&ADistIsLessCR, Zero, ADist); - - uint32_t ADistIsGreaterCR; - XMVECTOR ADistIsGreater = XMVectorGreaterR(&ADistIsGreaterCR, ADist, Zero); - - // If all the points are on the same side we don't intersect. - if (XMComparisonAllTrue(ADistIsLessCR) || - XMComparisonAllTrue(ADistIsGreaterCR)) - return false; - - // Special case for co-planar triangles. - if (XMComparisonAllTrue(ADistIsZeroCR) || - XMComparisonAllTrue(BDistIsZeroCR)) { - XMVECTOR Axis, Dist, MinDist; - - // Compute an axis perpindicular to the edge (points out). - Axis = XMVector3Cross(N1, XMVectorSubtract(A1, A0)); - Dist = XMVector3Dot(Axis, A0); - - // Test points of B against the axis. - MinDist = XMVector3Dot(B0, Axis); - MinDist = XMVectorMin(MinDist, XMVector3Dot(B1, Axis)); - MinDist = XMVectorMin(MinDist, XMVector3Dot(B2, Axis)); - if (XMVector4GreaterOrEqual(MinDist, Dist)) return false; - - // Edge (A1, A2) - Axis = XMVector3Cross(N1, XMVectorSubtract(A2, A1)); - Dist = XMVector3Dot(Axis, A1); - - MinDist = XMVector3Dot(B0, Axis); - MinDist = XMVectorMin(MinDist, XMVector3Dot(B1, Axis)); - MinDist = XMVectorMin(MinDist, XMVector3Dot(B2, Axis)); - if (XMVector4GreaterOrEqual(MinDist, Dist)) return false; - - // Edge (A2, A0) - Axis = XMVector3Cross(N1, XMVectorSubtract(A0, A2)); - Dist = XMVector3Dot(Axis, A2); - - MinDist = XMVector3Dot(B0, Axis); - MinDist = XMVectorMin(MinDist, XMVector3Dot(B1, Axis)); - MinDist = XMVectorMin(MinDist, XMVector3Dot(B2, Axis)); - if (XMVector4GreaterOrEqual(MinDist, Dist)) return false; - - // Edge (B0, B1) - Axis = XMVector3Cross(N2, XMVectorSubtract(B1, B0)); - Dist = XMVector3Dot(Axis, B0); - - MinDist = XMVector3Dot(A0, Axis); - MinDist = XMVectorMin(MinDist, XMVector3Dot(A1, Axis)); - MinDist = XMVectorMin(MinDist, XMVector3Dot(A2, Axis)); - if (XMVector4GreaterOrEqual(MinDist, Dist)) return false; - - // Edge (B1, B2) - Axis = XMVector3Cross(N2, XMVectorSubtract(B2, B1)); - Dist = XMVector3Dot(Axis, B1); - - MinDist = XMVector3Dot(A0, Axis); - MinDist = XMVectorMin(MinDist, XMVector3Dot(A1, Axis)); - MinDist = XMVectorMin(MinDist, XMVector3Dot(A2, Axis)); - if (XMVector4GreaterOrEqual(MinDist, Dist)) return false; - - // Edge (B2,B0) - Axis = XMVector3Cross(N2, XMVectorSubtract(B0, B2)); - Dist = XMVector3Dot(Axis, B2); - - MinDist = XMVector3Dot(A0, Axis); - MinDist = XMVectorMin(MinDist, XMVector3Dot(A1, Axis)); - MinDist = XMVectorMin(MinDist, XMVector3Dot(A2, Axis)); - if (XMVector4GreaterOrEqual(MinDist, Dist)) return false; - - return true; - } - - // - // Find the single vertex of A and B (ie the vertex on the opposite side - // of the plane from the other two) and reorder the edges so we can compute - // the signed edge/edge distances. - // - // if ( (V0 >= 0 && V1 < 0 && V2 < 0) || - // (V0 > 0 && V1 <= 0 && V2 <= 0) || - // (V0 <= 0 && V1 > 0 && V2 > 0) || - // (V0 < 0 && V1 >= 0 && V2 >= 0) ) then V0 is singular; - // - // If our singular vertex is not on the positive side of the plane we - // reverse the triangle winding so that the overlap comparisons will compare - // the correct edges with the correct signs. - // - XMVECTOR ADistIsLessEqual = XMVectorOrInt(ADistIsLess, ADistIsZero); - XMVECTOR ADistIsGreaterEqual = XMVectorOrInt(ADistIsGreater, ADistIsZero); - - XMVECTOR AA0, AA1, AA2; - bool bPositiveA; - - if (DirectX::MathInternal::XMVector3AllTrue( - XMVectorSelect(ADistIsGreaterEqual, ADistIsLess, Select0111)) || - DirectX::MathInternal::XMVector3AllTrue( - XMVectorSelect(ADistIsGreater, ADistIsLessEqual, Select0111))) { - // A0 is singular, crossing from positive to negative. - AA0 = A0; - AA1 = A1; - AA2 = A2; - bPositiveA = true; - } else if (DirectX::MathInternal::XMVector3AllTrue(XMVectorSelect( - ADistIsLessEqual, ADistIsGreater, Select0111)) || - DirectX::MathInternal::XMVector3AllTrue(XMVectorSelect( - ADistIsLess, ADistIsGreaterEqual, Select0111))) { - // A0 is singular, crossing from negative to positive. - AA0 = A0; - AA1 = A2; - AA2 = A1; - bPositiveA = false; - } else if (DirectX::MathInternal::XMVector3AllTrue(XMVectorSelect( - ADistIsGreaterEqual, ADistIsLess, Select1011)) || - DirectX::MathInternal::XMVector3AllTrue(XMVectorSelect( - ADistIsGreater, ADistIsLessEqual, Select1011))) { - // A1 is singular, crossing from positive to negative. - AA0 = A1; - AA1 = A2; - AA2 = A0; - bPositiveA = true; - } else if (DirectX::MathInternal::XMVector3AllTrue(XMVectorSelect( - ADistIsLessEqual, ADistIsGreater, Select1011)) || - DirectX::MathInternal::XMVector3AllTrue(XMVectorSelect( - ADistIsLess, ADistIsGreaterEqual, Select1011))) { - // A1 is singular, crossing from negative to positive. - AA0 = A1; - AA1 = A0; - AA2 = A2; - bPositiveA = false; - } else if (DirectX::MathInternal::XMVector3AllTrue(XMVectorSelect( - ADistIsGreaterEqual, ADistIsLess, Select1101)) || - DirectX::MathInternal::XMVector3AllTrue(XMVectorSelect( - ADistIsGreater, ADistIsLessEqual, Select1101))) { - // A2 is singular, crossing from positive to negative. - AA0 = A2; - AA1 = A0; - AA2 = A1; - bPositiveA = true; - } else if (DirectX::MathInternal::XMVector3AllTrue(XMVectorSelect( - ADistIsLessEqual, ADistIsGreater, Select1101)) || - DirectX::MathInternal::XMVector3AllTrue(XMVectorSelect( - ADistIsLess, ADistIsGreaterEqual, Select1101))) { - // A2 is singular, crossing from negative to positive. - AA0 = A2; - AA1 = A1; - AA2 = A0; - bPositiveA = false; - } else { - assert(false); - return false; - } - - XMVECTOR BDistIsLessEqual = XMVectorOrInt(BDistIsLess, BDistIsZero); - XMVECTOR BDistIsGreaterEqual = XMVectorOrInt(BDistIsGreater, BDistIsZero); - - XMVECTOR BB0, BB1, BB2; - bool bPositiveB; - - if (DirectX::MathInternal::XMVector3AllTrue( - XMVectorSelect(BDistIsGreaterEqual, BDistIsLess, Select0111)) || - DirectX::MathInternal::XMVector3AllTrue( - XMVectorSelect(BDistIsGreater, BDistIsLessEqual, Select0111))) { - // B0 is singular, crossing from positive to negative. - BB0 = B0; - BB1 = B1; - BB2 = B2; - bPositiveB = true; - } else if (DirectX::MathInternal::XMVector3AllTrue(XMVectorSelect( - BDistIsLessEqual, BDistIsGreater, Select0111)) || - DirectX::MathInternal::XMVector3AllTrue(XMVectorSelect( - BDistIsLess, BDistIsGreaterEqual, Select0111))) { - // B0 is singular, crossing from negative to positive. - BB0 = B0; - BB1 = B2; - BB2 = B1; - bPositiveB = false; - } else if (DirectX::MathInternal::XMVector3AllTrue(XMVectorSelect( - BDistIsGreaterEqual, BDistIsLess, Select1011)) || - DirectX::MathInternal::XMVector3AllTrue(XMVectorSelect( - BDistIsGreater, BDistIsLessEqual, Select1011))) { - // B1 is singular, crossing from positive to negative. - BB0 = B1; - BB1 = B2; - BB2 = B0; - bPositiveB = true; - } else if (DirectX::MathInternal::XMVector3AllTrue(XMVectorSelect( - BDistIsLessEqual, BDistIsGreater, Select1011)) || - DirectX::MathInternal::XMVector3AllTrue(XMVectorSelect( - BDistIsLess, BDistIsGreaterEqual, Select1011))) { - // B1 is singular, crossing from negative to positive. - BB0 = B1; - BB1 = B0; - BB2 = B2; - bPositiveB = false; - } else if (DirectX::MathInternal::XMVector3AllTrue(XMVectorSelect( - BDistIsGreaterEqual, BDistIsLess, Select1101)) || - DirectX::MathInternal::XMVector3AllTrue(XMVectorSelect( - BDistIsGreater, BDistIsLessEqual, Select1101))) { - // B2 is singular, crossing from positive to negative. - BB0 = B2; - BB1 = B0; - BB2 = B1; - bPositiveB = true; - } else if (DirectX::MathInternal::XMVector3AllTrue(XMVectorSelect( - BDistIsLessEqual, BDistIsGreater, Select1101)) || - DirectX::MathInternal::XMVector3AllTrue(XMVectorSelect( - BDistIsLess, BDistIsGreaterEqual, Select1101))) { - // B2 is singular, crossing from negative to positive. - BB0 = B2; - BB1 = B1; - BB2 = B0; - bPositiveB = false; - } else { - assert(false); - return false; - } - - XMVECTOR Delta0, Delta1; - - // Reverse the direction of the test depending on whether the singular - // vertices are the same sign or different signs. - if (bPositiveA ^ bPositiveB) { - Delta0 = XMVectorSubtract(BB0, AA0); - Delta1 = XMVectorSubtract(AA0, BB0); - } else { - Delta0 = XMVectorSubtract(AA0, BB0); - Delta1 = XMVectorSubtract(BB0, AA0); - } - - // Check if the triangles overlap on the line of intersection between the - // planes of the two triangles by finding the signed line distances. - XMVECTOR Dist0 = XMVector3Dot( - Delta0, - XMVector3Cross(XMVectorSubtract(BB2, BB0), XMVectorSubtract(AA2, AA0))); - if (XMVector4Greater(Dist0, Zero)) return false; - - XMVECTOR Dist1 = XMVector3Dot( - Delta1, - XMVector3Cross(XMVectorSubtract(BB1, BB0), XMVectorSubtract(AA1, AA0))); - if (XMVector4Greater(Dist1, Zero)) return false; - - return true; -} - -//----------------------------------------------------------------------------- -// Ray-triangle test -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline PlaneIntersectionType XM_CALLCONV -Intersects(FXMVECTOR V0, FXMVECTOR V1, FXMVECTOR V2, GXMVECTOR Plane) noexcept { - XMVECTOR One = XMVectorSplatOne(); - - assert(DirectX::MathInternal::XMPlaneIsUnit(Plane)); - - // Set w of the points to one so we can dot4 with a plane. - XMVECTOR TV0 = XMVectorInsert<0, 0, 0, 0, 1>(V0, One); - XMVECTOR TV1 = XMVectorInsert<0, 0, 0, 0, 1>(V1, One); - XMVECTOR TV2 = XMVectorInsert<0, 0, 0, 0, 1>(V2, One); - - XMVECTOR Outside, Inside; - DirectX::MathInternal::FastIntersectTrianglePlane(TV0, TV1, TV2, Plane, - Outside, Inside); - - // If the triangle is outside any plane it is outside. - if (XMVector4EqualInt(Outside, XMVectorTrueInt())) return FRONT; - - // If the triangle is inside all planes it is inside. - if (XMVector4EqualInt(Inside, XMVectorTrueInt())) return BACK; - - // The triangle is not inside all planes or outside a plane it intersects. - return INTERSECTING; -} - -//----------------------------------------------------------------------------- -// Test a triangle vs 6 planes (typically forming a frustum). -//----------------------------------------------------------------------------- -_Use_decl_annotations_ inline ContainmentType XM_CALLCONV -ContainedBy(FXMVECTOR V0, FXMVECTOR V1, FXMVECTOR V2, GXMVECTOR Plane0, - HXMVECTOR Plane1, HXMVECTOR Plane2, CXMVECTOR Plane3, - CXMVECTOR Plane4, CXMVECTOR Plane5) noexcept { - XMVECTOR One = XMVectorSplatOne(); - - // Set w of the points to one so we can dot4 with a plane. - XMVECTOR TV0 = XMVectorInsert<0, 0, 0, 0, 1>(V0, One); - XMVECTOR TV1 = XMVectorInsert<0, 0, 0, 0, 1>(V1, One); - XMVECTOR TV2 = XMVectorInsert<0, 0, 0, 0, 1>(V2, One); - - XMVECTOR Outside, Inside; - - // Test against each plane. - DirectX::MathInternal::FastIntersectTrianglePlane(TV0, TV1, TV2, Plane0, - Outside, Inside); - - XMVECTOR AnyOutside = Outside; - XMVECTOR AllInside = Inside; - - DirectX::MathInternal::FastIntersectTrianglePlane(TV0, TV1, TV2, Plane1, - Outside, Inside); - AnyOutside = XMVectorOrInt(AnyOutside, Outside); - AllInside = XMVectorAndInt(AllInside, Inside); - - DirectX::MathInternal::FastIntersectTrianglePlane(TV0, TV1, TV2, Plane2, - Outside, Inside); - AnyOutside = XMVectorOrInt(AnyOutside, Outside); - AllInside = XMVectorAndInt(AllInside, Inside); - - DirectX::MathInternal::FastIntersectTrianglePlane(TV0, TV1, TV2, Plane3, - Outside, Inside); - AnyOutside = XMVectorOrInt(AnyOutside, Outside); - AllInside = XMVectorAndInt(AllInside, Inside); - - DirectX::MathInternal::FastIntersectTrianglePlane(TV0, TV1, TV2, Plane4, - Outside, Inside); - AnyOutside = XMVectorOrInt(AnyOutside, Outside); - AllInside = XMVectorAndInt(AllInside, Inside); - - DirectX::MathInternal::FastIntersectTrianglePlane(TV0, TV1, TV2, Plane5, - Outside, Inside); - AnyOutside = XMVectorOrInt(AnyOutside, Outside); - AllInside = XMVectorAndInt(AllInside, Inside); - - // If the triangle is outside any plane it is outside. - if (XMVector4EqualInt(AnyOutside, XMVectorTrueInt())) return DISJOINT; - - // If the triangle is inside all planes it is inside. - if (XMVector4EqualInt(AllInside, XMVectorTrueInt())) return CONTAINS; - - // The triangle is not inside all planes or outside a plane, it may - // intersect. - return INTERSECTS; -} - -} // namespace TriangleTests diff --git a/targets/app/linux/Stubs/DirectXMath/DirectXColors.h b/targets/app/linux/Stubs/DirectXMath/DirectXColors.h deleted file mode 100644 index 218fe17c6..000000000 --- a/targets/app/linux/Stubs/DirectXMath/DirectXColors.h +++ /dev/null @@ -1,500 +0,0 @@ -//------------------------------------------------------------------------------------- -// DirectXColors.h -- C++ Color Math library -// -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. -// -// http://go.microsoft.com/fwlink/?LinkID=615560 -//------------------------------------------------------------------------------------- - -#pragma once - -#include "DirectXMath.h" - -namespace DirectX { - -namespace Colors { -// Standard colors (Red/Green/Blue/Alpha) in sRGB colorspace -XMGLOBALCONST XMVECTORF32 AliceBlue = { - {{0.941176534f, 0.972549081f, 1.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 AntiqueWhite = { - {{0.980392218f, 0.921568692f, 0.843137324f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Aqua = {{{0.f, 1.f, 1.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Aquamarine = { - {{0.498039246f, 1.f, 0.831372619f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Azure = {{{0.941176534f, 1.f, 1.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Beige = { - {{0.960784376f, 0.960784376f, 0.862745166f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Bisque = {{{1.f, 0.894117713f, 0.768627524f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Black = {{{0.f, 0.f, 0.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 BlanchedAlmond = { - {{1.f, 0.921568692f, 0.803921640f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Blue = {{{0.f, 0.f, 1.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 BlueViolet = { - {{0.541176498f, 0.168627456f, 0.886274576f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Brown = { - {{0.647058845f, 0.164705887f, 0.164705887f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 BurlyWood = { - {{0.870588303f, 0.721568644f, 0.529411793f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 CadetBlue = { - {{0.372549027f, 0.619607866f, 0.627451003f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Chartreuse = {{{0.498039246f, 1.f, 0.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Chocolate = { - {{0.823529482f, 0.411764741f, 0.117647067f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Coral = {{{1.f, 0.498039246f, 0.313725501f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 CornflowerBlue = { - {{0.392156899f, 0.584313750f, 0.929411829f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Cornsilk = {{{1.f, 0.972549081f, 0.862745166f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Crimson = { - {{0.862745166f, 0.078431375f, 0.235294133f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Cyan = {{{0.f, 1.f, 1.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 DarkBlue = {{{0.f, 0.f, 0.545098066f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 DarkCyan = {{{0.f, 0.545098066f, 0.545098066f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 DarkGoldenrod = { - {{0.721568644f, 0.525490224f, 0.043137256f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 DarkGray = { - {{0.662745118f, 0.662745118f, 0.662745118f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 DarkGreen = {{{0.f, 0.392156899f, 0.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 DarkKhaki = { - {{0.741176486f, 0.717647076f, 0.419607878f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 DarkMagenta = { - {{0.545098066f, 0.f, 0.545098066f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 DarkOliveGreen = { - {{0.333333343f, 0.419607878f, 0.184313729f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 DarkOrange = {{{1.f, 0.549019635f, 0.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 DarkOrchid = { - {{0.600000024f, 0.196078449f, 0.800000072f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 DarkRed = {{{0.545098066f, 0.f, 0.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 DarkSalmon = { - {{0.913725555f, 0.588235319f, 0.478431404f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 DarkSeaGreen = { - {{0.560784340f, 0.737254918f, 0.545098066f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 DarkSlateBlue = { - {{0.282352954f, 0.239215702f, 0.545098066f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 DarkSlateGray = { - {{0.184313729f, 0.309803933f, 0.309803933f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 DarkTurquoise = { - {{0.f, 0.807843208f, 0.819607913f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 DarkViolet = { - {{0.580392182f, 0.f, 0.827451050f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 DeepPink = {{{1.f, 0.078431375f, 0.576470613f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 DeepSkyBlue = {{{0.f, 0.749019623f, 1.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 DimGray = { - {{0.411764741f, 0.411764741f, 0.411764741f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 DodgerBlue = { - {{0.117647067f, 0.564705908f, 1.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Firebrick = { - {{0.698039234f, 0.133333340f, 0.133333340f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 FloralWhite = { - {{1.f, 0.980392218f, 0.941176534f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 ForestGreen = { - {{0.133333340f, 0.545098066f, 0.133333340f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Fuchsia = {{{1.f, 0.f, 1.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Gainsboro = { - {{0.862745166f, 0.862745166f, 0.862745166f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 GhostWhite = { - {{0.972549081f, 0.972549081f, 1.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Gold = {{{1.f, 0.843137324f, 0.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Goldenrod = { - {{0.854902029f, 0.647058845f, 0.125490203f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Gray = { - {{0.501960814f, 0.501960814f, 0.501960814f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Green = {{{0.f, 0.501960814f, 0.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 GreenYellow = { - {{0.678431392f, 1.f, 0.184313729f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Honeydew = {{{0.941176534f, 1.f, 0.941176534f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 HotPink = {{{1.f, 0.411764741f, 0.705882370f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 IndianRed = { - {{0.803921640f, 0.360784322f, 0.360784322f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Indigo = {{{0.294117659f, 0.f, 0.509803951f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Ivory = {{{1.f, 1.f, 0.941176534f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Khaki = { - {{0.941176534f, 0.901960850f, 0.549019635f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Lavender = { - {{0.901960850f, 0.901960850f, 0.980392218f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 LavenderBlush = { - {{1.f, 0.941176534f, 0.960784376f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 LawnGreen = { - {{0.486274540f, 0.988235354f, 0.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 LemonChiffon = { - {{1.f, 0.980392218f, 0.803921640f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 LightBlue = { - {{0.678431392f, 0.847058892f, 0.901960850f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 LightCoral = { - {{0.941176534f, 0.501960814f, 0.501960814f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 LightCyan = {{{0.878431439f, 1.f, 1.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 LightGoldenrodYellow = { - {{0.980392218f, 0.980392218f, 0.823529482f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 LightGray = { - {{0.827451050f, 0.827451050f, 0.827451050f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 LightGreen = { - {{0.564705908f, 0.933333397f, 0.564705908f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 LightPink = { - {{1.f, 0.713725507f, 0.756862819f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 LightSalmon = { - {{1.f, 0.627451003f, 0.478431404f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 LightSeaGreen = { - {{0.125490203f, 0.698039234f, 0.666666687f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 LightSkyBlue = { - {{0.529411793f, 0.807843208f, 0.980392218f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 LightSlateGray = { - {{0.466666698f, 0.533333361f, 0.600000024f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 LightSteelBlue = { - {{0.690196097f, 0.768627524f, 0.870588303f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 LightYellow = {{{1.f, 1.f, 0.878431439f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Lime = {{{0.f, 1.f, 0.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 LimeGreen = { - {{0.196078449f, 0.803921640f, 0.196078449f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Linen = { - {{0.980392218f, 0.941176534f, 0.901960850f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Magenta = {{{1.f, 0.f, 1.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Maroon = {{{0.501960814f, 0.f, 0.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 MediumAquamarine = { - {{0.400000036f, 0.803921640f, 0.666666687f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 MediumBlue = {{{0.f, 0.f, 0.803921640f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 MediumOrchid = { - {{0.729411781f, 0.333333343f, 0.827451050f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 MediumPurple = { - {{0.576470613f, 0.439215720f, 0.858823597f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 MediumSeaGreen = { - {{0.235294133f, 0.701960802f, 0.443137288f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 MediumSlateBlue = { - {{0.482352972f, 0.407843173f, 0.933333397f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 MediumSpringGreen = { - {{0.f, 0.980392218f, 0.603921592f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 MediumTurquoise = { - {{0.282352954f, 0.819607913f, 0.800000072f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 MediumVioletRed = { - {{0.780392230f, 0.082352944f, 0.521568656f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 MidnightBlue = { - {{0.098039225f, 0.098039225f, 0.439215720f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 MintCream = { - {{0.960784376f, 1.f, 0.980392218f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 MistyRose = { - {{1.f, 0.894117713f, 0.882353008f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Moccasin = {{{1.f, 0.894117713f, 0.709803939f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 NavajoWhite = { - {{1.f, 0.870588303f, 0.678431392f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Navy = {{{0.f, 0.f, 0.501960814f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 OldLace = { - {{0.992156923f, 0.960784376f, 0.901960850f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Olive = {{{0.501960814f, 0.501960814f, 0.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 OliveDrab = { - {{0.419607878f, 0.556862772f, 0.137254909f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Orange = {{{1.f, 0.647058845f, 0.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 OrangeRed = {{{1.f, 0.270588249f, 0.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Orchid = { - {{0.854902029f, 0.439215720f, 0.839215755f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 PaleGoldenrod = { - {{0.933333397f, 0.909803987f, 0.666666687f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 PaleGreen = { - {{0.596078455f, 0.984313786f, 0.596078455f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 PaleTurquoise = { - {{0.686274529f, 0.933333397f, 0.933333397f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 PaleVioletRed = { - {{0.858823597f, 0.439215720f, 0.576470613f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 PapayaWhip = { - {{1.f, 0.937254965f, 0.835294187f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 PeachPuff = { - {{1.f, 0.854902029f, 0.725490212f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Peru = { - {{0.803921640f, 0.521568656f, 0.247058839f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Pink = {{{1.f, 0.752941251f, 0.796078503f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Plum = { - {{0.866666734f, 0.627451003f, 0.866666734f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 PowderBlue = { - {{0.690196097f, 0.878431439f, 0.901960850f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Purple = {{{0.501960814f, 0.f, 0.501960814f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Red = {{{1.f, 0.f, 0.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 RosyBrown = { - {{0.737254918f, 0.560784340f, 0.560784340f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 RoyalBlue = { - {{0.254901975f, 0.411764741f, 0.882353008f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 SaddleBrown = { - {{0.545098066f, 0.270588249f, 0.074509807f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Salmon = { - {{0.980392218f, 0.501960814f, 0.447058856f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 SandyBrown = { - {{0.956862807f, 0.643137276f, 0.376470625f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 SeaGreen = { - {{0.180392161f, 0.545098066f, 0.341176480f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 SeaShell = {{{1.f, 0.960784376f, 0.933333397f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Sienna = { - {{0.627451003f, 0.321568638f, 0.176470593f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Silver = { - {{0.752941251f, 0.752941251f, 0.752941251f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 SkyBlue = { - {{0.529411793f, 0.807843208f, 0.921568692f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 SlateBlue = { - {{0.415686309f, 0.352941185f, 0.803921640f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 SlateGray = { - {{0.439215720f, 0.501960814f, 0.564705908f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Snow = {{{1.f, 0.980392218f, 0.980392218f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 SpringGreen = {{{0.f, 1.f, 0.498039246f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 SteelBlue = { - {{0.274509817f, 0.509803951f, 0.705882370f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Tan = { - {{0.823529482f, 0.705882370f, 0.549019635f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Teal = {{{0.f, 0.501960814f, 0.501960814f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Thistle = { - {{0.847058892f, 0.749019623f, 0.847058892f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Tomato = {{{1.f, 0.388235331f, 0.278431386f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Transparent = {{{0.f, 0.f, 0.f, 0.f}}}; -XMGLOBALCONST XMVECTORF32 Turquoise = { - {{0.250980407f, 0.878431439f, 0.815686345f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Violet = { - {{0.933333397f, 0.509803951f, 0.933333397f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Wheat = { - {{0.960784376f, 0.870588303f, 0.701960802f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 White = {{{1.f, 1.f, 1.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 WhiteSmoke = { - {{0.960784376f, 0.960784376f, 0.960784376f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Yellow = {{{1.f, 1.f, 0.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 YellowGreen = { - {{0.603921592f, 0.803921640f, 0.196078449f, 1.f}}}; - -} // namespace Colors - -namespace ColorsLinear { -// Standard colors (Red/Green/Blue/Alpha) in linear colorspace -XMGLOBALCONST XMVECTORF32 AliceBlue = { - {{0.871367335f, 0.938685894f, 1.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 AntiqueWhite = { - {{0.955973506f, 0.830770075f, 0.679542601f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Aqua = {{{0.f, 1.f, 1.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Aquamarine = { - {{0.212230787f, 1.f, 0.658374965f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Azure = {{{0.871367335f, 1.f, 1.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Beige = { - {{0.913098991f, 0.913098991f, 0.715693772f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Bisque = {{{1.f, 0.775822461f, 0.552011609f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Black = {{{0.f, 0.f, 0.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 BlanchedAlmond = { - {{1.f, 0.830770075f, 0.610495746f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Blue = {{{0.f, 0.f, 1.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 BlueViolet = { - {{0.254152179f, 0.024157630f, 0.760524750f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Brown = { - {{0.376262218f, 0.023153365f, 0.023153365f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 BurlyWood = { - {{0.730461001f, 0.479320228f, 0.242281199f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 CadetBlue = { - {{0.114435382f, 0.341914445f, 0.351532698f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Chartreuse = {{{0.212230787f, 1.f, 0.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Chocolate = { - {{0.644479871f, 0.141263321f, 0.012983031f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Coral = {{{1.f, 0.212230787f, 0.080219828f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 CornflowerBlue = { - {{0.127437726f, 0.300543845f, 0.846873462f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Cornsilk = {{{1.f, 0.938685894f, 0.715693772f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Crimson = { - {{0.715693772f, 0.006995410f, 0.045186214f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Cyan = {{{0.f, 1.f, 1.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 DarkBlue = {{{0.f, 0.f, 0.258182913f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 DarkCyan = {{{0.f, 0.258182913f, 0.258182913f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 DarkGoldenrod = { - {{0.479320228f, 0.238397658f, 0.003346536f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 DarkGray = { - {{0.396755308f, 0.396755308f, 0.396755308f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 DarkGreen = {{{0.f, 0.127437726f, 0.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 DarkKhaki = { - {{0.508881450f, 0.473531544f, 0.147027299f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 DarkMagenta = { - {{0.258182913f, 0.f, 0.258182913f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 DarkOliveGreen = { - {{0.090841733f, 0.147027299f, 0.028426038f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 DarkOrange = {{{1.f, 0.262250721f, 0.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 DarkOrchid = { - {{0.318546832f, 0.031896040f, 0.603827536f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 DarkRed = {{{0.258182913f, 0.f, 0.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 DarkSalmon = { - {{0.814846814f, 0.304987371f, 0.194617867f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 DarkSeaGreen = { - {{0.274677366f, 0.502886593f, 0.258182913f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 DarkSlateBlue = { - {{0.064803280f, 0.046665095f, 0.258182913f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 DarkSlateGray = { - {{0.028426038f, 0.078187428f, 0.078187428f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 DarkTurquoise = { - {{0.f, 0.617206752f, 0.637597024f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 DarkViolet = { - {{0.296138316f, 0.f, 0.651405811f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 DeepPink = {{{1.f, 0.006995410f, 0.291770697f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 DeepSkyBlue = {{{0.f, 0.520995677f, 1.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 DimGray = { - {{0.141263321f, 0.141263321f, 0.141263321f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 DodgerBlue = { - {{0.012983031f, 0.278894335f, 1.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Firebrick = { - {{0.445201248f, 0.015996292f, 0.015996292f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 FloralWhite = { - {{1.f, 0.955973506f, 0.871367335f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 ForestGreen = { - {{0.015996292f, 0.258182913f, 0.015996292f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Fuchsia = {{{1.f, 0.f, 1.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Gainsboro = { - {{0.715693772f, 0.715693772f, 0.715693772f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 GhostWhite = { - {{0.938685894f, 0.938685894f, 1.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Gold = {{{1.f, 0.679542601f, 0.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Goldenrod = { - {{0.701102138f, 0.376262218f, 0.014443844f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Gray = { - {{0.215860531f, 0.215860531f, 0.215860531f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Green = {{{0.f, 0.215860531f, 0.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 GreenYellow = { - {{0.417885154f, 1.f, 0.028426038f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Honeydew = {{{0.871367335f, 1.f, 0.871367335f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 HotPink = {{{1.f, 0.141263321f, 0.456411064f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 IndianRed = { - {{0.610495746f, 0.107023112f, 0.107023112f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Indigo = {{{0.070360109f, 0.f, 0.223227978f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Ivory = {{{1.f, 1.f, 0.871367335f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Khaki = { - {{0.871367335f, 0.791298151f, 0.262250721f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Lavender = { - {{0.791298151f, 0.791298151f, 0.955973506f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 LavenderBlush = { - {{1.f, 0.871367335f, 0.913098991f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 LawnGreen = { - {{0.201556295f, 0.973445475f, 0.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 LemonChiffon = { - {{1.f, 0.955973506f, 0.610495746f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 LightBlue = { - {{0.417885154f, 0.686685443f, 0.791298151f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 LightCoral = { - {{0.871367335f, 0.215860531f, 0.215860531f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 LightCyan = {{{0.745404482f, 1.f, 1.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 LightGoldenrodYellow = { - {{0.955973506f, 0.955973506f, 0.644479871f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 LightGray = { - {{0.651405811f, 0.651405811f, 0.651405811f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 LightGreen = { - {{0.278894335f, 0.854992807f, 0.278894335f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 LightPink = { - {{1.f, 0.467783839f, 0.533276618f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 LightSalmon = { - {{1.f, 0.351532698f, 0.194617867f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 LightSeaGreen = { - {{0.014443844f, 0.445201248f, 0.401977867f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 LightSkyBlue = { - {{0.242281199f, 0.617206752f, 0.955973506f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 LightSlateGray = { - {{0.184475034f, 0.246201396f, 0.318546832f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 LightSteelBlue = { - {{0.434153706f, 0.552011609f, 0.730461001f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 LightYellow = {{{1.f, 1.f, 0.745404482f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Lime = {{{0.f, 1.f, 0.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 LimeGreen = { - {{0.031896040f, 0.610495746f, 0.031896040f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Linen = { - {{0.955973506f, 0.871367335f, 0.791298151f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Magenta = {{{1.f, 0.f, 1.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Maroon = {{{0.215860531f, 0.f, 0.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 MediumAquamarine = { - {{0.132868364f, 0.610495746f, 0.401977867f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 MediumBlue = {{{0.f, 0.f, 0.610495746f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 MediumOrchid = { - {{0.491020888f, 0.090841733f, 0.651405811f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 MediumPurple = { - {{0.291770697f, 0.162029430f, 0.708376050f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 MediumSeaGreen = { - {{0.045186214f, 0.450785846f, 0.165132239f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 MediumSlateBlue = { - {{0.198069349f, 0.138431653f, 0.854992807f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 MediumSpringGreen = { - {{0.f, 0.955973506f, 0.323143244f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 MediumTurquoise = { - {{0.064803280f, 0.637597024f, 0.603827536f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 MediumVioletRed = { - {{0.571125031f, 0.007499032f, 0.234550655f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 MidnightBlue = { - {{0.009721218f, 0.009721218f, 0.162029430f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 MintCream = { - {{0.913098991f, 1.f, 0.955973506f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 MistyRose = { - {{1.f, 0.775822461f, 0.752942443f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Moccasin = {{{1.f, 0.775822461f, 0.462077051f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 NavajoWhite = { - {{1.f, 0.730461001f, 0.417885154f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Navy = {{{0.f, 0.f, 0.215860531f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 OldLace = { - {{0.982250869f, 0.913098991f, 0.791298151f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Olive = {{{0.215860531f, 0.215860531f, 0.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 OliveDrab = { - {{0.147027299f, 0.270497859f, 0.016807375f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Orange = {{{1.f, 0.376262218f, 0.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 OrangeRed = {{{1.f, 0.059511241f, 0.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Orchid = { - {{0.701102138f, 0.162029430f, 0.672443330f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 PaleGoldenrod = { - {{0.854992807f, 0.806952477f, 0.401977867f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 PaleGreen = { - {{0.313988745f, 0.964686573f, 0.313988745f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 PaleTurquoise = { - {{0.428690553f, 0.854992807f, 0.854992807f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 PaleVioletRed = { - {{0.708376050f, 0.162029430f, 0.291770697f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 PapayaWhip = { - {{1.f, 0.863157392f, 0.665387452f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 PeachPuff = { - {{1.f, 0.701102138f, 0.485149980f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Peru = { - {{0.610495746f, 0.234550655f, 0.049706575f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Pink = {{{1.f, 0.527115345f, 0.597202003f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Plum = { - {{0.723055363f, 0.351532698f, 0.723055363f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 PowderBlue = { - {{0.434153706f, 0.745404482f, 0.791298151f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Purple = {{{0.215860531f, 0.f, 0.215860531f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Red = {{{1.f, 0.f, 0.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 RosyBrown = { - {{0.502886593f, 0.274677366f, 0.274677366f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 RoyalBlue = { - {{0.052860655f, 0.141263321f, 0.752942443f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 SaddleBrown = { - {{0.258182913f, 0.059511241f, 0.006512091f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Salmon = { - {{0.955973506f, 0.215860531f, 0.168269455f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 SandyBrown = { - {{0.904661357f, 0.371237785f, 0.116970696f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 SeaGreen = { - {{0.027320892f, 0.258182913f, 0.095307484f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 SeaShell = {{{1.f, 0.913098991f, 0.854992807f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Sienna = { - {{0.351532698f, 0.084376216f, 0.026241222f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Silver = { - {{0.527115345f, 0.527115345f, 0.527115345f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 SkyBlue = { - {{0.242281199f, 0.617206752f, 0.830770075f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 SlateBlue = { - {{0.144128501f, 0.102241747f, 0.610495746f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 SlateGray = { - {{0.162029430f, 0.215860531f, 0.278894335f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Snow = {{{1.f, 0.955973506f, 0.955973506f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 SpringGreen = {{{0.f, 1.f, 0.212230787f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 SteelBlue = { - {{0.061246071f, 0.223227978f, 0.456411064f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Tan = { - {{0.644479871f, 0.456411064f, 0.262250721f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Teal = {{{0.f, 0.215860531f, 0.215860531f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Thistle = { - {{0.686685443f, 0.520995677f, 0.686685443f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Tomato = {{{1.f, 0.124771863f, 0.063010029f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Transparent = {{{0.f, 0.f, 0.f, 0.f}}}; -XMGLOBALCONST XMVECTORF32 Turquoise = { - {{0.051269468f, 0.745404482f, 0.630757332f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Violet = { - {{0.854992807f, 0.223227978f, 0.854992807f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Wheat = { - {{0.913098991f, 0.730461001f, 0.450785846f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 White = {{{1.f, 1.f, 1.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 WhiteSmoke = { - {{0.913098991f, 0.913098991f, 0.913098991f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 Yellow = {{{1.f, 1.f, 0.f, 1.f}}}; -XMGLOBALCONST XMVECTORF32 YellowGreen = { - {{0.323143244f, 0.610495746f, 0.031896040f, 1.f}}}; - -} // namespace ColorsLinear - -} // namespace DirectX diff --git a/targets/app/linux/Stubs/DirectXMath/DirectXMath.h b/targets/app/linux/Stubs/DirectXMath/DirectXMath.h deleted file mode 100644 index e3e629732..000000000 --- a/targets/app/linux/Stubs/DirectXMath/DirectXMath.h +++ /dev/null @@ -1,3092 +0,0 @@ -//------------------------------------------------------------------------------------- -// DirectXMath.h -- SIMD C++ Math library -// -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. -// -// http://go.microsoft.com/fwlink/?LinkID=615560 -//------------------------------------------------------------------------------------- - -#pragma once - -#ifndef __cplusplus -#error DirectX Math requires C++ -#endif - -#define DIRECTX_MATH_VERSION 320 - -#if defined(_MSC_VER) && (_MSC_VER < 1910) -#error DirectX Math requires Visual C++ 2017 or later. -#endif - -#if defined(_MSC_VER) && !defined(_M_ARM) && !defined(_M_ARM64) && \ - !defined(_M_HYBRID_X86_ARM64) && !defined(_M_ARM64EC) && (!_MANAGED) && \ - (!_M_CEE) && (!defined(_M_IX86_FP) || (_M_IX86_FP > 1)) && \ - !defined(_XM_NO_INTRINSICS_) && !defined(_XM_VECTORCALL_) -#define _XM_VECTORCALL_ 1 -#endif - -#if _XM_VECTORCALL_ -#define XM_CALLCONV __vectorcall -#elif defined(__GNUC__) -#define XM_CALLCONV -#else -#define XM_CALLCONV __fastcall -#endif - -#ifndef XM_DEPRECATED -#if (__cplusplus >= 201402L) -#define XM_DEPRECATED [[deprecated]] -#elif defined(__GNUC__) -#define XM_DEPRECATED __attribute__((deprecated)) -#else -#define XM_DEPRECATED \ - __declspec(deprecated( \ - "This is deprecated and will be removed in a future version.")) -#endif -#endif - -#if !defined(_XM_AVX2_INTRINSICS_) && defined(__AVX2__) && \ - !defined(_XM_NO_INTRINSICS_) -#define _XM_AVX2_INTRINSICS_ -#endif - -#if !defined(_XM_FMA3_INTRINSICS_) && defined(_XM_AVX2_INTRINSICS_) && \ - !defined(_XM_NO_INTRINSICS_) -#define _XM_FMA3_INTRINSICS_ -#endif - -#if !defined(_XM_F16C_INTRINSICS_) && defined(_XM_AVX2_INTRINSICS_) && \ - !defined(_XM_NO_INTRINSICS_) -#define _XM_F16C_INTRINSICS_ -#endif - -#if !defined(_XM_F16C_INTRINSICS_) && defined(__F16C__) && \ - !defined(_XM_NO_INTRINSICS_) -#define _XM_F16C_INTRINSICS_ -#endif - -#if defined(_XM_FMA3_INTRINSICS_) && !defined(_XM_AVX_INTRINSICS_) -#define _XM_AVX_INTRINSICS_ -#endif - -#if defined(_XM_F16C_INTRINSICS_) && !defined(_XM_AVX_INTRINSICS_) -#define _XM_AVX_INTRINSICS_ -#endif - -#if !defined(_XM_AVX_INTRINSICS_) && defined(__AVX__) && \ - !defined(_XM_NO_INTRINSICS_) -#define _XM_AVX_INTRINSICS_ -#endif - -#if defined(_XM_AVX_INTRINSICS_) && !defined(_XM_SSE4_INTRINSICS_) -#define _XM_SSE4_INTRINSICS_ -#endif - -#if defined(_XM_SSE4_INTRINSICS_) && !defined(_XM_SSE3_INTRINSICS_) -#define _XM_SSE3_INTRINSICS_ -#endif - -#if defined(_XM_SSE3_INTRINSICS_) && !defined(_XM_SSE_INTRINSICS_) -#define _XM_SSE_INTRINSICS_ -#endif - -#if !defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_SSE_INTRINSICS_) && \ - !defined(_XM_NO_INTRINSICS_) -#if (defined(_M_IX86) || defined(_M_X64) || __i386__ || __x86_64__) && \ - !defined(_M_HYBRID_X86_ARM64) && !defined(_M_ARM64EC) -#define _XM_SSE_INTRINSICS_ -#elif defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || \ - defined(_M_ARM64EC) || __arm__ || __aarch64__ -#define _XM_ARM_NEON_INTRINSICS_ -#elif !defined(_XM_NO_INTRINSICS_) -#error DirectX Math does not support this target -#endif -#endif // !_XM_ARM_NEON_INTRINSICS_ && !_XM_SSE_INTRINSICS_ && - // !_XM_NO_INTRINSICS_ - -#if defined(_XM_SSE_INTRINSICS_) && defined(_MSC_VER) && (_MSC_VER >= 1920) && \ - !defined(__clang__) && !defined(_XM_SVML_INTRINSICS_) && \ - !defined(_XM_DISABLE_INTEL_SVML_) -#define _XM_SVML_INTRINSICS_ -#endif - -#if !defined(_XM_NO_XMVECTOR_OVERLOADS_) && \ - (defined(__clang__) || defined(__GNUC__)) && !defined(_XM_NO_INTRINSICS_) -#define _XM_NO_XMVECTOR_OVERLOADS_ -#endif - -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4514 4820) -// C4514/4820: Off by default noise -#endif -#include -#include -#ifdef _MSC_VER -#pragma warning(pop) -#endif - -#ifndef _XM_NO_INTRINSICS_ - -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4987) -// C4987: Off by default noise -#endif -#if defined(_MSC_VER) || defined(__MINGW32__) -#include -#endif -#ifdef _MSC_VER -#pragma warning(pop) -#endif - -#if (defined(__clang__) || defined(__GNUC__)) && (__x86_64__ || __i386__) && \ - !defined(__MINGW32__) -#include -#endif - -#ifdef _XM_SSE_INTRINSICS_ -#include -#include - -#ifdef _XM_SSE3_INTRINSICS_ -#include -#endif - -#ifdef _XM_SSE4_INTRINSICS_ -#include -#endif - -#ifdef _XM_AVX_INTRINSICS_ -#include -#endif - -#elif defined(_XM_ARM_NEON_INTRINSICS_) -#if defined(_MSC_VER) && !defined(__clang__) && \ - (defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC)) -#include -#else -#include -#endif -#endif -#endif // !_XM_NO_INTRINSICS_ - -#include - -#include "sal.h" - -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4005 4668) -// C4005/4668: Old header issue -#endif -#include -#ifdef _MSC_VER -#pragma warning(pop) -#endif - -#if (__cplusplus >= 201703L) -#define XM_ALIGNED_DATA(x) alignas(x) -#define XM_ALIGNED_STRUCT(x) struct alignas(x) -#elif defined(__GNUC__) -#define XM_ALIGNED_DATA(x) __attribute__((aligned(x))) -#define XM_ALIGNED_STRUCT(x) struct __attribute__((aligned(x))) -#else -#define XM_ALIGNED_DATA(x) __declspec(align(x)) -#define XM_ALIGNED_STRUCT(x) __declspec(align(x)) struct -#endif - -#if (__cplusplus >= 202002L) -#include -#endif - -/**************************************************************************** - * - * Conditional intrinsics - * - ****************************************************************************/ - -#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) - -#if defined(_XM_NO_MOVNT_) -#define XM_STREAM_PS(p, a) _mm_store_ps((p), (a)) -#define XM256_STREAM_PS(p, a) _mm256_store_ps((p), (a)) -#define XM_SFENCE() -#else -#define XM_STREAM_PS(p, a) _mm_stream_ps((p), (a)) -#define XM256_STREAM_PS(p, a) _mm256_stream_ps((p), (a)) -#define XM_SFENCE() _mm_sfence() -#endif - -#if defined(_XM_FMA3_INTRINSICS_) -#define XM_FMADD_PS(a, b, c) _mm_fmadd_ps((a), (b), (c)) -#define XM_FNMADD_PS(a, b, c) _mm_fnmadd_ps((a), (b), (c)) -#else -#define XM_FMADD_PS(a, b, c) _mm_add_ps(_mm_mul_ps((a), (b)), (c)) -#define XM_FNMADD_PS(a, b, c) _mm_sub_ps((c), _mm_mul_ps((a), (b))) -#endif - -#if defined(_XM_AVX_INTRINSICS_) && defined(_XM_FAVOR_INTEL_) -#define XM_PERMUTE_PS(v, c) _mm_permute_ps((v), c) -#else -#define XM_PERMUTE_PS(v, c) _mm_shuffle_ps((v), (v), c) -#endif - -#if defined(__GNUC__) && !defined(__clang__) && (__GNUC__ < 11) -#define XM_LOADU_SI16(p) \ - _mm_cvtsi32_si128(*reinterpret_cast(p)) -#else -#define XM_LOADU_SI16(p) _mm_loadu_si16(p) -#endif - -#endif // _XM_SSE_INTRINSICS_ && !_XM_NO_INTRINSICS_ - -#if defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) - -#if defined(__clang__) || defined(__GNUC__) -#define XM_PREFETCH(a) __builtin_prefetch(a) -#elif defined(_MSC_VER) -#define XM_PREFETCH(a) __prefetch(a) -#else -#define XM_PREFETCH(a) -#endif - -#endif // _XM_ARM_NEON_INTRINSICS_ && !_XM_NO_INTRINSICS_ - -namespace DirectX { - -/**************************************************************************** - * - * Constant definitions - * - ****************************************************************************/ - -#if defined(__XNAMATH_H__) && defined(XM_PI) -#undef XM_PI -#undef XM_2PI -#undef XM_1DIVPI -#undef XM_1DIV2PI -#undef XM_PIDIV2 -#undef XM_PIDIV4 -#undef XM_SELECT_0 -#undef XM_SELECT_1 -#undef XM_PERMUTE_0X -#undef XM_PERMUTE_0Y -#undef XM_PERMUTE_0Z -#undef XM_PERMUTE_0W -#undef XM_PERMUTE_1X -#undef XM_PERMUTE_1Y -#undef XM_PERMUTE_1Z -#undef XM_PERMUTE_1W -#undef XM_CRMASK_CR6 -#undef XM_CRMASK_CR6TRUE -#undef XM_CRMASK_CR6FALSE -#undef XM_CRMASK_CR6BOUNDS -#undef XM_CACHE_LINE_SIZE -#endif - -constexpr float XM_PI = 3.141592654f; -constexpr float XM_2PI = 6.283185307f; -constexpr float XM_1DIVPI = 0.318309886f; -constexpr float XM_1DIV2PI = 0.159154943f; -constexpr float XM_PIDIV2 = 1.570796327f; -constexpr float XM_PIDIV4 = 0.785398163f; - -constexpr uint32_t XM_SELECT_0 = 0x00000000; -constexpr uint32_t XM_SELECT_1 = 0xFFFFFFFF; - -constexpr uint32_t XM_PERMUTE_0X = 0; -constexpr uint32_t XM_PERMUTE_0Y = 1; -constexpr uint32_t XM_PERMUTE_0Z = 2; -constexpr uint32_t XM_PERMUTE_0W = 3; -constexpr uint32_t XM_PERMUTE_1X = 4; -constexpr uint32_t XM_PERMUTE_1Y = 5; -constexpr uint32_t XM_PERMUTE_1Z = 6; -constexpr uint32_t XM_PERMUTE_1W = 7; - -constexpr uint32_t XM_SWIZZLE_X = 0; -constexpr uint32_t XM_SWIZZLE_Y = 1; -constexpr uint32_t XM_SWIZZLE_Z = 2; -constexpr uint32_t XM_SWIZZLE_W = 3; - -constexpr uint32_t XM_CRMASK_CR6 = 0x000000F0; -constexpr uint32_t XM_CRMASK_CR6TRUE = 0x00000080; -constexpr uint32_t XM_CRMASK_CR6FALSE = 0x00000020; -constexpr uint32_t XM_CRMASK_CR6BOUNDS = XM_CRMASK_CR6FALSE; - -#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || \ - defined(_M_ARM64EC) || __arm__ || __aarch64__ -constexpr size_t XM_CACHE_LINE_SIZE = 128; -#else -constexpr size_t XM_CACHE_LINE_SIZE = 64; -#endif - -/**************************************************************************** - * - * Macros - * - ****************************************************************************/ - -#if defined(__XNAMATH_H__) && defined(XMComparisonAllTrue) -#undef XMComparisonAllTrue -#undef XMComparisonAnyTrue -#undef XMComparisonAllFalse -#undef XMComparisonAnyFalse -#undef XMComparisonMixed -#undef XMComparisonAllInBounds -#undef XMComparisonAnyOutOfBounds -#endif - -// Unit conversion - -constexpr float XMConvertToRadians(float fDegrees) noexcept { - return fDegrees * (XM_PI / 180.0f); -} -constexpr float XMConvertToDegrees(float fRadians) noexcept { - return fRadians * (180.0f / XM_PI); -} - -// Condition register evaluation proceeding a recording (R) comparison - -constexpr bool XMComparisonAllTrue(uint32_t CR) noexcept { - return (CR & XM_CRMASK_CR6TRUE) == XM_CRMASK_CR6TRUE; -} -constexpr bool XMComparisonAnyTrue(uint32_t CR) noexcept { - return (CR & XM_CRMASK_CR6FALSE) != XM_CRMASK_CR6FALSE; -} -constexpr bool XMComparisonAllFalse(uint32_t CR) noexcept { - return (CR & XM_CRMASK_CR6FALSE) == XM_CRMASK_CR6FALSE; -} -constexpr bool XMComparisonAnyFalse(uint32_t CR) noexcept { - return (CR & XM_CRMASK_CR6TRUE) != XM_CRMASK_CR6TRUE; -} -constexpr bool XMComparisonMixed(uint32_t CR) noexcept { - return (CR & XM_CRMASK_CR6) == 0; -} -constexpr bool XMComparisonAllInBounds(uint32_t CR) noexcept { - return (CR & XM_CRMASK_CR6BOUNDS) == XM_CRMASK_CR6BOUNDS; -} -constexpr bool XMComparisonAnyOutOfBounds(uint32_t CR) noexcept { - return (CR & XM_CRMASK_CR6BOUNDS) != XM_CRMASK_CR6BOUNDS; -} - -/**************************************************************************** - * - * Data types - * - ****************************************************************************/ - -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4068 4201 4365 4324 4820) -// C4068: ignore unknown pragmas -// C4201: nonstandard extension used : nameless struct/union -// C4365: Off by default noise -// C4324/4820: padding warnings -#endif - -#ifdef _PREFAST_ -#pragma prefast(push) -#pragma prefast(disable : 25000, "FXMVECTOR is 16 bytes") -#endif - -//------------------------------------------------------------------------------ -#if defined(_XM_NO_INTRINSICS_) -struct __vector4 { - union { - float vector4_f32[4]; - uint32_t vector4_u32[4]; - }; -}; -#endif // _XM_NO_INTRINSICS_ - -//------------------------------------------------------------------------------ -// Vector intrinsic: Four 32 bit floating point components aligned on a 16 byte -// boundary and mapped to hardware vector registers -#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) -using XMVECTOR = __m128; -#elif defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) -using XMVECTOR = float32x4_t; -#else -using XMVECTOR = __vector4; -#endif - -// Fix-up for (1st-3rd) XMVECTOR parameters that are pass-in-register for x86, -// ARM, ARM64, and vector call; by reference otherwise -#if (defined(_M_IX86) || defined(_M_ARM) || defined(_M_ARM64) || \ - _XM_VECTORCALL_ || __i386__ || __arm__ || __aarch64__) && \ - !defined(_XM_NO_INTRINSICS_) -typedef const XMVECTOR FXMVECTOR; -#else -typedef const XMVECTOR& FXMVECTOR; -#endif - -// Fix-up for (4th) XMVECTOR parameter to pass in-register for ARM, ARM64, and -// vector call; by reference otherwise -#if (defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || \ - defined(_M_ARM64EC) || _XM_VECTORCALL_ || __arm__ || __aarch64__) && \ - !defined(_XM_NO_INTRINSICS_) -typedef const XMVECTOR GXMVECTOR; -#else -typedef const XMVECTOR& GXMVECTOR; -#endif - -// Fix-up for (5th & 6th) XMVECTOR parameter to pass in-register for ARM64 and -// vector call; by reference otherwise -#if (defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || \ - defined(_M_ARM64EC) || _XM_VECTORCALL_ || __aarch64__) && \ - !defined(_XM_NO_INTRINSICS_) -typedef const XMVECTOR HXMVECTOR; -#else -typedef const XMVECTOR& HXMVECTOR; -#endif - -// Fix-up for (7th+) XMVECTOR parameters to pass by reference -typedef const XMVECTOR& CXMVECTOR; - -//------------------------------------------------------------------------------ -// Conversion types for constants -XM_ALIGNED_STRUCT(16) XMVECTORF32 { - union { - float f[4]; - XMVECTOR v; - }; - - inline operator XMVECTOR() const noexcept { return v; } - inline operator const float*() const noexcept { return f; } -#ifdef _XM_NO_INTRINSICS_ -#elif defined(_XM_SSE_INTRINSICS_) - inline operator __m128i() const noexcept { return _mm_castps_si128(v); } - inline operator __m128d() const noexcept { return _mm_castps_pd(v); } -#elif defined(_XM_ARM_NEON_INTRINSICS_) && \ - (defined(__GNUC__) || defined(_ARM64_DISTINCT_NEON_TYPES)) - inline operator int32x4_t() const noexcept { - return vreinterpretq_s32_f32(v); - } - inline operator uint32x4_t() const noexcept { - return vreinterpretq_u32_f32(v); - } -#endif -}; - -XM_ALIGNED_STRUCT(16) XMVECTORI32 { - union { - int32_t i[4]; - XMVECTOR v; - }; - - inline operator XMVECTOR() const noexcept { return v; } -#ifdef _XM_NO_INTRINSICS_ -#elif defined(_XM_SSE_INTRINSICS_) - inline operator __m128i() const noexcept { return _mm_castps_si128(v); } - inline operator __m128d() const noexcept { return _mm_castps_pd(v); } -#elif defined(_XM_ARM_NEON_INTRINSICS_) && \ - (defined(__GNUC__) || defined(_ARM64_DISTINCT_NEON_TYPES)) - inline operator int32x4_t() const noexcept { - return vreinterpretq_s32_f32(v); - } - inline operator uint32x4_t() const noexcept { - return vreinterpretq_u32_f32(v); - } -#endif -}; - -XM_ALIGNED_STRUCT(16) XMVECTORU8 { - union { - uint8_t u[16]; - XMVECTOR v; - }; - - inline operator XMVECTOR() const noexcept { return v; } -#ifdef _XM_NO_INTRINSICS_ -#elif defined(_XM_SSE_INTRINSICS_) - inline operator __m128i() const noexcept { return _mm_castps_si128(v); } - inline operator __m128d() const noexcept { return _mm_castps_pd(v); } -#elif defined(_XM_ARM_NEON_INTRINSICS_) && \ - (defined(__GNUC__) || defined(_ARM64_DISTINCT_NEON_TYPES)) - inline operator int32x4_t() const noexcept { - return vreinterpretq_s32_f32(v); - } - inline operator uint32x4_t() const noexcept { - return vreinterpretq_u32_f32(v); - } -#endif -}; - -XM_ALIGNED_STRUCT(16) XMVECTORU32 { - union { - uint32_t u[4]; - XMVECTOR v; - }; - - inline operator XMVECTOR() const noexcept { return v; } -#ifdef _XM_NO_INTRINSICS_ -#elif defined(_XM_SSE_INTRINSICS_) - inline operator __m128i() const noexcept { return _mm_castps_si128(v); } - inline operator __m128d() const noexcept { return _mm_castps_pd(v); } -#elif defined(_XM_ARM_NEON_INTRINSICS_) && \ - (defined(__GNUC__) || defined(_ARM64_DISTINCT_NEON_TYPES)) - inline operator int32x4_t() const noexcept { - return vreinterpretq_s32_f32(v); - } - inline operator uint32x4_t() const noexcept { - return vreinterpretq_u32_f32(v); - } -#endif -}; - -//------------------------------------------------------------------------------ -// Vector operators - -#ifndef _XM_NO_XMVECTOR_OVERLOADS_ -XMVECTOR XM_CALLCONV operator+(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV operator-(FXMVECTOR V) noexcept; - -XMVECTOR& XM_CALLCONV operator+=(XMVECTOR& V1, FXMVECTOR V2) noexcept; -XMVECTOR& XM_CALLCONV operator-=(XMVECTOR& V1, FXMVECTOR V2) noexcept; -XMVECTOR& XM_CALLCONV operator*=(XMVECTOR& V1, FXMVECTOR V2) noexcept; -XMVECTOR& XM_CALLCONV operator/=(XMVECTOR& V1, FXMVECTOR V2) noexcept; - -XMVECTOR& operator*=(XMVECTOR& V, float S) noexcept; -XMVECTOR& operator/=(XMVECTOR& V, float S) noexcept; - -XMVECTOR XM_CALLCONV operator+(FXMVECTOR V1, FXMVECTOR V2) noexcept; -XMVECTOR XM_CALLCONV operator-(FXMVECTOR V1, FXMVECTOR V2) noexcept; -XMVECTOR XM_CALLCONV operator*(FXMVECTOR V1, FXMVECTOR V2) noexcept; -XMVECTOR XM_CALLCONV operator/(FXMVECTOR V1, FXMVECTOR V2) noexcept; -XMVECTOR XM_CALLCONV operator*(FXMVECTOR V, float S) noexcept; -XMVECTOR XM_CALLCONV operator*(float S, FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV operator/(FXMVECTOR V, float S) noexcept; -#endif /* !_XM_NO_XMVECTOR_OVERLOADS_ */ - -//------------------------------------------------------------------------------ -// Matrix type: Sixteen 32 bit floating point components aligned on a -// 16 byte boundary and mapped to four hardware vector registers - -struct XMMATRIX; - -// Fix-up for (1st) XMMATRIX parameter to pass in-register for ARM64 and vector -// call; by reference otherwise -#if (defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || \ - defined(_M_ARM64EC) || _XM_VECTORCALL_ || __aarch64__) && \ - !defined(_XM_NO_INTRINSICS_) -typedef const XMMATRIX FXMMATRIX; -#else -typedef const XMMATRIX& FXMMATRIX; -#endif - -// Fix-up for (2nd+) XMMATRIX parameters to pass by reference -typedef const XMMATRIX& CXMMATRIX; - -#ifdef _XM_NO_INTRINSICS_ -struct XMMATRIX -#else -XM_ALIGNED_STRUCT(16) -XMMATRIX -#endif -{ -#ifdef _XM_NO_INTRINSICS_ - union { - XMVECTOR r[4]; - struct { - float _11, _12, _13, _14; - float _21, _22, _23, _24; - float _31, _32, _33, _34; - float _41, _42, _43, _44; - }; - float m[4][4]; - }; -#else - XMVECTOR r[4]; -#endif - - XMMATRIX() = default; - - XMMATRIX(const XMMATRIX&) = default; - -#if defined(_MSC_VER) && (_MSC_FULL_VER < 191426431) - XMMATRIX& operator=(const XMMATRIX& M) noexcept { - r[0] = M.r[0]; - r[1] = M.r[1]; - r[2] = M.r[2]; - r[3] = M.r[3]; - return *this; - } -#else - XMMATRIX& operator=(const XMMATRIX&) = default; - - XMMATRIX(XMMATRIX&&) = default; - XMMATRIX& operator=(XMMATRIX&&) = default; -#endif - - constexpr XMMATRIX(FXMVECTOR R0, FXMVECTOR R1, FXMVECTOR R2, - CXMVECTOR R3) noexcept - : r{R0, R1, R2, R3} {} - XMMATRIX(float m00, float m01, float m02, float m03, float m10, float m11, - float m12, float m13, float m20, float m21, float m22, float m23, - float m30, float m31, float m32, float m33) noexcept; - explicit XMMATRIX(_In_reads_(16) const float* pArray) noexcept; - -#ifdef _XM_NO_INTRINSICS_ - float operator()(size_t Row, size_t Column) const noexcept { - return m[Row][Column]; - } - float& operator()(size_t Row, size_t Column) noexcept { - return m[Row][Column]; - } -#endif - - XMMATRIX operator+() const noexcept { return *this; } - XMMATRIX operator-() const noexcept; - - XMMATRIX& XM_CALLCONV operator+=(FXMMATRIX M) noexcept; - XMMATRIX& XM_CALLCONV operator-=(FXMMATRIX M) noexcept; - XMMATRIX& XM_CALLCONV operator*=(FXMMATRIX M) noexcept; - XMMATRIX& operator*=(float S) noexcept; - XMMATRIX& operator/=(float S) noexcept; - - XMMATRIX XM_CALLCONV operator+(FXMMATRIX M) const noexcept; - XMMATRIX XM_CALLCONV operator-(FXMMATRIX M) const noexcept; - XMMATRIX XM_CALLCONV operator*(FXMMATRIX M) const noexcept; - XMMATRIX operator*(float S) const noexcept; - XMMATRIX operator/(float S) const noexcept; - - friend XMMATRIX XM_CALLCONV operator*(float S, FXMMATRIX M) noexcept; -}; - -//------------------------------------------------------------------------------ -// 2D Vector; 32 bit floating point components -struct XMFLOAT2 { - float x; - float y; - - XMFLOAT2() = default; - - XMFLOAT2(const XMFLOAT2&) = default; - XMFLOAT2& operator=(const XMFLOAT2&) = default; - - XMFLOAT2(XMFLOAT2&&) = default; - XMFLOAT2& operator=(XMFLOAT2&&) = default; - - constexpr XMFLOAT2(float _x, float _y) noexcept : x(_x), y(_y) {} - explicit XMFLOAT2(_In_reads_(2) const float* pArray) noexcept - : x(pArray[0]), y(pArray[1]) {} - -#if (__cplusplus >= 202002L) - bool operator==(const XMFLOAT2&) const = default; - auto operator<=>(const XMFLOAT2&) const = default; -#endif -}; - -// 2D Vector; 32 bit floating point components aligned on a 16 byte boundary -XM_ALIGNED_STRUCT(16) XMFLOAT2A : public XMFLOAT2 { using XMFLOAT2::XMFLOAT2; }; - -//------------------------------------------------------------------------------ -// 2D Vector; 32 bit signed integer components -struct XMINT2 { - int32_t x; - int32_t y; - - XMINT2() = default; - - XMINT2(const XMINT2&) = default; - XMINT2& operator=(const XMINT2&) = default; - - XMINT2(XMINT2&&) = default; - XMINT2& operator=(XMINT2&&) = default; - - constexpr XMINT2(int32_t _x, int32_t _y) noexcept : x(_x), y(_y) {} - explicit XMINT2(_In_reads_(2) const int32_t* pArray) noexcept - : x(pArray[0]), y(pArray[1]) {} - -#if (__cplusplus >= 202002L) - bool operator==(const XMINT2&) const = default; - auto operator<=>(const XMINT2&) const = default; -#endif -}; - -// 2D Vector; 32 bit unsigned integer components -struct XMUINT2 { - uint32_t x; - uint32_t y; - - XMUINT2() = default; - - XMUINT2(const XMUINT2&) = default; - XMUINT2& operator=(const XMUINT2&) = default; - - XMUINT2(XMUINT2&&) = default; - XMUINT2& operator=(XMUINT2&&) = default; - - constexpr XMUINT2(uint32_t _x, uint32_t _y) noexcept : x(_x), y(_y) {} - explicit XMUINT2(_In_reads_(2) const uint32_t* pArray) noexcept - : x(pArray[0]), y(pArray[1]) {} - -#if (__cplusplus >= 202002L) - bool operator==(const XMUINT2&) const = default; - auto operator<=>(const XMUINT2&) const = default; -#endif -}; - -//------------------------------------------------------------------------------ -// 3D Vector; 32 bit floating point components -struct XMFLOAT3 { - float x; - float y; - float z; - - XMFLOAT3() = default; - - XMFLOAT3(const XMFLOAT3&) = default; - XMFLOAT3& operator=(const XMFLOAT3&) = default; - - XMFLOAT3(XMFLOAT3&&) = default; - XMFLOAT3& operator=(XMFLOAT3&&) = default; - - constexpr XMFLOAT3(float _x, float _y, float _z) noexcept - : x(_x), y(_y), z(_z) {} - explicit XMFLOAT3(_In_reads_(3) const float* pArray) noexcept - : x(pArray[0]), y(pArray[1]), z(pArray[2]) {} -}; - -// 3D Vector; 32 bit floating point components aligned on a 16 byte boundary -XM_ALIGNED_STRUCT(16) XMFLOAT3A : public XMFLOAT3 { using XMFLOAT3::XMFLOAT3; }; - -//------------------------------------------------------------------------------ -// 3D Vector; 32 bit signed integer components -struct XMINT3 { - int32_t x; - int32_t y; - int32_t z; - - XMINT3() = default; - - XMINT3(const XMINT3&) = default; - XMINT3& operator=(const XMINT3&) = default; - - XMINT3(XMINT3&&) = default; - XMINT3& operator=(XMINT3&&) = default; - - constexpr XMINT3(int32_t _x, int32_t _y, int32_t _z) noexcept - : x(_x), y(_y), z(_z) {} - explicit XMINT3(_In_reads_(3) const int32_t* pArray) noexcept - : x(pArray[0]), y(pArray[1]), z(pArray[2]) {} - -#if (__cplusplus >= 202002L) - bool operator==(const XMINT3&) const = default; - auto operator<=>(const XMINT3&) const = default; -#endif -}; - -// 3D Vector; 32 bit unsigned integer components -struct XMUINT3 { - uint32_t x; - uint32_t y; - uint32_t z; - - XMUINT3() = default; - - XMUINT3(const XMUINT3&) = default; - XMUINT3& operator=(const XMUINT3&) = default; - - XMUINT3(XMUINT3&&) = default; - XMUINT3& operator=(XMUINT3&&) = default; - - constexpr XMUINT3(uint32_t _x, uint32_t _y, uint32_t _z) noexcept - : x(_x), y(_y), z(_z) {} - explicit XMUINT3(_In_reads_(3) const uint32_t* pArray) noexcept - : x(pArray[0]), y(pArray[1]), z(pArray[2]) {} - -#if (__cplusplus >= 202002L) - bool operator==(const XMUINT3&) const = default; - auto operator<=>(const XMUINT3&) const = default; -#endif -}; - -//------------------------------------------------------------------------------ -// 4D Vector; 32 bit floating point components -struct XMFLOAT4 { - float x; - float y; - float z; - float w; - - XMFLOAT4() = default; - - XMFLOAT4(const XMFLOAT4&) = default; - XMFLOAT4& operator=(const XMFLOAT4&) = default; - - XMFLOAT4(XMFLOAT4&&) = default; - XMFLOAT4& operator=(XMFLOAT4&&) = default; - - constexpr XMFLOAT4(float _x, float _y, float _z, float _w) noexcept - : x(_x), y(_y), z(_z), w(_w) {} - explicit XMFLOAT4(_In_reads_(4) const float* pArray) noexcept - : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {} - -#if (__cplusplus >= 202002L) - bool operator==(const XMFLOAT4&) const = default; - auto operator<=>(const XMFLOAT4&) const = default; -#endif -}; - -// 4D Vector; 32 bit floating point components aligned on a 16 byte boundary -XM_ALIGNED_STRUCT(16) XMFLOAT4A : public XMFLOAT4 { using XMFLOAT4::XMFLOAT4; }; - -//------------------------------------------------------------------------------ -// 4D Vector; 32 bit signed integer components -struct XMINT4 { - int32_t x; - int32_t y; - int32_t z; - int32_t w; - - XMINT4() = default; - - XMINT4(const XMINT4&) = default; - XMINT4& operator=(const XMINT4&) = default; - - XMINT4(XMINT4&&) = default; - XMINT4& operator=(XMINT4&&) = default; - - constexpr XMINT4(int32_t _x, int32_t _y, int32_t _z, int32_t _w) noexcept - : x(_x), y(_y), z(_z), w(_w) {} - explicit XMINT4(_In_reads_(4) const int32_t* pArray) noexcept - : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {} - -#if (__cplusplus >= 202002L) - bool operator==(const XMINT4&) const = default; - auto operator<=>(const XMINT4&) const = default; -#endif -}; - -// 4D Vector; 32 bit unsigned integer components -struct XMUINT4 { - uint32_t x; - uint32_t y; - uint32_t z; - uint32_t w; - - XMUINT4() = default; - - XMUINT4(const XMUINT4&) = default; - XMUINT4& operator=(const XMUINT4&) = default; - - XMUINT4(XMUINT4&&) = default; - XMUINT4& operator=(XMUINT4&&) = default; - - constexpr XMUINT4(uint32_t _x, uint32_t _y, uint32_t _z, - uint32_t _w) noexcept - : x(_x), y(_y), z(_z), w(_w) {} - explicit XMUINT4(_In_reads_(4) const uint32_t* pArray) noexcept - : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {} - -#if (__cplusplus >= 202002L) - bool operator==(const XMUINT4&) const = default; - auto operator<=>(const XMUINT4&) const = default; -#endif -}; - -#ifdef __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wgnu-anonymous-struct" -#pragma clang diagnostic ignored "-Wnested-anon-types" -#pragma clang diagnostic ignored "-Wunknown-warning-option" -#pragma clang diagnostic ignored "-Wunsafe-buffer-usage" -#endif - -//------------------------------------------------------------------------------ -// 3x3 Matrix: 32 bit floating point components -struct XMFLOAT3X3 { - union { - struct { - float _11, _12, _13; - float _21, _22, _23; - float _31, _32, _33; - }; - float m[3][3]; - }; - - XMFLOAT3X3() = default; - - XMFLOAT3X3(const XMFLOAT3X3&) = default; - XMFLOAT3X3& operator=(const XMFLOAT3X3&) = default; - - XMFLOAT3X3(XMFLOAT3X3&&) = default; - XMFLOAT3X3& operator=(XMFLOAT3X3&&) = default; - - constexpr XMFLOAT3X3(float m00, float m01, float m02, float m10, float m11, - float m12, float m20, float m21, float m22) noexcept - : _11(m00), - _12(m01), - _13(m02), - _21(m10), - _22(m11), - _23(m12), - _31(m20), - _32(m21), - _33(m22) {} - explicit XMFLOAT3X3(_In_reads_(9) const float* pArray) noexcept; - - float operator()(size_t Row, size_t Column) const noexcept { - return m[Row][Column]; - } - float& operator()(size_t Row, size_t Column) noexcept { - return m[Row][Column]; - } - -#if (__cplusplus >= 202002L) - bool operator==(const XMFLOAT3X3&) const = delete; - auto operator<=>(const XMFLOAT3X3&) const = delete; -#endif -}; - -//------------------------------------------------------------------------------ -// 4x3 Row-major Matrix: 32 bit floating point components -struct XMFLOAT4X3 { - union { - struct { - float _11, _12, _13; - float _21, _22, _23; - float _31, _32, _33; - float _41, _42, _43; - }; - float m[4][3]; - float f[12]; - }; - - XMFLOAT4X3() = default; - - XMFLOAT4X3(const XMFLOAT4X3&) = default; - XMFLOAT4X3& operator=(const XMFLOAT4X3&) = default; - - XMFLOAT4X3(XMFLOAT4X3&&) = default; - XMFLOAT4X3& operator=(XMFLOAT4X3&&) = default; - - constexpr XMFLOAT4X3(float m00, float m01, float m02, float m10, float m11, - float m12, float m20, float m21, float m22, float m30, - float m31, float m32) noexcept - : _11(m00), - _12(m01), - _13(m02), - _21(m10), - _22(m11), - _23(m12), - _31(m20), - _32(m21), - _33(m22), - _41(m30), - _42(m31), - _43(m32) {} - explicit XMFLOAT4X3(_In_reads_(12) const float* pArray) noexcept; - - float operator()(size_t Row, size_t Column) const noexcept { - return m[Row][Column]; - } - float& operator()(size_t Row, size_t Column) noexcept { - return m[Row][Column]; - } - -#if (__cplusplus >= 202002L) - bool operator==(const XMFLOAT4X3&) const = delete; - auto operator<=>(const XMFLOAT4X3&) const = delete; -#endif -}; - -// 4x3 Row-major Matrix: 32 bit floating point components aligned on a 16 byte -// boundary -XM_ALIGNED_STRUCT(16) XMFLOAT4X3A : public XMFLOAT4X3 { - using XMFLOAT4X3::XMFLOAT4X3; -}; - -//------------------------------------------------------------------------------ -// 3x4 Column-major Matrix: 32 bit floating point components -struct XMFLOAT3X4 { - union { - struct { - float _11, _12, _13, _14; - float _21, _22, _23, _24; - float _31, _32, _33, _34; - }; - float m[3][4]; - float f[12]; - }; - - XMFLOAT3X4() = default; - - XMFLOAT3X4(const XMFLOAT3X4&) = default; - XMFLOAT3X4& operator=(const XMFLOAT3X4&) = default; - - XMFLOAT3X4(XMFLOAT3X4&&) = default; - XMFLOAT3X4& operator=(XMFLOAT3X4&&) = default; - - constexpr XMFLOAT3X4(float m00, float m01, float m02, float m03, float m10, - float m11, float m12, float m13, float m20, float m21, - float m22, float m23) noexcept - : _11(m00), - _12(m01), - _13(m02), - _14(m03), - _21(m10), - _22(m11), - _23(m12), - _24(m13), - _31(m20), - _32(m21), - _33(m22), - _34(m23) {} - explicit XMFLOAT3X4(_In_reads_(12) const float* pArray) noexcept; - - float operator()(size_t Row, size_t Column) const noexcept { - return m[Row][Column]; - } - float& operator()(size_t Row, size_t Column) noexcept { - return m[Row][Column]; - } - -#if (__cplusplus >= 202002L) - bool operator==(const XMFLOAT3X4&) const = delete; - auto operator<=>(const XMFLOAT3X4&) const = delete; -#endif -}; - -// 3x4 Column-major Matrix: 32 bit floating point components aligned on a 16 -// byte boundary -XM_ALIGNED_STRUCT(16) XMFLOAT3X4A : public XMFLOAT3X4 { - using XMFLOAT3X4::XMFLOAT3X4; -}; - -//------------------------------------------------------------------------------ -// 4x4 Matrix: 32 bit floating point components -struct XMFLOAT4X4 { - union { - struct { - float _11, _12, _13, _14; - float _21, _22, _23, _24; - float _31, _32, _33, _34; - float _41, _42, _43, _44; - }; - float m[4][4]; - }; - - XMFLOAT4X4() = default; - - XMFLOAT4X4(const XMFLOAT4X4&) = default; - XMFLOAT4X4& operator=(const XMFLOAT4X4&) = default; - - XMFLOAT4X4(XMFLOAT4X4&&) = default; - XMFLOAT4X4& operator=(XMFLOAT4X4&&) = default; - - constexpr XMFLOAT4X4(float m00, float m01, float m02, float m03, float m10, - float m11, float m12, float m13, float m20, float m21, - float m22, float m23, float m30, float m31, float m32, - float m33) noexcept - : _11(m00), - _12(m01), - _13(m02), - _14(m03), - _21(m10), - _22(m11), - _23(m12), - _24(m13), - _31(m20), - _32(m21), - _33(m22), - _34(m23), - _41(m30), - _42(m31), - _43(m32), - _44(m33) {} - explicit XMFLOAT4X4(_In_reads_(16) const float* pArray) noexcept; - - float operator()(size_t Row, size_t Column) const noexcept { - return m[Row][Column]; - } - float& operator()(size_t Row, size_t Column) noexcept { - return m[Row][Column]; - } - -#if (__cplusplus >= 202002L) - bool operator==(const XMFLOAT4X4&) const = delete; - auto operator<=>(const XMFLOAT4X4&) const = delete; -#endif -}; - -// 4x4 Matrix: 32 bit floating point components aligned on a 16 byte boundary -XM_ALIGNED_STRUCT(16) XMFLOAT4X4A : public XMFLOAT4X4 { - using XMFLOAT4X4::XMFLOAT4X4; -}; - -//////////////////////////////////////////////////////////////////////////////// - -#ifdef __clang__ -#pragma clang diagnostic pop -#endif -#ifdef _PREFAST_ -#pragma prefast(pop) -#endif -#ifdef _MSC_VER -#pragma warning(pop) -#endif - -/**************************************************************************** - * - * Data conversion operations - * - ****************************************************************************/ - -XMVECTOR XM_CALLCONV XMConvertVectorIntToFloat(FXMVECTOR VInt, - uint32_t DivExponent) noexcept; -XMVECTOR XM_CALLCONV XMConvertVectorFloatToInt(FXMVECTOR VFloat, - uint32_t MulExponent) noexcept; -XMVECTOR XM_CALLCONV XMConvertVectorUIntToFloat(FXMVECTOR VUInt, - uint32_t DivExponent) noexcept; -XMVECTOR XM_CALLCONV XMConvertVectorFloatToUInt(FXMVECTOR VFloat, - uint32_t MulExponent) noexcept; - -#if defined(__XNAMATH_H__) && defined(XMVectorSetBinaryConstant) -#undef XMVectorSetBinaryConstant -#undef XMVectorSplatConstant -#undef XMVectorSplatConstantInt -#endif - -XMVECTOR XM_CALLCONV XMVectorSetBinaryConstant(uint32_t C0, uint32_t C1, - uint32_t C2, - uint32_t C3) noexcept; -XMVECTOR XM_CALLCONV XMVectorSplatConstant(int32_t IntConstant, - uint32_t DivExponent) noexcept; -XMVECTOR XM_CALLCONV XMVectorSplatConstantInt(int32_t IntConstant) noexcept; - -/**************************************************************************** - * - * Load operations - * - ****************************************************************************/ - -XMVECTOR XM_CALLCONV XMLoadInt(_In_ const uint32_t* pSource) noexcept; -XMVECTOR XM_CALLCONV XMLoadFloat(_In_ const float* pSource) noexcept; - -XMVECTOR XM_CALLCONV XMLoadInt2(_In_reads_(2) const uint32_t* pSource) noexcept; -XMVECTOR XM_CALLCONV XMLoadInt2A(_In_reads_(2) - const uint32_t* PSource) noexcept; -XMVECTOR XM_CALLCONV XMLoadFloat2(_In_ const XMFLOAT2* pSource) noexcept; -XMVECTOR XM_CALLCONV XMLoadFloat2A(_In_ const XMFLOAT2A* pSource) noexcept; -XMVECTOR XM_CALLCONV XMLoadSInt2(_In_ const XMINT2* pSource) noexcept; -XMVECTOR XM_CALLCONV XMLoadUInt2(_In_ const XMUINT2* pSource) noexcept; - -XMVECTOR XM_CALLCONV XMLoadInt3(_In_reads_(3) const uint32_t* pSource) noexcept; -XMVECTOR XM_CALLCONV XMLoadInt3A(_In_reads_(3) - const uint32_t* pSource) noexcept; -XMVECTOR XM_CALLCONV XMLoadFloat3(_In_ const XMFLOAT3* pSource) noexcept; -XMVECTOR XM_CALLCONV XMLoadFloat3A(_In_ const XMFLOAT3A* pSource) noexcept; -XMVECTOR XM_CALLCONV XMLoadSInt3(_In_ const XMINT3* pSource) noexcept; -XMVECTOR XM_CALLCONV XMLoadUInt3(_In_ const XMUINT3* pSource) noexcept; - -XMVECTOR XM_CALLCONV XMLoadInt4(_In_reads_(4) const uint32_t* pSource) noexcept; -XMVECTOR XM_CALLCONV XMLoadInt4A(_In_reads_(4) - const uint32_t* pSource) noexcept; -XMVECTOR XM_CALLCONV XMLoadFloat4(_In_ const XMFLOAT4* pSource) noexcept; -XMVECTOR XM_CALLCONV XMLoadFloat4A(_In_ const XMFLOAT4A* pSource) noexcept; -XMVECTOR XM_CALLCONV XMLoadSInt4(_In_ const XMINT4* pSource) noexcept; -XMVECTOR XM_CALLCONV XMLoadUInt4(_In_ const XMUINT4* pSource) noexcept; - -XMMATRIX XM_CALLCONV XMLoadFloat3x3(_In_ const XMFLOAT3X3* pSource) noexcept; -XMMATRIX XM_CALLCONV XMLoadFloat4x3(_In_ const XMFLOAT4X3* pSource) noexcept; -XMMATRIX XM_CALLCONV XMLoadFloat4x3A(_In_ const XMFLOAT4X3A* pSource) noexcept; -XMMATRIX XM_CALLCONV XMLoadFloat3x4(_In_ const XMFLOAT3X4* pSource) noexcept; -XMMATRIX XM_CALLCONV XMLoadFloat3x4A(_In_ const XMFLOAT3X4A* pSource) noexcept; -XMMATRIX XM_CALLCONV XMLoadFloat4x4(_In_ const XMFLOAT4X4* pSource) noexcept; -XMMATRIX XM_CALLCONV XMLoadFloat4x4A(_In_ const XMFLOAT4X4A* pSource) noexcept; - -/**************************************************************************** - * - * Store operations - * - ****************************************************************************/ - -void XM_CALLCONV XMStoreInt(_Out_ uint32_t* pDestination, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMStoreFloat(_Out_ float* pDestination, - _In_ FXMVECTOR V) noexcept; - -void XM_CALLCONV XMStoreInt2(_Out_writes_(2) uint32_t* pDestination, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMStoreInt2A(_Out_writes_(2) uint32_t* pDestination, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMStoreFloat2(_Out_ XMFLOAT2* pDestination, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMStoreFloat2A(_Out_ XMFLOAT2A* pDestination, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMStoreSInt2(_Out_ XMINT2* pDestination, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMStoreUInt2(_Out_ XMUINT2* pDestination, - _In_ FXMVECTOR V) noexcept; - -void XM_CALLCONV XMStoreInt3(_Out_writes_(3) uint32_t* pDestination, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMStoreInt3A(_Out_writes_(3) uint32_t* pDestination, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMStoreFloat3(_Out_ XMFLOAT3* pDestination, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMStoreFloat3A(_Out_ XMFLOAT3A* pDestination, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMStoreSInt3(_Out_ XMINT3* pDestination, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMStoreUInt3(_Out_ XMUINT3* pDestination, - _In_ FXMVECTOR V) noexcept; - -void XM_CALLCONV XMStoreInt4(_Out_writes_(4) uint32_t* pDestination, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMStoreInt4A(_Out_writes_(4) uint32_t* pDestination, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMStoreFloat4(_Out_ XMFLOAT4* pDestination, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMStoreFloat4A(_Out_ XMFLOAT4A* pDestination, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMStoreSInt4(_Out_ XMINT4* pDestination, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMStoreUInt4(_Out_ XMUINT4* pDestination, - _In_ FXMVECTOR V) noexcept; - -void XM_CALLCONV XMStoreFloat3x3(_Out_ XMFLOAT3X3* pDestination, - _In_ FXMMATRIX M) noexcept; -void XM_CALLCONV XMStoreFloat4x3(_Out_ XMFLOAT4X3* pDestination, - _In_ FXMMATRIX M) noexcept; -void XM_CALLCONV XMStoreFloat4x3A(_Out_ XMFLOAT4X3A* pDestination, - _In_ FXMMATRIX M) noexcept; -void XM_CALLCONV XMStoreFloat3x4(_Out_ XMFLOAT3X4* pDestination, - _In_ FXMMATRIX M) noexcept; -void XM_CALLCONV XMStoreFloat3x4A(_Out_ XMFLOAT3X4A* pDestination, - _In_ FXMMATRIX M) noexcept; -void XM_CALLCONV XMStoreFloat4x4(_Out_ XMFLOAT4X4* pDestination, - _In_ FXMMATRIX M) noexcept; -void XM_CALLCONV XMStoreFloat4x4A(_Out_ XMFLOAT4X4A* pDestination, - _In_ FXMMATRIX M) noexcept; - -/**************************************************************************** - * - * General vector operations - * - ****************************************************************************/ - -XMVECTOR XM_CALLCONV XMVectorZero() noexcept; -XMVECTOR XM_CALLCONV XMVectorSet(float x, float y, float z, float w) noexcept; -XMVECTOR XM_CALLCONV XMVectorSetInt(uint32_t x, uint32_t y, uint32_t z, - uint32_t w) noexcept; -XMVECTOR XM_CALLCONV XMVectorReplicate(float Value) noexcept; -XMVECTOR XM_CALLCONV XMVectorReplicatePtr(_In_ const float* pValue) noexcept; -XMVECTOR XM_CALLCONV XMVectorReplicateInt(uint32_t Value) noexcept; -XMVECTOR XM_CALLCONV -XMVectorReplicateIntPtr(_In_ const uint32_t* pValue) noexcept; -XMVECTOR XM_CALLCONV XMVectorTrueInt() noexcept; -XMVECTOR XM_CALLCONV XMVectorFalseInt() noexcept; -XMVECTOR XM_CALLCONV XMVectorSplatX(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVectorSplatY(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVectorSplatZ(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVectorSplatW(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVectorSplatOne() noexcept; -XMVECTOR XM_CALLCONV XMVectorSplatInfinity() noexcept; -XMVECTOR XM_CALLCONV XMVectorSplatQNaN() noexcept; -XMVECTOR XM_CALLCONV XMVectorSplatEpsilon() noexcept; -XMVECTOR XM_CALLCONV XMVectorSplatSignMask() noexcept; - -float XM_CALLCONV XMVectorGetByIndex(FXMVECTOR V, size_t i) noexcept; -float XM_CALLCONV XMVectorGetX(FXMVECTOR V) noexcept; -float XM_CALLCONV XMVectorGetY(FXMVECTOR V) noexcept; -float XM_CALLCONV XMVectorGetZ(FXMVECTOR V) noexcept; -float XM_CALLCONV XMVectorGetW(FXMVECTOR V) noexcept; - -void XM_CALLCONV XMVectorGetByIndexPtr(_Out_ float* f, _In_ FXMVECTOR V, - _In_ size_t i) noexcept; -void XM_CALLCONV XMVectorGetXPtr(_Out_ float* x, _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMVectorGetYPtr(_Out_ float* y, _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMVectorGetZPtr(_Out_ float* z, _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMVectorGetWPtr(_Out_ float* w, _In_ FXMVECTOR V) noexcept; - -uint32_t XM_CALLCONV XMVectorGetIntByIndex(FXMVECTOR V, size_t i) noexcept; -uint32_t XM_CALLCONV XMVectorGetIntX(FXMVECTOR V) noexcept; -uint32_t XM_CALLCONV XMVectorGetIntY(FXMVECTOR V) noexcept; -uint32_t XM_CALLCONV XMVectorGetIntZ(FXMVECTOR V) noexcept; -uint32_t XM_CALLCONV XMVectorGetIntW(FXMVECTOR V) noexcept; - -void XM_CALLCONV XMVectorGetIntByIndexPtr(_Out_ uint32_t* x, _In_ FXMVECTOR V, - _In_ size_t i) noexcept; -void XM_CALLCONV XMVectorGetIntXPtr(_Out_ uint32_t* x, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMVectorGetIntYPtr(_Out_ uint32_t* y, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMVectorGetIntZPtr(_Out_ uint32_t* z, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMVectorGetIntWPtr(_Out_ uint32_t* w, - _In_ FXMVECTOR V) noexcept; - -XMVECTOR XM_CALLCONV XMVectorSetByIndex(FXMVECTOR V, float f, - size_t i) noexcept; -XMVECTOR XM_CALLCONV XMVectorSetX(FXMVECTOR V, float x) noexcept; -XMVECTOR XM_CALLCONV XMVectorSetY(FXMVECTOR V, float y) noexcept; -XMVECTOR XM_CALLCONV XMVectorSetZ(FXMVECTOR V, float z) noexcept; -XMVECTOR XM_CALLCONV XMVectorSetW(FXMVECTOR V, float w) noexcept; - -XMVECTOR XM_CALLCONV XMVectorSetByIndexPtr(_In_ FXMVECTOR V, - _In_ const float* f, - _In_ size_t i) noexcept; -XMVECTOR XM_CALLCONV XMVectorSetXPtr(_In_ FXMVECTOR V, - _In_ const float* x) noexcept; -XMVECTOR XM_CALLCONV XMVectorSetYPtr(_In_ FXMVECTOR V, - _In_ const float* y) noexcept; -XMVECTOR XM_CALLCONV XMVectorSetZPtr(_In_ FXMVECTOR V, - _In_ const float* z) noexcept; -XMVECTOR XM_CALLCONV XMVectorSetWPtr(_In_ FXMVECTOR V, - _In_ const float* w) noexcept; - -XMVECTOR XM_CALLCONV XMVectorSetIntByIndex(FXMVECTOR V, uint32_t x, - size_t i) noexcept; -XMVECTOR XM_CALLCONV XMVectorSetIntX(FXMVECTOR V, uint32_t x) noexcept; -XMVECTOR XM_CALLCONV XMVectorSetIntY(FXMVECTOR V, uint32_t y) noexcept; -XMVECTOR XM_CALLCONV XMVectorSetIntZ(FXMVECTOR V, uint32_t z) noexcept; -XMVECTOR XM_CALLCONV XMVectorSetIntW(FXMVECTOR V, uint32_t w) noexcept; - -XMVECTOR XM_CALLCONV XMVectorSetIntByIndexPtr(_In_ FXMVECTOR V, - _In_ const uint32_t* x, - _In_ size_t i) noexcept; -XMVECTOR XM_CALLCONV XMVectorSetIntXPtr(_In_ FXMVECTOR V, - _In_ const uint32_t* x) noexcept; -XMVECTOR XM_CALLCONV XMVectorSetIntYPtr(_In_ FXMVECTOR V, - _In_ const uint32_t* y) noexcept; -XMVECTOR XM_CALLCONV XMVectorSetIntZPtr(_In_ FXMVECTOR V, - _In_ const uint32_t* z) noexcept; -XMVECTOR XM_CALLCONV XMVectorSetIntWPtr(_In_ FXMVECTOR V, - _In_ const uint32_t* w) noexcept; - -#if defined(__XNAMATH_H__) && defined(XMVectorSwizzle) -#undef XMVectorSwizzle -#endif - -XMVECTOR XM_CALLCONV XMVectorSwizzle(FXMVECTOR V, uint32_t E0, uint32_t E1, - uint32_t E2, uint32_t E3) noexcept; -XMVECTOR XM_CALLCONV XMVectorPermute(FXMVECTOR V1, FXMVECTOR V2, - uint32_t PermuteX, uint32_t PermuteY, - uint32_t PermuteZ, - uint32_t PermuteW) noexcept; -XMVECTOR XM_CALLCONV XMVectorSelectControl(uint32_t VectorIndex0, - uint32_t VectorIndex1, - uint32_t VectorIndex2, - uint32_t VectorIndex3) noexcept; -XMVECTOR XM_CALLCONV XMVectorSelect(FXMVECTOR V1, FXMVECTOR V2, - FXMVECTOR Control) noexcept; -XMVECTOR XM_CALLCONV XMVectorMergeXY(FXMVECTOR V1, FXMVECTOR V2) noexcept; -XMVECTOR XM_CALLCONV XMVectorMergeZW(FXMVECTOR V1, FXMVECTOR V2) noexcept; - -#if defined(__XNAMATH_H__) && defined(XMVectorShiftLeft) -#undef XMVectorShiftLeft -#undef XMVectorRotateLeft -#undef XMVectorRotateRight -#undef XMVectorInsert -#endif - -XMVECTOR XM_CALLCONV XMVectorShiftLeft(FXMVECTOR V1, FXMVECTOR V2, - uint32_t Elements) noexcept; -XMVECTOR XM_CALLCONV XMVectorRotateLeft(FXMVECTOR V, - uint32_t Elements) noexcept; -XMVECTOR XM_CALLCONV XMVectorRotateRight(FXMVECTOR V, - uint32_t Elements) noexcept; -XMVECTOR XM_CALLCONV XMVectorInsert(FXMVECTOR VD, FXMVECTOR VS, - uint32_t VSLeftRotateElements, - uint32_t Select0, uint32_t Select1, - uint32_t Select2, - uint32_t Select3) noexcept; - -XMVECTOR XM_CALLCONV XMVectorEqual(FXMVECTOR V1, FXMVECTOR V2) noexcept; -XMVECTOR XM_CALLCONV XMVectorEqualR(_Out_ uint32_t* pCR, _In_ FXMVECTOR V1, - _In_ FXMVECTOR V2) noexcept; -XMVECTOR XM_CALLCONV XMVectorEqualInt(FXMVECTOR V1, FXMVECTOR V2) noexcept; -XMVECTOR XM_CALLCONV XMVectorEqualIntR(_Out_ uint32_t* pCR, _In_ FXMVECTOR V, - _In_ FXMVECTOR V2) noexcept; -XMVECTOR XM_CALLCONV XMVectorNearEqual(FXMVECTOR V1, FXMVECTOR V2, - FXMVECTOR Epsilon) noexcept; -XMVECTOR XM_CALLCONV XMVectorNotEqual(FXMVECTOR V1, FXMVECTOR V2) noexcept; -XMVECTOR XM_CALLCONV XMVectorNotEqualInt(FXMVECTOR V1, FXMVECTOR V2) noexcept; -XMVECTOR XM_CALLCONV XMVectorGreater(FXMVECTOR V1, FXMVECTOR V2) noexcept; -XMVECTOR XM_CALLCONV XMVectorGreaterR(_Out_ uint32_t* pCR, _In_ FXMVECTOR V1, - _In_ FXMVECTOR V2) noexcept; -XMVECTOR XM_CALLCONV XMVectorGreaterOrEqual(FXMVECTOR V1, - FXMVECTOR V2) noexcept; -XMVECTOR XM_CALLCONV XMVectorGreaterOrEqualR(_Out_ uint32_t* pCR, - _In_ FXMVECTOR V1, - _In_ FXMVECTOR V2) noexcept; -XMVECTOR XM_CALLCONV XMVectorLess(FXMVECTOR V1, FXMVECTOR V2) noexcept; -XMVECTOR XM_CALLCONV XMVectorLessOrEqual(FXMVECTOR V1, FXMVECTOR V2) noexcept; -XMVECTOR XM_CALLCONV XMVectorInBounds(FXMVECTOR V, FXMVECTOR Bounds) noexcept; -XMVECTOR XM_CALLCONV XMVectorInBoundsR(_Out_ uint32_t* pCR, _In_ FXMVECTOR V, - _In_ FXMVECTOR Bounds) noexcept; - -XMVECTOR XM_CALLCONV XMVectorIsNaN(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVectorIsInfinite(FXMVECTOR V) noexcept; - -XMVECTOR XM_CALLCONV XMVectorMin(FXMVECTOR V1, FXMVECTOR V2) noexcept; -XMVECTOR XM_CALLCONV XMVectorMax(FXMVECTOR V1, FXMVECTOR V2) noexcept; -XMVECTOR XM_CALLCONV XMVectorRound(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVectorTruncate(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVectorFloor(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVectorCeiling(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVectorClamp(FXMVECTOR V, FXMVECTOR Min, - FXMVECTOR Max) noexcept; -XMVECTOR XM_CALLCONV XMVectorSaturate(FXMVECTOR V) noexcept; - -XMVECTOR XM_CALLCONV XMVectorAndInt(FXMVECTOR V1, FXMVECTOR V2) noexcept; -XMVECTOR XM_CALLCONV XMVectorAndCInt(FXMVECTOR V1, FXMVECTOR V2) noexcept; -XMVECTOR XM_CALLCONV XMVectorOrInt(FXMVECTOR V1, FXMVECTOR V2) noexcept; -XMVECTOR XM_CALLCONV XMVectorNorInt(FXMVECTOR V1, FXMVECTOR V2) noexcept; -XMVECTOR XM_CALLCONV XMVectorXorInt(FXMVECTOR V1, FXMVECTOR V2) noexcept; - -XMVECTOR XM_CALLCONV XMVectorNegate(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVectorAdd(FXMVECTOR V1, FXMVECTOR V2) noexcept; -XMVECTOR XM_CALLCONV XMVectorSum(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVectorAddAngles(FXMVECTOR V1, FXMVECTOR V2) noexcept; -XMVECTOR XM_CALLCONV XMVectorSubtract(FXMVECTOR V1, FXMVECTOR V2) noexcept; -XMVECTOR XM_CALLCONV XMVectorSubtractAngles(FXMVECTOR V1, - FXMVECTOR V2) noexcept; -XMVECTOR XM_CALLCONV XMVectorMultiply(FXMVECTOR V1, FXMVECTOR V2) noexcept; -XMVECTOR XM_CALLCONV XMVectorMultiplyAdd(FXMVECTOR V1, FXMVECTOR V2, - FXMVECTOR V3) noexcept; -XMVECTOR XM_CALLCONV XMVectorDivide(FXMVECTOR V1, FXMVECTOR V2) noexcept; -XMVECTOR XM_CALLCONV XMVectorNegativeMultiplySubtract(FXMVECTOR V1, - FXMVECTOR V2, - FXMVECTOR V3) noexcept; -XMVECTOR XM_CALLCONV XMVectorScale(FXMVECTOR V, float ScaleFactor) noexcept; -XMVECTOR XM_CALLCONV XMVectorReciprocalEst(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVectorReciprocal(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVectorSqrtEst(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVectorSqrt(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVectorReciprocalSqrtEst(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVectorReciprocalSqrt(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVectorExp2(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVectorExp10(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVectorExpE(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVectorExp(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVectorLog2(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVectorLog10(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVectorLogE(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVectorLog(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVectorPow(FXMVECTOR V1, FXMVECTOR V2) noexcept; -XMVECTOR XM_CALLCONV XMVectorAbs(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVectorMod(FXMVECTOR V1, FXMVECTOR V2) noexcept; -XMVECTOR XM_CALLCONV XMVectorModAngles(FXMVECTOR Angles) noexcept; -XMVECTOR XM_CALLCONV XMVectorSin(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVectorSinEst(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVectorCos(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVectorCosEst(FXMVECTOR V) noexcept; -void XM_CALLCONV XMVectorSinCos(_Out_ XMVECTOR* pSin, _Out_ XMVECTOR* pCos, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMVectorSinCosEst(_Out_ XMVECTOR* pSin, _Out_ XMVECTOR* pCos, - _In_ FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVectorTan(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVectorTanEst(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVectorSinH(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVectorCosH(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVectorTanH(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVectorASin(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVectorASinEst(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVectorACos(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVectorACosEst(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVectorATan(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVectorATanEst(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVectorATan2(FXMVECTOR Y, FXMVECTOR X) noexcept; -XMVECTOR XM_CALLCONV XMVectorATan2Est(FXMVECTOR Y, FXMVECTOR X) noexcept; -XMVECTOR XM_CALLCONV XMVectorLerp(FXMVECTOR V0, FXMVECTOR V1, float t) noexcept; -XMVECTOR XM_CALLCONV XMVectorLerpV(FXMVECTOR V0, FXMVECTOR V1, - FXMVECTOR T) noexcept; -XMVECTOR XM_CALLCONV XMVectorHermite(FXMVECTOR Position0, FXMVECTOR Tangent0, - FXMVECTOR Position1, GXMVECTOR Tangent1, - float t) noexcept; -XMVECTOR XM_CALLCONV XMVectorHermiteV(FXMVECTOR Position0, FXMVECTOR Tangent0, - FXMVECTOR Position1, GXMVECTOR Tangent1, - HXMVECTOR T) noexcept; -XMVECTOR XM_CALLCONV XMVectorCatmullRom(FXMVECTOR Position0, - FXMVECTOR Position1, - FXMVECTOR Position2, - GXMVECTOR Position3, float t) noexcept; -XMVECTOR XM_CALLCONV XMVectorCatmullRomV(FXMVECTOR Position0, - FXMVECTOR Position1, - FXMVECTOR Position2, - GXMVECTOR Position3, - HXMVECTOR T) noexcept; -XMVECTOR XM_CALLCONV XMVectorBaryCentric(FXMVECTOR Position0, - FXMVECTOR Position1, - FXMVECTOR Position2, float f, - float g) noexcept; -XMVECTOR XM_CALLCONV XMVectorBaryCentricV(FXMVECTOR Position0, - FXMVECTOR Position1, - FXMVECTOR Position2, GXMVECTOR F, - HXMVECTOR G) noexcept; - -/**************************************************************************** - * - * 2D vector operations - * - ****************************************************************************/ - -bool XM_CALLCONV XMVector2Equal(FXMVECTOR V1, FXMVECTOR V2) noexcept; -uint32_t XM_CALLCONV XMVector2EqualR(FXMVECTOR V1, FXMVECTOR V2) noexcept; -bool XM_CALLCONV XMVector2EqualInt(FXMVECTOR V1, FXMVECTOR V2) noexcept; -uint32_t XM_CALLCONV XMVector2EqualIntR(FXMVECTOR V1, FXMVECTOR V2) noexcept; -bool XM_CALLCONV XMVector2NearEqual(FXMVECTOR V1, FXMVECTOR V2, - FXMVECTOR Epsilon) noexcept; -bool XM_CALLCONV XMVector2NotEqual(FXMVECTOR V1, FXMVECTOR V2) noexcept; -bool XM_CALLCONV XMVector2NotEqualInt(FXMVECTOR V1, FXMVECTOR V2) noexcept; -bool XM_CALLCONV XMVector2Greater(FXMVECTOR V1, FXMVECTOR V2) noexcept; -uint32_t XM_CALLCONV XMVector2GreaterR(FXMVECTOR V1, FXMVECTOR V2) noexcept; -bool XM_CALLCONV XMVector2GreaterOrEqual(FXMVECTOR V1, FXMVECTOR V2) noexcept; -uint32_t XM_CALLCONV XMVector2GreaterOrEqualR(FXMVECTOR V1, - FXMVECTOR V2) noexcept; -bool XM_CALLCONV XMVector2Less(FXMVECTOR V1, FXMVECTOR V2) noexcept; -bool XM_CALLCONV XMVector2LessOrEqual(FXMVECTOR V1, FXMVECTOR V2) noexcept; -bool XM_CALLCONV XMVector2InBounds(FXMVECTOR V, FXMVECTOR Bounds) noexcept; - -bool XM_CALLCONV XMVector2IsNaN(FXMVECTOR V) noexcept; -bool XM_CALLCONV XMVector2IsInfinite(FXMVECTOR V) noexcept; - -XMVECTOR XM_CALLCONV XMVector2Dot(FXMVECTOR V1, FXMVECTOR V2) noexcept; -XMVECTOR XM_CALLCONV XMVector2Cross(FXMVECTOR V1, FXMVECTOR V2) noexcept; -XMVECTOR XM_CALLCONV XMVector2LengthSq(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVector2ReciprocalLengthEst(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVector2ReciprocalLength(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVector2LengthEst(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVector2Length(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVector2NormalizeEst(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVector2Normalize(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVector2ClampLength(FXMVECTOR V, float LengthMin, - float LengthMax) noexcept; -XMVECTOR XM_CALLCONV XMVector2ClampLengthV(FXMVECTOR V, FXMVECTOR LengthMin, - FXMVECTOR LengthMax) noexcept; -XMVECTOR XM_CALLCONV XMVector2Reflect(FXMVECTOR Incident, - FXMVECTOR Normal) noexcept; -XMVECTOR XM_CALLCONV XMVector2Refract(FXMVECTOR Incident, FXMVECTOR Normal, - float RefractionIndex) noexcept; -XMVECTOR XM_CALLCONV XMVector2RefractV(FXMVECTOR Incident, FXMVECTOR Normal, - FXMVECTOR RefractionIndex) noexcept; -XMVECTOR XM_CALLCONV XMVector2Orthogonal(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVector2AngleBetweenNormalsEst(FXMVECTOR N1, - FXMVECTOR N2) noexcept; -XMVECTOR XM_CALLCONV XMVector2AngleBetweenNormals(FXMVECTOR N1, - FXMVECTOR N2) noexcept; -XMVECTOR XM_CALLCONV XMVector2AngleBetweenVectors(FXMVECTOR V1, - FXMVECTOR V2) noexcept; -XMVECTOR XM_CALLCONV XMVector2LinePointDistance(FXMVECTOR LinePoint1, - FXMVECTOR LinePoint2, - FXMVECTOR Point) noexcept; -XMVECTOR XM_CALLCONV XMVector2IntersectLine(FXMVECTOR Line1Point1, - FXMVECTOR Line1Point2, - FXMVECTOR Line2Point1, - GXMVECTOR Line2Point2) noexcept; -XMVECTOR XM_CALLCONV XMVector2Transform(FXMVECTOR V, FXMMATRIX M) noexcept; -XMFLOAT4* XM_CALLCONV XMVector2TransformStream( - _Out_writes_bytes_(sizeof(XMFLOAT4) + OutputStride * (VectorCount - 1)) - XMFLOAT4* pOutputStream, - _In_ size_t OutputStride, - _In_reads_bytes_(sizeof(XMFLOAT2) + InputStride * (VectorCount - 1)) - const XMFLOAT2* pInputStream, - _In_ size_t InputStride, _In_ size_t VectorCount, - _In_ FXMMATRIX M) noexcept; -XMVECTOR XM_CALLCONV XMVector2TransformCoord(FXMVECTOR V, FXMMATRIX M) noexcept; -XMFLOAT2* XM_CALLCONV XMVector2TransformCoordStream( - _Out_writes_bytes_(sizeof(XMFLOAT2) + OutputStride * (VectorCount - 1)) - XMFLOAT2* pOutputStream, - _In_ size_t OutputStride, - _In_reads_bytes_(sizeof(XMFLOAT2) + InputStride * (VectorCount - 1)) - const XMFLOAT2* pInputStream, - _In_ size_t InputStride, _In_ size_t VectorCount, - _In_ FXMMATRIX M) noexcept; -XMVECTOR XM_CALLCONV XMVector2TransformNormal(FXMVECTOR V, - FXMMATRIX M) noexcept; -XMFLOAT2* XM_CALLCONV XMVector2TransformNormalStream( - _Out_writes_bytes_(sizeof(XMFLOAT2) + OutputStride * (VectorCount - 1)) - XMFLOAT2* pOutputStream, - _In_ size_t OutputStride, - _In_reads_bytes_(sizeof(XMFLOAT2) + InputStride * (VectorCount - 1)) - const XMFLOAT2* pInputStream, - _In_ size_t InputStride, _In_ size_t VectorCount, - _In_ FXMMATRIX M) noexcept; - -/**************************************************************************** - * - * 3D vector operations - * - ****************************************************************************/ - -bool XM_CALLCONV XMVector3Equal(FXMVECTOR V1, FXMVECTOR V2) noexcept; -uint32_t XM_CALLCONV XMVector3EqualR(FXMVECTOR V1, FXMVECTOR V2) noexcept; -bool XM_CALLCONV XMVector3EqualInt(FXMVECTOR V1, FXMVECTOR V2) noexcept; -uint32_t XM_CALLCONV XMVector3EqualIntR(FXMVECTOR V1, FXMVECTOR V2) noexcept; -bool XM_CALLCONV XMVector3NearEqual(FXMVECTOR V1, FXMVECTOR V2, - FXMVECTOR Epsilon) noexcept; -bool XM_CALLCONV XMVector3NotEqual(FXMVECTOR V1, FXMVECTOR V2) noexcept; -bool XM_CALLCONV XMVector3NotEqualInt(FXMVECTOR V1, FXMVECTOR V2) noexcept; -bool XM_CALLCONV XMVector3Greater(FXMVECTOR V1, FXMVECTOR V2) noexcept; -uint32_t XM_CALLCONV XMVector3GreaterR(FXMVECTOR V1, FXMVECTOR V2) noexcept; -bool XM_CALLCONV XMVector3GreaterOrEqual(FXMVECTOR V1, FXMVECTOR V2) noexcept; -uint32_t XM_CALLCONV XMVector3GreaterOrEqualR(FXMVECTOR V1, - FXMVECTOR V2) noexcept; -bool XM_CALLCONV XMVector3Less(FXMVECTOR V1, FXMVECTOR V2) noexcept; -bool XM_CALLCONV XMVector3LessOrEqual(FXMVECTOR V1, FXMVECTOR V2) noexcept; -bool XM_CALLCONV XMVector3InBounds(FXMVECTOR V, FXMVECTOR Bounds) noexcept; - -bool XM_CALLCONV XMVector3IsNaN(FXMVECTOR V) noexcept; -bool XM_CALLCONV XMVector3IsInfinite(FXMVECTOR V) noexcept; - -XMVECTOR XM_CALLCONV XMVector3Dot(FXMVECTOR V1, FXMVECTOR V2) noexcept; -XMVECTOR XM_CALLCONV XMVector3Cross(FXMVECTOR V1, FXMVECTOR V2) noexcept; -XMVECTOR XM_CALLCONV XMVector3LengthSq(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVector3ReciprocalLengthEst(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVector3ReciprocalLength(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVector3LengthEst(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVector3Length(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVector3NormalizeEst(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVector3Normalize(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVector3ClampLength(FXMVECTOR V, float LengthMin, - float LengthMax) noexcept; -XMVECTOR XM_CALLCONV XMVector3ClampLengthV(FXMVECTOR V, FXMVECTOR LengthMin, - FXMVECTOR LengthMax) noexcept; -XMVECTOR XM_CALLCONV XMVector3Reflect(FXMVECTOR Incident, - FXMVECTOR Normal) noexcept; -XMVECTOR XM_CALLCONV XMVector3Refract(FXMVECTOR Incident, FXMVECTOR Normal, - float RefractionIndex) noexcept; -XMVECTOR XM_CALLCONV XMVector3RefractV(FXMVECTOR Incident, FXMVECTOR Normal, - FXMVECTOR RefractionIndex) noexcept; -XMVECTOR XM_CALLCONV XMVector3Orthogonal(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVector3AngleBetweenNormalsEst(FXMVECTOR N1, - FXMVECTOR N2) noexcept; -XMVECTOR XM_CALLCONV XMVector3AngleBetweenNormals(FXMVECTOR N1, - FXMVECTOR N2) noexcept; -XMVECTOR XM_CALLCONV XMVector3AngleBetweenVectors(FXMVECTOR V1, - FXMVECTOR V2) noexcept; -XMVECTOR XM_CALLCONV XMVector3LinePointDistance(FXMVECTOR LinePoint1, - FXMVECTOR LinePoint2, - FXMVECTOR Point) noexcept; -void XM_CALLCONV XMVector3ComponentsFromNormal(_Out_ XMVECTOR* pParallel, - _Out_ XMVECTOR* pPerpendicular, - _In_ FXMVECTOR V, - _In_ FXMVECTOR Normal) noexcept; -XMVECTOR XM_CALLCONV XMVector3Rotate(FXMVECTOR V, - FXMVECTOR RotationQuaternion) noexcept; -XMVECTOR XM_CALLCONV -XMVector3InverseRotate(FXMVECTOR V, FXMVECTOR RotationQuaternion) noexcept; -XMVECTOR XM_CALLCONV XMVector3Transform(FXMVECTOR V, FXMMATRIX M) noexcept; -XMFLOAT4* XM_CALLCONV XMVector3TransformStream( - _Out_writes_bytes_(sizeof(XMFLOAT4) + OutputStride * (VectorCount - 1)) - XMFLOAT4* pOutputStream, - _In_ size_t OutputStride, - _In_reads_bytes_(sizeof(XMFLOAT3) + InputStride * (VectorCount - 1)) - const XMFLOAT3* pInputStream, - _In_ size_t InputStride, _In_ size_t VectorCount, - _In_ FXMMATRIX M) noexcept; -XMVECTOR XM_CALLCONV XMVector3TransformCoord(FXMVECTOR V, FXMMATRIX M) noexcept; -XMFLOAT3* XM_CALLCONV XMVector3TransformCoordStream( - _Out_writes_bytes_(sizeof(XMFLOAT3) + OutputStride * (VectorCount - 1)) - XMFLOAT3* pOutputStream, - _In_ size_t OutputStride, - _In_reads_bytes_(sizeof(XMFLOAT3) + InputStride * (VectorCount - 1)) - const XMFLOAT3* pInputStream, - _In_ size_t InputStride, _In_ size_t VectorCount, - _In_ FXMMATRIX M) noexcept; -XMVECTOR XM_CALLCONV XMVector3TransformNormal(FXMVECTOR V, - FXMMATRIX M) noexcept; -XMFLOAT3* XM_CALLCONV XMVector3TransformNormalStream( - _Out_writes_bytes_(sizeof(XMFLOAT3) + OutputStride * (VectorCount - 1)) - XMFLOAT3* pOutputStream, - _In_ size_t OutputStride, - _In_reads_bytes_(sizeof(XMFLOAT3) + InputStride * (VectorCount - 1)) - const XMFLOAT3* pInputStream, - _In_ size_t InputStride, _In_ size_t VectorCount, - _In_ FXMMATRIX M) noexcept; -XMVECTOR XM_CALLCONV XMVector3Project(FXMVECTOR V, float ViewportX, - float ViewportY, float ViewportWidth, - float ViewportHeight, float ViewportMinZ, - float ViewportMaxZ, FXMMATRIX Projection, - CXMMATRIX View, CXMMATRIX World) noexcept; -XMFLOAT3* XM_CALLCONV XMVector3ProjectStream( - _Out_writes_bytes_(sizeof(XMFLOAT3) + OutputStride * (VectorCount - 1)) - XMFLOAT3* pOutputStream, - _In_ size_t OutputStride, - _In_reads_bytes_(sizeof(XMFLOAT3) + InputStride * (VectorCount - 1)) - const XMFLOAT3* pInputStream, - _In_ size_t InputStride, _In_ size_t VectorCount, _In_ float ViewportX, - _In_ float ViewportY, _In_ float ViewportWidth, _In_ float ViewportHeight, - _In_ float ViewportMinZ, _In_ float ViewportMaxZ, _In_ FXMMATRIX Projection, - _In_ CXMMATRIX View, _In_ CXMMATRIX World) noexcept; -XMVECTOR XM_CALLCONV XMVector3Unproject(FXMVECTOR V, float ViewportX, - float ViewportY, float ViewportWidth, - float ViewportHeight, - float ViewportMinZ, float ViewportMaxZ, - FXMMATRIX Projection, CXMMATRIX View, - CXMMATRIX World) noexcept; -XMFLOAT3* XM_CALLCONV XMVector3UnprojectStream( - _Out_writes_bytes_(sizeof(XMFLOAT3) + OutputStride * (VectorCount - 1)) - XMFLOAT3* pOutputStream, - _In_ size_t OutputStride, - _In_reads_bytes_(sizeof(XMFLOAT3) + InputStride * (VectorCount - 1)) - const XMFLOAT3* pInputStream, - _In_ size_t InputStride, _In_ size_t VectorCount, _In_ float ViewportX, - _In_ float ViewportY, _In_ float ViewportWidth, _In_ float ViewportHeight, - _In_ float ViewportMinZ, _In_ float ViewportMaxZ, _In_ FXMMATRIX Projection, - _In_ CXMMATRIX View, _In_ CXMMATRIX World) noexcept; - -/**************************************************************************** - * - * 4D vector operations - * - ****************************************************************************/ - -bool XM_CALLCONV XMVector4Equal(FXMVECTOR V1, FXMVECTOR V2) noexcept; -uint32_t XM_CALLCONV XMVector4EqualR(FXMVECTOR V1, FXMVECTOR V2) noexcept; -bool XM_CALLCONV XMVector4EqualInt(FXMVECTOR V1, FXMVECTOR V2) noexcept; -uint32_t XM_CALLCONV XMVector4EqualIntR(FXMVECTOR V1, FXMVECTOR V2) noexcept; -bool XM_CALLCONV XMVector4NearEqual(FXMVECTOR V1, FXMVECTOR V2, - FXMVECTOR Epsilon) noexcept; -bool XM_CALLCONV XMVector4NotEqual(FXMVECTOR V1, FXMVECTOR V2) noexcept; -bool XM_CALLCONV XMVector4NotEqualInt(FXMVECTOR V1, FXMVECTOR V2) noexcept; -bool XM_CALLCONV XMVector4Greater(FXMVECTOR V1, FXMVECTOR V2) noexcept; -uint32_t XM_CALLCONV XMVector4GreaterR(FXMVECTOR V1, FXMVECTOR V2) noexcept; -bool XM_CALLCONV XMVector4GreaterOrEqual(FXMVECTOR V1, FXMVECTOR V2) noexcept; -uint32_t XM_CALLCONV XMVector4GreaterOrEqualR(FXMVECTOR V1, - FXMVECTOR V2) noexcept; -bool XM_CALLCONV XMVector4Less(FXMVECTOR V1, FXMVECTOR V2) noexcept; -bool XM_CALLCONV XMVector4LessOrEqual(FXMVECTOR V1, FXMVECTOR V2) noexcept; -bool XM_CALLCONV XMVector4InBounds(FXMVECTOR V, FXMVECTOR Bounds) noexcept; - -bool XM_CALLCONV XMVector4IsNaN(FXMVECTOR V) noexcept; -bool XM_CALLCONV XMVector4IsInfinite(FXMVECTOR V) noexcept; - -XMVECTOR XM_CALLCONV XMVector4Dot(FXMVECTOR V1, FXMVECTOR V2) noexcept; -XMVECTOR XM_CALLCONV XMVector4Cross(FXMVECTOR V1, FXMVECTOR V2, - FXMVECTOR V3) noexcept; -XMVECTOR XM_CALLCONV XMVector4LengthSq(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVector4ReciprocalLengthEst(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVector4ReciprocalLength(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVector4LengthEst(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVector4Length(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVector4NormalizeEst(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVector4Normalize(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVector4ClampLength(FXMVECTOR V, float LengthMin, - float LengthMax) noexcept; -XMVECTOR XM_CALLCONV XMVector4ClampLengthV(FXMVECTOR V, FXMVECTOR LengthMin, - FXMVECTOR LengthMax) noexcept; -XMVECTOR XM_CALLCONV XMVector4Reflect(FXMVECTOR Incident, - FXMVECTOR Normal) noexcept; -XMVECTOR XM_CALLCONV XMVector4Refract(FXMVECTOR Incident, FXMVECTOR Normal, - float RefractionIndex) noexcept; -XMVECTOR XM_CALLCONV XMVector4RefractV(FXMVECTOR Incident, FXMVECTOR Normal, - FXMVECTOR RefractionIndex) noexcept; -XMVECTOR XM_CALLCONV XMVector4Orthogonal(FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMVector4AngleBetweenNormalsEst(FXMVECTOR N1, - FXMVECTOR N2) noexcept; -XMVECTOR XM_CALLCONV XMVector4AngleBetweenNormals(FXMVECTOR N1, - FXMVECTOR N2) noexcept; -XMVECTOR XM_CALLCONV XMVector4AngleBetweenVectors(FXMVECTOR V1, - FXMVECTOR V2) noexcept; -XMVECTOR XM_CALLCONV XMVector4Transform(FXMVECTOR V, FXMMATRIX M) noexcept; -XMFLOAT4* XM_CALLCONV XMVector4TransformStream( - _Out_writes_bytes_(sizeof(XMFLOAT4) + OutputStride * (VectorCount - 1)) - XMFLOAT4* pOutputStream, - _In_ size_t OutputStride, - _In_reads_bytes_(sizeof(XMFLOAT4) + InputStride * (VectorCount - 1)) - const XMFLOAT4* pInputStream, - _In_ size_t InputStride, _In_ size_t VectorCount, - _In_ FXMMATRIX M) noexcept; - -/**************************************************************************** - * - * Matrix operations - * - ****************************************************************************/ - -bool XM_CALLCONV XMMatrixIsNaN(FXMMATRIX M) noexcept; -bool XM_CALLCONV XMMatrixIsInfinite(FXMMATRIX M) noexcept; -bool XM_CALLCONV XMMatrixIsIdentity(FXMMATRIX M) noexcept; - -XMMATRIX XM_CALLCONV XMMatrixMultiply(FXMMATRIX M1, CXMMATRIX M2) noexcept; -XMMATRIX XM_CALLCONV XMMatrixMultiplyTranspose(FXMMATRIX M1, - CXMMATRIX M2) noexcept; -XMMATRIX XM_CALLCONV XMMatrixTranspose(FXMMATRIX M) noexcept; -XMMATRIX XM_CALLCONV XMMatrixInverse(_Out_opt_ XMVECTOR* pDeterminant, - _In_ FXMMATRIX M) noexcept; -XMMATRIX XM_CALLCONV XMMatrixVectorTensorProduct(FXMVECTOR V1, - FXMVECTOR V2) noexcept; -XMVECTOR XM_CALLCONV XMMatrixDeterminant(FXMMATRIX M) noexcept; - -_Success_(return) bool XM_CALLCONV - XMMatrixDecompose(_Out_ XMVECTOR* outScale, _Out_ XMVECTOR* outRotQuat, - _Out_ XMVECTOR* outTrans, _In_ FXMMATRIX M) noexcept; - -XMMATRIX XM_CALLCONV XMMatrixIdentity() noexcept; -XMMATRIX XM_CALLCONV XMMatrixSet(float m00, float m01, float m02, float m03, - float m10, float m11, float m12, float m13, - float m20, float m21, float m22, float m23, - float m30, float m31, float m32, - float m33) noexcept; -XMMATRIX XM_CALLCONV XMMatrixTranslation(float OffsetX, float OffsetY, - float OffsetZ) noexcept; -XMMATRIX XM_CALLCONV XMMatrixTranslationFromVector(FXMVECTOR Offset) noexcept; -XMMATRIX XM_CALLCONV XMMatrixScaling(float ScaleX, float ScaleY, - float ScaleZ) noexcept; -XMMATRIX XM_CALLCONV XMMatrixScalingFromVector(FXMVECTOR Scale) noexcept; -XMMATRIX XM_CALLCONV XMMatrixRotationX(float Angle) noexcept; -XMMATRIX XM_CALLCONV XMMatrixRotationY(float Angle) noexcept; -XMMATRIX XM_CALLCONV XMMatrixRotationZ(float Angle) noexcept; - -// Rotates about y-axis (Yaw), then x-axis (Pitch), then z-axis (Roll) -XMMATRIX XM_CALLCONV XMMatrixRotationRollPitchYaw(float Pitch, float Yaw, - float Roll) noexcept; - -// Rotates about y-axis (Angles.y), then x-axis (Angles.x), then z-axis -// (Angles.z) -XMMATRIX XM_CALLCONV -XMMatrixRotationRollPitchYawFromVector(FXMVECTOR Angles) noexcept; - -XMMATRIX XM_CALLCONV XMMatrixRotationNormal(FXMVECTOR NormalAxis, - float Angle) noexcept; -XMMATRIX XM_CALLCONV XMMatrixRotationAxis(FXMVECTOR Axis, float Angle) noexcept; -XMMATRIX XM_CALLCONV XMMatrixRotationQuaternion(FXMVECTOR Quaternion) noexcept; -XMMATRIX XM_CALLCONV XMMatrixTransformation2D( - FXMVECTOR ScalingOrigin, float ScalingOrientation, FXMVECTOR Scaling, - FXMVECTOR RotationOrigin, float Rotation, GXMVECTOR Translation) noexcept; -XMMATRIX XM_CALLCONV XMMatrixTransformation( - FXMVECTOR ScalingOrigin, FXMVECTOR ScalingOrientationQuaternion, - FXMVECTOR Scaling, GXMVECTOR RotationOrigin, HXMVECTOR RotationQuaternion, - HXMVECTOR Translation) noexcept; -XMMATRIX XM_CALLCONV -XMMatrixAffineTransformation2D(FXMVECTOR Scaling, FXMVECTOR RotationOrigin, - float Rotation, FXMVECTOR Translation) noexcept; -XMMATRIX XM_CALLCONV XMMatrixAffineTransformation( - FXMVECTOR Scaling, FXMVECTOR RotationOrigin, FXMVECTOR RotationQuaternion, - GXMVECTOR Translation) noexcept; -XMMATRIX XM_CALLCONV XMMatrixReflect(FXMVECTOR ReflectionPlane) noexcept; -XMMATRIX XM_CALLCONV XMMatrixShadow(FXMVECTOR ShadowPlane, - FXMVECTOR LightPosition) noexcept; - -XMMATRIX XM_CALLCONV XMMatrixLookAtLH(FXMVECTOR EyePosition, - FXMVECTOR FocusPosition, - FXMVECTOR UpDirection) noexcept; -XMMATRIX XM_CALLCONV XMMatrixLookAtRH(FXMVECTOR EyePosition, - FXMVECTOR FocusPosition, - FXMVECTOR UpDirection) noexcept; -XMMATRIX XM_CALLCONV XMMatrixLookToLH(FXMVECTOR EyePosition, - FXMVECTOR EyeDirection, - FXMVECTOR UpDirection) noexcept; -XMMATRIX XM_CALLCONV XMMatrixLookToRH(FXMVECTOR EyePosition, - FXMVECTOR EyeDirection, - FXMVECTOR UpDirection) noexcept; -XMMATRIX XM_CALLCONV XMMatrixPerspectiveLH(float ViewWidth, float ViewHeight, - float NearZ, float FarZ) noexcept; -XMMATRIX XM_CALLCONV XMMatrixPerspectiveRH(float ViewWidth, float ViewHeight, - float NearZ, float FarZ) noexcept; -XMMATRIX XM_CALLCONV XMMatrixPerspectiveFovLH(float FovAngleY, - float AspectRatio, float NearZ, - float FarZ) noexcept; -XMMATRIX XM_CALLCONV XMMatrixPerspectiveFovRH(float FovAngleY, - float AspectRatio, float NearZ, - float FarZ) noexcept; -XMMATRIX XM_CALLCONV XMMatrixPerspectiveOffCenterLH(float ViewLeft, - float ViewRight, - float ViewBottom, - float ViewTop, float NearZ, - float FarZ) noexcept; -XMMATRIX XM_CALLCONV XMMatrixPerspectiveOffCenterRH(float ViewLeft, - float ViewRight, - float ViewBottom, - float ViewTop, float NearZ, - float FarZ) noexcept; -XMMATRIX XM_CALLCONV XMMatrixOrthographicLH(float ViewWidth, float ViewHeight, - float NearZ, float FarZ) noexcept; -XMMATRIX XM_CALLCONV XMMatrixOrthographicRH(float ViewWidth, float ViewHeight, - float NearZ, float FarZ) noexcept; -XMMATRIX XM_CALLCONV XMMatrixOrthographicOffCenterLH(float ViewLeft, - float ViewRight, - float ViewBottom, - float ViewTop, float NearZ, - float FarZ) noexcept; -XMMATRIX XM_CALLCONV XMMatrixOrthographicOffCenterRH(float ViewLeft, - float ViewRight, - float ViewBottom, - float ViewTop, float NearZ, - float FarZ) noexcept; - -/**************************************************************************** - * - * Quaternion operations - * - ****************************************************************************/ - -bool XM_CALLCONV XMQuaternionEqual(FXMVECTOR Q1, FXMVECTOR Q2) noexcept; -bool XM_CALLCONV XMQuaternionNotEqual(FXMVECTOR Q1, FXMVECTOR Q2) noexcept; - -bool XM_CALLCONV XMQuaternionIsNaN(FXMVECTOR Q) noexcept; -bool XM_CALLCONV XMQuaternionIsInfinite(FXMVECTOR Q) noexcept; -bool XM_CALLCONV XMQuaternionIsIdentity(FXMVECTOR Q) noexcept; - -XMVECTOR XM_CALLCONV XMQuaternionDot(FXMVECTOR Q1, FXMVECTOR Q2) noexcept; -XMVECTOR XM_CALLCONV XMQuaternionMultiply(FXMVECTOR Q1, FXMVECTOR Q2) noexcept; -XMVECTOR XM_CALLCONV XMQuaternionLengthSq(FXMVECTOR Q) noexcept; -XMVECTOR XM_CALLCONV XMQuaternionReciprocalLength(FXMVECTOR Q) noexcept; -XMVECTOR XM_CALLCONV XMQuaternionLength(FXMVECTOR Q) noexcept; -XMVECTOR XM_CALLCONV XMQuaternionNormalizeEst(FXMVECTOR Q) noexcept; -XMVECTOR XM_CALLCONV XMQuaternionNormalize(FXMVECTOR Q) noexcept; -XMVECTOR XM_CALLCONV XMQuaternionConjugate(FXMVECTOR Q) noexcept; -XMVECTOR XM_CALLCONV XMQuaternionInverse(FXMVECTOR Q) noexcept; -XMVECTOR XM_CALLCONV XMQuaternionLn(FXMVECTOR Q) noexcept; -XMVECTOR XM_CALLCONV XMQuaternionExp(FXMVECTOR Q) noexcept; -XMVECTOR XM_CALLCONV XMQuaternionSlerp(FXMVECTOR Q0, FXMVECTOR Q1, - float t) noexcept; -XMVECTOR XM_CALLCONV XMQuaternionSlerpV(FXMVECTOR Q0, FXMVECTOR Q1, - FXMVECTOR T) noexcept; -XMVECTOR XM_CALLCONV XMQuaternionSquad(FXMVECTOR Q0, FXMVECTOR Q1, FXMVECTOR Q2, - GXMVECTOR Q3, float t) noexcept; -XMVECTOR XM_CALLCONV XMQuaternionSquadV(FXMVECTOR Q0, FXMVECTOR Q1, - FXMVECTOR Q2, GXMVECTOR Q3, - HXMVECTOR T) noexcept; -void XM_CALLCONV XMQuaternionSquadSetup(_Out_ XMVECTOR* pA, _Out_ XMVECTOR* pB, - _Out_ XMVECTOR* pC, _In_ FXMVECTOR Q0, - _In_ FXMVECTOR Q1, _In_ FXMVECTOR Q2, - _In_ GXMVECTOR Q3) noexcept; -XMVECTOR XM_CALLCONV XMQuaternionBaryCentric(FXMVECTOR Q0, FXMVECTOR Q1, - FXMVECTOR Q2, float f, - float g) noexcept; -XMVECTOR XM_CALLCONV XMQuaternionBaryCentricV(FXMVECTOR Q0, FXMVECTOR Q1, - FXMVECTOR Q2, GXMVECTOR F, - HXMVECTOR G) noexcept; - -XMVECTOR XM_CALLCONV XMQuaternionIdentity() noexcept; - -// Rotates about y-axis (Yaw), then x-axis (Pitch), then z-axis (Roll) -XMVECTOR XM_CALLCONV XMQuaternionRotationRollPitchYaw(float Pitch, float Yaw, - float Roll) noexcept; - -// Rotates about y-axis (Angles.y), then x-axis (Angles.x), then z-axis -// (Angles.z) -XMVECTOR XM_CALLCONV -XMQuaternionRotationRollPitchYawFromVector(FXMVECTOR Angles) noexcept; - -XMVECTOR XM_CALLCONV XMQuaternionRotationNormal(FXMVECTOR NormalAxis, - float Angle) noexcept; -XMVECTOR XM_CALLCONV XMQuaternionRotationAxis(FXMVECTOR Axis, - float Angle) noexcept; -XMVECTOR XM_CALLCONV XMQuaternionRotationMatrix(FXMMATRIX M) noexcept; - -void XM_CALLCONV XMQuaternionToAxisAngle(_Out_ XMVECTOR* pAxis, - _Out_ float* pAngle, - _In_ FXMVECTOR Q) noexcept; - -/**************************************************************************** - * - * Plane operations - * - ****************************************************************************/ - -bool XM_CALLCONV XMPlaneEqual(FXMVECTOR P1, FXMVECTOR P2) noexcept; -bool XM_CALLCONV XMPlaneNearEqual(FXMVECTOR P1, FXMVECTOR P2, - FXMVECTOR Epsilon) noexcept; -bool XM_CALLCONV XMPlaneNotEqual(FXMVECTOR P1, FXMVECTOR P2) noexcept; - -bool XM_CALLCONV XMPlaneIsNaN(FXMVECTOR P) noexcept; -bool XM_CALLCONV XMPlaneIsInfinite(FXMVECTOR P) noexcept; - -XMVECTOR XM_CALLCONV XMPlaneDot(FXMVECTOR P, FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMPlaneDotCoord(FXMVECTOR P, FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMPlaneDotNormal(FXMVECTOR P, FXMVECTOR V) noexcept; -XMVECTOR XM_CALLCONV XMPlaneNormalizeEst(FXMVECTOR P) noexcept; -XMVECTOR XM_CALLCONV XMPlaneNormalize(FXMVECTOR P) noexcept; -XMVECTOR XM_CALLCONV XMPlaneIntersectLine(FXMVECTOR P, FXMVECTOR LinePoint1, - FXMVECTOR LinePoint2) noexcept; -void XM_CALLCONV XMPlaneIntersectPlane(_Out_ XMVECTOR* pLinePoint1, - _Out_ XMVECTOR* pLinePoint2, - _In_ FXMVECTOR P1, - _In_ FXMVECTOR P2) noexcept; - -// Transforms a plane given an inverse transpose matrix -XMVECTOR XM_CALLCONV XMPlaneTransform(FXMVECTOR P, FXMMATRIX ITM) noexcept; - -// Transforms an array of planes given an inverse transpose matrix -XMFLOAT4* XM_CALLCONV XMPlaneTransformStream( - _Out_writes_bytes_(sizeof(XMFLOAT4) + OutputStride * (PlaneCount - 1)) - XMFLOAT4* pOutputStream, - _In_ size_t OutputStride, - _In_reads_bytes_(sizeof(XMFLOAT4) + InputStride * (PlaneCount - 1)) - const XMFLOAT4* pInputStream, - _In_ size_t InputStride, _In_ size_t PlaneCount, - _In_ FXMMATRIX ITM) noexcept; - -XMVECTOR XM_CALLCONV XMPlaneFromPointNormal(FXMVECTOR Point, - FXMVECTOR Normal) noexcept; -XMVECTOR XM_CALLCONV XMPlaneFromPoints(FXMVECTOR Point1, FXMVECTOR Point2, - FXMVECTOR Point3) noexcept; - -/**************************************************************************** - * - * Color operations - * - ****************************************************************************/ - -bool XM_CALLCONV XMColorEqual(FXMVECTOR C1, FXMVECTOR C2) noexcept; -bool XM_CALLCONV XMColorNotEqual(FXMVECTOR C1, FXMVECTOR C2) noexcept; -bool XM_CALLCONV XMColorGreater(FXMVECTOR C1, FXMVECTOR C2) noexcept; -bool XM_CALLCONV XMColorGreaterOrEqual(FXMVECTOR C1, FXMVECTOR C2) noexcept; -bool XM_CALLCONV XMColorLess(FXMVECTOR C1, FXMVECTOR C2) noexcept; -bool XM_CALLCONV XMColorLessOrEqual(FXMVECTOR C1, FXMVECTOR C2) noexcept; - -bool XM_CALLCONV XMColorIsNaN(FXMVECTOR C) noexcept; -bool XM_CALLCONV XMColorIsInfinite(FXMVECTOR C) noexcept; - -XMVECTOR XM_CALLCONV XMColorNegative(FXMVECTOR C) noexcept; -XMVECTOR XM_CALLCONV XMColorModulate(FXMVECTOR C1, FXMVECTOR C2) noexcept; -XMVECTOR XM_CALLCONV XMColorAdjustSaturation(FXMVECTOR C, - float Saturation) noexcept; -XMVECTOR XM_CALLCONV XMColorAdjustContrast(FXMVECTOR C, - float Contrast) noexcept; - -XMVECTOR XM_CALLCONV XMColorRGBToHSL(FXMVECTOR rgb) noexcept; -XMVECTOR XM_CALLCONV XMColorHSLToRGB(FXMVECTOR hsl) noexcept; - -XMVECTOR XM_CALLCONV XMColorRGBToHSV(FXMVECTOR rgb) noexcept; -XMVECTOR XM_CALLCONV XMColorHSVToRGB(FXMVECTOR hsv) noexcept; - -XMVECTOR XM_CALLCONV XMColorRGBToYUV(FXMVECTOR rgb) noexcept; -XMVECTOR XM_CALLCONV XMColorYUVToRGB(FXMVECTOR yuv) noexcept; - -XMVECTOR XM_CALLCONV XMColorRGBToYUV_HD(FXMVECTOR rgb) noexcept; -XMVECTOR XM_CALLCONV XMColorYUVToRGB_HD(FXMVECTOR yuv) noexcept; - -XMVECTOR XM_CALLCONV XMColorRGBToYUV_UHD(FXMVECTOR rgb) noexcept; -XMVECTOR XM_CALLCONV XMColorYUVToRGB_UHD(FXMVECTOR yuv) noexcept; - -XMVECTOR XM_CALLCONV XMColorRGBToXYZ(FXMVECTOR rgb) noexcept; -XMVECTOR XM_CALLCONV XMColorXYZToRGB(FXMVECTOR xyz) noexcept; - -XMVECTOR XM_CALLCONV XMColorXYZToSRGB(FXMVECTOR xyz) noexcept; -XMVECTOR XM_CALLCONV XMColorSRGBToXYZ(FXMVECTOR srgb) noexcept; - -XMVECTOR XM_CALLCONV XMColorRGBToSRGB(FXMVECTOR rgb) noexcept; -XMVECTOR XM_CALLCONV XMColorSRGBToRGB(FXMVECTOR srgb) noexcept; - -/**************************************************************************** - * - * Miscellaneous operations - * - ****************************************************************************/ - -bool XMVerifyCPUSupport() noexcept; - -XMVECTOR XM_CALLCONV XMFresnelTerm(FXMVECTOR CosIncidentAngle, - FXMVECTOR RefractionIndex) noexcept; - -bool XMScalarNearEqual(float S1, float S2, float Epsilon) noexcept; -float XMScalarModAngle(float Value) noexcept; - -float XMScalarSin(float Value) noexcept; -float XMScalarSinEst(float Value) noexcept; - -float XMScalarCos(float Value) noexcept; -float XMScalarCosEst(float Value) noexcept; - -void XMScalarSinCos(_Out_ float* pSin, _Out_ float* pCos, float Value) noexcept; -void XMScalarSinCosEst(_Out_ float* pSin, _Out_ float* pCos, - float Value) noexcept; - -float XMScalarASin(float Value) noexcept; -float XMScalarASinEst(float Value) noexcept; - -float XMScalarACos(float Value) noexcept; -float XMScalarACosEst(float Value) noexcept; - -/**************************************************************************** - * - * Templates - * - ****************************************************************************/ - -#if defined(__XNAMATH_H__) && defined(XMMin) -#undef XMMin -#undef XMMax -#endif - -template -inline T XMMin(T a, T b) noexcept { - return (a < b) ? a : b; -} -template -inline T XMMax(T a, T b) noexcept { - return (a > b) ? a : b; -} - -//------------------------------------------------------------------------------ - -#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) - -// PermuteHelper internal template (SSE only) -namespace MathInternal { -// Slow path fallback for permutes that do not map to a single SSE shuffle -// opcode. -template -struct PermuteHelper { - static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2) noexcept { - static const XMVECTORU32 selectMask = {{{ - WhichX ? 0xFFFFFFFF : 0, - WhichY ? 0xFFFFFFFF : 0, - WhichZ ? 0xFFFFFFFF : 0, - WhichW ? 0xFFFFFFFF : 0, - }}}; - - XMVECTOR shuffled1 = XM_PERMUTE_PS(v1, Shuffle); - XMVECTOR shuffled2 = XM_PERMUTE_PS(v2, Shuffle); - - XMVECTOR masked1 = _mm_andnot_ps(selectMask, shuffled1); - XMVECTOR masked2 = _mm_and_ps(selectMask, shuffled2); - - return _mm_or_ps(masked1, masked2); - } -}; - -// Fast path for permutes that only read from the first vector. -template -struct PermuteHelper { - static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR) noexcept { - return XM_PERMUTE_PS(v1, Shuffle); - } -}; - -// Fast path for permutes that only read from the second vector. -template -struct PermuteHelper { - static XMVECTOR XM_CALLCONV Permute(FXMVECTOR, FXMVECTOR v2) noexcept { - return XM_PERMUTE_PS(v2, Shuffle); - } -}; - -// Fast path for permutes that read XY from the first vector, ZW from the -// second. -template -struct PermuteHelper { - static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2) noexcept { - return _mm_shuffle_ps(v1, v2, Shuffle); - } -}; - -// Fast path for permutes that read XY from the second vector, ZW from the -// first. -template -struct PermuteHelper { - static XMVECTOR XM_CALLCONV Permute(FXMVECTOR v1, FXMVECTOR v2) noexcept { - return _mm_shuffle_ps(v2, v1, Shuffle); - } -}; -} // namespace MathInternal - -#endif // _XM_SSE_INTRINSICS_ && !_XM_NO_INTRINSICS_ - -// General permute template -template -inline XMVECTOR XM_CALLCONV XMVectorPermute(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - static_assert(PermuteX <= 7, "PermuteX template parameter out of range"); - static_assert(PermuteY <= 7, "PermuteY template parameter out of range"); - static_assert(PermuteZ <= 7, "PermuteZ template parameter out of range"); - static_assert(PermuteW <= 7, "PermuteW template parameter out of range"); - -#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) - constexpr uint32_t Shuffle = - _MM_SHUFFLE(PermuteW & 3, PermuteZ & 3, PermuteY & 3, PermuteX & 3); - - constexpr bool WhichX = PermuteX > 3; - constexpr bool WhichY = PermuteY > 3; - constexpr bool WhichZ = PermuteZ > 3; - constexpr bool WhichW = PermuteW > 3; - - return MathInternal::PermuteHelper::Permute(V1, V2); -#else - - return XMVectorPermute(V1, V2, PermuteX, PermuteY, PermuteZ, PermuteW); - -#endif -} - -// Special-case permute templates -template <> -constexpr XMVECTOR XM_CALLCONV XMVectorPermute<0, 1, 2, 3>(FXMVECTOR V1, - FXMVECTOR) noexcept { - return V1; -} -template <> -constexpr XMVECTOR XM_CALLCONV -XMVectorPermute<4, 5, 6, 7>(FXMVECTOR, FXMVECTOR V2) noexcept { - return V2; -} - -#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) -template <> -inline XMVECTOR XM_CALLCONV XMVectorPermute<0, 1, 4, 5>(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - return _mm_movelh_ps(V1, V2); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorPermute<6, 7, 2, 3>(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - return _mm_movehl_ps(V1, V2); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorPermute<0, 4, 1, 5>(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - return _mm_unpacklo_ps(V1, V2); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorPermute<2, 6, 3, 7>(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - return _mm_unpackhi_ps(V1, V2); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorPermute<2, 3, 6, 7>(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - return _mm_castpd_ps(_mm_unpackhi_pd(_mm_castps_pd(V1), _mm_castps_pd(V2))); -} -#endif - -#if defined(_XM_SSE4_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) -template <> -inline XMVECTOR XM_CALLCONV XMVectorPermute<4, 1, 2, 3>(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - return _mm_blend_ps(V1, V2, 0x1); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorPermute<0, 5, 2, 3>(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - return _mm_blend_ps(V1, V2, 0x2); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorPermute<4, 5, 2, 3>(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - return _mm_blend_ps(V1, V2, 0x3); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorPermute<0, 1, 6, 3>(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - return _mm_blend_ps(V1, V2, 0x4); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorPermute<4, 1, 6, 3>(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - return _mm_blend_ps(V1, V2, 0x5); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorPermute<0, 5, 6, 3>(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - return _mm_blend_ps(V1, V2, 0x6); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorPermute<4, 5, 6, 3>(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - return _mm_blend_ps(V1, V2, 0x7); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorPermute<0, 1, 2, 7>(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - return _mm_blend_ps(V1, V2, 0x8); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorPermute<4, 1, 2, 7>(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - return _mm_blend_ps(V1, V2, 0x9); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorPermute<0, 5, 2, 7>(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - return _mm_blend_ps(V1, V2, 0xA); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorPermute<4, 5, 2, 7>(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - return _mm_blend_ps(V1, V2, 0xB); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorPermute<0, 1, 6, 7>(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - return _mm_blend_ps(V1, V2, 0xC); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorPermute<4, 1, 6, 7>(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - return _mm_blend_ps(V1, V2, 0xD); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorPermute<0, 5, 6, 7>(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - return _mm_blend_ps(V1, V2, 0xE); -} -#endif - -#if defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) - -// If the indices are all in the range 0-3 or 4-7, then use XMVectorSwizzle -// instead The mirror cases are not spelled out here as the programmer can -// always swap the arguments (i.e. prefer permutes where the X element comes -// from the V1 vector instead of the V2 vector) - -template <> -inline XMVECTOR XM_CALLCONV XMVectorPermute<0, 1, 4, 5>(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - return vcombine_f32(vget_low_f32(V1), vget_low_f32(V2)); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorPermute<1, 0, 4, 5>(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - return vcombine_f32(vrev64_f32(vget_low_f32(V1)), vget_low_f32(V2)); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorPermute<0, 1, 5, 4>(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - return vcombine_f32(vget_low_f32(V1), vrev64_f32(vget_low_f32(V2))); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorPermute<1, 0, 5, 4>(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - return vcombine_f32(vrev64_f32(vget_low_f32(V1)), - vrev64_f32(vget_low_f32(V2))); -} - -template <> -inline XMVECTOR XM_CALLCONV XMVectorPermute<2, 3, 6, 7>(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - return vcombine_f32(vget_high_f32(V1), vget_high_f32(V2)); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorPermute<3, 2, 6, 7>(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - return vcombine_f32(vrev64_f32(vget_high_f32(V1)), vget_high_f32(V2)); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorPermute<2, 3, 7, 6>(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - return vcombine_f32(vget_high_f32(V1), vrev64_f32(vget_high_f32(V2))); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorPermute<3, 2, 7, 6>(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - return vcombine_f32(vrev64_f32(vget_high_f32(V1)), - vrev64_f32(vget_high_f32(V2))); -} - -template <> -inline XMVECTOR XM_CALLCONV XMVectorPermute<0, 1, 6, 7>(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - return vcombine_f32(vget_low_f32(V1), vget_high_f32(V2)); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorPermute<1, 0, 6, 7>(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - return vcombine_f32(vrev64_f32(vget_low_f32(V1)), vget_high_f32(V2)); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorPermute<0, 1, 7, 6>(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - return vcombine_f32(vget_low_f32(V1), vrev64_f32(vget_high_f32(V2))); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorPermute<1, 0, 7, 6>(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - return vcombine_f32(vrev64_f32(vget_low_f32(V1)), - vrev64_f32(vget_high_f32(V2))); -} - -template <> -inline XMVECTOR XM_CALLCONV XMVectorPermute<3, 2, 4, 5>(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - return vcombine_f32(vrev64_f32(vget_high_f32(V1)), vget_low_f32(V2)); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorPermute<2, 3, 5, 4>(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - return vcombine_f32(vget_high_f32(V1), vrev64_f32(vget_low_f32(V2))); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorPermute<3, 2, 5, 4>(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - return vcombine_f32(vrev64_f32(vget_high_f32(V1)), - vrev64_f32(vget_low_f32(V2))); -} - -template <> -inline XMVECTOR XM_CALLCONV XMVectorPermute<0, 4, 2, 6>(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - return vtrnq_f32(V1, V2).val[0]; -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorPermute<1, 5, 3, 7>(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - return vtrnq_f32(V1, V2).val[1]; -} - -template <> -inline XMVECTOR XM_CALLCONV XMVectorPermute<0, 4, 1, 5>(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - return vzipq_f32(V1, V2).val[0]; -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorPermute<2, 6, 3, 7>(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - return vzipq_f32(V1, V2).val[1]; -} - -template <> -inline XMVECTOR XM_CALLCONV XMVectorPermute<0, 2, 4, 6>(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - return vuzpq_f32(V1, V2).val[0]; -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorPermute<1, 3, 5, 7>(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - return vuzpq_f32(V1, V2).val[1]; -} - -template <> -inline XMVECTOR XM_CALLCONV XMVectorPermute<1, 2, 3, 4>(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - return vextq_f32(V1, V2, 1); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorPermute<2, 3, 4, 5>(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - return vextq_f32(V1, V2, 2); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorPermute<3, 4, 5, 6>(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - return vextq_f32(V1, V2, 3); -} - -#endif // _XM_ARM_NEON_INTRINSICS_ && !_XM_NO_INTRINSICS_ - -//------------------------------------------------------------------------------ - -// General swizzle template -template -inline XMVECTOR XM_CALLCONV XMVectorSwizzle(FXMVECTOR V) noexcept { - static_assert(SwizzleX <= 3, "SwizzleX template parameter out of range"); - static_assert(SwizzleY <= 3, "SwizzleY template parameter out of range"); - static_assert(SwizzleZ <= 3, "SwizzleZ template parameter out of range"); - static_assert(SwizzleW <= 3, "SwizzleW template parameter out of range"); - -#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) - return XM_PERMUTE_PS(V, - _MM_SHUFFLE(SwizzleW, SwizzleZ, SwizzleY, SwizzleX)); -#else - - return XMVectorSwizzle(V, SwizzleX, SwizzleY, SwizzleZ, SwizzleW); - -#endif -} - -// Specialized swizzles -template <> -constexpr XMVECTOR XM_CALLCONV -XMVectorSwizzle<0, 1, 2, 3>(FXMVECTOR V) noexcept { - return V; -} - -#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) -template <> -inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0, 1, 0, 1>(FXMVECTOR V) noexcept { - return _mm_movelh_ps(V, V); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorSwizzle<2, 3, 2, 3>(FXMVECTOR V) noexcept { - return _mm_movehl_ps(V, V); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0, 0, 1, 1>(FXMVECTOR V) noexcept { - return _mm_unpacklo_ps(V, V); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorSwizzle<2, 2, 3, 3>(FXMVECTOR V) noexcept { - return _mm_unpackhi_ps(V, V); -} -#endif - -#if defined(_XM_SSE3_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) -template <> -inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0, 0, 2, 2>(FXMVECTOR V) noexcept { - return _mm_moveldup_ps(V); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorSwizzle<1, 1, 3, 3>(FXMVECTOR V) noexcept { - return _mm_movehdup_ps(V); -} -#endif - -#if defined(_XM_AVX2_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) && \ - defined(_XM_FAVOR_INTEL_) -template <> -inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0, 0, 0, 0>(FXMVECTOR V) noexcept { - return _mm_broadcastss_ps(V); -} -#endif - -#if defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) - -template <> -inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0, 0, 0, 0>(FXMVECTOR V) noexcept { - return vdupq_lane_f32(vget_low_f32(V), 0); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorSwizzle<1, 1, 1, 1>(FXMVECTOR V) noexcept { - return vdupq_lane_f32(vget_low_f32(V), 1); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorSwizzle<2, 2, 2, 2>(FXMVECTOR V) noexcept { - return vdupq_lane_f32(vget_high_f32(V), 0); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorSwizzle<3, 3, 3, 3>(FXMVECTOR V) noexcept { - return vdupq_lane_f32(vget_high_f32(V), 1); -} - -template <> -inline XMVECTOR XM_CALLCONV XMVectorSwizzle<1, 0, 3, 2>(FXMVECTOR V) noexcept { - return vrev64q_f32(V); -} - -template <> -inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0, 1, 0, 1>(FXMVECTOR V) noexcept { - float32x2_t vt = vget_low_f32(V); - return vcombine_f32(vt, vt); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorSwizzle<2, 3, 2, 3>(FXMVECTOR V) noexcept { - float32x2_t vt = vget_high_f32(V); - return vcombine_f32(vt, vt); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorSwizzle<1, 0, 1, 0>(FXMVECTOR V) noexcept { - float32x2_t vt = vrev64_f32(vget_low_f32(V)); - return vcombine_f32(vt, vt); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorSwizzle<3, 2, 3, 2>(FXMVECTOR V) noexcept { - float32x2_t vt = vrev64_f32(vget_high_f32(V)); - return vcombine_f32(vt, vt); -} - -template <> -inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0, 1, 3, 2>(FXMVECTOR V) noexcept { - return vcombine_f32(vget_low_f32(V), vrev64_f32(vget_high_f32(V))); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorSwizzle<1, 0, 2, 3>(FXMVECTOR V) noexcept { - return vcombine_f32(vrev64_f32(vget_low_f32(V)), vget_high_f32(V)); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorSwizzle<2, 3, 1, 0>(FXMVECTOR V) noexcept { - return vcombine_f32(vget_high_f32(V), vrev64_f32(vget_low_f32(V))); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorSwizzle<3, 2, 0, 1>(FXMVECTOR V) noexcept { - return vcombine_f32(vrev64_f32(vget_high_f32(V)), vget_low_f32(V)); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorSwizzle<3, 2, 1, 0>(FXMVECTOR V) noexcept { - return vcombine_f32(vrev64_f32(vget_high_f32(V)), - vrev64_f32(vget_low_f32(V))); -} - -template <> -inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0, 0, 2, 2>(FXMVECTOR V) noexcept { - return vtrnq_f32(V, V).val[0]; -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorSwizzle<1, 1, 3, 3>(FXMVECTOR V) noexcept { - return vtrnq_f32(V, V).val[1]; -} - -template <> -inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0, 0, 1, 1>(FXMVECTOR V) noexcept { - return vzipq_f32(V, V).val[0]; -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorSwizzle<2, 2, 3, 3>(FXMVECTOR V) noexcept { - return vzipq_f32(V, V).val[1]; -} - -template <> -inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0, 2, 0, 2>(FXMVECTOR V) noexcept { - return vuzpq_f32(V, V).val[0]; -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorSwizzle<1, 3, 1, 3>(FXMVECTOR V) noexcept { - return vuzpq_f32(V, V).val[1]; -} - -template <> -inline XMVECTOR XM_CALLCONV XMVectorSwizzle<1, 2, 3, 0>(FXMVECTOR V) noexcept { - return vextq_f32(V, V, 1); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorSwizzle<2, 3, 0, 1>(FXMVECTOR V) noexcept { - return vextq_f32(V, V, 2); -} -template <> -inline XMVECTOR XM_CALLCONV XMVectorSwizzle<3, 0, 1, 2>(FXMVECTOR V) noexcept { - return vextq_f32(V, V, 3); -} - -#endif // _XM_ARM_NEON_INTRINSICS_ && !_XM_NO_INTRINSICS_ - -//------------------------------------------------------------------------------ - -template -inline XMVECTOR XM_CALLCONV XMVectorShiftLeft(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - static_assert(Elements < 4, "Elements template parameter out of range"); - return XMVectorPermute(V1, V2); -} - -template -inline XMVECTOR XM_CALLCONV XMVectorRotateLeft(FXMVECTOR V) noexcept { - static_assert(Elements < 4, "Elements template parameter out of range"); - return XMVectorSwizzle(V); -} - -template -inline XMVECTOR XM_CALLCONV XMVectorRotateRight(FXMVECTOR V) noexcept { - static_assert(Elements < 4, "Elements template parameter out of range"); - return XMVectorSwizzle<(4 - Elements) & 3, (5 - Elements) & 3, - (6 - Elements) & 3, (7 - Elements) & 3>(V); -} - -template -inline XMVECTOR XM_CALLCONV XMVectorInsert(FXMVECTOR VD, - FXMVECTOR VS) noexcept { - XMVECTOR Control = XMVectorSelectControl(Select0 & 1, Select1 & 1, - Select2 & 1, Select3 & 1); - return XMVectorSelect(VD, XMVectorRotateLeft(VS), - Control); -} - -/**************************************************************************** - * - * Globals - * - ****************************************************************************/ - -// The purpose of the following global constants is to prevent redundant -// reloading of the constants when they are referenced by more than one -// separate inline math routine called within the same function. Declaring -// a constant locally within a routine is sufficient to prevent redundant -// reloads of that constant when that single routine is called multiple -// times in a function, but if the constant is used (and declared) in a -// separate math routine it would be reloaded. - -#ifndef XMGLOBALCONST -#if defined(__GNUC__) && !defined(__MINGW32__) -#define XMGLOBALCONST extern const __attribute__((weak)) -#else -#define XMGLOBALCONST extern const __declspec(selectany) -#endif -#endif - -XMGLOBALCONST XMVECTORF32 g_XMSinCoefficients0 = { - {{-0.16666667f, +0.0083333310f, -0.00019840874f, +2.7525562e-06f}}}; -XMGLOBALCONST XMVECTORF32 g_XMSinCoefficients1 = { - {{-2.3889859e-08f, -0.16665852f /*Est1*/, +0.0083139502f /*Est2*/, - -0.00018524670f /*Est3*/}}}; -XMGLOBALCONST XMVECTORF32 g_XMCosCoefficients0 = { - {{-0.5f, +0.041666638f, -0.0013888378f, +2.4760495e-05f}}}; -XMGLOBALCONST XMVECTORF32 g_XMCosCoefficients1 = { - {{-2.6051615e-07f, -0.49992746f /*Est1*/, +0.041493919f /*Est2*/, - -0.0012712436f /*Est3*/}}}; -XMGLOBALCONST XMVECTORF32 g_XMTanCoefficients0 = { - {{1.0f, 0.333333333f, 0.133333333f, 5.396825397e-2f}}}; -XMGLOBALCONST XMVECTORF32 g_XMTanCoefficients1 = { - {{2.186948854e-2f, 8.863235530e-3f, 3.592128167e-3f, 1.455834485e-3f}}}; -XMGLOBALCONST XMVECTORF32 g_XMTanCoefficients2 = { - {{5.900274264e-4f, 2.391290764e-4f, 9.691537707e-5f, 3.927832950e-5f}}}; -XMGLOBALCONST XMVECTORF32 g_XMArcCoefficients0 = { - {{+1.5707963050f, -0.2145988016f, +0.0889789874f, -0.0501743046f}}}; -XMGLOBALCONST XMVECTORF32 g_XMArcCoefficients1 = { - {{+0.0308918810f, -0.0170881256f, +0.0066700901f, -0.0012624911f}}}; -XMGLOBALCONST XMVECTORF32 g_XMATanCoefficients0 = { - {{-0.3333314528f, +0.1999355085f, -0.1420889944f, +0.1065626393f}}}; -XMGLOBALCONST XMVECTORF32 g_XMATanCoefficients1 = { - {{-0.0752896400f, +0.0429096138f, -0.0161657367f, +0.0028662257f}}}; -XMGLOBALCONST XMVECTORF32 g_XMATanEstCoefficients0 = { - {{+0.999866f, +0.999866f, +0.999866f, +0.999866f}}}; -XMGLOBALCONST XMVECTORF32 g_XMATanEstCoefficients1 = { - {{-0.3302995f, +0.180141f, -0.085133f, +0.0208351f}}}; -XMGLOBALCONST XMVECTORF32 g_XMTanEstCoefficients = { - {{2.484f, -1.954923183e-1f, 2.467401101f, XM_1DIVPI}}}; -XMGLOBALCONST XMVECTORF32 g_XMArcEstCoefficients = { - {{+1.5707288f, -0.2121144f, +0.0742610f, -0.0187293f}}}; -XMGLOBALCONST XMVECTORF32 g_XMPiConstants0 = { - {{XM_PI, XM_2PI, XM_1DIVPI, XM_1DIV2PI}}}; -XMGLOBALCONST XMVECTORF32 g_XMIdentityR0 = {{{1.0f, 0.0f, 0.0f, 0.0f}}}; -XMGLOBALCONST XMVECTORF32 g_XMIdentityR1 = {{{0.0f, 1.0f, 0.0f, 0.0f}}}; -XMGLOBALCONST XMVECTORF32 g_XMIdentityR2 = {{{0.0f, 0.0f, 1.0f, 0.0f}}}; -XMGLOBALCONST XMVECTORF32 g_XMIdentityR3 = {{{0.0f, 0.0f, 0.0f, 1.0f}}}; -XMGLOBALCONST XMVECTORF32 g_XMNegIdentityR0 = {{{-1.0f, 0.0f, 0.0f, 0.0f}}}; -XMGLOBALCONST XMVECTORF32 g_XMNegIdentityR1 = {{{0.0f, -1.0f, 0.0f, 0.0f}}}; -XMGLOBALCONST XMVECTORF32 g_XMNegIdentityR2 = {{{0.0f, 0.0f, -1.0f, 0.0f}}}; -XMGLOBALCONST XMVECTORF32 g_XMNegIdentityR3 = {{{0.0f, 0.0f, 0.0f, -1.0f}}}; -XMGLOBALCONST XMVECTORU32 g_XMNegativeZero = { - {{0x80000000, 0x80000000, 0x80000000, 0x80000000}}}; -XMGLOBALCONST XMVECTORU32 g_XMNegate3 = { - {{0x80000000, 0x80000000, 0x80000000, 0x00000000}}}; -XMGLOBALCONST XMVECTORU32 g_XMMaskXY = { - {{0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000}}}; -XMGLOBALCONST XMVECTORU32 g_XMMask3 = { - {{0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000}}}; -XMGLOBALCONST XMVECTORU32 g_XMMaskX = { - {{0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000}}}; -XMGLOBALCONST XMVECTORU32 g_XMMaskY = { - {{0x00000000, 0xFFFFFFFF, 0x00000000, 0x00000000}}}; -XMGLOBALCONST XMVECTORU32 g_XMMaskZ = { - {{0x00000000, 0x00000000, 0xFFFFFFFF, 0x00000000}}}; -XMGLOBALCONST XMVECTORU32 g_XMMaskW = { - {{0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF}}}; -XMGLOBALCONST XMVECTORF32 g_XMOne = {{{1.0f, 1.0f, 1.0f, 1.0f}}}; -XMGLOBALCONST XMVECTORF32 g_XMOne3 = {{{1.0f, 1.0f, 1.0f, 0.0f}}}; -XMGLOBALCONST XMVECTORF32 g_XMZero = {{{0.0f, 0.0f, 0.0f, 0.0f}}}; -XMGLOBALCONST XMVECTORF32 g_XMTwo = {{{2.f, 2.f, 2.f, 2.f}}}; -XMGLOBALCONST XMVECTORF32 g_XMFour = {{{4.f, 4.f, 4.f, 4.f}}}; -XMGLOBALCONST XMVECTORF32 g_XMSix = {{{6.f, 6.f, 6.f, 6.f}}}; -XMGLOBALCONST XMVECTORF32 g_XMNegativeOne = {{{-1.0f, -1.0f, -1.0f, -1.0f}}}; -XMGLOBALCONST XMVECTORF32 g_XMOneHalf = {{{0.5f, 0.5f, 0.5f, 0.5f}}}; -XMGLOBALCONST XMVECTORF32 g_XMNegativeOneHalf = { - {{-0.5f, -0.5f, -0.5f, -0.5f}}}; -XMGLOBALCONST XMVECTORF32 g_XMNegativeTwoPi = { - {{-XM_2PI, -XM_2PI, -XM_2PI, -XM_2PI}}}; -XMGLOBALCONST XMVECTORF32 g_XMNegativePi = {{{-XM_PI, -XM_PI, -XM_PI, -XM_PI}}}; -XMGLOBALCONST XMVECTORF32 g_XMHalfPi = { - {{XM_PIDIV2, XM_PIDIV2, XM_PIDIV2, XM_PIDIV2}}}; -XMGLOBALCONST XMVECTORF32 g_XMPi = {{{XM_PI, XM_PI, XM_PI, XM_PI}}}; -XMGLOBALCONST XMVECTORF32 g_XMReciprocalPi = { - {{XM_1DIVPI, XM_1DIVPI, XM_1DIVPI, XM_1DIVPI}}}; -XMGLOBALCONST XMVECTORF32 g_XMTwoPi = {{{XM_2PI, XM_2PI, XM_2PI, XM_2PI}}}; -XMGLOBALCONST XMVECTORF32 g_XMReciprocalTwoPi = { - {{XM_1DIV2PI, XM_1DIV2PI, XM_1DIV2PI, XM_1DIV2PI}}}; -XMGLOBALCONST XMVECTORF32 g_XMEpsilon = { - {{1.192092896e-7f, 1.192092896e-7f, 1.192092896e-7f, 1.192092896e-7f}}}; -XMGLOBALCONST XMVECTORI32 g_XMInfinity = { - {{0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000}}}; -XMGLOBALCONST XMVECTORI32 g_XMQNaN = { - {{0x7FC00000, 0x7FC00000, 0x7FC00000, 0x7FC00000}}}; -XMGLOBALCONST XMVECTORI32 g_XMQNaNTest = { - {{0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF}}}; -XMGLOBALCONST XMVECTORI32 g_XMAbsMask = { - {{0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF}}}; -XMGLOBALCONST XMVECTORI32 g_XMFltMin = { - {{0x00800000, 0x00800000, 0x00800000, 0x00800000}}}; -XMGLOBALCONST XMVECTORI32 g_XMFltMax = { - {{0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFF, 0x7F7FFFFF}}}; -XMGLOBALCONST XMVECTORU32 g_XMNegOneMask = { - {{0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}}}; -XMGLOBALCONST XMVECTORU32 g_XMMaskA8R8G8B8 = { - {{0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000}}}; -XMGLOBALCONST XMVECTORU32 g_XMFlipA8R8G8B8 = { - {{0x00000000, 0x00000000, 0x00000000, 0x80000000}}}; -XMGLOBALCONST XMVECTORF32 g_XMFixAA8R8G8B8 = { - {{0.0f, 0.0f, 0.0f, float(0x80000000U)}}}; -XMGLOBALCONST XMVECTORF32 g_XMNormalizeA8R8G8B8 = { - {{1.0f / (255.0f * float(0x10000)), 1.0f / (255.0f * float(0x100)), - 1.0f / 255.0f, 1.0f / (255.0f * float(0x1000000))}}}; -XMGLOBALCONST XMVECTORU32 g_XMMaskA2B10G10R10 = { - {{0x000003FF, 0x000FFC00, 0x3FF00000, 0xC0000000}}}; -XMGLOBALCONST XMVECTORU32 g_XMFlipA2B10G10R10 = { - {{0x00000200, 0x00080000, 0x20000000, 0x80000000}}}; -XMGLOBALCONST XMVECTORF32 g_XMFixAA2B10G10R10 = { - {{-512.0f, -512.0f * float(0x400), -512.0f * float(0x100000), - float(0x80000000U)}}}; -XMGLOBALCONST XMVECTORF32 g_XMNormalizeA2B10G10R10 = { - {{1.0f / 511.0f, 1.0f / (511.0f * float(0x400)), - 1.0f / (511.0f * float(0x100000)), 1.0f / (3.0f * float(0x40000000))}}}; -XMGLOBALCONST XMVECTORU32 g_XMMaskX16Y16 = { - {{0x0000FFFF, 0xFFFF0000, 0x00000000, 0x00000000}}}; -XMGLOBALCONST XMVECTORI32 g_XMFlipX16Y16 = { - {{0x00008000, 0x00000000, 0x00000000, 0x00000000}}}; -XMGLOBALCONST XMVECTORF32 g_XMFixX16Y16 = {{{-32768.0f, 0.0f, 0.0f, 0.0f}}}; -XMGLOBALCONST XMVECTORF32 g_XMNormalizeX16Y16 = { - {{1.0f / 32767.0f, 1.0f / (32767.0f * 65536.0f), 0.0f, 0.0f}}}; -XMGLOBALCONST XMVECTORU32 g_XMMaskX16Y16Z16W16 = { - {{0x0000FFFF, 0x0000FFFF, 0xFFFF0000, 0xFFFF0000}}}; -XMGLOBALCONST XMVECTORI32 g_XMFlipX16Y16Z16W16 = { - {{0x00008000, 0x00008000, 0x00000000, 0x00000000}}}; -XMGLOBALCONST XMVECTORF32 g_XMFixX16Y16Z16W16 = { - {{-32768.0f, -32768.0f, 0.0f, 0.0f}}}; -XMGLOBALCONST XMVECTORF32 g_XMNormalizeX16Y16Z16W16 = { - {{1.0f / 32767.0f, 1.0f / 32767.0f, 1.0f / (32767.0f * 65536.0f), - 1.0f / (32767.0f * 65536.0f)}}}; -XMGLOBALCONST XMVECTORF32 g_XMNoFraction = { - {{8388608.0f, 8388608.0f, 8388608.0f, 8388608.0f}}}; -XMGLOBALCONST XMVECTORI32 g_XMMaskByte = { - {{0x000000FF, 0x000000FF, 0x000000FF, 0x000000FF}}}; -XMGLOBALCONST XMVECTORF32 g_XMNegateX = {{{-1.0f, 1.0f, 1.0f, 1.0f}}}; -XMGLOBALCONST XMVECTORF32 g_XMNegateY = {{{1.0f, -1.0f, 1.0f, 1.0f}}}; -XMGLOBALCONST XMVECTORF32 g_XMNegateZ = {{{1.0f, 1.0f, -1.0f, 1.0f}}}; -XMGLOBALCONST XMVECTORF32 g_XMNegateW = {{{1.0f, 1.0f, 1.0f, -1.0f}}}; -XMGLOBALCONST XMVECTORU32 g_XMSelect0101 = { - {{XM_SELECT_0, XM_SELECT_1, XM_SELECT_0, XM_SELECT_1}}}; -XMGLOBALCONST XMVECTORU32 g_XMSelect1010 = { - {{XM_SELECT_1, XM_SELECT_0, XM_SELECT_1, XM_SELECT_0}}}; -XMGLOBALCONST XMVECTORI32 g_XMOneHalfMinusEpsilon = { - {{0x3EFFFFFD, 0x3EFFFFFD, 0x3EFFFFFD, 0x3EFFFFFD}}}; -XMGLOBALCONST XMVECTORU32 g_XMSelect1000 = { - {{XM_SELECT_1, XM_SELECT_0, XM_SELECT_0, XM_SELECT_0}}}; -XMGLOBALCONST XMVECTORU32 g_XMSelect1100 = { - {{XM_SELECT_1, XM_SELECT_1, XM_SELECT_0, XM_SELECT_0}}}; -XMGLOBALCONST XMVECTORU32 g_XMSelect1110 = { - {{XM_SELECT_1, XM_SELECT_1, XM_SELECT_1, XM_SELECT_0}}}; -XMGLOBALCONST XMVECTORU32 g_XMSelect1011 = { - {{XM_SELECT_1, XM_SELECT_0, XM_SELECT_1, XM_SELECT_1}}}; -XMGLOBALCONST XMVECTORF32 g_XMFixupY16 = { - {{1.0f, 1.0f / 65536.0f, 0.0f, 0.0f}}}; -XMGLOBALCONST XMVECTORF32 g_XMFixupY16W16 = { - {{1.0f, 1.0f, 1.0f / 65536.0f, 1.0f / 65536.0f}}}; -XMGLOBALCONST XMVECTORU32 g_XMFlipY = {{{0, 0x80000000, 0, 0}}}; -XMGLOBALCONST XMVECTORU32 g_XMFlipZ = {{{0, 0, 0x80000000, 0}}}; -XMGLOBALCONST XMVECTORU32 g_XMFlipW = {{{0, 0, 0, 0x80000000}}}; -XMGLOBALCONST XMVECTORU32 g_XMFlipYZ = {{{0, 0x80000000, 0x80000000, 0}}}; -XMGLOBALCONST XMVECTORU32 g_XMFlipZW = {{{0, 0, 0x80000000, 0x80000000}}}; -XMGLOBALCONST XMVECTORU32 g_XMFlipYW = {{{0, 0x80000000, 0, 0x80000000}}}; -XMGLOBALCONST XMVECTORI32 g_XMMaskDec4 = { - {{0x3FF, 0x3FF << 10, 0x3FF << 20, static_cast(0xC0000000)}}}; -XMGLOBALCONST XMVECTORI32 g_XMXorDec4 = { - {{0x200, 0x200 << 10, 0x200 << 20, 0}}}; -XMGLOBALCONST XMVECTORF32 g_XMAddUDec4 = {{{0, 0, 0, 32768.0f * 65536.0f}}}; -XMGLOBALCONST XMVECTORF32 g_XMAddDec4 = { - {{-512.0f, -512.0f * 1024.0f, -512.0f * 1024.0f * 1024.0f, 0}}}; -XMGLOBALCONST XMVECTORF32 g_XMMulDec4 = { - {{1.0f, 1.0f / 1024.0f, 1.0f / (1024.0f * 1024.0f), - 1.0f / (1024.0f * 1024.0f * 1024.0f)}}}; -XMGLOBALCONST XMVECTORU32 g_XMMaskByte4 = { - {{0xFF, 0xFF00, 0xFF0000, 0xFF000000}}}; -XMGLOBALCONST XMVECTORI32 g_XMXorByte4 = { - {{0x80, 0x8000, 0x800000, 0x00000000}}}; -XMGLOBALCONST XMVECTORF32 g_XMAddByte4 = { - {{-128.0f, -128.0f * 256.0f, -128.0f * 65536.0f, 0}}}; -XMGLOBALCONST XMVECTORF32 g_XMFixUnsigned = { - {{32768.0f * 65536.0f, 32768.0f * 65536.0f, 32768.0f * 65536.0f, - 32768.0f * 65536.0f}}}; -XMGLOBALCONST XMVECTORF32 g_XMMaxInt = { - {{65536.0f * 32768.0f - 128.0f, 65536.0f * 32768.0f - 128.0f, - 65536.0f * 32768.0f - 128.0f, 65536.0f * 32768.0f - 128.0f}}}; -XMGLOBALCONST XMVECTORF32 g_XMMaxUInt = { - {{65536.0f * 65536.0f - 256.0f, 65536.0f * 65536.0f - 256.0f, - 65536.0f * 65536.0f - 256.0f, 65536.0f * 65536.0f - 256.0f}}}; -XMGLOBALCONST XMVECTORF32 g_XMUnsignedFix = { - {{32768.0f * 65536.0f, 32768.0f * 65536.0f, 32768.0f * 65536.0f, - 32768.0f * 65536.0f}}}; -XMGLOBALCONST XMVECTORF32 g_XMsrgbScale = {{{12.92f, 12.92f, 12.92f, 1.0f}}}; -XMGLOBALCONST XMVECTORF32 g_XMsrgbA = {{{0.055f, 0.055f, 0.055f, 0.0f}}}; -XMGLOBALCONST XMVECTORF32 g_XMsrgbA1 = {{{1.055f, 1.055f, 1.055f, 1.0f}}}; -XMGLOBALCONST XMVECTORI32 g_XMExponentBias = {{{127, 127, 127, 127}}}; -XMGLOBALCONST XMVECTORI32 g_XMSubnormalExponent = {{{-126, -126, -126, -126}}}; -XMGLOBALCONST XMVECTORI32 g_XMNumTrailing = {{{23, 23, 23, 23}}}; -XMGLOBALCONST XMVECTORI32 g_XMMinNormal = { - {{0x00800000, 0x00800000, 0x00800000, 0x00800000}}}; -XMGLOBALCONST XMVECTORU32 g_XMNegInfinity = { - {{0xFF800000, 0xFF800000, 0xFF800000, 0xFF800000}}}; -XMGLOBALCONST XMVECTORU32 g_XMNegQNaN = { - {{0xFFC00000, 0xFFC00000, 0xFFC00000, 0xFFC00000}}}; -XMGLOBALCONST XMVECTORI32 g_XMBin128 = { - {{0x43000000, 0x43000000, 0x43000000, 0x43000000}}}; -XMGLOBALCONST XMVECTORU32 g_XMBinNeg150 = { - {{0xC3160000, 0xC3160000, 0xC3160000, 0xC3160000}}}; -XMGLOBALCONST XMVECTORI32 g_XM253 = {{{253, 253, 253, 253}}}; -XMGLOBALCONST XMVECTORF32 g_XMExpEst1 = { - {{-6.93147182e-1f, -6.93147182e-1f, -6.93147182e-1f, -6.93147182e-1f}}}; -XMGLOBALCONST XMVECTORF32 g_XMExpEst2 = { - {{+2.40226462e-1f, +2.40226462e-1f, +2.40226462e-1f, +2.40226462e-1f}}}; -XMGLOBALCONST XMVECTORF32 g_XMExpEst3 = { - {{-5.55036440e-2f, -5.55036440e-2f, -5.55036440e-2f, -5.55036440e-2f}}}; -XMGLOBALCONST XMVECTORF32 g_XMExpEst4 = { - {{+9.61597636e-3f, +9.61597636e-3f, +9.61597636e-3f, +9.61597636e-3f}}}; -XMGLOBALCONST XMVECTORF32 g_XMExpEst5 = { - {{-1.32823968e-3f, -1.32823968e-3f, -1.32823968e-3f, -1.32823968e-3f}}}; -XMGLOBALCONST XMVECTORF32 g_XMExpEst6 = { - {{+1.47491097e-4f, +1.47491097e-4f, +1.47491097e-4f, +1.47491097e-4f}}}; -XMGLOBALCONST XMVECTORF32 g_XMExpEst7 = { - {{-1.08635004e-5f, -1.08635004e-5f, -1.08635004e-5f, -1.08635004e-5f}}}; -XMGLOBALCONST XMVECTORF32 g_XMLogEst0 = { - {{+1.442693f, +1.442693f, +1.442693f, +1.442693f}}}; -XMGLOBALCONST XMVECTORF32 g_XMLogEst1 = { - {{-0.721242f, -0.721242f, -0.721242f, -0.721242f}}}; -XMGLOBALCONST XMVECTORF32 g_XMLogEst2 = { - {{+0.479384f, +0.479384f, +0.479384f, +0.479384f}}}; -XMGLOBALCONST XMVECTORF32 g_XMLogEst3 = { - {{-0.350295f, -0.350295f, -0.350295f, -0.350295f}}}; -XMGLOBALCONST XMVECTORF32 g_XMLogEst4 = { - {{+0.248590f, +0.248590f, +0.248590f, +0.248590f}}}; -XMGLOBALCONST XMVECTORF32 g_XMLogEst5 = { - {{-0.145700f, -0.145700f, -0.145700f, -0.145700f}}}; -XMGLOBALCONST XMVECTORF32 g_XMLogEst6 = { - {{+0.057148f, +0.057148f, +0.057148f, +0.057148f}}}; -XMGLOBALCONST XMVECTORF32 g_XMLogEst7 = { - {{-0.010578f, -0.010578f, -0.010578f, -0.010578f}}}; -XMGLOBALCONST XMVECTORF32 g_XMLgE = { - {{+1.442695f, +1.442695f, +1.442695f, +1.442695f}}}; -XMGLOBALCONST XMVECTORF32 g_XMInvLgE = { - {{+6.93147182e-1f, +6.93147182e-1f, +6.93147182e-1f, +6.93147182e-1f}}}; -XMGLOBALCONST XMVECTORF32 g_XMLg10 = { - {{+3.321928f, +3.321928f, +3.321928f, +3.321928f}}}; -XMGLOBALCONST XMVECTORF32 g_XMInvLg10 = { - {{+3.010299956e-1f, +3.010299956e-1f, +3.010299956e-1f, +3.010299956e-1f}}}; -XMGLOBALCONST XMVECTORF32 g_UByteMax = {{{255.0f, 255.0f, 255.0f, 255.0f}}}; -XMGLOBALCONST XMVECTORF32 g_ByteMin = {{{-127.0f, -127.0f, -127.0f, -127.0f}}}; -XMGLOBALCONST XMVECTORF32 g_ByteMax = {{{127.0f, 127.0f, 127.0f, 127.0f}}}; -XMGLOBALCONST XMVECTORF32 g_ShortMin = { - {{-32767.0f, -32767.0f, -32767.0f, -32767.0f}}}; -XMGLOBALCONST XMVECTORF32 g_ShortMax = { - {{32767.0f, 32767.0f, 32767.0f, 32767.0f}}}; -XMGLOBALCONST XMVECTORF32 g_UShortMax = { - {{65535.0f, 65535.0f, 65535.0f, 65535.0f}}}; - -/**************************************************************************** - * - * Implementation - * - ****************************************************************************/ - -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4068 4214 4204 4365 4616 4640 6001 6101) -// C4068/4616: ignore unknown pragmas -// C4214/4204: nonstandard extension used -// C4365/4640: Off by default noise -// C6001/6101: False positives -#endif - -#ifdef _PREFAST_ -#pragma prefast(push) -#pragma prefast(disable : 25000, "FXMVECTOR is 16 bytes") -#pragma prefast(disable : 26495, "Union initialization confuses /analyze") -#endif - -#ifdef __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wfloat-equal" -#pragma clang diagnostic ignored "-Wundefined-reinterpret-cast" -#pragma clang diagnostic ignored "-Wunknown-warning-option" -#pragma clang diagnostic ignored "-Wunsafe-buffer-usage" -#endif - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorSetBinaryConstant(uint32_t C0, uint32_t C1, - uint32_t C2, - uint32_t C3) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORU32 vResult; - vResult.u[0] = (0 - (C0 & 1)) & 0x3F800000; - vResult.u[1] = (0 - (C1 & 1)) & 0x3F800000; - vResult.u[2] = (0 - (C2 & 1)) & 0x3F800000; - vResult.u[3] = (0 - (C3 & 1)) & 0x3F800000; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - XMVECTORU32 vResult; - vResult.u[0] = (0 - (C0 & 1)) & 0x3F800000; - vResult.u[1] = (0 - (C1 & 1)) & 0x3F800000; - vResult.u[2] = (0 - (C2 & 1)) & 0x3F800000; - vResult.u[3] = (0 - (C3 & 1)) & 0x3F800000; - return vResult.v; -#else // XM_SSE_INTRINSICS_ - static const XMVECTORU32 g_vMask1 = {{{1, 1, 1, 1}}}; - // Move the parms to a vector - __m128i vTemp = _mm_set_epi32(static_cast(C3), static_cast(C2), - static_cast(C1), static_cast(C0)); - // Mask off the low bits - vTemp = _mm_and_si128(vTemp, g_vMask1); - // 0xFFFFFFFF on true bits - vTemp = _mm_cmpeq_epi32(vTemp, g_vMask1); - // 0xFFFFFFFF -> 1.0f, 0x00000000 -> 0.0f - vTemp = _mm_and_si128(vTemp, g_XMOne); - return _mm_castsi128_ps(vTemp); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV -XMVectorSplatConstant(int32_t IntConstant, uint32_t DivExponent) noexcept { - assert(IntConstant >= -16 && IntConstant <= 15); - assert(DivExponent < 32); -#if defined(_XM_NO_INTRINSICS_) - - using DirectX::XMConvertVectorIntToFloat; - - XMVECTORI32 V = {{{IntConstant, IntConstant, IntConstant, IntConstant}}}; - return XMConvertVectorIntToFloat(V.v, DivExponent); - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - // Splat the int - int32x4_t vScale = vdupq_n_s32(IntConstant); - // Convert to a float - XMVECTOR vResult = vcvtq_f32_s32(vScale); - // Convert DivExponent into 1.0f/(1<(&vScale)[0]); - return vResult; -#else // XM_SSE_INTRINSICS_ - // Splat the int - __m128i vScale = _mm_set1_epi32(IntConstant); - // Convert to a float - XMVECTOR vResult = _mm_cvtepi32_ps(vScale); - // Convert DivExponent into 1.0f/(1<(uScale)); - // Multiply by the reciprocal (Perform a right shift by DivExponent) - vResult = _mm_mul_ps(vResult, _mm_castsi128_ps(vScale)); - return vResult; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV -XMVectorSplatConstantInt(int32_t IntConstant) noexcept { - assert(IntConstant >= -16 && IntConstant <= 15); -#if defined(_XM_NO_INTRINSICS_) - - XMVECTORI32 V = {{{IntConstant, IntConstant, IntConstant, IntConstant}}}; - return V.v; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - int32x4_t V = vdupq_n_s32(IntConstant); - return reinterpret_cast(&V)[0]; -#else // XM_SSE_INTRINSICS_ - __m128i V = _mm_set1_epi32(IntConstant); - return _mm_castsi128_ps(V); -#endif -} - -#include "DirectXMathConvert.inl" -#include "DirectXMathMatrix.inl" -#include "DirectXMathMisc.inl" -#include "DirectXMathVector.inl" - -#ifdef __clang__ -#pragma clang diagnostic pop -#endif -#ifdef _PREFAST_ -#pragma prefast(pop) -#endif -#ifdef _MSC_VER -#pragma warning(pop) -#endif - -} // namespace DirectX diff --git a/targets/app/linux/Stubs/DirectXMath/DirectXMathConvert.inl b/targets/app/linux/Stubs/DirectXMath/DirectXMathConvert.inl deleted file mode 100644 index b68857896..000000000 --- a/targets/app/linux/Stubs/DirectXMath/DirectXMathConvert.inl +++ /dev/null @@ -1,2057 +0,0 @@ -//------------------------------------------------------------------------------------- -// DirectXMathConvert.inl -- SIMD C++ Math library -// -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. -// -// http://go.microsoft.com/fwlink/?LinkID=615560 -//------------------------------------------------------------------------------------- - -#pragma once - -/**************************************************************************** - * - * Data conversion - * - ****************************************************************************/ - -//------------------------------------------------------------------------------ - -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4701) -// C4701: false positives -#endif - -inline XMVECTOR XM_CALLCONV -XMConvertVectorIntToFloat(FXMVECTOR VInt, uint32_t DivExponent) noexcept { - assert(DivExponent < 32); -#if defined(_XM_NO_INTRINSICS_) - float fScale = 1.0f / static_cast(1U << DivExponent); - uint32_t ElementIndex = 0; - XMVECTOR Result; - do { - auto iTemp = static_cast(VInt.vector4_u32[ElementIndex]); - Result.vector4_f32[ElementIndex] = static_cast(iTemp) * fScale; - } while (++ElementIndex < 4); - return Result; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float fScale = 1.0f / static_cast(1U << DivExponent); - float32x4_t vResult = vcvtq_f32_s32(vreinterpretq_s32_f32(VInt)); - return vmulq_n_f32(vResult, fScale); -#else // _XM_SSE_INTRINSICS_ - // Convert to floats - XMVECTOR vResult = _mm_cvtepi32_ps(_mm_castps_si128(VInt)); - // Convert DivExponent into 1.0f/(1<(uScale)); - vResult = _mm_mul_ps(vResult, _mm_castsi128_ps(vScale)); - return vResult; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV -XMConvertVectorFloatToInt(FXMVECTOR VFloat, uint32_t MulExponent) noexcept { - assert(MulExponent < 32); -#if defined(_XM_NO_INTRINSICS_) - // Get the scalar factor. - auto fScale = static_cast(1U << MulExponent); - uint32_t ElementIndex = 0; - XMVECTOR Result; - do { - int32_t iResult; - float fTemp = VFloat.vector4_f32[ElementIndex] * fScale; - if (fTemp <= -(65536.0f * 32768.0f)) { - iResult = (-0x7FFFFFFF) - 1; - } else if (fTemp > (65536.0f * 32768.0f) - 128.0f) { - iResult = 0x7FFFFFFF; - } else { - iResult = static_cast(fTemp); - } - Result.vector4_u32[ElementIndex] = static_cast(iResult); - } while (++ElementIndex < 4); - return Result; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x4_t vResult = - vmulq_n_f32(VFloat, static_cast(1U << MulExponent)); - // In case of positive overflow, detect it - uint32x4_t vOverflow = vcgtq_f32(vResult, g_XMMaxInt); - // Float to int conversion - int32x4_t vResulti = vcvtq_s32_f32(vResult); - // If there was positive overflow, set to 0x7FFFFFFF - vResult = vreinterpretq_f32_u32(vandq_u32(vOverflow, g_XMAbsMask)); - vOverflow = vbicq_u32(vreinterpretq_u32_s32(vResulti), vOverflow); - vOverflow = vorrq_u32(vOverflow, vreinterpretq_u32_f32(vResult)); - return vreinterpretq_f32_u32(vOverflow); -#else // _XM_SSE_INTRINSICS_ - XMVECTOR vResult = _mm_set_ps1(static_cast(1U << MulExponent)); - vResult = _mm_mul_ps(vResult, VFloat); - // In case of positive overflow, detect it - XMVECTOR vOverflow = _mm_cmpgt_ps(vResult, g_XMMaxInt); - // Float to int conversion - __m128i vResulti = _mm_cvttps_epi32(vResult); - // If there was positive overflow, set to 0x7FFFFFFF - vResult = _mm_and_ps(vOverflow, g_XMAbsMask); - vOverflow = _mm_andnot_ps(vOverflow, _mm_castsi128_ps(vResulti)); - vOverflow = _mm_or_ps(vOverflow, vResult); - return vOverflow; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV -XMConvertVectorUIntToFloat(FXMVECTOR VUInt, uint32_t DivExponent) noexcept { - assert(DivExponent < 32); -#if defined(_XM_NO_INTRINSICS_) - float fScale = 1.0f / static_cast(1U << DivExponent); - uint32_t ElementIndex = 0; - XMVECTOR Result; - do { - Result.vector4_f32[ElementIndex] = - static_cast(VUInt.vector4_u32[ElementIndex]) * fScale; - } while (++ElementIndex < 4); - return Result; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float fScale = 1.0f / static_cast(1U << DivExponent); - float32x4_t vResult = vcvtq_f32_u32(vreinterpretq_u32_f32(VUInt)); - return vmulq_n_f32(vResult, fScale); -#else // _XM_SSE_INTRINSICS_ - // For the values that are higher than 0x7FFFFFFF, a fixup is needed - // Determine which ones need the fix. - XMVECTOR vMask = _mm_and_ps(VUInt, g_XMNegativeZero); - // Force all values positive - XMVECTOR vResult = _mm_xor_ps(VUInt, vMask); - // Convert to floats - vResult = _mm_cvtepi32_ps(_mm_castps_si128(vResult)); - // Convert 0x80000000 -> 0xFFFFFFFF - __m128i iMask = _mm_srai_epi32(_mm_castps_si128(vMask), 31); - // For only the ones that are too big, add the fixup - vMask = _mm_and_ps(_mm_castsi128_ps(iMask), g_XMFixUnsigned); - vResult = _mm_add_ps(vResult, vMask); - // Convert DivExponent into 1.0f/(1<(uScale)); - vResult = _mm_mul_ps(vResult, _mm_castsi128_ps(iMask)); - return vResult; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV -XMConvertVectorFloatToUInt(FXMVECTOR VFloat, uint32_t MulExponent) noexcept { - assert(MulExponent < 32); -#if defined(_XM_NO_INTRINSICS_) - // Get the scalar factor. - auto fScale = static_cast(1U << MulExponent); - uint32_t ElementIndex = 0; - XMVECTOR Result; - do { - uint32_t uResult; - float fTemp = VFloat.vector4_f32[ElementIndex] * fScale; - if (fTemp <= 0.0f) { - uResult = 0; - } else if (fTemp >= (65536.0f * 65536.0f)) { - uResult = 0xFFFFFFFFU; - } else { - uResult = static_cast(fTemp); - } - Result.vector4_u32[ElementIndex] = uResult; - } while (++ElementIndex < 4); - return Result; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x4_t vResult = - vmulq_n_f32(VFloat, static_cast(1U << MulExponent)); - // In case of overflow, detect it - uint32x4_t vOverflow = vcgtq_f32(vResult, g_XMMaxUInt); - // Float to int conversion - uint32x4_t vResulti = vcvtq_u32_f32(vResult); - // If there was overflow, set to 0xFFFFFFFFU - vResult = vreinterpretq_f32_u32(vbicq_u32(vResulti, vOverflow)); - vOverflow = vorrq_u32(vOverflow, vreinterpretq_u32_f32(vResult)); - return vreinterpretq_f32_u32(vOverflow); -#else // _XM_SSE_INTRINSICS_ - XMVECTOR vResult = _mm_set_ps1(static_cast(1U << MulExponent)); - vResult = _mm_mul_ps(vResult, VFloat); - // Clamp to >=0 - vResult = _mm_max_ps(vResult, g_XMZero); - // Any numbers that are too big, set to 0xFFFFFFFFU - XMVECTOR vOverflow = _mm_cmpgt_ps(vResult, g_XMMaxUInt); - XMVECTOR vValue = g_XMUnsignedFix; - // Too large for a signed integer? - XMVECTOR vMask = _mm_cmpge_ps(vResult, vValue); - // Zero for number's lower than 0x80000000, 32768.0f*65536.0f otherwise - vValue = _mm_and_ps(vValue, vMask); - // Perform fixup only on numbers too large (Keeps low bit precision) - vResult = _mm_sub_ps(vResult, vValue); - __m128i vResulti = _mm_cvttps_epi32(vResult); - // Convert from signed to unsigned pnly if greater than 0x80000000 - vMask = _mm_and_ps(vMask, g_XMNegativeZero); - vResult = _mm_xor_ps(_mm_castsi128_ps(vResulti), vMask); - // On those that are too large, set to 0xFFFFFFFF - vResult = _mm_or_ps(vResult, vOverflow); - return vResult; -#endif -} - -#ifdef _MSC_VER -#pragma warning(pop) -#endif - -/**************************************************************************** - * - * Vector and matrix load operations - * - ****************************************************************************/ - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadInt(const uint32_t* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - XMVECTOR V; - V.vector4_u32[0] = *pSource; - V.vector4_u32[1] = 0; - V.vector4_u32[2] = 0; - V.vector4_u32[3] = 0; - return V; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x4_t zero = vdupq_n_u32(0); - return vreinterpretq_f32_u32(vld1q_lane_u32(pSource, zero, 0)); -#elif defined(_XM_SSE_INTRINSICS_) - return _mm_load_ss(reinterpret_cast(pSource)); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadFloat(const float* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - XMVECTOR V; - V.vector4_f32[0] = *pSource; - V.vector4_f32[1] = 0.f; - V.vector4_f32[2] = 0.f; - V.vector4_f32[3] = 0.f; - return V; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x4_t zero = vdupq_n_f32(0); - return vld1q_lane_f32(pSource, zero, 0); -#elif defined(_XM_SSE_INTRINSICS_) - return _mm_load_ss(pSource); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadInt2(const uint32_t* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - XMVECTOR V; - V.vector4_u32[0] = pSource[0]; - V.vector4_u32[1] = pSource[1]; - V.vector4_u32[2] = 0; - V.vector4_u32[3] = 0; - return V; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x2_t x = vld1_u32(pSource); - uint32x2_t zero = vdup_n_u32(0); - return vreinterpretq_f32_u32(vcombine_u32(x, zero)); -#elif defined(_XM_SSE_INTRINSICS_) - return _mm_castpd_ps(_mm_load_sd(reinterpret_cast(pSource))); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadInt2A(const uint32_t* pSource) noexcept { - assert(pSource); - assert((reinterpret_cast(pSource) & 0xF) == 0); -#if defined(_XM_NO_INTRINSICS_) - XMVECTOR V; - V.vector4_u32[0] = pSource[0]; - V.vector4_u32[1] = pSource[1]; - V.vector4_u32[2] = 0; - V.vector4_u32[3] = 0; - return V; -#elif defined(_XM_ARM_NEON_INTRINSICS_) -#if defined(_MSC_VER) && !defined(__clang__) && \ - !defined(_ARM64_DISTINCT_NEON_TYPES) - uint32x2_t x = vld1_u32_ex(pSource, 64); -#else - uint32x2_t x = vld1_u32(pSource); -#endif - uint32x2_t zero = vdup_n_u32(0); - return vreinterpretq_f32_u32(vcombine_u32(x, zero)); -#elif defined(_XM_SSE_INTRINSICS_) - return _mm_castpd_ps(_mm_load_sd(reinterpret_cast(pSource))); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadFloat2(const XMFLOAT2* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - XMVECTOR V; - V.vector4_f32[0] = pSource->x; - V.vector4_f32[1] = pSource->y; - V.vector4_f32[2] = 0.f; - V.vector4_f32[3] = 0.f; - return V; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x2_t x = vld1_f32(reinterpret_cast(pSource)); - float32x2_t zero = vdup_n_f32(0); - return vcombine_f32(x, zero); -#elif defined(_XM_SSE_INTRINSICS_) - return _mm_castpd_ps(_mm_load_sd(reinterpret_cast(pSource))); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadFloat2A(const XMFLOAT2A* pSource) noexcept { - assert(pSource); - assert((reinterpret_cast(pSource) & 0xF) == 0); -#if defined(_XM_NO_INTRINSICS_) - XMVECTOR V; - V.vector4_f32[0] = pSource->x; - V.vector4_f32[1] = pSource->y; - V.vector4_f32[2] = 0.f; - V.vector4_f32[3] = 0.f; - return V; -#elif defined(_XM_ARM_NEON_INTRINSICS_) -#if defined(_MSC_VER) && !defined(__clang__) && \ - !defined(_ARM64_DISTINCT_NEON_TYPES) - float32x2_t x = vld1_f32_ex(reinterpret_cast(pSource), 64); -#else - float32x2_t x = vld1_f32(reinterpret_cast(pSource)); -#endif - float32x2_t zero = vdup_n_f32(0); - return vcombine_f32(x, zero); -#elif defined(_XM_SSE_INTRINSICS_) - return _mm_castpd_ps(_mm_load_sd(reinterpret_cast(pSource))); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadSInt2(const XMINT2* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - XMVECTOR V; - V.vector4_f32[0] = static_cast(pSource->x); - V.vector4_f32[1] = static_cast(pSource->y); - V.vector4_f32[2] = 0.f; - V.vector4_f32[3] = 0.f; - return V; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - int32x2_t x = vld1_s32(reinterpret_cast(pSource)); - float32x2_t v = vcvt_f32_s32(x); - float32x2_t zero = vdup_n_f32(0); - return vcombine_f32(v, zero); -#elif defined(_XM_SSE_INTRINSICS_) - __m128 V = - _mm_castpd_ps(_mm_load_sd(reinterpret_cast(pSource))); - return _mm_cvtepi32_ps(_mm_castps_si128(V)); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadUInt2(const XMUINT2* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - XMVECTOR V; - V.vector4_f32[0] = static_cast(pSource->x); - V.vector4_f32[1] = static_cast(pSource->y); - V.vector4_f32[2] = 0.f; - V.vector4_f32[3] = 0.f; - return V; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x2_t x = vld1_u32(reinterpret_cast(pSource)); - float32x2_t v = vcvt_f32_u32(x); - float32x2_t zero = vdup_n_f32(0); - return vcombine_f32(v, zero); -#elif defined(_XM_SSE_INTRINSICS_) - __m128 V = - _mm_castpd_ps(_mm_load_sd(reinterpret_cast(pSource))); - // For the values that are higher than 0x7FFFFFFF, a fixup is needed - // Determine which ones need the fix. - XMVECTOR vMask = _mm_and_ps(V, g_XMNegativeZero); - // Force all values positive - XMVECTOR vResult = _mm_xor_ps(V, vMask); - // Convert to floats - vResult = _mm_cvtepi32_ps(_mm_castps_si128(vResult)); - // Convert 0x80000000 -> 0xFFFFFFFF - __m128i iMask = _mm_srai_epi32(_mm_castps_si128(vMask), 31); - // For only the ones that are too big, add the fixup - vMask = _mm_and_ps(_mm_castsi128_ps(iMask), g_XMFixUnsigned); - vResult = _mm_add_ps(vResult, vMask); - return vResult; -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadInt3(const uint32_t* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - XMVECTOR V; - V.vector4_u32[0] = pSource[0]; - V.vector4_u32[1] = pSource[1]; - V.vector4_u32[2] = pSource[2]; - V.vector4_u32[3] = 0; - return V; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x2_t x = vld1_u32(pSource); - uint32x2_t zero = vdup_n_u32(0); - uint32x2_t y = vld1_lane_u32(pSource + 2, zero, 0); - return vreinterpretq_f32_u32(vcombine_u32(x, y)); -#elif defined(_XM_SSE4_INTRINSICS_) - __m128 xy = - _mm_castpd_ps(_mm_load_sd(reinterpret_cast(pSource))); - __m128 z = _mm_load_ss(reinterpret_cast(pSource + 2)); - return _mm_insert_ps(xy, z, 0x20); -#elif defined(_XM_SSE_INTRINSICS_) - __m128 xy = - _mm_castpd_ps(_mm_load_sd(reinterpret_cast(pSource))); - __m128 z = _mm_load_ss(reinterpret_cast(pSource + 2)); - return _mm_movelh_ps(xy, z); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadInt3A(const uint32_t* pSource) noexcept { - assert(pSource); - assert((reinterpret_cast(pSource) & 0xF) == 0); -#if defined(_XM_NO_INTRINSICS_) - XMVECTOR V; - V.vector4_u32[0] = pSource[0]; - V.vector4_u32[1] = pSource[1]; - V.vector4_u32[2] = pSource[2]; - V.vector4_u32[3] = 0; - return V; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - // Reads an extra integer which is zero'd -#if defined(_MSC_VER) && !defined(__clang__) && \ - !defined(_ARM64_DISTINCT_NEON_TYPES) - uint32x4_t V = vld1q_u32_ex(pSource, 128); -#else - uint32x4_t V = vld1q_u32(pSource); -#endif - return vreinterpretq_f32_u32(vsetq_lane_u32(0, V, 3)); -#elif defined(_XM_SSE4_INTRINSICS_) - __m128 xy = - _mm_castpd_ps(_mm_load_sd(reinterpret_cast(pSource))); - __m128 z = _mm_load_ss(reinterpret_cast(pSource + 2)); - return _mm_insert_ps(xy, z, 0x20); -#elif defined(_XM_SSE_INTRINSICS_) - __m128 xy = - _mm_castpd_ps(_mm_load_sd(reinterpret_cast(pSource))); - __m128 z = _mm_load_ss(reinterpret_cast(pSource + 2)); - return _mm_movelh_ps(xy, z); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadFloat3(const XMFLOAT3* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - XMVECTOR V; - V.vector4_f32[0] = pSource->x; - V.vector4_f32[1] = pSource->y; - V.vector4_f32[2] = pSource->z; - V.vector4_f32[3] = 0.f; - return V; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x2_t x = vld1_f32(reinterpret_cast(pSource)); - float32x2_t zero = vdup_n_f32(0); - float32x2_t y = - vld1_lane_f32(reinterpret_cast(pSource) + 2, zero, 0); - return vcombine_f32(x, y); -#elif defined(_XM_SSE4_INTRINSICS_) - __m128 xy = - _mm_castpd_ps(_mm_load_sd(reinterpret_cast(pSource))); - __m128 z = _mm_load_ss(&pSource->z); - return _mm_insert_ps(xy, z, 0x20); -#elif defined(_XM_SSE_INTRINSICS_) - __m128 xy = - _mm_castpd_ps(_mm_load_sd(reinterpret_cast(pSource))); - __m128 z = _mm_load_ss(&pSource->z); - return _mm_movelh_ps(xy, z); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadFloat3A(const XMFLOAT3A* pSource) noexcept { - assert(pSource); - assert((reinterpret_cast(pSource) & 0xF) == 0); -#if defined(_XM_NO_INTRINSICS_) - XMVECTOR V; - V.vector4_f32[0] = pSource->x; - V.vector4_f32[1] = pSource->y; - V.vector4_f32[2] = pSource->z; - V.vector4_f32[3] = 0.f; - return V; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - // Reads an extra float which is zero'd -#if defined(_MSC_VER) && !defined(__clang__) && \ - !defined(_ARM64_DISTINCT_NEON_TYPES) - float32x4_t V = vld1q_f32_ex(reinterpret_cast(pSource), 128); -#else - float32x4_t V = vld1q_f32(reinterpret_cast(pSource)); -#endif - return vsetq_lane_f32(0, V, 3); -#elif defined(_XM_SSE_INTRINSICS_) - // Reads an extra float which is zero'd - __m128 V = _mm_load_ps(&pSource->x); - return _mm_and_ps(V, g_XMMask3); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadSInt3(const XMINT3* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR V; - V.vector4_f32[0] = static_cast(pSource->x); - V.vector4_f32[1] = static_cast(pSource->y); - V.vector4_f32[2] = static_cast(pSource->z); - V.vector4_f32[3] = 0.f; - return V; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - int32x2_t x = vld1_s32(reinterpret_cast(pSource)); - int32x2_t zero = vdup_n_s32(0); - int32x2_t y = - vld1_lane_s32(reinterpret_cast(pSource) + 2, zero, 0); - int32x4_t v = vcombine_s32(x, y); - return vcvtq_f32_s32(v); -#elif defined(_XM_SSE_INTRINSICS_) - __m128 xy = - _mm_castpd_ps(_mm_load_sd(reinterpret_cast(pSource))); - __m128 z = _mm_load_ss(reinterpret_cast(&pSource->z)); - __m128 V = _mm_movelh_ps(xy, z); - return _mm_cvtepi32_ps(_mm_castps_si128(V)); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadUInt3(const XMUINT3* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - XMVECTOR V; - V.vector4_f32[0] = static_cast(pSource->x); - V.vector4_f32[1] = static_cast(pSource->y); - V.vector4_f32[2] = static_cast(pSource->z); - V.vector4_f32[3] = 0.f; - return V; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x2_t x = vld1_u32(reinterpret_cast(pSource)); - uint32x2_t zero = vdup_n_u32(0); - uint32x2_t y = - vld1_lane_u32(reinterpret_cast(pSource) + 2, zero, 0); - uint32x4_t v = vcombine_u32(x, y); - return vcvtq_f32_u32(v); -#elif defined(_XM_SSE_INTRINSICS_) - __m128 xy = - _mm_castpd_ps(_mm_load_sd(reinterpret_cast(pSource))); - __m128 z = _mm_load_ss(reinterpret_cast(&pSource->z)); - __m128 V = _mm_movelh_ps(xy, z); - // For the values that are higher than 0x7FFFFFFF, a fixup is needed - // Determine which ones need the fix. - XMVECTOR vMask = _mm_and_ps(V, g_XMNegativeZero); - // Force all values positive - XMVECTOR vResult = _mm_xor_ps(V, vMask); - // Convert to floats - vResult = _mm_cvtepi32_ps(_mm_castps_si128(vResult)); - // Convert 0x80000000 -> 0xFFFFFFFF - __m128i iMask = _mm_srai_epi32(_mm_castps_si128(vMask), 31); - // For only the ones that are too big, add the fixup - vMask = _mm_and_ps(_mm_castsi128_ps(iMask), g_XMFixUnsigned); - vResult = _mm_add_ps(vResult, vMask); - return vResult; -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadInt4(const uint32_t* pSource) noexcept { - assert(pSource); - -#if defined(_XM_NO_INTRINSICS_) - XMVECTOR V; - V.vector4_u32[0] = pSource[0]; - V.vector4_u32[1] = pSource[1]; - V.vector4_u32[2] = pSource[2]; - V.vector4_u32[3] = pSource[3]; - return V; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vreinterpretq_f32_u32(vld1q_u32(pSource)); -#elif defined(_XM_SSE_INTRINSICS_) - __m128i V = _mm_loadu_si128(reinterpret_cast(pSource)); - return _mm_castsi128_ps(V); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadInt4A(const uint32_t* pSource) noexcept { - assert(pSource); - assert((reinterpret_cast(pSource) & 0xF) == 0); -#if defined(_XM_NO_INTRINSICS_) - XMVECTOR V; - V.vector4_u32[0] = pSource[0]; - V.vector4_u32[1] = pSource[1]; - V.vector4_u32[2] = pSource[2]; - V.vector4_u32[3] = pSource[3]; - return V; -#elif defined(_XM_ARM_NEON_INTRINSICS_) -#if defined(_MSC_VER) && !defined(__clang__) && \ - !defined(_ARM64_DISTINCT_NEON_TYPES) - return vld1q_u32_ex(pSource, 128); -#else - return vreinterpretq_f32_u32(vld1q_u32(pSource)); -#endif -#elif defined(_XM_SSE_INTRINSICS_) - __m128i V = _mm_load_si128(reinterpret_cast(pSource)); - return _mm_castsi128_ps(V); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadFloat4(const XMFLOAT4* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - XMVECTOR V; - V.vector4_f32[0] = pSource->x; - V.vector4_f32[1] = pSource->y; - V.vector4_f32[2] = pSource->z; - V.vector4_f32[3] = pSource->w; - return V; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vld1q_f32(reinterpret_cast(pSource)); -#elif defined(_XM_SSE_INTRINSICS_) - return _mm_loadu_ps(&pSource->x); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadFloat4A(const XMFLOAT4A* pSource) noexcept { - assert(pSource); - assert((reinterpret_cast(pSource) & 0xF) == 0); -#if defined(_XM_NO_INTRINSICS_) - XMVECTOR V; - V.vector4_f32[0] = pSource->x; - V.vector4_f32[1] = pSource->y; - V.vector4_f32[2] = pSource->z; - V.vector4_f32[3] = pSource->w; - return V; -#elif defined(_XM_ARM_NEON_INTRINSICS_) -#if defined(_MSC_VER) && !defined(__clang__) && \ - !defined(_ARM64_DISTINCT_NEON_TYPES) - return vld1q_f32_ex(reinterpret_cast(pSource), 128); -#else - return vld1q_f32(reinterpret_cast(pSource)); -#endif -#elif defined(_XM_SSE_INTRINSICS_) - return _mm_load_ps(&pSource->x); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadSInt4(const XMINT4* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR V; - V.vector4_f32[0] = static_cast(pSource->x); - V.vector4_f32[1] = static_cast(pSource->y); - V.vector4_f32[2] = static_cast(pSource->z); - V.vector4_f32[3] = static_cast(pSource->w); - return V; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - int32x4_t v = vld1q_s32(reinterpret_cast(pSource)); - return vcvtq_f32_s32(v); -#elif defined(_XM_SSE_INTRINSICS_) - __m128i V = _mm_loadu_si128(reinterpret_cast(pSource)); - return _mm_cvtepi32_ps(V); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadUInt4(const XMUINT4* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - XMVECTOR V; - V.vector4_f32[0] = static_cast(pSource->x); - V.vector4_f32[1] = static_cast(pSource->y); - V.vector4_f32[2] = static_cast(pSource->z); - V.vector4_f32[3] = static_cast(pSource->w); - return V; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x4_t v = vld1q_u32(reinterpret_cast(pSource)); - return vcvtq_f32_u32(v); -#elif defined(_XM_SSE_INTRINSICS_) - __m128i V = _mm_loadu_si128(reinterpret_cast(pSource)); - // For the values that are higher than 0x7FFFFFFF, a fixup is needed - // Determine which ones need the fix. - XMVECTOR vMask = _mm_and_ps(_mm_castsi128_ps(V), g_XMNegativeZero); - // Force all values positive - XMVECTOR vResult = _mm_xor_ps(_mm_castsi128_ps(V), vMask); - // Convert to floats - vResult = _mm_cvtepi32_ps(_mm_castps_si128(vResult)); - // Convert 0x80000000 -> 0xFFFFFFFF - __m128i iMask = _mm_srai_epi32(_mm_castps_si128(vMask), 31); - // For only the ones that are too big, add the fixup - vMask = _mm_and_ps(_mm_castsi128_ps(iMask), g_XMFixUnsigned); - vResult = _mm_add_ps(vResult, vMask); - return vResult; -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMMATRIX XM_CALLCONV -XMLoadFloat3x3(const XMFLOAT3X3* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - - XMMATRIX M; - M.r[0].vector4_f32[0] = pSource->m[0][0]; - M.r[0].vector4_f32[1] = pSource->m[0][1]; - M.r[0].vector4_f32[2] = pSource->m[0][2]; - M.r[0].vector4_f32[3] = 0.0f; - - M.r[1].vector4_f32[0] = pSource->m[1][0]; - M.r[1].vector4_f32[1] = pSource->m[1][1]; - M.r[1].vector4_f32[2] = pSource->m[1][2]; - M.r[1].vector4_f32[3] = 0.0f; - - M.r[2].vector4_f32[0] = pSource->m[2][0]; - M.r[2].vector4_f32[1] = pSource->m[2][1]; - M.r[2].vector4_f32[2] = pSource->m[2][2]; - M.r[2].vector4_f32[3] = 0.0f; - M.r[3].vector4_f32[0] = 0.0f; - M.r[3].vector4_f32[1] = 0.0f; - M.r[3].vector4_f32[2] = 0.0f; - M.r[3].vector4_f32[3] = 1.0f; - return M; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x4_t v0 = vld1q_f32(&pSource->m[0][0]); - float32x4_t v1 = vld1q_f32(&pSource->m[1][1]); - float32x2_t v2 = vcreate_f32(static_cast( - *reinterpret_cast(&pSource->m[2][2]))); - float32x4_t T = vextq_f32(v0, v1, 3); - - XMMATRIX M; - M.r[0] = - vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(v0), g_XMMask3)); - M.r[1] = - vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(T), g_XMMask3)); - M.r[2] = vcombine_f32(vget_high_f32(v1), v2); - M.r[3] = g_XMIdentityR3; - return M; -#elif defined(_XM_SSE_INTRINSICS_) - __m128 Z = _mm_setzero_ps(); - - __m128 V1 = _mm_loadu_ps(&pSource->m[0][0]); - __m128 V2 = _mm_loadu_ps(&pSource->m[1][1]); - __m128 V3 = _mm_load_ss(&pSource->m[2][2]); - - __m128 T1 = _mm_unpackhi_ps(V1, Z); - __m128 T2 = _mm_unpacklo_ps(V2, Z); - __m128 T3 = _mm_shuffle_ps(V3, T2, _MM_SHUFFLE(0, 1, 0, 0)); - __m128 T4 = _mm_movehl_ps(T2, T3); - __m128 T5 = _mm_movehl_ps(Z, T1); - - XMMATRIX M; - M.r[0] = _mm_movelh_ps(V1, T1); - M.r[1] = _mm_add_ps(T4, T5); - M.r[2] = _mm_shuffle_ps(V2, V3, _MM_SHUFFLE(1, 0, 3, 2)); - M.r[3] = g_XMIdentityR3; - return M; -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMMATRIX XM_CALLCONV -XMLoadFloat4x3(const XMFLOAT4X3* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - - XMMATRIX M; - M.r[0].vector4_f32[0] = pSource->m[0][0]; - M.r[0].vector4_f32[1] = pSource->m[0][1]; - M.r[0].vector4_f32[2] = pSource->m[0][2]; - M.r[0].vector4_f32[3] = 0.0f; - - M.r[1].vector4_f32[0] = pSource->m[1][0]; - M.r[1].vector4_f32[1] = pSource->m[1][1]; - M.r[1].vector4_f32[2] = pSource->m[1][2]; - M.r[1].vector4_f32[3] = 0.0f; - - M.r[2].vector4_f32[0] = pSource->m[2][0]; - M.r[2].vector4_f32[1] = pSource->m[2][1]; - M.r[2].vector4_f32[2] = pSource->m[2][2]; - M.r[2].vector4_f32[3] = 0.0f; - - M.r[3].vector4_f32[0] = pSource->m[3][0]; - M.r[3].vector4_f32[1] = pSource->m[3][1]; - M.r[3].vector4_f32[2] = pSource->m[3][2]; - M.r[3].vector4_f32[3] = 1.0f; - return M; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x4_t v0 = vld1q_f32(&pSource->m[0][0]); - float32x4_t v1 = vld1q_f32(&pSource->m[1][1]); - float32x4_t v2 = vld1q_f32(&pSource->m[2][2]); - - float32x4_t T1 = vextq_f32(v0, v1, 3); - float32x4_t T2 = vcombine_f32(vget_high_f32(v1), vget_low_f32(v2)); - float32x4_t T3 = vextq_f32(v2, v2, 1); - - XMMATRIX M; - M.r[0] = - vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(v0), g_XMMask3)); - M.r[1] = - vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(T1), g_XMMask3)); - M.r[2] = - vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(T2), g_XMMask3)); - M.r[3] = vsetq_lane_f32(1.f, T3, 3); - return M; -#elif defined(_XM_SSE_INTRINSICS_) - // Use unaligned load instructions to - // load the 12 floats - // vTemp1 = x1,y1,z1,x2 - XMVECTOR vTemp1 = _mm_loadu_ps(&pSource->m[0][0]); - // vTemp2 = y2,z2,x3,y3 - XMVECTOR vTemp2 = _mm_loadu_ps(&pSource->m[1][1]); - // vTemp4 = z3,x4,y4,z4 - XMVECTOR vTemp4 = _mm_loadu_ps(&pSource->m[2][2]); - // vTemp3 = x3,y3,z3,z3 - XMVECTOR vTemp3 = _mm_shuffle_ps(vTemp2, vTemp4, _MM_SHUFFLE(0, 0, 3, 2)); - // vTemp2 = y2,z2,x2,x2 - vTemp2 = _mm_shuffle_ps(vTemp2, vTemp1, _MM_SHUFFLE(3, 3, 1, 0)); - // vTemp2 = x2,y2,z2,z2 - vTemp2 = XM_PERMUTE_PS(vTemp2, _MM_SHUFFLE(1, 1, 0, 2)); - // vTemp1 = x1,y1,z1,0 - vTemp1 = _mm_and_ps(vTemp1, g_XMMask3); - // vTemp2 = x2,y2,z2,0 - vTemp2 = _mm_and_ps(vTemp2, g_XMMask3); - // vTemp3 = x3,y3,z3,0 - vTemp3 = _mm_and_ps(vTemp3, g_XMMask3); - // vTemp4i = x4,y4,z4,0 - __m128i vTemp4i = _mm_srli_si128(_mm_castps_si128(vTemp4), 32 / 8); - // vTemp4i = x4,y4,z4,1.0f - vTemp4i = _mm_or_si128(vTemp4i, g_XMIdentityR3); - XMMATRIX M(vTemp1, vTemp2, vTemp3, _mm_castsi128_ps(vTemp4i)); - return M; -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMMATRIX XM_CALLCONV -XMLoadFloat4x3A(const XMFLOAT4X3A* pSource) noexcept { - assert(pSource); - assert((reinterpret_cast(pSource) & 0xF) == 0); -#if defined(_XM_NO_INTRINSICS_) - - XMMATRIX M; - M.r[0].vector4_f32[0] = pSource->m[0][0]; - M.r[0].vector4_f32[1] = pSource->m[0][1]; - M.r[0].vector4_f32[2] = pSource->m[0][2]; - M.r[0].vector4_f32[3] = 0.0f; - - M.r[1].vector4_f32[0] = pSource->m[1][0]; - M.r[1].vector4_f32[1] = pSource->m[1][1]; - M.r[1].vector4_f32[2] = pSource->m[1][2]; - M.r[1].vector4_f32[3] = 0.0f; - - M.r[2].vector4_f32[0] = pSource->m[2][0]; - M.r[2].vector4_f32[1] = pSource->m[2][1]; - M.r[2].vector4_f32[2] = pSource->m[2][2]; - M.r[2].vector4_f32[3] = 0.0f; - - M.r[3].vector4_f32[0] = pSource->m[3][0]; - M.r[3].vector4_f32[1] = pSource->m[3][1]; - M.r[3].vector4_f32[2] = pSource->m[3][2]; - M.r[3].vector4_f32[3] = 1.0f; - return M; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) -#if defined(_MSC_VER) && !defined(__clang__) && \ - !defined(_ARM64_DISTINCT_NEON_TYPES) - float32x4_t v0 = vld1q_f32_ex(&pSource->m[0][0], 128); - float32x4_t v1 = vld1q_f32_ex(&pSource->m[1][1], 128); - float32x4_t v2 = vld1q_f32_ex(&pSource->m[2][2], 128); -#else - float32x4_t v0 = vld1q_f32(&pSource->m[0][0]); - float32x4_t v1 = vld1q_f32(&pSource->m[1][1]); - float32x4_t v2 = vld1q_f32(&pSource->m[2][2]); -#endif - - float32x4_t T1 = vextq_f32(v0, v1, 3); - float32x4_t T2 = vcombine_f32(vget_high_f32(v1), vget_low_f32(v2)); - float32x4_t T3 = vextq_f32(v2, v2, 1); - - XMMATRIX M; - M.r[0] = - vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(v0), g_XMMask3)); - M.r[1] = - vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(T1), g_XMMask3)); - M.r[2] = - vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(T2), g_XMMask3)); - M.r[3] = vsetq_lane_f32(1.f, T3, 3); - return M; -#elif defined(_XM_SSE_INTRINSICS_) - // Use aligned load instructions to - // load the 12 floats - // vTemp1 = x1,y1,z1,x2 - XMVECTOR vTemp1 = _mm_load_ps(&pSource->m[0][0]); - // vTemp2 = y2,z2,x3,y3 - XMVECTOR vTemp2 = _mm_load_ps(&pSource->m[1][1]); - // vTemp4 = z3,x4,y4,z4 - XMVECTOR vTemp4 = _mm_load_ps(&pSource->m[2][2]); - // vTemp3 = x3,y3,z3,z3 - XMVECTOR vTemp3 = _mm_shuffle_ps(vTemp2, vTemp4, _MM_SHUFFLE(0, 0, 3, 2)); - // vTemp2 = y2,z2,x2,x2 - vTemp2 = _mm_shuffle_ps(vTemp2, vTemp1, _MM_SHUFFLE(3, 3, 1, 0)); - // vTemp2 = x2,y2,z2,z2 - vTemp2 = XM_PERMUTE_PS(vTemp2, _MM_SHUFFLE(1, 1, 0, 2)); - // vTemp1 = x1,y1,z1,0 - vTemp1 = _mm_and_ps(vTemp1, g_XMMask3); - // vTemp2 = x2,y2,z2,0 - vTemp2 = _mm_and_ps(vTemp2, g_XMMask3); - // vTemp3 = x3,y3,z3,0 - vTemp3 = _mm_and_ps(vTemp3, g_XMMask3); - // vTemp4i = x4,y4,z4,0 - __m128i vTemp4i = _mm_srli_si128(_mm_castps_si128(vTemp4), 32 / 8); - // vTemp4i = x4,y4,z4,1.0f - vTemp4i = _mm_or_si128(vTemp4i, g_XMIdentityR3); - XMMATRIX M(vTemp1, vTemp2, vTemp3, _mm_castsi128_ps(vTemp4i)); - return M; -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMMATRIX XM_CALLCONV -XMLoadFloat3x4(const XMFLOAT3X4* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - - XMMATRIX M; - M.r[0].vector4_f32[0] = pSource->m[0][0]; - M.r[0].vector4_f32[1] = pSource->m[1][0]; - M.r[0].vector4_f32[2] = pSource->m[2][0]; - M.r[0].vector4_f32[3] = 0.0f; - - M.r[1].vector4_f32[0] = pSource->m[0][1]; - M.r[1].vector4_f32[1] = pSource->m[1][1]; - M.r[1].vector4_f32[2] = pSource->m[2][1]; - M.r[1].vector4_f32[3] = 0.0f; - - M.r[2].vector4_f32[0] = pSource->m[0][2]; - M.r[2].vector4_f32[1] = pSource->m[1][2]; - M.r[2].vector4_f32[2] = pSource->m[2][2]; - M.r[2].vector4_f32[3] = 0.0f; - - M.r[3].vector4_f32[0] = pSource->m[0][3]; - M.r[3].vector4_f32[1] = pSource->m[1][3]; - M.r[3].vector4_f32[2] = pSource->m[2][3]; - M.r[3].vector4_f32[3] = 1.0f; - return M; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x2x4_t vTemp0 = vld4_f32(&pSource->_11); - float32x4_t vTemp1 = vld1q_f32(&pSource->_31); - - float32x2_t l = vget_low_f32(vTemp1); - float32x4_t T0 = vcombine_f32(vTemp0.val[0], l); - float32x2_t rl = vrev64_f32(l); - float32x4_t T1 = vcombine_f32(vTemp0.val[1], rl); - - float32x2_t h = vget_high_f32(vTemp1); - float32x4_t T2 = vcombine_f32(vTemp0.val[2], h); - float32x2_t rh = vrev64_f32(h); - float32x4_t T3 = vcombine_f32(vTemp0.val[3], rh); - - XMMATRIX M = {}; - M.r[0] = - vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(T0), g_XMMask3)); - M.r[1] = - vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(T1), g_XMMask3)); - M.r[2] = - vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(T2), g_XMMask3)); - M.r[3] = vsetq_lane_f32(1.f, T3, 3); - return M; -#elif defined(_XM_SSE_INTRINSICS_) - XMMATRIX M; - M.r[0] = _mm_loadu_ps(&pSource->_11); - M.r[1] = _mm_loadu_ps(&pSource->_21); - M.r[2] = _mm_loadu_ps(&pSource->_31); - M.r[3] = g_XMIdentityR3; - - // x.x,x.y,y.x,y.y - XMVECTOR vTemp1 = _mm_shuffle_ps(M.r[0], M.r[1], _MM_SHUFFLE(1, 0, 1, 0)); - // x.z,x.w,y.z,y.w - XMVECTOR vTemp3 = _mm_shuffle_ps(M.r[0], M.r[1], _MM_SHUFFLE(3, 2, 3, 2)); - // z.x,z.y,w.x,w.y - XMVECTOR vTemp2 = _mm_shuffle_ps(M.r[2], M.r[3], _MM_SHUFFLE(1, 0, 1, 0)); - // z.z,z.w,w.z,w.w - XMVECTOR vTemp4 = _mm_shuffle_ps(M.r[2], M.r[3], _MM_SHUFFLE(3, 2, 3, 2)); - XMMATRIX mResult; - - // x.x,y.x,z.x,w.x - mResult.r[0] = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(2, 0, 2, 0)); - // x.y,y.y,z.y,w.y - mResult.r[1] = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(3, 1, 3, 1)); - // x.z,y.z,z.z,w.z - mResult.r[2] = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(2, 0, 2, 0)); - // x.w,y.w,z.w,w.w - mResult.r[3] = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(3, 1, 3, 1)); - return mResult; -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMMATRIX XM_CALLCONV -XMLoadFloat3x4A(const XMFLOAT3X4A* pSource) noexcept { - assert(pSource); - assert((reinterpret_cast(pSource) & 0xF) == 0); -#if defined(_XM_NO_INTRINSICS_) - - XMMATRIX M; - M.r[0].vector4_f32[0] = pSource->m[0][0]; - M.r[0].vector4_f32[1] = pSource->m[1][0]; - M.r[0].vector4_f32[2] = pSource->m[2][0]; - M.r[0].vector4_f32[3] = 0.0f; - - M.r[1].vector4_f32[0] = pSource->m[0][1]; - M.r[1].vector4_f32[1] = pSource->m[1][1]; - M.r[1].vector4_f32[2] = pSource->m[2][1]; - M.r[1].vector4_f32[3] = 0.0f; - - M.r[2].vector4_f32[0] = pSource->m[0][2]; - M.r[2].vector4_f32[1] = pSource->m[1][2]; - M.r[2].vector4_f32[2] = pSource->m[2][2]; - M.r[2].vector4_f32[3] = 0.0f; - - M.r[3].vector4_f32[0] = pSource->m[0][3]; - M.r[3].vector4_f32[1] = pSource->m[1][3]; - M.r[3].vector4_f32[2] = pSource->m[2][3]; - M.r[3].vector4_f32[3] = 1.0f; - return M; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) -#if defined(_MSC_VER) && !defined(__clang__) && \ - !defined(_ARM64_DISTINCT_NEON_TYPES) - float32x2x4_t vTemp0 = vld4_f32_ex(&pSource->_11, 128); - float32x4_t vTemp1 = vld1q_f32_ex(&pSource->_31, 128); -#else - float32x2x4_t vTemp0 = vld4_f32(&pSource->_11); - float32x4_t vTemp1 = vld1q_f32(&pSource->_31); -#endif - - float32x2_t l = vget_low_f32(vTemp1); - float32x4_t T0 = vcombine_f32(vTemp0.val[0], l); - float32x2_t rl = vrev64_f32(l); - float32x4_t T1 = vcombine_f32(vTemp0.val[1], rl); - - float32x2_t h = vget_high_f32(vTemp1); - float32x4_t T2 = vcombine_f32(vTemp0.val[2], h); - float32x2_t rh = vrev64_f32(h); - float32x4_t T3 = vcombine_f32(vTemp0.val[3], rh); - - XMMATRIX M = {}; - M.r[0] = - vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(T0), g_XMMask3)); - M.r[1] = - vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(T1), g_XMMask3)); - M.r[2] = - vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(T2), g_XMMask3)); - M.r[3] = vsetq_lane_f32(1.f, T3, 3); - return M; -#elif defined(_XM_SSE_INTRINSICS_) - XMMATRIX M; - M.r[0] = _mm_load_ps(&pSource->_11); - M.r[1] = _mm_load_ps(&pSource->_21); - M.r[2] = _mm_load_ps(&pSource->_31); - M.r[3] = g_XMIdentityR3; - - // x.x,x.y,y.x,y.y - XMVECTOR vTemp1 = _mm_shuffle_ps(M.r[0], M.r[1], _MM_SHUFFLE(1, 0, 1, 0)); - // x.z,x.w,y.z,y.w - XMVECTOR vTemp3 = _mm_shuffle_ps(M.r[0], M.r[1], _MM_SHUFFLE(3, 2, 3, 2)); - // z.x,z.y,w.x,w.y - XMVECTOR vTemp2 = _mm_shuffle_ps(M.r[2], M.r[3], _MM_SHUFFLE(1, 0, 1, 0)); - // z.z,z.w,w.z,w.w - XMVECTOR vTemp4 = _mm_shuffle_ps(M.r[2], M.r[3], _MM_SHUFFLE(3, 2, 3, 2)); - XMMATRIX mResult; - - // x.x,y.x,z.x,w.x - mResult.r[0] = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(2, 0, 2, 0)); - // x.y,y.y,z.y,w.y - mResult.r[1] = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(3, 1, 3, 1)); - // x.z,y.z,z.z,w.z - mResult.r[2] = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(2, 0, 2, 0)); - // x.w,y.w,z.w,w.w - mResult.r[3] = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(3, 1, 3, 1)); - return mResult; -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMMATRIX XM_CALLCONV -XMLoadFloat4x4(const XMFLOAT4X4* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - - XMMATRIX M; - M.r[0].vector4_f32[0] = pSource->m[0][0]; - M.r[0].vector4_f32[1] = pSource->m[0][1]; - M.r[0].vector4_f32[2] = pSource->m[0][2]; - M.r[0].vector4_f32[3] = pSource->m[0][3]; - - M.r[1].vector4_f32[0] = pSource->m[1][0]; - M.r[1].vector4_f32[1] = pSource->m[1][1]; - M.r[1].vector4_f32[2] = pSource->m[1][2]; - M.r[1].vector4_f32[3] = pSource->m[1][3]; - - M.r[2].vector4_f32[0] = pSource->m[2][0]; - M.r[2].vector4_f32[1] = pSource->m[2][1]; - M.r[2].vector4_f32[2] = pSource->m[2][2]; - M.r[2].vector4_f32[3] = pSource->m[2][3]; - - M.r[3].vector4_f32[0] = pSource->m[3][0]; - M.r[3].vector4_f32[1] = pSource->m[3][1]; - M.r[3].vector4_f32[2] = pSource->m[3][2]; - M.r[3].vector4_f32[3] = pSource->m[3][3]; - return M; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - XMMATRIX M; - M.r[0] = vld1q_f32(reinterpret_cast(&pSource->_11)); - M.r[1] = vld1q_f32(reinterpret_cast(&pSource->_21)); - M.r[2] = vld1q_f32(reinterpret_cast(&pSource->_31)); - M.r[3] = vld1q_f32(reinterpret_cast(&pSource->_41)); - return M; -#elif defined(_XM_SSE_INTRINSICS_) - XMMATRIX M; - M.r[0] = _mm_loadu_ps(&pSource->_11); - M.r[1] = _mm_loadu_ps(&pSource->_21); - M.r[2] = _mm_loadu_ps(&pSource->_31); - M.r[3] = _mm_loadu_ps(&pSource->_41); - return M; -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMMATRIX XM_CALLCONV -XMLoadFloat4x4A(const XMFLOAT4X4A* pSource) noexcept { - assert(pSource); - assert((reinterpret_cast(pSource) & 0xF) == 0); -#if defined(_XM_NO_INTRINSICS_) - - XMMATRIX M; - M.r[0].vector4_f32[0] = pSource->m[0][0]; - M.r[0].vector4_f32[1] = pSource->m[0][1]; - M.r[0].vector4_f32[2] = pSource->m[0][2]; - M.r[0].vector4_f32[3] = pSource->m[0][3]; - - M.r[1].vector4_f32[0] = pSource->m[1][0]; - M.r[1].vector4_f32[1] = pSource->m[1][1]; - M.r[1].vector4_f32[2] = pSource->m[1][2]; - M.r[1].vector4_f32[3] = pSource->m[1][3]; - - M.r[2].vector4_f32[0] = pSource->m[2][0]; - M.r[2].vector4_f32[1] = pSource->m[2][1]; - M.r[2].vector4_f32[2] = pSource->m[2][2]; - M.r[2].vector4_f32[3] = pSource->m[2][3]; - - M.r[3].vector4_f32[0] = pSource->m[3][0]; - M.r[3].vector4_f32[1] = pSource->m[3][1]; - M.r[3].vector4_f32[2] = pSource->m[3][2]; - M.r[3].vector4_f32[3] = pSource->m[3][3]; - return M; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - XMMATRIX M; -#if defined(_MSC_VER) && !defined(__clang__) && \ - !defined(_ARM64_DISTINCT_NEON_TYPES) - M.r[0] = vld1q_f32_ex(reinterpret_cast(&pSource->_11), 128); - M.r[1] = vld1q_f32_ex(reinterpret_cast(&pSource->_21), 128); - M.r[2] = vld1q_f32_ex(reinterpret_cast(&pSource->_31), 128); - M.r[3] = vld1q_f32_ex(reinterpret_cast(&pSource->_41), 128); -#else - M.r[0] = vld1q_f32(reinterpret_cast(&pSource->_11)); - M.r[1] = vld1q_f32(reinterpret_cast(&pSource->_21)); - M.r[2] = vld1q_f32(reinterpret_cast(&pSource->_31)); - M.r[3] = vld1q_f32(reinterpret_cast(&pSource->_41)); -#endif - return M; -#elif defined(_XM_SSE_INTRINSICS_) - XMMATRIX M; - M.r[0] = _mm_load_ps(&pSource->_11); - M.r[1] = _mm_load_ps(&pSource->_21); - M.r[2] = _mm_load_ps(&pSource->_31); - M.r[3] = _mm_load_ps(&pSource->_41); - return M; -#endif -} - -/**************************************************************************** - * - * Vector and matrix store operations - * - ****************************************************************************/ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreInt(uint32_t* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); -#if defined(_XM_NO_INTRINSICS_) - *pDestination = XMVectorGetIntX(V); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - vst1q_lane_u32(pDestination, *reinterpret_cast(&V), 0); -#elif defined(_XM_SSE_INTRINSICS_) - _mm_store_ss(reinterpret_cast(pDestination), V); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreFloat(float* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); -#if defined(_XM_NO_INTRINSICS_) - *pDestination = XMVectorGetX(V); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - vst1q_lane_f32(pDestination, V, 0); -#elif defined(_XM_SSE_INTRINSICS_) - _mm_store_ss(pDestination, V); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreInt2(uint32_t* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); -#if defined(_XM_NO_INTRINSICS_) - pDestination[0] = V.vector4_u32[0]; - pDestination[1] = V.vector4_u32[1]; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x2_t VL = vget_low_u32(vreinterpretq_u32_f32(V)); - vst1_u32(pDestination, VL); -#elif defined(_XM_SSE_INTRINSICS_) - _mm_store_sd(reinterpret_cast(pDestination), _mm_castps_pd(V)); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreInt2A(uint32_t* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); - assert((reinterpret_cast(pDestination) & 0xF) == 0); -#if defined(_XM_NO_INTRINSICS_) - pDestination[0] = V.vector4_u32[0]; - pDestination[1] = V.vector4_u32[1]; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x2_t VL = vget_low_u32(vreinterpretq_u32_f32(V)); -#if defined(_MSC_VER) && !defined(__clang__) && \ - !defined(_ARM64_DISTINCT_NEON_TYPES) - vst1_u32_ex(pDestination, VL, 64); -#else - vst1_u32(pDestination, VL); -#endif -#elif defined(_XM_SSE_INTRINSICS_) - _mm_store_sd(reinterpret_cast(pDestination), _mm_castps_pd(V)); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreFloat2(XMFLOAT2* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); -#if defined(_XM_NO_INTRINSICS_) - pDestination->x = V.vector4_f32[0]; - pDestination->y = V.vector4_f32[1]; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x2_t VL = vget_low_f32(V); - vst1_f32(reinterpret_cast(pDestination), VL); -#elif defined(_XM_SSE_INTRINSICS_) - _mm_store_sd(reinterpret_cast(pDestination), _mm_castps_pd(V)); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreFloat2A(XMFLOAT2A* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); - assert((reinterpret_cast(pDestination) & 0xF) == 0); -#if defined(_XM_NO_INTRINSICS_) - pDestination->x = V.vector4_f32[0]; - pDestination->y = V.vector4_f32[1]; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x2_t VL = vget_low_f32(V); -#if defined(_MSC_VER) && !defined(__clang__) && \ - !defined(_ARM64_DISTINCT_NEON_TYPES) - vst1_f32_ex(reinterpret_cast(pDestination), VL, 64); -#else - vst1_f32(reinterpret_cast(pDestination), VL); -#endif -#elif defined(_XM_SSE_INTRINSICS_) - _mm_store_sd(reinterpret_cast(pDestination), _mm_castps_pd(V)); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreSInt2(XMINT2* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); -#if defined(_XM_NO_INTRINSICS_) - pDestination->x = static_cast(V.vector4_f32[0]); - pDestination->y = static_cast(V.vector4_f32[1]); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x2_t v = vget_low_f32(V); - int32x2_t iv = vcvt_s32_f32(v); - vst1_s32(reinterpret_cast(pDestination), iv); -#elif defined(_XM_SSE_INTRINSICS_) - // In case of positive overflow, detect it - XMVECTOR vOverflow = _mm_cmpgt_ps(V, g_XMMaxInt); - // Float to int conversion - __m128i vResulti = _mm_cvttps_epi32(V); - // If there was positive overflow, set to 0x7FFFFFFF - XMVECTOR vResult = _mm_and_ps(vOverflow, g_XMAbsMask); - vOverflow = _mm_andnot_ps(vOverflow, _mm_castsi128_ps(vResulti)); - vOverflow = _mm_or_ps(vOverflow, vResult); - // Write two ints - _mm_store_sd(reinterpret_cast(pDestination), - _mm_castps_pd(vOverflow)); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreUInt2(XMUINT2* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); -#if defined(_XM_NO_INTRINSICS_) - pDestination->x = static_cast(V.vector4_f32[0]); - pDestination->y = static_cast(V.vector4_f32[1]); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x2_t v = vget_low_f32(V); - uint32x2_t iv = vcvt_u32_f32(v); - vst1_u32(reinterpret_cast(pDestination), iv); -#elif defined(_XM_SSE_INTRINSICS_) - // Clamp to >=0 - XMVECTOR vResult = _mm_max_ps(V, g_XMZero); - // Any numbers that are too big, set to 0xFFFFFFFFU - XMVECTOR vOverflow = _mm_cmpgt_ps(vResult, g_XMMaxUInt); - XMVECTOR vValue = g_XMUnsignedFix; - // Too large for a signed integer? - XMVECTOR vMask = _mm_cmpge_ps(vResult, vValue); - // Zero for number's lower than 0x80000000, 32768.0f*65536.0f otherwise - vValue = _mm_and_ps(vValue, vMask); - // Perform fixup only on numbers too large (Keeps low bit precision) - vResult = _mm_sub_ps(vResult, vValue); - __m128i vResulti = _mm_cvttps_epi32(vResult); - // Convert from signed to unsigned pnly if greater than 0x80000000 - vMask = _mm_and_ps(vMask, g_XMNegativeZero); - vResult = _mm_xor_ps(_mm_castsi128_ps(vResulti), vMask); - // On those that are too large, set to 0xFFFFFFFF - vResult = _mm_or_ps(vResult, vOverflow); - // Write two uints - _mm_store_sd(reinterpret_cast(pDestination), - _mm_castps_pd(vResult)); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreInt3(uint32_t* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); -#if defined(_XM_NO_INTRINSICS_) - pDestination[0] = V.vector4_u32[0]; - pDestination[1] = V.vector4_u32[1]; - pDestination[2] = V.vector4_u32[2]; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x2_t VL = vget_low_u32(vreinterpretq_u32_f32(V)); - vst1_u32(pDestination, VL); - vst1q_lane_u32(pDestination + 2, *reinterpret_cast(&V), - 2); -#elif defined(_XM_SSE_INTRINSICS_) - _mm_store_sd(reinterpret_cast(pDestination), _mm_castps_pd(V)); - __m128 z = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); - _mm_store_ss(reinterpret_cast(&pDestination[2]), z); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreInt3A(uint32_t* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); - assert((reinterpret_cast(pDestination) & 0xF) == 0); -#if defined(_XM_NO_INTRINSICS_) - pDestination[0] = V.vector4_u32[0]; - pDestination[1] = V.vector4_u32[1]; - pDestination[2] = V.vector4_u32[2]; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x2_t VL = vget_low_u32(vreinterpretq_u32_f32(V)); -#if defined(_MSC_VER) && !defined(__clang__) && \ - !defined(_ARM64_DISTINCT_NEON_TYPES) - vst1_u32_ex(pDestination, VL, 64); -#else - vst1_u32(pDestination, VL); -#endif - vst1q_lane_u32(pDestination + 2, *reinterpret_cast(&V), - 2); -#elif defined(_XM_SSE_INTRINSICS_) - _mm_store_sd(reinterpret_cast(pDestination), _mm_castps_pd(V)); - __m128 z = _mm_movehl_ps(V, V); - _mm_store_ss(reinterpret_cast(&pDestination[2]), z); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreFloat3(XMFLOAT3* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); -#if defined(_XM_NO_INTRINSICS_) - pDestination->x = V.vector4_f32[0]; - pDestination->y = V.vector4_f32[1]; - pDestination->z = V.vector4_f32[2]; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x2_t VL = vget_low_f32(V); - vst1_f32(reinterpret_cast(pDestination), VL); - vst1q_lane_f32(reinterpret_cast(pDestination) + 2, V, 2); -#elif defined(_XM_SSE4_INTRINSICS_) - *reinterpret_cast(&pDestination->x) = _mm_extract_ps(V, 0); - *reinterpret_cast(&pDestination->y) = _mm_extract_ps(V, 1); - *reinterpret_cast(&pDestination->z) = _mm_extract_ps(V, 2); -#elif defined(_XM_SSE_INTRINSICS_) - _mm_store_sd(reinterpret_cast(pDestination), _mm_castps_pd(V)); - __m128 z = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); - _mm_store_ss(&pDestination->z, z); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreFloat3A(XMFLOAT3A* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); - assert((reinterpret_cast(pDestination) & 0xF) == 0); -#if defined(_XM_NO_INTRINSICS_) - pDestination->x = V.vector4_f32[0]; - pDestination->y = V.vector4_f32[1]; - pDestination->z = V.vector4_f32[2]; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x2_t VL = vget_low_f32(V); -#if defined(_MSC_VER) && !defined(__clang__) && \ - !defined(_ARM64_DISTINCT_NEON_TYPES) - vst1_f32_ex(reinterpret_cast(pDestination), VL, 64); -#else - vst1_f32(reinterpret_cast(pDestination), VL); -#endif - vst1q_lane_f32(reinterpret_cast(pDestination) + 2, V, 2); -#elif defined(_XM_SSE4_INTRINSICS_) - _mm_store_sd(reinterpret_cast(pDestination), _mm_castps_pd(V)); - *reinterpret_cast(&pDestination->z) = _mm_extract_ps(V, 2); -#elif defined(_XM_SSE_INTRINSICS_) - _mm_store_sd(reinterpret_cast(pDestination), _mm_castps_pd(V)); - __m128 z = _mm_movehl_ps(V, V); - _mm_store_ss(&pDestination->z, z); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreSInt3(XMINT3* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); -#if defined(_XM_NO_INTRINSICS_) - pDestination->x = static_cast(V.vector4_f32[0]); - pDestination->y = static_cast(V.vector4_f32[1]); - pDestination->z = static_cast(V.vector4_f32[2]); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - int32x4_t v = vcvtq_s32_f32(V); - int32x2_t vL = vget_low_s32(v); - vst1_s32(reinterpret_cast(pDestination), vL); - vst1q_lane_s32(reinterpret_cast(pDestination) + 2, v, 2); -#elif defined(_XM_SSE_INTRINSICS_) - // In case of positive overflow, detect it - XMVECTOR vOverflow = _mm_cmpgt_ps(V, g_XMMaxInt); - // Float to int conversion - __m128i vResulti = _mm_cvttps_epi32(V); - // If there was positive overflow, set to 0x7FFFFFFF - XMVECTOR vResult = _mm_and_ps(vOverflow, g_XMAbsMask); - vOverflow = _mm_andnot_ps(vOverflow, _mm_castsi128_ps(vResulti)); - vOverflow = _mm_or_ps(vOverflow, vResult); - // Write 3 uints - _mm_store_sd(reinterpret_cast(pDestination), - _mm_castps_pd(vOverflow)); - __m128 z = XM_PERMUTE_PS(vOverflow, _MM_SHUFFLE(2, 2, 2, 2)); - _mm_store_ss(reinterpret_cast(&pDestination->z), z); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreUInt3(XMUINT3* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); -#if defined(_XM_NO_INTRINSICS_) - pDestination->x = static_cast(V.vector4_f32[0]); - pDestination->y = static_cast(V.vector4_f32[1]); - pDestination->z = static_cast(V.vector4_f32[2]); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x4_t v = vcvtq_u32_f32(V); - uint32x2_t vL = vget_low_u32(v); - vst1_u32(reinterpret_cast(pDestination), vL); - vst1q_lane_u32(reinterpret_cast(pDestination) + 2, v, 2); -#elif defined(_XM_SSE_INTRINSICS_) - // Clamp to >=0 - XMVECTOR vResult = _mm_max_ps(V, g_XMZero); - // Any numbers that are too big, set to 0xFFFFFFFFU - XMVECTOR vOverflow = _mm_cmpgt_ps(vResult, g_XMMaxUInt); - XMVECTOR vValue = g_XMUnsignedFix; - // Too large for a signed integer? - XMVECTOR vMask = _mm_cmpge_ps(vResult, vValue); - // Zero for number's lower than 0x80000000, 32768.0f*65536.0f otherwise - vValue = _mm_and_ps(vValue, vMask); - // Perform fixup only on numbers too large (Keeps low bit precision) - vResult = _mm_sub_ps(vResult, vValue); - __m128i vResulti = _mm_cvttps_epi32(vResult); - // Convert from signed to unsigned pnly if greater than 0x80000000 - vMask = _mm_and_ps(vMask, g_XMNegativeZero); - vResult = _mm_xor_ps(_mm_castsi128_ps(vResulti), vMask); - // On those that are too large, set to 0xFFFFFFFF - vResult = _mm_or_ps(vResult, vOverflow); - // Write 3 uints - _mm_store_sd(reinterpret_cast(pDestination), - _mm_castps_pd(vResult)); - __m128 z = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(2, 2, 2, 2)); - _mm_store_ss(reinterpret_cast(&pDestination->z), z); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreInt4(uint32_t* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); -#if defined(_XM_NO_INTRINSICS_) - pDestination[0] = V.vector4_u32[0]; - pDestination[1] = V.vector4_u32[1]; - pDestination[2] = V.vector4_u32[2]; - pDestination[3] = V.vector4_u32[3]; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - vst1q_u32(pDestination, vreinterpretq_u32_f32(V)); -#elif defined(_XM_SSE_INTRINSICS_) - _mm_storeu_si128(reinterpret_cast<__m128i*>(pDestination), - _mm_castps_si128(V)); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreInt4A(uint32_t* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); - assert((reinterpret_cast(pDestination) & 0xF) == 0); -#if defined(_XM_NO_INTRINSICS_) - pDestination[0] = V.vector4_u32[0]; - pDestination[1] = V.vector4_u32[1]; - pDestination[2] = V.vector4_u32[2]; - pDestination[3] = V.vector4_u32[3]; -#elif defined(_XM_ARM_NEON_INTRINSICS_) -#if defined(_MSC_VER) && !defined(__clang__) && \ - !defined(_ARM64_DISTINCT_NEON_TYPES) - vst1q_u32_ex(pDestination, V, 128); -#else - vst1q_u32(pDestination, vreinterpretq_u32_f32(V)); -#endif -#elif defined(_XM_SSE_INTRINSICS_) - _mm_store_si128(reinterpret_cast<__m128i*>(pDestination), - _mm_castps_si128(V)); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreFloat4(XMFLOAT4* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); -#if defined(_XM_NO_INTRINSICS_) - pDestination->x = V.vector4_f32[0]; - pDestination->y = V.vector4_f32[1]; - pDestination->z = V.vector4_f32[2]; - pDestination->w = V.vector4_f32[3]; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - vst1q_f32(reinterpret_cast(pDestination), V); -#elif defined(_XM_SSE_INTRINSICS_) - _mm_storeu_ps(&pDestination->x, V); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreFloat4A(XMFLOAT4A* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); - assert((reinterpret_cast(pDestination) & 0xF) == 0); -#if defined(_XM_NO_INTRINSICS_) - pDestination->x = V.vector4_f32[0]; - pDestination->y = V.vector4_f32[1]; - pDestination->z = V.vector4_f32[2]; - pDestination->w = V.vector4_f32[3]; -#elif defined(_XM_ARM_NEON_INTRINSICS_) -#if defined(_MSC_VER) && !defined(__clang__) && \ - !defined(_ARM64_DISTINCT_NEON_TYPES) - vst1q_f32_ex(reinterpret_cast(pDestination), V, 128); -#else - vst1q_f32(reinterpret_cast(pDestination), V); -#endif -#elif defined(_XM_SSE_INTRINSICS_) - _mm_store_ps(&pDestination->x, V); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreSInt4(XMINT4* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); -#if defined(_XM_NO_INTRINSICS_) - pDestination->x = static_cast(V.vector4_f32[0]); - pDestination->y = static_cast(V.vector4_f32[1]); - pDestination->z = static_cast(V.vector4_f32[2]); - pDestination->w = static_cast(V.vector4_f32[3]); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - int32x4_t v = vcvtq_s32_f32(V); - vst1q_s32(reinterpret_cast(pDestination), v); -#elif defined(_XM_SSE_INTRINSICS_) - // In case of positive overflow, detect it - XMVECTOR vOverflow = _mm_cmpgt_ps(V, g_XMMaxInt); - // Float to int conversion - __m128i vResulti = _mm_cvttps_epi32(V); - // If there was positive overflow, set to 0x7FFFFFFF - XMVECTOR vResult = _mm_and_ps(vOverflow, g_XMAbsMask); - vOverflow = _mm_andnot_ps(vOverflow, _mm_castsi128_ps(vResulti)); - vOverflow = _mm_or_ps(vOverflow, vResult); - _mm_storeu_si128(reinterpret_cast<__m128i*>(pDestination), - _mm_castps_si128(vOverflow)); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreUInt4(XMUINT4* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); -#if defined(_XM_NO_INTRINSICS_) - pDestination->x = static_cast(V.vector4_f32[0]); - pDestination->y = static_cast(V.vector4_f32[1]); - pDestination->z = static_cast(V.vector4_f32[2]); - pDestination->w = static_cast(V.vector4_f32[3]); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x4_t v = vcvtq_u32_f32(V); - vst1q_u32(reinterpret_cast(pDestination), v); -#elif defined(_XM_SSE_INTRINSICS_) - // Clamp to >=0 - XMVECTOR vResult = _mm_max_ps(V, g_XMZero); - // Any numbers that are too big, set to 0xFFFFFFFFU - XMVECTOR vOverflow = _mm_cmpgt_ps(vResult, g_XMMaxUInt); - XMVECTOR vValue = g_XMUnsignedFix; - // Too large for a signed integer? - XMVECTOR vMask = _mm_cmpge_ps(vResult, vValue); - // Zero for number's lower than 0x80000000, 32768.0f*65536.0f otherwise - vValue = _mm_and_ps(vValue, vMask); - // Perform fixup only on numbers too large (Keeps low bit precision) - vResult = _mm_sub_ps(vResult, vValue); - __m128i vResulti = _mm_cvttps_epi32(vResult); - // Convert from signed to unsigned pnly if greater than 0x80000000 - vMask = _mm_and_ps(vMask, g_XMNegativeZero); - vResult = _mm_xor_ps(_mm_castsi128_ps(vResulti), vMask); - // On those that are too large, set to 0xFFFFFFFF - vResult = _mm_or_ps(vResult, vOverflow); - _mm_storeu_si128(reinterpret_cast<__m128i*>(pDestination), - _mm_castps_si128(vResult)); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreFloat3x3(XMFLOAT3X3* pDestination, FXMMATRIX M) noexcept { - assert(pDestination); -#if defined(_XM_NO_INTRINSICS_) - - pDestination->m[0][0] = M.r[0].vector4_f32[0]; - pDestination->m[0][1] = M.r[0].vector4_f32[1]; - pDestination->m[0][2] = M.r[0].vector4_f32[2]; - - pDestination->m[1][0] = M.r[1].vector4_f32[0]; - pDestination->m[1][1] = M.r[1].vector4_f32[1]; - pDestination->m[1][2] = M.r[1].vector4_f32[2]; - - pDestination->m[2][0] = M.r[2].vector4_f32[0]; - pDestination->m[2][1] = M.r[2].vector4_f32[1]; - pDestination->m[2][2] = M.r[2].vector4_f32[2]; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x4_t T1 = vextq_f32(M.r[0], M.r[1], 1); - float32x4_t T2 = vbslq_f32(g_XMMask3, M.r[0], T1); - vst1q_f32(&pDestination->m[0][0], T2); - - T1 = vextq_f32(M.r[1], M.r[1], 1); - T2 = vcombine_f32(vget_low_f32(T1), vget_low_f32(M.r[2])); - vst1q_f32(&pDestination->m[1][1], T2); - - vst1q_lane_f32(&pDestination->m[2][2], M.r[2], 2); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vTemp1 = M.r[0]; - XMVECTOR vTemp2 = M.r[1]; - XMVECTOR vTemp3 = M.r[2]; - XMVECTOR vWork = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(0, 0, 2, 2)); - vTemp1 = _mm_shuffle_ps(vTemp1, vWork, _MM_SHUFFLE(2, 0, 1, 0)); - _mm_storeu_ps(&pDestination->m[0][0], vTemp1); - vTemp2 = _mm_shuffle_ps(vTemp2, vTemp3, _MM_SHUFFLE(1, 0, 2, 1)); - _mm_storeu_ps(&pDestination->m[1][1], vTemp2); - vTemp3 = XM_PERMUTE_PS(vTemp3, _MM_SHUFFLE(2, 2, 2, 2)); - _mm_store_ss(&pDestination->m[2][2], vTemp3); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreFloat4x3(XMFLOAT4X3* pDestination, FXMMATRIX M) noexcept { - assert(pDestination); -#if defined(_XM_NO_INTRINSICS_) - - pDestination->m[0][0] = M.r[0].vector4_f32[0]; - pDestination->m[0][1] = M.r[0].vector4_f32[1]; - pDestination->m[0][2] = M.r[0].vector4_f32[2]; - - pDestination->m[1][0] = M.r[1].vector4_f32[0]; - pDestination->m[1][1] = M.r[1].vector4_f32[1]; - pDestination->m[1][2] = M.r[1].vector4_f32[2]; - - pDestination->m[2][0] = M.r[2].vector4_f32[0]; - pDestination->m[2][1] = M.r[2].vector4_f32[1]; - pDestination->m[2][2] = M.r[2].vector4_f32[2]; - - pDestination->m[3][0] = M.r[3].vector4_f32[0]; - pDestination->m[3][1] = M.r[3].vector4_f32[1]; - pDestination->m[3][2] = M.r[3].vector4_f32[2]; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x4_t T1 = vextq_f32(M.r[0], M.r[1], 1); - float32x4_t T2 = vbslq_f32(g_XMMask3, M.r[0], T1); - vst1q_f32(&pDestination->m[0][0], T2); - - T1 = vextq_f32(M.r[1], M.r[1], 1); - T2 = vcombine_f32(vget_low_f32(T1), vget_low_f32(M.r[2])); - vst1q_f32(&pDestination->m[1][1], T2); - - T1 = vdupq_lane_f32(vget_high_f32(M.r[2]), 0); - T2 = vextq_f32(T1, M.r[3], 3); - vst1q_f32(&pDestination->m[2][2], T2); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vTemp1 = M.r[0]; - XMVECTOR vTemp2 = M.r[1]; - XMVECTOR vTemp3 = M.r[2]; - XMVECTOR vTemp4 = M.r[3]; - XMVECTOR vTemp2x = _mm_shuffle_ps(vTemp2, vTemp3, _MM_SHUFFLE(1, 0, 2, 1)); - vTemp2 = _mm_shuffle_ps(vTemp2, vTemp1, _MM_SHUFFLE(2, 2, 0, 0)); - vTemp1 = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(0, 2, 1, 0)); - vTemp3 = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(0, 0, 2, 2)); - vTemp3 = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(2, 1, 2, 0)); - _mm_storeu_ps(&pDestination->m[0][0], vTemp1); - _mm_storeu_ps(&pDestination->m[1][1], vTemp2x); - _mm_storeu_ps(&pDestination->m[2][2], vTemp3); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreFloat4x3A(XMFLOAT4X3A* pDestination, FXMMATRIX M) noexcept { - assert(pDestination); - assert((reinterpret_cast(pDestination) & 0xF) == 0); -#if defined(_XM_NO_INTRINSICS_) - - pDestination->m[0][0] = M.r[0].vector4_f32[0]; - pDestination->m[0][1] = M.r[0].vector4_f32[1]; - pDestination->m[0][2] = M.r[0].vector4_f32[2]; - - pDestination->m[1][0] = M.r[1].vector4_f32[0]; - pDestination->m[1][1] = M.r[1].vector4_f32[1]; - pDestination->m[1][2] = M.r[1].vector4_f32[2]; - - pDestination->m[2][0] = M.r[2].vector4_f32[0]; - pDestination->m[2][1] = M.r[2].vector4_f32[1]; - pDestination->m[2][2] = M.r[2].vector4_f32[2]; - - pDestination->m[3][0] = M.r[3].vector4_f32[0]; - pDestination->m[3][1] = M.r[3].vector4_f32[1]; - pDestination->m[3][2] = M.r[3].vector4_f32[2]; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) -#if defined(_MSC_VER) && !defined(__clang__) && \ - !defined(_ARM64_DISTINCT_NEON_TYPES) - float32x4_t T1 = vextq_f32(M.r[0], M.r[1], 1); - float32x4_t T2 = vbslq_f32(g_XMMask3, M.r[0], T1); - vst1q_f32_ex(&pDestination->m[0][0], T2, 128); - - T1 = vextq_f32(M.r[1], M.r[1], 1); - T2 = vcombine_f32(vget_low_f32(T1), vget_low_f32(M.r[2])); - vst1q_f32_ex(&pDestination->m[1][1], T2, 128); - - T1 = vdupq_lane_f32(vget_high_f32(M.r[2]), 0); - T2 = vextq_f32(T1, M.r[3], 3); - vst1q_f32_ex(&pDestination->m[2][2], T2, 128); -#else - float32x4_t T1 = vextq_f32(M.r[0], M.r[1], 1); - float32x4_t T2 = vbslq_f32(g_XMMask3, M.r[0], T1); - vst1q_f32(&pDestination->m[0][0], T2); - - T1 = vextq_f32(M.r[1], M.r[1], 1); - T2 = vcombine_f32(vget_low_f32(T1), vget_low_f32(M.r[2])); - vst1q_f32(&pDestination->m[1][1], T2); - - T1 = vdupq_lane_f32(vget_high_f32(M.r[2]), 0); - T2 = vextq_f32(T1, M.r[3], 3); - vst1q_f32(&pDestination->m[2][2], T2); -#endif -#elif defined(_XM_SSE_INTRINSICS_) - // x1,y1,z1,w1 - XMVECTOR vTemp1 = M.r[0]; - // x2,y2,z2,w2 - XMVECTOR vTemp2 = M.r[1]; - // x3,y3,z3,w3 - XMVECTOR vTemp3 = M.r[2]; - // x4,y4,z4,w4 - XMVECTOR vTemp4 = M.r[3]; - // z1,z1,x2,y2 - XMVECTOR vTemp = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(1, 0, 2, 2)); - // y2,z2,x3,y3 (Final) - vTemp2 = _mm_shuffle_ps(vTemp2, vTemp3, _MM_SHUFFLE(1, 0, 2, 1)); - // x1,y1,z1,x2 (Final) - vTemp1 = _mm_shuffle_ps(vTemp1, vTemp, _MM_SHUFFLE(2, 0, 1, 0)); - // z3,z3,x4,x4 - vTemp3 = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(0, 0, 2, 2)); - // z3,x4,y4,z4 (Final) - vTemp3 = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(2, 1, 2, 0)); - // Store in 3 operations - _mm_store_ps(&pDestination->m[0][0], vTemp1); - _mm_store_ps(&pDestination->m[1][1], vTemp2); - _mm_store_ps(&pDestination->m[2][2], vTemp3); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreFloat3x4(XMFLOAT3X4* pDestination, FXMMATRIX M) noexcept { - assert(pDestination); -#if defined(_XM_NO_INTRINSICS_) - - pDestination->m[0][0] = M.r[0].vector4_f32[0]; - pDestination->m[0][1] = M.r[1].vector4_f32[0]; - pDestination->m[0][2] = M.r[2].vector4_f32[0]; - pDestination->m[0][3] = M.r[3].vector4_f32[0]; - - pDestination->m[1][0] = M.r[0].vector4_f32[1]; - pDestination->m[1][1] = M.r[1].vector4_f32[1]; - pDestination->m[1][2] = M.r[2].vector4_f32[1]; - pDestination->m[1][3] = M.r[3].vector4_f32[1]; - - pDestination->m[2][0] = M.r[0].vector4_f32[2]; - pDestination->m[2][1] = M.r[1].vector4_f32[2]; - pDestination->m[2][2] = M.r[2].vector4_f32[2]; - pDestination->m[2][3] = M.r[3].vector4_f32[2]; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x4x2_t P0 = vzipq_f32(M.r[0], M.r[2]); - float32x4x2_t P1 = vzipq_f32(M.r[1], M.r[3]); - - float32x4x2_t T0 = vzipq_f32(P0.val[0], P1.val[0]); - float32x4x2_t T1 = vzipq_f32(P0.val[1], P1.val[1]); - - vst1q_f32(&pDestination->m[0][0], T0.val[0]); - vst1q_f32(&pDestination->m[1][0], T0.val[1]); - vst1q_f32(&pDestination->m[2][0], T1.val[0]); -#elif defined(_XM_SSE_INTRINSICS_) - // x.x,x.y,y.x,y.y - XMVECTOR vTemp1 = _mm_shuffle_ps(M.r[0], M.r[1], _MM_SHUFFLE(1, 0, 1, 0)); - // x.z,x.w,y.z,y.w - XMVECTOR vTemp3 = _mm_shuffle_ps(M.r[0], M.r[1], _MM_SHUFFLE(3, 2, 3, 2)); - // z.x,z.y,w.x,w.y - XMVECTOR vTemp2 = _mm_shuffle_ps(M.r[2], M.r[3], _MM_SHUFFLE(1, 0, 1, 0)); - // z.z,z.w,w.z,w.w - XMVECTOR vTemp4 = _mm_shuffle_ps(M.r[2], M.r[3], _MM_SHUFFLE(3, 2, 3, 2)); - - // x.x,y.x,z.x,w.x - XMVECTOR r0 = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(2, 0, 2, 0)); - // x.y,y.y,z.y,w.y - XMVECTOR r1 = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(3, 1, 3, 1)); - // x.z,y.z,z.z,w.z - XMVECTOR r2 = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(2, 0, 2, 0)); - - _mm_storeu_ps(&pDestination->m[0][0], r0); - _mm_storeu_ps(&pDestination->m[1][0], r1); - _mm_storeu_ps(&pDestination->m[2][0], r2); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreFloat3x4A(XMFLOAT3X4A* pDestination, FXMMATRIX M) noexcept { - assert(pDestination); - assert((reinterpret_cast(pDestination) & 0xF) == 0); -#if defined(_XM_NO_INTRINSICS_) - - pDestination->m[0][0] = M.r[0].vector4_f32[0]; - pDestination->m[0][1] = M.r[1].vector4_f32[0]; - pDestination->m[0][2] = M.r[2].vector4_f32[0]; - pDestination->m[0][3] = M.r[3].vector4_f32[0]; - - pDestination->m[1][0] = M.r[0].vector4_f32[1]; - pDestination->m[1][1] = M.r[1].vector4_f32[1]; - pDestination->m[1][2] = M.r[2].vector4_f32[1]; - pDestination->m[1][3] = M.r[3].vector4_f32[1]; - - pDestination->m[2][0] = M.r[0].vector4_f32[2]; - pDestination->m[2][1] = M.r[1].vector4_f32[2]; - pDestination->m[2][2] = M.r[2].vector4_f32[2]; - pDestination->m[2][3] = M.r[3].vector4_f32[2]; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x4x2_t P0 = vzipq_f32(M.r[0], M.r[2]); - float32x4x2_t P1 = vzipq_f32(M.r[1], M.r[3]); - - float32x4x2_t T0 = vzipq_f32(P0.val[0], P1.val[0]); - float32x4x2_t T1 = vzipq_f32(P0.val[1], P1.val[1]); - -#if defined(_MSC_VER) && !defined(__clang__) && \ - !defined(_ARM64_DISTINCT_NEON_TYPES) - vst1q_f32_ex(&pDestination->m[0][0], T0.val[0], 128); - vst1q_f32_ex(&pDestination->m[1][0], T0.val[1], 128); - vst1q_f32_ex(&pDestination->m[2][0], T1.val[0], 128); -#else - vst1q_f32(&pDestination->m[0][0], T0.val[0]); - vst1q_f32(&pDestination->m[1][0], T0.val[1]); - vst1q_f32(&pDestination->m[2][0], T1.val[0]); -#endif -#elif defined(_XM_SSE_INTRINSICS_) - // x.x,x.y,y.x,y.y - XMVECTOR vTemp1 = _mm_shuffle_ps(M.r[0], M.r[1], _MM_SHUFFLE(1, 0, 1, 0)); - // x.z,x.w,y.z,y.w - XMVECTOR vTemp3 = _mm_shuffle_ps(M.r[0], M.r[1], _MM_SHUFFLE(3, 2, 3, 2)); - // z.x,z.y,w.x,w.y - XMVECTOR vTemp2 = _mm_shuffle_ps(M.r[2], M.r[3], _MM_SHUFFLE(1, 0, 1, 0)); - // z.z,z.w,w.z,w.w - XMVECTOR vTemp4 = _mm_shuffle_ps(M.r[2], M.r[3], _MM_SHUFFLE(3, 2, 3, 2)); - - // x.x,y.x,z.x,w.x - XMVECTOR r0 = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(2, 0, 2, 0)); - // x.y,y.y,z.y,w.y - XMVECTOR r1 = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(3, 1, 3, 1)); - // x.z,y.z,z.z,w.z - XMVECTOR r2 = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(2, 0, 2, 0)); - - _mm_store_ps(&pDestination->m[0][0], r0); - _mm_store_ps(&pDestination->m[1][0], r1); - _mm_store_ps(&pDestination->m[2][0], r2); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreFloat4x4(XMFLOAT4X4* pDestination, FXMMATRIX M) noexcept { - assert(pDestination); -#if defined(_XM_NO_INTRINSICS_) - - pDestination->m[0][0] = M.r[0].vector4_f32[0]; - pDestination->m[0][1] = M.r[0].vector4_f32[1]; - pDestination->m[0][2] = M.r[0].vector4_f32[2]; - pDestination->m[0][3] = M.r[0].vector4_f32[3]; - - pDestination->m[1][0] = M.r[1].vector4_f32[0]; - pDestination->m[1][1] = M.r[1].vector4_f32[1]; - pDestination->m[1][2] = M.r[1].vector4_f32[2]; - pDestination->m[1][3] = M.r[1].vector4_f32[3]; - - pDestination->m[2][0] = M.r[2].vector4_f32[0]; - pDestination->m[2][1] = M.r[2].vector4_f32[1]; - pDestination->m[2][2] = M.r[2].vector4_f32[2]; - pDestination->m[2][3] = M.r[2].vector4_f32[3]; - - pDestination->m[3][0] = M.r[3].vector4_f32[0]; - pDestination->m[3][1] = M.r[3].vector4_f32[1]; - pDestination->m[3][2] = M.r[3].vector4_f32[2]; - pDestination->m[3][3] = M.r[3].vector4_f32[3]; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - vst1q_f32(reinterpret_cast(&pDestination->_11), M.r[0]); - vst1q_f32(reinterpret_cast(&pDestination->_21), M.r[1]); - vst1q_f32(reinterpret_cast(&pDestination->_31), M.r[2]); - vst1q_f32(reinterpret_cast(&pDestination->_41), M.r[3]); -#elif defined(_XM_SSE_INTRINSICS_) - _mm_storeu_ps(&pDestination->_11, M.r[0]); - _mm_storeu_ps(&pDestination->_21, M.r[1]); - _mm_storeu_ps(&pDestination->_31, M.r[2]); - _mm_storeu_ps(&pDestination->_41, M.r[3]); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreFloat4x4A(XMFLOAT4X4A* pDestination, FXMMATRIX M) noexcept { - assert(pDestination); - assert((reinterpret_cast(pDestination) & 0xF) == 0); -#if defined(_XM_NO_INTRINSICS_) - - pDestination->m[0][0] = M.r[0].vector4_f32[0]; - pDestination->m[0][1] = M.r[0].vector4_f32[1]; - pDestination->m[0][2] = M.r[0].vector4_f32[2]; - pDestination->m[0][3] = M.r[0].vector4_f32[3]; - - pDestination->m[1][0] = M.r[1].vector4_f32[0]; - pDestination->m[1][1] = M.r[1].vector4_f32[1]; - pDestination->m[1][2] = M.r[1].vector4_f32[2]; - pDestination->m[1][3] = M.r[1].vector4_f32[3]; - - pDestination->m[2][0] = M.r[2].vector4_f32[0]; - pDestination->m[2][1] = M.r[2].vector4_f32[1]; - pDestination->m[2][2] = M.r[2].vector4_f32[2]; - pDestination->m[2][3] = M.r[2].vector4_f32[3]; - - pDestination->m[3][0] = M.r[3].vector4_f32[0]; - pDestination->m[3][1] = M.r[3].vector4_f32[1]; - pDestination->m[3][2] = M.r[3].vector4_f32[2]; - pDestination->m[3][3] = M.r[3].vector4_f32[3]; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) -#if defined(_MSC_VER) && !defined(__clang__) && \ - !defined(_ARM64_DISTINCT_NEON_TYPES) - vst1q_f32_ex(reinterpret_cast(&pDestination->_11), M.r[0], 128); - vst1q_f32_ex(reinterpret_cast(&pDestination->_21), M.r[1], 128); - vst1q_f32_ex(reinterpret_cast(&pDestination->_31), M.r[2], 128); - vst1q_f32_ex(reinterpret_cast(&pDestination->_41), M.r[3], 128); -#else - vst1q_f32(reinterpret_cast(&pDestination->_11), M.r[0]); - vst1q_f32(reinterpret_cast(&pDestination->_21), M.r[1]); - vst1q_f32(reinterpret_cast(&pDestination->_31), M.r[2]); - vst1q_f32(reinterpret_cast(&pDestination->_41), M.r[3]); -#endif -#elif defined(_XM_SSE_INTRINSICS_) - _mm_store_ps(&pDestination->_11, M.r[0]); - _mm_store_ps(&pDestination->_21, M.r[1]); - _mm_store_ps(&pDestination->_31, M.r[2]); - _mm_store_ps(&pDestination->_41, M.r[3]); -#endif -} diff --git a/targets/app/linux/Stubs/DirectXMath/DirectXMathMatrix.inl b/targets/app/linux/Stubs/DirectXMath/DirectXMathMatrix.inl deleted file mode 100644 index d4ff70e09..000000000 --- a/targets/app/linux/Stubs/DirectXMath/DirectXMathMatrix.inl +++ /dev/null @@ -1,3484 +0,0 @@ -//------------------------------------------------------------------------------------- -// DirectXMathMatrix.inl -- SIMD C++ Math library -// -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. -// -// http://go.microsoft.com/fwlink/?LinkID=615560 -//------------------------------------------------------------------------------------- - -#pragma once - -/**************************************************************************** - * - * Matrix - * - ****************************************************************************/ - -//------------------------------------------------------------------------------ -// Comparison operations -//------------------------------------------------------------------------------ - -//------------------------------------------------------------------------------ - -#if !defined(_XM_NO_INTRINSICS_) && defined(_MSC_VER) && \ - !defined(__INTEL_COMPILER) -#pragma float_control(push) -#pragma float_control(precise, on) -#endif - -// Return true if any entry in the matrix is NaN -inline bool XM_CALLCONV XMMatrixIsNaN(FXMMATRIX M) noexcept { -#if defined(_XM_NO_INTRINSICS_) - size_t i = 16; - auto pWork = reinterpret_cast(&M.m[0][0]); - do { - // Fetch value into integer unit - uint32_t uTest = pWork[0]; - // Remove sign - uTest &= 0x7FFFFFFFU; - // NaN is 0x7F800001 through 0x7FFFFFFF inclusive - uTest -= 0x7F800001U; - if (uTest < 0x007FFFFFU) { - break; // NaN found - } - ++pWork; // Next entry - } while (--i); - return (i != 0); // i == 0 if nothing matched -#elif defined(_XM_ARM_NEON_INTRINSICS_) - // Load in registers - float32x4_t vX = M.r[0]; - float32x4_t vY = M.r[1]; - float32x4_t vZ = M.r[2]; - float32x4_t vW = M.r[3]; - // Test themselves to check for NaN - uint32x4_t xmask = vmvnq_u32(vceqq_f32(vX, vX)); - uint32x4_t ymask = vmvnq_u32(vceqq_f32(vY, vY)); - uint32x4_t zmask = vmvnq_u32(vceqq_f32(vZ, vZ)); - uint32x4_t wmask = vmvnq_u32(vceqq_f32(vW, vW)); - // Or all the results - xmask = vorrq_u32(xmask, zmask); - ymask = vorrq_u32(ymask, wmask); - xmask = vorrq_u32(xmask, ymask); - // If any tested true, return true - uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(xmask)), - vget_high_u8(vreinterpretq_u8_u32(xmask))); - uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), - vreinterpret_u16_u8(vTemp.val[1])); - uint32_t r = vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1); - return (r != 0); -#elif defined(_XM_SSE_INTRINSICS_) - // Load in registers - XMVECTOR vX = M.r[0]; - XMVECTOR vY = M.r[1]; - XMVECTOR vZ = M.r[2]; - XMVECTOR vW = M.r[3]; - // Test themselves to check for NaN - vX = _mm_cmpneq_ps(vX, vX); - vY = _mm_cmpneq_ps(vY, vY); - vZ = _mm_cmpneq_ps(vZ, vZ); - vW = _mm_cmpneq_ps(vW, vW); - // Or all the results - vX = _mm_or_ps(vX, vZ); - vY = _mm_or_ps(vY, vW); - vX = _mm_or_ps(vX, vY); - // If any tested true, return true - return (_mm_movemask_ps(vX) != 0); -#else -#endif -} - -#if !defined(_XM_NO_INTRINSICS_) && defined(_MSC_VER) && \ - !defined(__INTEL_COMPILER) -#pragma float_control(pop) -#endif - -//------------------------------------------------------------------------------ - -// Return true if any entry in the matrix is +/-INF -inline bool XM_CALLCONV XMMatrixIsInfinite(FXMMATRIX M) noexcept { -#if defined(_XM_NO_INTRINSICS_) - size_t i = 16; - auto pWork = reinterpret_cast(&M.m[0][0]); - do { - // Fetch value into integer unit - uint32_t uTest = pWork[0]; - // Remove sign - uTest &= 0x7FFFFFFFU; - // INF is 0x7F800000 - if (uTest == 0x7F800000U) { - break; // INF found - } - ++pWork; // Next entry - } while (--i); - return (i != 0); // i == 0 if nothing matched -#elif defined(_XM_ARM_NEON_INTRINSICS_) - // Load in registers - float32x4_t vX = M.r[0]; - float32x4_t vY = M.r[1]; - float32x4_t vZ = M.r[2]; - float32x4_t vW = M.r[3]; - // Mask off the sign bits - vX = vreinterpretq_f32_u32( - vandq_u32(vreinterpretq_u32_f32(vX), g_XMAbsMask)); - vY = vreinterpretq_f32_u32( - vandq_u32(vreinterpretq_u32_f32(vY), g_XMAbsMask)); - vZ = vreinterpretq_f32_u32( - vandq_u32(vreinterpretq_u32_f32(vZ), g_XMAbsMask)); - vW = vreinterpretq_f32_u32( - vandq_u32(vreinterpretq_u32_f32(vW), g_XMAbsMask)); - // Compare to infinity - uint32x4_t xmask = vceqq_f32(vX, g_XMInfinity); - uint32x4_t ymask = vceqq_f32(vY, g_XMInfinity); - uint32x4_t zmask = vceqq_f32(vZ, g_XMInfinity); - uint32x4_t wmask = vceqq_f32(vW, g_XMInfinity); - // Or the answers together - xmask = vorrq_u32(xmask, zmask); - ymask = vorrq_u32(ymask, wmask); - xmask = vorrq_u32(xmask, ymask); - // If any tested true, return true - uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(xmask)), - vget_high_u8(vreinterpretq_u8_u32(xmask))); - uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), - vreinterpret_u16_u8(vTemp.val[1])); - uint32_t r = vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1); - return (r != 0); -#elif defined(_XM_SSE_INTRINSICS_) - // Mask off the sign bits - XMVECTOR vTemp1 = _mm_and_ps(M.r[0], g_XMAbsMask); - XMVECTOR vTemp2 = _mm_and_ps(M.r[1], g_XMAbsMask); - XMVECTOR vTemp3 = _mm_and_ps(M.r[2], g_XMAbsMask); - XMVECTOR vTemp4 = _mm_and_ps(M.r[3], g_XMAbsMask); - // Compare to infinity - vTemp1 = _mm_cmpeq_ps(vTemp1, g_XMInfinity); - vTemp2 = _mm_cmpeq_ps(vTemp2, g_XMInfinity); - vTemp3 = _mm_cmpeq_ps(vTemp3, g_XMInfinity); - vTemp4 = _mm_cmpeq_ps(vTemp4, g_XMInfinity); - // Or the answers together - vTemp1 = _mm_or_ps(vTemp1, vTemp2); - vTemp3 = _mm_or_ps(vTemp3, vTemp4); - vTemp1 = _mm_or_ps(vTemp1, vTemp3); - // If any are infinity, the signs are true. - return (_mm_movemask_ps(vTemp1) != 0); -#endif -} - -//------------------------------------------------------------------------------ - -// Return true if the XMMatrix is equal to identity -inline bool XM_CALLCONV XMMatrixIsIdentity(FXMMATRIX M) noexcept { -#if defined(_XM_NO_INTRINSICS_) - // Use the integer pipeline to reduce branching to a minimum - auto pWork = reinterpret_cast(&M.m[0][0]); - // Convert 1.0f to zero and or them together - uint32_t uOne = pWork[0] ^ 0x3F800000U; - // Or all the 0.0f entries together - uint32_t uZero = pWork[1]; - uZero |= pWork[2]; - uZero |= pWork[3]; - // 2nd row - uZero |= pWork[4]; - uOne |= pWork[5] ^ 0x3F800000U; - uZero |= pWork[6]; - uZero |= pWork[7]; - // 3rd row - uZero |= pWork[8]; - uZero |= pWork[9]; - uOne |= pWork[10] ^ 0x3F800000U; - uZero |= pWork[11]; - // 4th row - uZero |= pWork[12]; - uZero |= pWork[13]; - uZero |= pWork[14]; - uOne |= pWork[15] ^ 0x3F800000U; - // If all zero entries are zero, the uZero==0 - uZero &= 0x7FFFFFFF; // Allow -0.0f - // If all 1.0f entries are 1.0f, then uOne==0 - uOne |= uZero; - return (uOne == 0); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x4_t xmask = vceqq_f32(M.r[0], g_XMIdentityR0); - uint32x4_t ymask = vceqq_f32(M.r[1], g_XMIdentityR1); - uint32x4_t zmask = vceqq_f32(M.r[2], g_XMIdentityR2); - uint32x4_t wmask = vceqq_f32(M.r[3], g_XMIdentityR3); - xmask = vandq_u32(xmask, zmask); - ymask = vandq_u32(ymask, wmask); - xmask = vandq_u32(xmask, ymask); - uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(xmask)), - vget_high_u8(vreinterpretq_u8_u32(xmask))); - uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), - vreinterpret_u16_u8(vTemp.val[1])); - uint32_t r = vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1); - return (r == 0xFFFFFFFFU); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vTemp1 = _mm_cmpeq_ps(M.r[0], g_XMIdentityR0); - XMVECTOR vTemp2 = _mm_cmpeq_ps(M.r[1], g_XMIdentityR1); - XMVECTOR vTemp3 = _mm_cmpeq_ps(M.r[2], g_XMIdentityR2); - XMVECTOR vTemp4 = _mm_cmpeq_ps(M.r[3], g_XMIdentityR3); - vTemp1 = _mm_and_ps(vTemp1, vTemp2); - vTemp3 = _mm_and_ps(vTemp3, vTemp4); - vTemp1 = _mm_and_ps(vTemp1, vTemp3); - return (_mm_movemask_ps(vTemp1) == 0x0f); -#endif -} - -//------------------------------------------------------------------------------ -// Computation operations -//------------------------------------------------------------------------------ - -//------------------------------------------------------------------------------ -// Perform a 4x4 matrix multiply by a 4x4 matrix -inline XMMATRIX XM_CALLCONV XMMatrixMultiply(FXMMATRIX M1, - CXMMATRIX M2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMMATRIX mResult; - // Cache the invariants in registers - float x = M1.m[0][0]; - float y = M1.m[0][1]; - float z = M1.m[0][2]; - float w = M1.m[0][3]; - // Perform the operation on the first row - mResult.m[0][0] = (M2.m[0][0] * x) + (M2.m[1][0] * y) + (M2.m[2][0] * z) + - (M2.m[3][0] * w); - mResult.m[0][1] = (M2.m[0][1] * x) + (M2.m[1][1] * y) + (M2.m[2][1] * z) + - (M2.m[3][1] * w); - mResult.m[0][2] = (M2.m[0][2] * x) + (M2.m[1][2] * y) + (M2.m[2][2] * z) + - (M2.m[3][2] * w); - mResult.m[0][3] = (M2.m[0][3] * x) + (M2.m[1][3] * y) + (M2.m[2][3] * z) + - (M2.m[3][3] * w); - // Repeat for all the other rows - x = M1.m[1][0]; - y = M1.m[1][1]; - z = M1.m[1][2]; - w = M1.m[1][3]; - mResult.m[1][0] = (M2.m[0][0] * x) + (M2.m[1][0] * y) + (M2.m[2][0] * z) + - (M2.m[3][0] * w); - mResult.m[1][1] = (M2.m[0][1] * x) + (M2.m[1][1] * y) + (M2.m[2][1] * z) + - (M2.m[3][1] * w); - mResult.m[1][2] = (M2.m[0][2] * x) + (M2.m[1][2] * y) + (M2.m[2][2] * z) + - (M2.m[3][2] * w); - mResult.m[1][3] = (M2.m[0][3] * x) + (M2.m[1][3] * y) + (M2.m[2][3] * z) + - (M2.m[3][3] * w); - x = M1.m[2][0]; - y = M1.m[2][1]; - z = M1.m[2][2]; - w = M1.m[2][3]; - mResult.m[2][0] = (M2.m[0][0] * x) + (M2.m[1][0] * y) + (M2.m[2][0] * z) + - (M2.m[3][0] * w); - mResult.m[2][1] = (M2.m[0][1] * x) + (M2.m[1][1] * y) + (M2.m[2][1] * z) + - (M2.m[3][1] * w); - mResult.m[2][2] = (M2.m[0][2] * x) + (M2.m[1][2] * y) + (M2.m[2][2] * z) + - (M2.m[3][2] * w); - mResult.m[2][3] = (M2.m[0][3] * x) + (M2.m[1][3] * y) + (M2.m[2][3] * z) + - (M2.m[3][3] * w); - x = M1.m[3][0]; - y = M1.m[3][1]; - z = M1.m[3][2]; - w = M1.m[3][3]; - mResult.m[3][0] = (M2.m[0][0] * x) + (M2.m[1][0] * y) + (M2.m[2][0] * z) + - (M2.m[3][0] * w); - mResult.m[3][1] = (M2.m[0][1] * x) + (M2.m[1][1] * y) + (M2.m[2][1] * z) + - (M2.m[3][1] * w); - mResult.m[3][2] = (M2.m[0][2] * x) + (M2.m[1][2] * y) + (M2.m[2][2] * z) + - (M2.m[3][2] * w); - mResult.m[3][3] = (M2.m[0][3] * x) + (M2.m[1][3] * y) + (M2.m[2][3] * z) + - (M2.m[3][3] * w); - return mResult; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - XMMATRIX mResult; - float32x2_t VL = vget_low_f32(M1.r[0]); - float32x2_t VH = vget_high_f32(M1.r[0]); - // Perform the operation on the first row - float32x4_t vX = vmulq_lane_f32(M2.r[0], VL, 0); - float32x4_t vY = vmulq_lane_f32(M2.r[1], VL, 1); - float32x4_t vZ = vmlaq_lane_f32(vX, M2.r[2], VH, 0); - float32x4_t vW = vmlaq_lane_f32(vY, M2.r[3], VH, 1); - mResult.r[0] = vaddq_f32(vZ, vW); - // Repeat for the other 3 rows - VL = vget_low_f32(M1.r[1]); - VH = vget_high_f32(M1.r[1]); - vX = vmulq_lane_f32(M2.r[0], VL, 0); - vY = vmulq_lane_f32(M2.r[1], VL, 1); - vZ = vmlaq_lane_f32(vX, M2.r[2], VH, 0); - vW = vmlaq_lane_f32(vY, M2.r[3], VH, 1); - mResult.r[1] = vaddq_f32(vZ, vW); - VL = vget_low_f32(M1.r[2]); - VH = vget_high_f32(M1.r[2]); - vX = vmulq_lane_f32(M2.r[0], VL, 0); - vY = vmulq_lane_f32(M2.r[1], VL, 1); - vZ = vmlaq_lane_f32(vX, M2.r[2], VH, 0); - vW = vmlaq_lane_f32(vY, M2.r[3], VH, 1); - mResult.r[2] = vaddq_f32(vZ, vW); - VL = vget_low_f32(M1.r[3]); - VH = vget_high_f32(M1.r[3]); - vX = vmulq_lane_f32(M2.r[0], VL, 0); - vY = vmulq_lane_f32(M2.r[1], VL, 1); - vZ = vmlaq_lane_f32(vX, M2.r[2], VH, 0); - vW = vmlaq_lane_f32(vY, M2.r[3], VH, 1); - mResult.r[3] = vaddq_f32(vZ, vW); - return mResult; -#elif defined(_XM_AVX2_INTRINSICS_) - __m256 t0 = _mm256_castps128_ps256(M1.r[0]); - t0 = _mm256_insertf128_ps(t0, M1.r[1], 1); - __m256 t1 = _mm256_castps128_ps256(M1.r[2]); - t1 = _mm256_insertf128_ps(t1, M1.r[3], 1); - - __m256 u0 = _mm256_castps128_ps256(M2.r[0]); - u0 = _mm256_insertf128_ps(u0, M2.r[1], 1); - __m256 u1 = _mm256_castps128_ps256(M2.r[2]); - u1 = _mm256_insertf128_ps(u1, M2.r[3], 1); - - __m256 a0 = _mm256_shuffle_ps(t0, t0, _MM_SHUFFLE(0, 0, 0, 0)); - __m256 a1 = _mm256_shuffle_ps(t1, t1, _MM_SHUFFLE(0, 0, 0, 0)); - __m256 b0 = _mm256_permute2f128_ps(u0, u0, 0x00); - __m256 c0 = _mm256_mul_ps(a0, b0); - __m256 c1 = _mm256_mul_ps(a1, b0); - - a0 = _mm256_shuffle_ps(t0, t0, _MM_SHUFFLE(1, 1, 1, 1)); - a1 = _mm256_shuffle_ps(t1, t1, _MM_SHUFFLE(1, 1, 1, 1)); - b0 = _mm256_permute2f128_ps(u0, u0, 0x11); - __m256 c2 = _mm256_fmadd_ps(a0, b0, c0); - __m256 c3 = _mm256_fmadd_ps(a1, b0, c1); - - a0 = _mm256_shuffle_ps(t0, t0, _MM_SHUFFLE(2, 2, 2, 2)); - a1 = _mm256_shuffle_ps(t1, t1, _MM_SHUFFLE(2, 2, 2, 2)); - __m256 b1 = _mm256_permute2f128_ps(u1, u1, 0x00); - __m256 c4 = _mm256_mul_ps(a0, b1); - __m256 c5 = _mm256_mul_ps(a1, b1); - - a0 = _mm256_shuffle_ps(t0, t0, _MM_SHUFFLE(3, 3, 3, 3)); - a1 = _mm256_shuffle_ps(t1, t1, _MM_SHUFFLE(3, 3, 3, 3)); - b1 = _mm256_permute2f128_ps(u1, u1, 0x11); - __m256 c6 = _mm256_fmadd_ps(a0, b1, c4); - __m256 c7 = _mm256_fmadd_ps(a1, b1, c5); - - t0 = _mm256_add_ps(c2, c6); - t1 = _mm256_add_ps(c3, c7); - - XMMATRIX mResult; - mResult.r[0] = _mm256_castps256_ps128(t0); - mResult.r[1] = _mm256_extractf128_ps(t0, 1); - mResult.r[2] = _mm256_castps256_ps128(t1); - mResult.r[3] = _mm256_extractf128_ps(t1, 1); - return mResult; -#elif defined(_XM_SSE_INTRINSICS_) - XMMATRIX mResult; - // Splat the component X,Y,Z then W -#if defined(_XM_AVX_INTRINSICS_) - XMVECTOR vX = - _mm_broadcast_ss(reinterpret_cast(&M1.r[0]) + 0); - XMVECTOR vY = - _mm_broadcast_ss(reinterpret_cast(&M1.r[0]) + 1); - XMVECTOR vZ = - _mm_broadcast_ss(reinterpret_cast(&M1.r[0]) + 2); - XMVECTOR vW = - _mm_broadcast_ss(reinterpret_cast(&M1.r[0]) + 3); -#else - // Use vW to hold the original row - XMVECTOR vW = M1.r[0]; - XMVECTOR vX = XM_PERMUTE_PS(vW, _MM_SHUFFLE(0, 0, 0, 0)); - XMVECTOR vY = XM_PERMUTE_PS(vW, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR vZ = XM_PERMUTE_PS(vW, _MM_SHUFFLE(2, 2, 2, 2)); - vW = XM_PERMUTE_PS(vW, _MM_SHUFFLE(3, 3, 3, 3)); -#endif - // Perform the operation on the first row - vX = _mm_mul_ps(vX, M2.r[0]); - vY = _mm_mul_ps(vY, M2.r[1]); - vZ = _mm_mul_ps(vZ, M2.r[2]); - vW = _mm_mul_ps(vW, M2.r[3]); - // Perform a binary add to reduce cumulative errors - vX = _mm_add_ps(vX, vZ); - vY = _mm_add_ps(vY, vW); - vX = _mm_add_ps(vX, vY); - mResult.r[0] = vX; - // Repeat for the other 3 rows -#if defined(_XM_AVX_INTRINSICS_) - vX = _mm_broadcast_ss(reinterpret_cast(&M1.r[1]) + 0); - vY = _mm_broadcast_ss(reinterpret_cast(&M1.r[1]) + 1); - vZ = _mm_broadcast_ss(reinterpret_cast(&M1.r[1]) + 2); - vW = _mm_broadcast_ss(reinterpret_cast(&M1.r[1]) + 3); -#else - vW = M1.r[1]; - vX = XM_PERMUTE_PS(vW, _MM_SHUFFLE(0, 0, 0, 0)); - vY = XM_PERMUTE_PS(vW, _MM_SHUFFLE(1, 1, 1, 1)); - vZ = XM_PERMUTE_PS(vW, _MM_SHUFFLE(2, 2, 2, 2)); - vW = XM_PERMUTE_PS(vW, _MM_SHUFFLE(3, 3, 3, 3)); -#endif - vX = _mm_mul_ps(vX, M2.r[0]); - vY = _mm_mul_ps(vY, M2.r[1]); - vZ = _mm_mul_ps(vZ, M2.r[2]); - vW = _mm_mul_ps(vW, M2.r[3]); - vX = _mm_add_ps(vX, vZ); - vY = _mm_add_ps(vY, vW); - vX = _mm_add_ps(vX, vY); - mResult.r[1] = vX; -#if defined(_XM_AVX_INTRINSICS_) - vX = _mm_broadcast_ss(reinterpret_cast(&M1.r[2]) + 0); - vY = _mm_broadcast_ss(reinterpret_cast(&M1.r[2]) + 1); - vZ = _mm_broadcast_ss(reinterpret_cast(&M1.r[2]) + 2); - vW = _mm_broadcast_ss(reinterpret_cast(&M1.r[2]) + 3); -#else - vW = M1.r[2]; - vX = XM_PERMUTE_PS(vW, _MM_SHUFFLE(0, 0, 0, 0)); - vY = XM_PERMUTE_PS(vW, _MM_SHUFFLE(1, 1, 1, 1)); - vZ = XM_PERMUTE_PS(vW, _MM_SHUFFLE(2, 2, 2, 2)); - vW = XM_PERMUTE_PS(vW, _MM_SHUFFLE(3, 3, 3, 3)); -#endif - vX = _mm_mul_ps(vX, M2.r[0]); - vY = _mm_mul_ps(vY, M2.r[1]); - vZ = _mm_mul_ps(vZ, M2.r[2]); - vW = _mm_mul_ps(vW, M2.r[3]); - vX = _mm_add_ps(vX, vZ); - vY = _mm_add_ps(vY, vW); - vX = _mm_add_ps(vX, vY); - mResult.r[2] = vX; -#if defined(_XM_AVX_INTRINSICS_) - vX = _mm_broadcast_ss(reinterpret_cast(&M1.r[3]) + 0); - vY = _mm_broadcast_ss(reinterpret_cast(&M1.r[3]) + 1); - vZ = _mm_broadcast_ss(reinterpret_cast(&M1.r[3]) + 2); - vW = _mm_broadcast_ss(reinterpret_cast(&M1.r[3]) + 3); -#else - vW = M1.r[3]; - vX = XM_PERMUTE_PS(vW, _MM_SHUFFLE(0, 0, 0, 0)); - vY = XM_PERMUTE_PS(vW, _MM_SHUFFLE(1, 1, 1, 1)); - vZ = XM_PERMUTE_PS(vW, _MM_SHUFFLE(2, 2, 2, 2)); - vW = XM_PERMUTE_PS(vW, _MM_SHUFFLE(3, 3, 3, 3)); -#endif - vX = _mm_mul_ps(vX, M2.r[0]); - vY = _mm_mul_ps(vY, M2.r[1]); - vZ = _mm_mul_ps(vZ, M2.r[2]); - vW = _mm_mul_ps(vW, M2.r[3]); - vX = _mm_add_ps(vX, vZ); - vY = _mm_add_ps(vY, vW); - vX = _mm_add_ps(vX, vY); - mResult.r[3] = vX; - return mResult; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX XM_CALLCONV XMMatrixMultiplyTranspose(FXMMATRIX M1, - CXMMATRIX M2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMMATRIX mResult; - // Cache the invariants in registers - float x = M2.m[0][0]; - float y = M2.m[1][0]; - float z = M2.m[2][0]; - float w = M2.m[3][0]; - // Perform the operation on the first row - mResult.m[0][0] = (M1.m[0][0] * x) + (M1.m[0][1] * y) + (M1.m[0][2] * z) + - (M1.m[0][3] * w); - mResult.m[0][1] = (M1.m[1][0] * x) + (M1.m[1][1] * y) + (M1.m[1][2] * z) + - (M1.m[1][3] * w); - mResult.m[0][2] = (M1.m[2][0] * x) + (M1.m[2][1] * y) + (M1.m[2][2] * z) + - (M1.m[2][3] * w); - mResult.m[0][3] = (M1.m[3][0] * x) + (M1.m[3][1] * y) + (M1.m[3][2] * z) + - (M1.m[3][3] * w); - // Repeat for all the other rows - x = M2.m[0][1]; - y = M2.m[1][1]; - z = M2.m[2][1]; - w = M2.m[3][1]; - mResult.m[1][0] = (M1.m[0][0] * x) + (M1.m[0][1] * y) + (M1.m[0][2] * z) + - (M1.m[0][3] * w); - mResult.m[1][1] = (M1.m[1][0] * x) + (M1.m[1][1] * y) + (M1.m[1][2] * z) + - (M1.m[1][3] * w); - mResult.m[1][2] = (M1.m[2][0] * x) + (M1.m[2][1] * y) + (M1.m[2][2] * z) + - (M1.m[2][3] * w); - mResult.m[1][3] = (M1.m[3][0] * x) + (M1.m[3][1] * y) + (M1.m[3][2] * z) + - (M1.m[3][3] * w); - x = M2.m[0][2]; - y = M2.m[1][2]; - z = M2.m[2][2]; - w = M2.m[3][2]; - mResult.m[2][0] = (M1.m[0][0] * x) + (M1.m[0][1] * y) + (M1.m[0][2] * z) + - (M1.m[0][3] * w); - mResult.m[2][1] = (M1.m[1][0] * x) + (M1.m[1][1] * y) + (M1.m[1][2] * z) + - (M1.m[1][3] * w); - mResult.m[2][2] = (M1.m[2][0] * x) + (M1.m[2][1] * y) + (M1.m[2][2] * z) + - (M1.m[2][3] * w); - mResult.m[2][3] = (M1.m[3][0] * x) + (M1.m[3][1] * y) + (M1.m[3][2] * z) + - (M1.m[3][3] * w); - x = M2.m[0][3]; - y = M2.m[1][3]; - z = M2.m[2][3]; - w = M2.m[3][3]; - mResult.m[3][0] = (M1.m[0][0] * x) + (M1.m[0][1] * y) + (M1.m[0][2] * z) + - (M1.m[0][3] * w); - mResult.m[3][1] = (M1.m[1][0] * x) + (M1.m[1][1] * y) + (M1.m[1][2] * z) + - (M1.m[1][3] * w); - mResult.m[3][2] = (M1.m[2][0] * x) + (M1.m[2][1] * y) + (M1.m[2][2] * z) + - (M1.m[2][3] * w); - mResult.m[3][3] = (M1.m[3][0] * x) + (M1.m[3][1] * y) + (M1.m[3][2] * z) + - (M1.m[3][3] * w); - return mResult; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x2_t VL = vget_low_f32(M1.r[0]); - float32x2_t VH = vget_high_f32(M1.r[0]); - // Perform the operation on the first row - float32x4_t vX = vmulq_lane_f32(M2.r[0], VL, 0); - float32x4_t vY = vmulq_lane_f32(M2.r[1], VL, 1); - float32x4_t vZ = vmlaq_lane_f32(vX, M2.r[2], VH, 0); - float32x4_t vW = vmlaq_lane_f32(vY, M2.r[3], VH, 1); - float32x4_t r0 = vaddq_f32(vZ, vW); - // Repeat for the other 3 rows - VL = vget_low_f32(M1.r[1]); - VH = vget_high_f32(M1.r[1]); - vX = vmulq_lane_f32(M2.r[0], VL, 0); - vY = vmulq_lane_f32(M2.r[1], VL, 1); - vZ = vmlaq_lane_f32(vX, M2.r[2], VH, 0); - vW = vmlaq_lane_f32(vY, M2.r[3], VH, 1); - float32x4_t r1 = vaddq_f32(vZ, vW); - VL = vget_low_f32(M1.r[2]); - VH = vget_high_f32(M1.r[2]); - vX = vmulq_lane_f32(M2.r[0], VL, 0); - vY = vmulq_lane_f32(M2.r[1], VL, 1); - vZ = vmlaq_lane_f32(vX, M2.r[2], VH, 0); - vW = vmlaq_lane_f32(vY, M2.r[3], VH, 1); - float32x4_t r2 = vaddq_f32(vZ, vW); - VL = vget_low_f32(M1.r[3]); - VH = vget_high_f32(M1.r[3]); - vX = vmulq_lane_f32(M2.r[0], VL, 0); - vY = vmulq_lane_f32(M2.r[1], VL, 1); - vZ = vmlaq_lane_f32(vX, M2.r[2], VH, 0); - vW = vmlaq_lane_f32(vY, M2.r[3], VH, 1); - float32x4_t r3 = vaddq_f32(vZ, vW); - - // Transpose result - float32x4x2_t P0 = vzipq_f32(r0, r2); - float32x4x2_t P1 = vzipq_f32(r1, r3); - - float32x4x2_t T0 = vzipq_f32(P0.val[0], P1.val[0]); - float32x4x2_t T1 = vzipq_f32(P0.val[1], P1.val[1]); - - XMMATRIX mResult; - mResult.r[0] = T0.val[0]; - mResult.r[1] = T0.val[1]; - mResult.r[2] = T1.val[0]; - mResult.r[3] = T1.val[1]; - return mResult; -#elif defined(_XM_AVX2_INTRINSICS_) - __m256 t0 = _mm256_castps128_ps256(M1.r[0]); - t0 = _mm256_insertf128_ps(t0, M1.r[1], 1); - __m256 t1 = _mm256_castps128_ps256(M1.r[2]); - t1 = _mm256_insertf128_ps(t1, M1.r[3], 1); - - __m256 u0 = _mm256_castps128_ps256(M2.r[0]); - u0 = _mm256_insertf128_ps(u0, M2.r[1], 1); - __m256 u1 = _mm256_castps128_ps256(M2.r[2]); - u1 = _mm256_insertf128_ps(u1, M2.r[3], 1); - - __m256 a0 = _mm256_shuffle_ps(t0, t0, _MM_SHUFFLE(0, 0, 0, 0)); - __m256 a1 = _mm256_shuffle_ps(t1, t1, _MM_SHUFFLE(0, 0, 0, 0)); - __m256 b0 = _mm256_permute2f128_ps(u0, u0, 0x00); - __m256 c0 = _mm256_mul_ps(a0, b0); - __m256 c1 = _mm256_mul_ps(a1, b0); - - a0 = _mm256_shuffle_ps(t0, t0, _MM_SHUFFLE(1, 1, 1, 1)); - a1 = _mm256_shuffle_ps(t1, t1, _MM_SHUFFLE(1, 1, 1, 1)); - b0 = _mm256_permute2f128_ps(u0, u0, 0x11); - __m256 c2 = _mm256_fmadd_ps(a0, b0, c0); - __m256 c3 = _mm256_fmadd_ps(a1, b0, c1); - - a0 = _mm256_shuffle_ps(t0, t0, _MM_SHUFFLE(2, 2, 2, 2)); - a1 = _mm256_shuffle_ps(t1, t1, _MM_SHUFFLE(2, 2, 2, 2)); - __m256 b1 = _mm256_permute2f128_ps(u1, u1, 0x00); - __m256 c4 = _mm256_mul_ps(a0, b1); - __m256 c5 = _mm256_mul_ps(a1, b1); - - a0 = _mm256_shuffle_ps(t0, t0, _MM_SHUFFLE(3, 3, 3, 3)); - a1 = _mm256_shuffle_ps(t1, t1, _MM_SHUFFLE(3, 3, 3, 3)); - b1 = _mm256_permute2f128_ps(u1, u1, 0x11); - __m256 c6 = _mm256_fmadd_ps(a0, b1, c4); - __m256 c7 = _mm256_fmadd_ps(a1, b1, c5); - - t0 = _mm256_add_ps(c2, c6); - t1 = _mm256_add_ps(c3, c7); - - // Transpose result - __m256 vTemp = _mm256_unpacklo_ps(t0, t1); - __m256 vTemp2 = _mm256_unpackhi_ps(t0, t1); - __m256 vTemp3 = _mm256_permute2f128_ps(vTemp, vTemp2, 0x20); - __m256 vTemp4 = _mm256_permute2f128_ps(vTemp, vTemp2, 0x31); - vTemp = _mm256_unpacklo_ps(vTemp3, vTemp4); - vTemp2 = _mm256_unpackhi_ps(vTemp3, vTemp4); - t0 = _mm256_permute2f128_ps(vTemp, vTemp2, 0x20); - t1 = _mm256_permute2f128_ps(vTemp, vTemp2, 0x31); - - XMMATRIX mResult; - mResult.r[0] = _mm256_castps256_ps128(t0); - mResult.r[1] = _mm256_extractf128_ps(t0, 1); - mResult.r[2] = _mm256_castps256_ps128(t1); - mResult.r[3] = _mm256_extractf128_ps(t1, 1); - return mResult; -#elif defined(_XM_SSE_INTRINSICS_) - // Splat the component X,Y,Z then W -#if defined(_XM_AVX_INTRINSICS_) - XMVECTOR vX = - _mm_broadcast_ss(reinterpret_cast(&M1.r[0]) + 0); - XMVECTOR vY = - _mm_broadcast_ss(reinterpret_cast(&M1.r[0]) + 1); - XMVECTOR vZ = - _mm_broadcast_ss(reinterpret_cast(&M1.r[0]) + 2); - XMVECTOR vW = - _mm_broadcast_ss(reinterpret_cast(&M1.r[0]) + 3); -#else - // Use vW to hold the original row - XMVECTOR vW = M1.r[0]; - XMVECTOR vX = XM_PERMUTE_PS(vW, _MM_SHUFFLE(0, 0, 0, 0)); - XMVECTOR vY = XM_PERMUTE_PS(vW, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR vZ = XM_PERMUTE_PS(vW, _MM_SHUFFLE(2, 2, 2, 2)); - vW = XM_PERMUTE_PS(vW, _MM_SHUFFLE(3, 3, 3, 3)); -#endif - // Perform the operation on the first row - vX = _mm_mul_ps(vX, M2.r[0]); - vY = _mm_mul_ps(vY, M2.r[1]); - vZ = _mm_mul_ps(vZ, M2.r[2]); - vW = _mm_mul_ps(vW, M2.r[3]); - // Perform a binary add to reduce cumulative errors - vX = _mm_add_ps(vX, vZ); - vY = _mm_add_ps(vY, vW); - vX = _mm_add_ps(vX, vY); - XMVECTOR r0 = vX; - // Repeat for the other 3 rows -#if defined(_XM_AVX_INTRINSICS_) - vX = _mm_broadcast_ss(reinterpret_cast(&M1.r[1]) + 0); - vY = _mm_broadcast_ss(reinterpret_cast(&M1.r[1]) + 1); - vZ = _mm_broadcast_ss(reinterpret_cast(&M1.r[1]) + 2); - vW = _mm_broadcast_ss(reinterpret_cast(&M1.r[1]) + 3); -#else - vW = M1.r[1]; - vX = XM_PERMUTE_PS(vW, _MM_SHUFFLE(0, 0, 0, 0)); - vY = XM_PERMUTE_PS(vW, _MM_SHUFFLE(1, 1, 1, 1)); - vZ = XM_PERMUTE_PS(vW, _MM_SHUFFLE(2, 2, 2, 2)); - vW = XM_PERMUTE_PS(vW, _MM_SHUFFLE(3, 3, 3, 3)); -#endif - vX = _mm_mul_ps(vX, M2.r[0]); - vY = _mm_mul_ps(vY, M2.r[1]); - vZ = _mm_mul_ps(vZ, M2.r[2]); - vW = _mm_mul_ps(vW, M2.r[3]); - vX = _mm_add_ps(vX, vZ); - vY = _mm_add_ps(vY, vW); - vX = _mm_add_ps(vX, vY); - XMVECTOR r1 = vX; -#if defined(_XM_AVX_INTRINSICS_) - vX = _mm_broadcast_ss(reinterpret_cast(&M1.r[2]) + 0); - vY = _mm_broadcast_ss(reinterpret_cast(&M1.r[2]) + 1); - vZ = _mm_broadcast_ss(reinterpret_cast(&M1.r[2]) + 2); - vW = _mm_broadcast_ss(reinterpret_cast(&M1.r[2]) + 3); -#else - vW = M1.r[2]; - vX = XM_PERMUTE_PS(vW, _MM_SHUFFLE(0, 0, 0, 0)); - vY = XM_PERMUTE_PS(vW, _MM_SHUFFLE(1, 1, 1, 1)); - vZ = XM_PERMUTE_PS(vW, _MM_SHUFFLE(2, 2, 2, 2)); - vW = XM_PERMUTE_PS(vW, _MM_SHUFFLE(3, 3, 3, 3)); -#endif - vX = _mm_mul_ps(vX, M2.r[0]); - vY = _mm_mul_ps(vY, M2.r[1]); - vZ = _mm_mul_ps(vZ, M2.r[2]); - vW = _mm_mul_ps(vW, M2.r[3]); - vX = _mm_add_ps(vX, vZ); - vY = _mm_add_ps(vY, vW); - vX = _mm_add_ps(vX, vY); - XMVECTOR r2 = vX; -#if defined(_XM_AVX_INTRINSICS_) - vX = _mm_broadcast_ss(reinterpret_cast(&M1.r[3]) + 0); - vY = _mm_broadcast_ss(reinterpret_cast(&M1.r[3]) + 1); - vZ = _mm_broadcast_ss(reinterpret_cast(&M1.r[3]) + 2); - vW = _mm_broadcast_ss(reinterpret_cast(&M1.r[3]) + 3); -#else - vW = M1.r[3]; - vX = XM_PERMUTE_PS(vW, _MM_SHUFFLE(0, 0, 0, 0)); - vY = XM_PERMUTE_PS(vW, _MM_SHUFFLE(1, 1, 1, 1)); - vZ = XM_PERMUTE_PS(vW, _MM_SHUFFLE(2, 2, 2, 2)); - vW = XM_PERMUTE_PS(vW, _MM_SHUFFLE(3, 3, 3, 3)); -#endif - vX = _mm_mul_ps(vX, M2.r[0]); - vY = _mm_mul_ps(vY, M2.r[1]); - vZ = _mm_mul_ps(vZ, M2.r[2]); - vW = _mm_mul_ps(vW, M2.r[3]); - vX = _mm_add_ps(vX, vZ); - vY = _mm_add_ps(vY, vW); - vX = _mm_add_ps(vX, vY); - XMVECTOR r3 = vX; - - // Transpose result - // x.x,x.y,y.x,y.y - XMVECTOR vTemp1 = _mm_shuffle_ps(r0, r1, _MM_SHUFFLE(1, 0, 1, 0)); - // x.z,x.w,y.z,y.w - XMVECTOR vTemp3 = _mm_shuffle_ps(r0, r1, _MM_SHUFFLE(3, 2, 3, 2)); - // z.x,z.y,w.x,w.y - XMVECTOR vTemp2 = _mm_shuffle_ps(r2, r3, _MM_SHUFFLE(1, 0, 1, 0)); - // z.z,z.w,w.z,w.w - XMVECTOR vTemp4 = _mm_shuffle_ps(r2, r3, _MM_SHUFFLE(3, 2, 3, 2)); - - XMMATRIX mResult; - // x.x,y.x,z.x,w.x - mResult.r[0] = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(2, 0, 2, 0)); - // x.y,y.y,z.y,w.y - mResult.r[1] = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(3, 1, 3, 1)); - // x.z,y.z,z.z,w.z - mResult.r[2] = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(2, 0, 2, 0)); - // x.w,y.w,z.w,w.w - mResult.r[3] = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(3, 1, 3, 1)); - return mResult; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX XM_CALLCONV XMMatrixTranspose(FXMMATRIX M) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - // Original matrix: - // - // m00m01m02m03 - // m10m11m12m13 - // m20m21m22m23 - // m30m31m32m33 - - XMMATRIX P; - P.r[0] = XMVectorMergeXY(M.r[0], M.r[2]); // m00m20m01m21 - P.r[1] = XMVectorMergeXY(M.r[1], M.r[3]); // m10m30m11m31 - P.r[2] = XMVectorMergeZW(M.r[0], M.r[2]); // m02m22m03m23 - P.r[3] = XMVectorMergeZW(M.r[1], M.r[3]); // m12m32m13m33 - - XMMATRIX MT; - MT.r[0] = XMVectorMergeXY(P.r[0], P.r[1]); // m00m10m20m30 - MT.r[1] = XMVectorMergeZW(P.r[0], P.r[1]); // m01m11m21m31 - MT.r[2] = XMVectorMergeXY(P.r[2], P.r[3]); // m02m12m22m32 - MT.r[3] = XMVectorMergeZW(P.r[2], P.r[3]); // m03m13m23m33 - return MT; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x4x2_t P0 = vzipq_f32(M.r[0], M.r[2]); - float32x4x2_t P1 = vzipq_f32(M.r[1], M.r[3]); - - float32x4x2_t T0 = vzipq_f32(P0.val[0], P1.val[0]); - float32x4x2_t T1 = vzipq_f32(P0.val[1], P1.val[1]); - - XMMATRIX mResult; - mResult.r[0] = T0.val[0]; - mResult.r[1] = T0.val[1]; - mResult.r[2] = T1.val[0]; - mResult.r[3] = T1.val[1]; - return mResult; -#elif defined(_XM_AVX2_INTRINSICS_) - __m256 t0 = _mm256_castps128_ps256(M.r[0]); - t0 = _mm256_insertf128_ps(t0, M.r[1], 1); - __m256 t1 = _mm256_castps128_ps256(M.r[2]); - t1 = _mm256_insertf128_ps(t1, M.r[3], 1); - - __m256 vTemp = _mm256_unpacklo_ps(t0, t1); - __m256 vTemp2 = _mm256_unpackhi_ps(t0, t1); - __m256 vTemp3 = _mm256_permute2f128_ps(vTemp, vTemp2, 0x20); - __m256 vTemp4 = _mm256_permute2f128_ps(vTemp, vTemp2, 0x31); - vTemp = _mm256_unpacklo_ps(vTemp3, vTemp4); - vTemp2 = _mm256_unpackhi_ps(vTemp3, vTemp4); - t0 = _mm256_permute2f128_ps(vTemp, vTemp2, 0x20); - t1 = _mm256_permute2f128_ps(vTemp, vTemp2, 0x31); - - XMMATRIX mResult; - mResult.r[0] = _mm256_castps256_ps128(t0); - mResult.r[1] = _mm256_extractf128_ps(t0, 1); - mResult.r[2] = _mm256_castps256_ps128(t1); - mResult.r[3] = _mm256_extractf128_ps(t1, 1); - return mResult; -#elif defined(_XM_SSE_INTRINSICS_) - // x.x,x.y,y.x,y.y - XMVECTOR vTemp1 = _mm_shuffle_ps(M.r[0], M.r[1], _MM_SHUFFLE(1, 0, 1, 0)); - // x.z,x.w,y.z,y.w - XMVECTOR vTemp3 = _mm_shuffle_ps(M.r[0], M.r[1], _MM_SHUFFLE(3, 2, 3, 2)); - // z.x,z.y,w.x,w.y - XMVECTOR vTemp2 = _mm_shuffle_ps(M.r[2], M.r[3], _MM_SHUFFLE(1, 0, 1, 0)); - // z.z,z.w,w.z,w.w - XMVECTOR vTemp4 = _mm_shuffle_ps(M.r[2], M.r[3], _MM_SHUFFLE(3, 2, 3, 2)); - - XMMATRIX mResult; - // x.x,y.x,z.x,w.x - mResult.r[0] = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(2, 0, 2, 0)); - // x.y,y.y,z.y,w.y - mResult.r[1] = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(3, 1, 3, 1)); - // x.z,y.z,z.z,w.z - mResult.r[2] = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(2, 0, 2, 0)); - // x.w,y.w,z.w,w.w - mResult.r[3] = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(3, 1, 3, 1)); - return mResult; -#endif -} - -//------------------------------------------------------------------------------ -// Return the inverse and the determinant of a 4x4 matrix -_Use_decl_annotations_ inline XMMATRIX XM_CALLCONV -XMMatrixInverse(XMVECTOR* pDeterminant, FXMMATRIX M) noexcept { -#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) - - XMMATRIX MT = XMMatrixTranspose(M); - - XMVECTOR V0[4], V1[4]; - V0[0] = - XMVectorSwizzle( - MT.r[2]); - V1[0] = - XMVectorSwizzle( - MT.r[3]); - V0[1] = - XMVectorSwizzle( - MT.r[0]); - V1[1] = - XMVectorSwizzle( - MT.r[1]); - V0[2] = XMVectorPermute(MT.r[2], MT.r[0]); - V1[2] = XMVectorPermute(MT.r[3], MT.r[1]); - - XMVECTOR D0 = XMVectorMultiply(V0[0], V1[0]); - XMVECTOR D1 = XMVectorMultiply(V0[1], V1[1]); - XMVECTOR D2 = XMVectorMultiply(V0[2], V1[2]); - - V0[0] = - XMVectorSwizzle( - MT.r[2]); - V1[0] = - XMVectorSwizzle( - MT.r[3]); - V0[1] = - XMVectorSwizzle( - MT.r[0]); - V1[1] = - XMVectorSwizzle( - MT.r[1]); - V0[2] = XMVectorPermute(MT.r[2], MT.r[0]); - V1[2] = XMVectorPermute(MT.r[3], MT.r[1]); - - D0 = XMVectorNegativeMultiplySubtract(V0[0], V1[0], D0); - D1 = XMVectorNegativeMultiplySubtract(V0[1], V1[1], D1); - D2 = XMVectorNegativeMultiplySubtract(V0[2], V1[2], D2); - - V0[0] = - XMVectorSwizzle( - MT.r[1]); - V1[0] = XMVectorPermute(D0, D2); - V0[1] = - XMVectorSwizzle( - MT.r[0]); - V1[1] = XMVectorPermute(D0, D2); - V0[2] = - XMVectorSwizzle( - MT.r[3]); - V1[2] = XMVectorPermute(D1, D2); - V0[3] = - XMVectorSwizzle( - MT.r[2]); - V1[3] = XMVectorPermute(D1, D2); - - XMVECTOR C0 = XMVectorMultiply(V0[0], V1[0]); - XMVECTOR C2 = XMVectorMultiply(V0[1], V1[1]); - XMVECTOR C4 = XMVectorMultiply(V0[2], V1[2]); - XMVECTOR C6 = XMVectorMultiply(V0[3], V1[3]); - - V0[0] = - XMVectorSwizzle( - MT.r[1]); - V1[0] = XMVectorPermute(D0, D2); - V0[1] = - XMVectorSwizzle( - MT.r[0]); - V1[1] = XMVectorPermute(D0, D2); - V0[2] = - XMVectorSwizzle( - MT.r[3]); - V1[2] = XMVectorPermute(D1, D2); - V0[3] = - XMVectorSwizzle( - MT.r[2]); - V1[3] = XMVectorPermute(D1, D2); - - C0 = XMVectorNegativeMultiplySubtract(V0[0], V1[0], C0); - C2 = XMVectorNegativeMultiplySubtract(V0[1], V1[1], C2); - C4 = XMVectorNegativeMultiplySubtract(V0[2], V1[2], C4); - C6 = XMVectorNegativeMultiplySubtract(V0[3], V1[3], C6); - - V0[0] = - XMVectorSwizzle( - MT.r[1]); - V1[0] = XMVectorPermute(D0, D2); - V0[1] = - XMVectorSwizzle( - MT.r[0]); - V1[1] = XMVectorPermute(D0, D2); - V0[2] = - XMVectorSwizzle( - MT.r[3]); - V1[2] = XMVectorPermute(D1, D2); - V0[3] = - XMVectorSwizzle( - MT.r[2]); - V1[3] = XMVectorPermute(D1, D2); - - XMVECTOR C1 = XMVectorNegativeMultiplySubtract(V0[0], V1[0], C0); - C0 = XMVectorMultiplyAdd(V0[0], V1[0], C0); - XMVECTOR C3 = XMVectorMultiplyAdd(V0[1], V1[1], C2); - C2 = XMVectorNegativeMultiplySubtract(V0[1], V1[1], C2); - XMVECTOR C5 = XMVectorNegativeMultiplySubtract(V0[2], V1[2], C4); - C4 = XMVectorMultiplyAdd(V0[2], V1[2], C4); - XMVECTOR C7 = XMVectorMultiplyAdd(V0[3], V1[3], C6); - C6 = XMVectorNegativeMultiplySubtract(V0[3], V1[3], C6); - - XMMATRIX R; - R.r[0] = XMVectorSelect(C0, C1, g_XMSelect0101.v); - R.r[1] = XMVectorSelect(C2, C3, g_XMSelect0101.v); - R.r[2] = XMVectorSelect(C4, C5, g_XMSelect0101.v); - R.r[3] = XMVectorSelect(C6, C7, g_XMSelect0101.v); - - XMVECTOR Determinant = XMVector4Dot(R.r[0], MT.r[0]); - - if (pDeterminant != nullptr) *pDeterminant = Determinant; - - XMVECTOR Reciprocal = XMVectorReciprocal(Determinant); - - XMMATRIX Result; - Result.r[0] = XMVectorMultiply(R.r[0], Reciprocal); - Result.r[1] = XMVectorMultiply(R.r[1], Reciprocal); - Result.r[2] = XMVectorMultiply(R.r[2], Reciprocal); - Result.r[3] = XMVectorMultiply(R.r[3], Reciprocal); - return Result; - -#elif defined(_XM_SSE_INTRINSICS_) - // Transpose matrix - XMVECTOR vTemp1 = _mm_shuffle_ps(M.r[0], M.r[1], _MM_SHUFFLE(1, 0, 1, 0)); - XMVECTOR vTemp3 = _mm_shuffle_ps(M.r[0], M.r[1], _MM_SHUFFLE(3, 2, 3, 2)); - XMVECTOR vTemp2 = _mm_shuffle_ps(M.r[2], M.r[3], _MM_SHUFFLE(1, 0, 1, 0)); - XMVECTOR vTemp4 = _mm_shuffle_ps(M.r[2], M.r[3], _MM_SHUFFLE(3, 2, 3, 2)); - - XMMATRIX MT; - MT.r[0] = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(2, 0, 2, 0)); - MT.r[1] = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(3, 1, 3, 1)); - MT.r[2] = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(2, 0, 2, 0)); - MT.r[3] = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(3, 1, 3, 1)); - - XMVECTOR V00 = XM_PERMUTE_PS(MT.r[2], _MM_SHUFFLE(1, 1, 0, 0)); - XMVECTOR V10 = XM_PERMUTE_PS(MT.r[3], _MM_SHUFFLE(3, 2, 3, 2)); - XMVECTOR V01 = XM_PERMUTE_PS(MT.r[0], _MM_SHUFFLE(1, 1, 0, 0)); - XMVECTOR V11 = XM_PERMUTE_PS(MT.r[1], _MM_SHUFFLE(3, 2, 3, 2)); - XMVECTOR V02 = _mm_shuffle_ps(MT.r[2], MT.r[0], _MM_SHUFFLE(2, 0, 2, 0)); - XMVECTOR V12 = _mm_shuffle_ps(MT.r[3], MT.r[1], _MM_SHUFFLE(3, 1, 3, 1)); - - XMVECTOR D0 = _mm_mul_ps(V00, V10); - XMVECTOR D1 = _mm_mul_ps(V01, V11); - XMVECTOR D2 = _mm_mul_ps(V02, V12); - - V00 = XM_PERMUTE_PS(MT.r[2], _MM_SHUFFLE(3, 2, 3, 2)); - V10 = XM_PERMUTE_PS(MT.r[3], _MM_SHUFFLE(1, 1, 0, 0)); - V01 = XM_PERMUTE_PS(MT.r[0], _MM_SHUFFLE(3, 2, 3, 2)); - V11 = XM_PERMUTE_PS(MT.r[1], _MM_SHUFFLE(1, 1, 0, 0)); - V02 = _mm_shuffle_ps(MT.r[2], MT.r[0], _MM_SHUFFLE(3, 1, 3, 1)); - V12 = _mm_shuffle_ps(MT.r[3], MT.r[1], _MM_SHUFFLE(2, 0, 2, 0)); - - D0 = XM_FNMADD_PS(V00, V10, D0); - D1 = XM_FNMADD_PS(V01, V11, D1); - D2 = XM_FNMADD_PS(V02, V12, D2); - // V11 = D0Y,D0W,D2Y,D2Y - V11 = _mm_shuffle_ps(D0, D2, _MM_SHUFFLE(1, 1, 3, 1)); - V00 = XM_PERMUTE_PS(MT.r[1], _MM_SHUFFLE(1, 0, 2, 1)); - V10 = _mm_shuffle_ps(V11, D0, _MM_SHUFFLE(0, 3, 0, 2)); - V01 = XM_PERMUTE_PS(MT.r[0], _MM_SHUFFLE(0, 1, 0, 2)); - V11 = _mm_shuffle_ps(V11, D0, _MM_SHUFFLE(2, 1, 2, 1)); - // V13 = D1Y,D1W,D2W,D2W - XMVECTOR V13 = _mm_shuffle_ps(D1, D2, _MM_SHUFFLE(3, 3, 3, 1)); - V02 = XM_PERMUTE_PS(MT.r[3], _MM_SHUFFLE(1, 0, 2, 1)); - V12 = _mm_shuffle_ps(V13, D1, _MM_SHUFFLE(0, 3, 0, 2)); - XMVECTOR V03 = XM_PERMUTE_PS(MT.r[2], _MM_SHUFFLE(0, 1, 0, 2)); - V13 = _mm_shuffle_ps(V13, D1, _MM_SHUFFLE(2, 1, 2, 1)); - - XMVECTOR C0 = _mm_mul_ps(V00, V10); - XMVECTOR C2 = _mm_mul_ps(V01, V11); - XMVECTOR C4 = _mm_mul_ps(V02, V12); - XMVECTOR C6 = _mm_mul_ps(V03, V13); - - // V11 = D0X,D0Y,D2X,D2X - V11 = _mm_shuffle_ps(D0, D2, _MM_SHUFFLE(0, 0, 1, 0)); - V00 = XM_PERMUTE_PS(MT.r[1], _MM_SHUFFLE(2, 1, 3, 2)); - V10 = _mm_shuffle_ps(D0, V11, _MM_SHUFFLE(2, 1, 0, 3)); - V01 = XM_PERMUTE_PS(MT.r[0], _MM_SHUFFLE(1, 3, 2, 3)); - V11 = _mm_shuffle_ps(D0, V11, _MM_SHUFFLE(0, 2, 1, 2)); - // V13 = D1X,D1Y,D2Z,D2Z - V13 = _mm_shuffle_ps(D1, D2, _MM_SHUFFLE(2, 2, 1, 0)); - V02 = XM_PERMUTE_PS(MT.r[3], _MM_SHUFFLE(2, 1, 3, 2)); - V12 = _mm_shuffle_ps(D1, V13, _MM_SHUFFLE(2, 1, 0, 3)); - V03 = XM_PERMUTE_PS(MT.r[2], _MM_SHUFFLE(1, 3, 2, 3)); - V13 = _mm_shuffle_ps(D1, V13, _MM_SHUFFLE(0, 2, 1, 2)); - - C0 = XM_FNMADD_PS(V00, V10, C0); - C2 = XM_FNMADD_PS(V01, V11, C2); - C4 = XM_FNMADD_PS(V02, V12, C4); - C6 = XM_FNMADD_PS(V03, V13, C6); - - V00 = XM_PERMUTE_PS(MT.r[1], _MM_SHUFFLE(0, 3, 0, 3)); - // V10 = D0Z,D0Z,D2X,D2Y - V10 = _mm_shuffle_ps(D0, D2, _MM_SHUFFLE(1, 0, 2, 2)); - V10 = XM_PERMUTE_PS(V10, _MM_SHUFFLE(0, 2, 3, 0)); - V01 = XM_PERMUTE_PS(MT.r[0], _MM_SHUFFLE(2, 0, 3, 1)); - // V11 = D0X,D0W,D2X,D2Y - V11 = _mm_shuffle_ps(D0, D2, _MM_SHUFFLE(1, 0, 3, 0)); - V11 = XM_PERMUTE_PS(V11, _MM_SHUFFLE(2, 1, 0, 3)); - V02 = XM_PERMUTE_PS(MT.r[3], _MM_SHUFFLE(0, 3, 0, 3)); - // V12 = D1Z,D1Z,D2Z,D2W - V12 = _mm_shuffle_ps(D1, D2, _MM_SHUFFLE(3, 2, 2, 2)); - V12 = XM_PERMUTE_PS(V12, _MM_SHUFFLE(0, 2, 3, 0)); - V03 = XM_PERMUTE_PS(MT.r[2], _MM_SHUFFLE(2, 0, 3, 1)); - // V13 = D1X,D1W,D2Z,D2W - V13 = _mm_shuffle_ps(D1, D2, _MM_SHUFFLE(3, 2, 3, 0)); - V13 = XM_PERMUTE_PS(V13, _MM_SHUFFLE(2, 1, 0, 3)); - - V00 = _mm_mul_ps(V00, V10); - V01 = _mm_mul_ps(V01, V11); - V02 = _mm_mul_ps(V02, V12); - V03 = _mm_mul_ps(V03, V13); - XMVECTOR C1 = _mm_sub_ps(C0, V00); - C0 = _mm_add_ps(C0, V00); - XMVECTOR C3 = _mm_add_ps(C2, V01); - C2 = _mm_sub_ps(C2, V01); - XMVECTOR C5 = _mm_sub_ps(C4, V02); - C4 = _mm_add_ps(C4, V02); - XMVECTOR C7 = _mm_add_ps(C6, V03); - C6 = _mm_sub_ps(C6, V03); - - C0 = _mm_shuffle_ps(C0, C1, _MM_SHUFFLE(3, 1, 2, 0)); - C2 = _mm_shuffle_ps(C2, C3, _MM_SHUFFLE(3, 1, 2, 0)); - C4 = _mm_shuffle_ps(C4, C5, _MM_SHUFFLE(3, 1, 2, 0)); - C6 = _mm_shuffle_ps(C6, C7, _MM_SHUFFLE(3, 1, 2, 0)); - C0 = XM_PERMUTE_PS(C0, _MM_SHUFFLE(3, 1, 2, 0)); - C2 = XM_PERMUTE_PS(C2, _MM_SHUFFLE(3, 1, 2, 0)); - C4 = XM_PERMUTE_PS(C4, _MM_SHUFFLE(3, 1, 2, 0)); - C6 = XM_PERMUTE_PS(C6, _MM_SHUFFLE(3, 1, 2, 0)); - // Get the determinant - XMVECTOR vTemp = XMVector4Dot(C0, MT.r[0]); - if (pDeterminant != nullptr) *pDeterminant = vTemp; - vTemp = _mm_div_ps(g_XMOne, vTemp); - XMMATRIX mResult; - mResult.r[0] = _mm_mul_ps(C0, vTemp); - mResult.r[1] = _mm_mul_ps(C2, vTemp); - mResult.r[2] = _mm_mul_ps(C4, vTemp); - mResult.r[3] = _mm_mul_ps(C6, vTemp); - return mResult; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX XM_CALLCONV XMMatrixVectorTensorProduct(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - XMMATRIX mResult; - mResult.r[0] = XMVectorMultiply(XMVectorSwizzle<0, 0, 0, 0>(V1), V2); - mResult.r[1] = XMVectorMultiply(XMVectorSwizzle<1, 1, 1, 1>(V1), V2); - mResult.r[2] = XMVectorMultiply(XMVectorSwizzle<2, 2, 2, 2>(V1), V2); - mResult.r[3] = XMVectorMultiply(XMVectorSwizzle<3, 3, 3, 3>(V1), V2); - return mResult; -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMMatrixDeterminant(FXMMATRIX M) noexcept { - static const XMVECTORF32 Sign = {{{1.0f, -1.0f, 1.0f, -1.0f}}}; - - XMVECTOR V0 = - XMVectorSwizzle( - M.r[2]); - XMVECTOR V1 = - XMVectorSwizzle( - M.r[3]); - XMVECTOR V2 = - XMVectorSwizzle( - M.r[2]); - XMVECTOR V3 = - XMVectorSwizzle( - M.r[3]); - XMVECTOR V4 = - XMVectorSwizzle( - M.r[2]); - XMVECTOR V5 = - XMVectorSwizzle( - M.r[3]); - - XMVECTOR P0 = XMVectorMultiply(V0, V1); - XMVECTOR P1 = XMVectorMultiply(V2, V3); - XMVECTOR P2 = XMVectorMultiply(V4, V5); - - V0 = - XMVectorSwizzle( - M.r[2]); - V1 = - XMVectorSwizzle( - M.r[3]); - V2 = - XMVectorSwizzle( - M.r[2]); - V3 = - XMVectorSwizzle( - M.r[3]); - V4 = - XMVectorSwizzle( - M.r[2]); - V5 = - XMVectorSwizzle( - M.r[3]); - - P0 = XMVectorNegativeMultiplySubtract(V0, V1, P0); - P1 = XMVectorNegativeMultiplySubtract(V2, V3, P1); - P2 = XMVectorNegativeMultiplySubtract(V4, V5, P2); - - V0 = - XMVectorSwizzle( - M.r[1]); - V1 = - XMVectorSwizzle( - M.r[1]); - V2 = - XMVectorSwizzle( - M.r[1]); - - XMVECTOR S = XMVectorMultiply(M.r[0], Sign.v); - XMVECTOR R = XMVectorMultiply(V0, P0); - R = XMVectorNegativeMultiplySubtract(V1, P1, R); - R = XMVectorMultiplyAdd(V2, P2, R); - - return XMVector4Dot(S, R); -} - -#define XM3RANKDECOMPOSE(a, b, c, x, y, z) \ - if ((x) < (y)) { \ - if ((y) < (z)) { \ - (a) = 2; \ - (b) = 1; \ - (c) = 0; \ - } else { \ - (a) = 1; \ - \ - if ((x) < (z)) { \ - (b) = 2; \ - (c) = 0; \ - } else { \ - (b) = 0; \ - (c) = 2; \ - } \ - } \ - } else { \ - if ((x) < (z)) { \ - (a) = 2; \ - (b) = 0; \ - (c) = 1; \ - } else { \ - (a) = 0; \ - \ - if ((y) < (z)) { \ - (b) = 2; \ - (c) = 1; \ - } else { \ - (b) = 1; \ - (c) = 2; \ - } \ - } \ - } - -#define XM3_DECOMP_EPSILON 0.0001f - -_Use_decl_annotations_ inline bool XM_CALLCONV -XMMatrixDecompose(XMVECTOR* outScale, XMVECTOR* outRotQuat, XMVECTOR* outTrans, - FXMMATRIX M) noexcept { - static const XMVECTOR* pvCanonicalBasis[3] = { - &g_XMIdentityR0.v, &g_XMIdentityR1.v, &g_XMIdentityR2.v}; - - assert(outScale != nullptr); - assert(outRotQuat != nullptr); - assert(outTrans != nullptr); - - // Get the translation - outTrans[0] = M.r[3]; - - XMVECTOR* ppvBasis[3]; - XMMATRIX matTemp; - ppvBasis[0] = &matTemp.r[0]; - ppvBasis[1] = &matTemp.r[1]; - ppvBasis[2] = &matTemp.r[2]; - - matTemp.r[0] = M.r[0]; - matTemp.r[1] = M.r[1]; - matTemp.r[2] = M.r[2]; - matTemp.r[3] = g_XMIdentityR3.v; - - auto pfScales = reinterpret_cast(outScale); - - size_t a, b, c; - XMVectorGetXPtr(&pfScales[0], XMVector3Length(ppvBasis[0][0])); - XMVectorGetXPtr(&pfScales[1], XMVector3Length(ppvBasis[1][0])); - XMVectorGetXPtr(&pfScales[2], XMVector3Length(ppvBasis[2][0])); - pfScales[3] = 0.f; - - XM3RANKDECOMPOSE(a, b, c, pfScales[0], pfScales[1], pfScales[2]) - - if (pfScales[a] < XM3_DECOMP_EPSILON) { - ppvBasis[a][0] = pvCanonicalBasis[a][0]; - } - ppvBasis[a][0] = XMVector3Normalize(ppvBasis[a][0]); - - if (pfScales[b] < XM3_DECOMP_EPSILON) { - size_t aa, bb, cc; - float fAbsX, fAbsY, fAbsZ; - - fAbsX = fabsf(XMVectorGetX(ppvBasis[a][0])); - fAbsY = fabsf(XMVectorGetY(ppvBasis[a][0])); - fAbsZ = fabsf(XMVectorGetZ(ppvBasis[a][0])); - - XM3RANKDECOMPOSE(aa, bb, cc, fAbsX, fAbsY, fAbsZ) - - ppvBasis[b][0] = - XMVector3Cross(ppvBasis[a][0], pvCanonicalBasis[cc][0]); - } - - ppvBasis[b][0] = XMVector3Normalize(ppvBasis[b][0]); - - if (pfScales[c] < XM3_DECOMP_EPSILON) { - ppvBasis[c][0] = XMVector3Cross(ppvBasis[a][0], ppvBasis[b][0]); - } - - ppvBasis[c][0] = XMVector3Normalize(ppvBasis[c][0]); - - float fDet = XMVectorGetX(XMMatrixDeterminant(matTemp)); - - // use Kramer's rule to check for handedness of coordinate system - if (fDet < 0.0f) { - // switch coordinate system by negating the scale and inverting the - // basis vector on the x-axis - pfScales[a] = -pfScales[a]; - ppvBasis[a][0] = XMVectorNegate(ppvBasis[a][0]); - - fDet = -fDet; - } - - fDet -= 1.0f; - fDet *= fDet; - - if (XM3_DECOMP_EPSILON < fDet) { - // Non-SRT matrix encountered - return false; - } - - // generate the quaternion from the matrix - outRotQuat[0] = XMQuaternionRotationMatrix(matTemp); - return true; -} - -#undef XM3_DECOMP_EPSILON -#undef XM3RANKDECOMPOSE - -//------------------------------------------------------------------------------ -// Transformation operations -//------------------------------------------------------------------------------ - -//------------------------------------------------------------------------------ - -inline XMMATRIX XM_CALLCONV XMMatrixIdentity() noexcept { - XMMATRIX M; - M.r[0] = g_XMIdentityR0.v; - M.r[1] = g_XMIdentityR1.v; - M.r[2] = g_XMIdentityR2.v; - M.r[3] = g_XMIdentityR3.v; - return M; -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX XM_CALLCONV XMMatrixSet(float m00, float m01, float m02, - float m03, float m10, float m11, - float m12, float m13, float m20, - float m21, float m22, float m23, - float m30, float m31, float m32, - float m33) noexcept { - XMMATRIX M; -#if defined(_XM_NO_INTRINSICS_) - M.m[0][0] = m00; - M.m[0][1] = m01; - M.m[0][2] = m02; - M.m[0][3] = m03; - M.m[1][0] = m10; - M.m[1][1] = m11; - M.m[1][2] = m12; - M.m[1][3] = m13; - M.m[2][0] = m20; - M.m[2][1] = m21; - M.m[2][2] = m22; - M.m[2][3] = m23; - M.m[3][0] = m30; - M.m[3][1] = m31; - M.m[3][2] = m32; - M.m[3][3] = m33; -#else - M.r[0] = XMVectorSet(m00, m01, m02, m03); - M.r[1] = XMVectorSet(m10, m11, m12, m13); - M.r[2] = XMVectorSet(m20, m21, m22, m23); - M.r[3] = XMVectorSet(m30, m31, m32, m33); -#endif - return M; -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX XM_CALLCONV XMMatrixTranslation(float OffsetX, float OffsetY, - float OffsetZ) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMMATRIX M; - M.m[0][0] = 1.0f; - M.m[0][1] = 0.0f; - M.m[0][2] = 0.0f; - M.m[0][3] = 0.0f; - - M.m[1][0] = 0.0f; - M.m[1][1] = 1.0f; - M.m[1][2] = 0.0f; - M.m[1][3] = 0.0f; - - M.m[2][0] = 0.0f; - M.m[2][1] = 0.0f; - M.m[2][2] = 1.0f; - M.m[2][3] = 0.0f; - - M.m[3][0] = OffsetX; - M.m[3][1] = OffsetY; - M.m[3][2] = OffsetZ; - M.m[3][3] = 1.0f; - return M; - -#elif defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) - XMMATRIX M; - M.r[0] = g_XMIdentityR0.v; - M.r[1] = g_XMIdentityR1.v; - M.r[2] = g_XMIdentityR2.v; - M.r[3] = XMVectorSet(OffsetX, OffsetY, OffsetZ, 1.f); - return M; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX XM_CALLCONV -XMMatrixTranslationFromVector(FXMVECTOR Offset) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMMATRIX M; - M.m[0][0] = 1.0f; - M.m[0][1] = 0.0f; - M.m[0][2] = 0.0f; - M.m[0][3] = 0.0f; - - M.m[1][0] = 0.0f; - M.m[1][1] = 1.0f; - M.m[1][2] = 0.0f; - M.m[1][3] = 0.0f; - - M.m[2][0] = 0.0f; - M.m[2][1] = 0.0f; - M.m[2][2] = 1.0f; - M.m[2][3] = 0.0f; - - M.m[3][0] = Offset.vector4_f32[0]; - M.m[3][1] = Offset.vector4_f32[1]; - M.m[3][2] = Offset.vector4_f32[2]; - M.m[3][3] = 1.0f; - return M; - -#elif defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) - XMMATRIX M; - M.r[0] = g_XMIdentityR0.v; - M.r[1] = g_XMIdentityR1.v; - M.r[2] = g_XMIdentityR2.v; - M.r[3] = XMVectorSelect(g_XMIdentityR3.v, Offset, g_XMSelect1110.v); - return M; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX XM_CALLCONV XMMatrixScaling(float ScaleX, float ScaleY, - float ScaleZ) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMMATRIX M; - M.m[0][0] = ScaleX; - M.m[0][1] = 0.0f; - M.m[0][2] = 0.0f; - M.m[0][3] = 0.0f; - - M.m[1][0] = 0.0f; - M.m[1][1] = ScaleY; - M.m[1][2] = 0.0f; - M.m[1][3] = 0.0f; - - M.m[2][0] = 0.0f; - M.m[2][1] = 0.0f; - M.m[2][2] = ScaleZ; - M.m[2][3] = 0.0f; - - M.m[3][0] = 0.0f; - M.m[3][1] = 0.0f; - M.m[3][2] = 0.0f; - M.m[3][3] = 1.0f; - return M; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - const XMVECTOR Zero = vdupq_n_f32(0); - XMMATRIX M; - M.r[0] = vsetq_lane_f32(ScaleX, Zero, 0); - M.r[1] = vsetq_lane_f32(ScaleY, Zero, 1); - M.r[2] = vsetq_lane_f32(ScaleZ, Zero, 2); - M.r[3] = g_XMIdentityR3.v; - return M; -#elif defined(_XM_SSE_INTRINSICS_) - XMMATRIX M; - M.r[0] = _mm_set_ps(0, 0, 0, ScaleX); - M.r[1] = _mm_set_ps(0, 0, ScaleY, 0); - M.r[2] = _mm_set_ps(0, ScaleZ, 0, 0); - M.r[3] = g_XMIdentityR3.v; - return M; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX XM_CALLCONV -XMMatrixScalingFromVector(FXMVECTOR Scale) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMMATRIX M; - M.m[0][0] = Scale.vector4_f32[0]; - M.m[0][1] = 0.0f; - M.m[0][2] = 0.0f; - M.m[0][3] = 0.0f; - - M.m[1][0] = 0.0f; - M.m[1][1] = Scale.vector4_f32[1]; - M.m[1][2] = 0.0f; - M.m[1][3] = 0.0f; - - M.m[2][0] = 0.0f; - M.m[2][1] = 0.0f; - M.m[2][2] = Scale.vector4_f32[2]; - M.m[2][3] = 0.0f; - - M.m[3][0] = 0.0f; - M.m[3][1] = 0.0f; - M.m[3][2] = 0.0f; - M.m[3][3] = 1.0f; - return M; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - XMMATRIX M; - M.r[0] = vreinterpretq_f32_u32( - vandq_u32(vreinterpretq_u32_f32(Scale), g_XMMaskX)); - M.r[1] = vreinterpretq_f32_u32( - vandq_u32(vreinterpretq_u32_f32(Scale), g_XMMaskY)); - M.r[2] = vreinterpretq_f32_u32( - vandq_u32(vreinterpretq_u32_f32(Scale), g_XMMaskZ)); - M.r[3] = g_XMIdentityR3.v; - return M; -#elif defined(_XM_SSE_INTRINSICS_) - XMMATRIX M; - M.r[0] = _mm_and_ps(Scale, g_XMMaskX); - M.r[1] = _mm_and_ps(Scale, g_XMMaskY); - M.r[2] = _mm_and_ps(Scale, g_XMMaskZ); - M.r[3] = g_XMIdentityR3.v; - return M; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX XM_CALLCONV XMMatrixRotationX(float Angle) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - float fSinAngle; - float fCosAngle; - XMScalarSinCos(&fSinAngle, &fCosAngle, Angle); - - XMMATRIX M; - M.m[0][0] = 1.0f; - M.m[0][1] = 0.0f; - M.m[0][2] = 0.0f; - M.m[0][3] = 0.0f; - - M.m[1][0] = 0.0f; - M.m[1][1] = fCosAngle; - M.m[1][2] = fSinAngle; - M.m[1][3] = 0.0f; - - M.m[2][0] = 0.0f; - M.m[2][1] = -fSinAngle; - M.m[2][2] = fCosAngle; - M.m[2][3] = 0.0f; - - M.m[3][0] = 0.0f; - M.m[3][1] = 0.0f; - M.m[3][2] = 0.0f; - M.m[3][3] = 1.0f; - return M; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float fSinAngle; - float fCosAngle; - XMScalarSinCos(&fSinAngle, &fCosAngle, Angle); - - const float32x4_t Zero = vdupq_n_f32(0); - - float32x4_t T1 = vsetq_lane_f32(fCosAngle, Zero, 1); - T1 = vsetq_lane_f32(fSinAngle, T1, 2); - - float32x4_t T2 = vsetq_lane_f32(-fSinAngle, Zero, 1); - T2 = vsetq_lane_f32(fCosAngle, T2, 2); - - XMMATRIX M; - M.r[0] = g_XMIdentityR0.v; - M.r[1] = T1; - M.r[2] = T2; - M.r[3] = g_XMIdentityR3.v; - return M; -#elif defined(_XM_SSE_INTRINSICS_) - float SinAngle; - float CosAngle; - XMScalarSinCos(&SinAngle, &CosAngle, Angle); - - XMVECTOR vSin = _mm_set_ss(SinAngle); - XMVECTOR vCos = _mm_set_ss(CosAngle); - // x = 0,y = cos,z = sin, w = 0 - vCos = _mm_shuffle_ps(vCos, vSin, _MM_SHUFFLE(3, 0, 0, 3)); - XMMATRIX M; - M.r[0] = g_XMIdentityR0; - M.r[1] = vCos; - // x = 0,y = sin,z = cos, w = 0 - vCos = XM_PERMUTE_PS(vCos, _MM_SHUFFLE(3, 1, 2, 0)); - // x = 0,y = -sin,z = cos, w = 0 - vCos = _mm_mul_ps(vCos, g_XMNegateY); - M.r[2] = vCos; - M.r[3] = g_XMIdentityR3; - return M; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX XM_CALLCONV XMMatrixRotationY(float Angle) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - float fSinAngle; - float fCosAngle; - XMScalarSinCos(&fSinAngle, &fCosAngle, Angle); - - XMMATRIX M; - M.m[0][0] = fCosAngle; - M.m[0][1] = 0.0f; - M.m[0][2] = -fSinAngle; - M.m[0][3] = 0.0f; - - M.m[1][0] = 0.0f; - M.m[1][1] = 1.0f; - M.m[1][2] = 0.0f; - M.m[1][3] = 0.0f; - - M.m[2][0] = fSinAngle; - M.m[2][1] = 0.0f; - M.m[2][2] = fCosAngle; - M.m[2][3] = 0.0f; - - M.m[3][0] = 0.0f; - M.m[3][1] = 0.0f; - M.m[3][2] = 0.0f; - M.m[3][3] = 1.0f; - return M; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float fSinAngle; - float fCosAngle; - XMScalarSinCos(&fSinAngle, &fCosAngle, Angle); - - const float32x4_t Zero = vdupq_n_f32(0); - - float32x4_t T0 = vsetq_lane_f32(fCosAngle, Zero, 0); - T0 = vsetq_lane_f32(-fSinAngle, T0, 2); - - float32x4_t T2 = vsetq_lane_f32(fSinAngle, Zero, 0); - T2 = vsetq_lane_f32(fCosAngle, T2, 2); - - XMMATRIX M; - M.r[0] = T0; - M.r[1] = g_XMIdentityR1.v; - M.r[2] = T2; - M.r[3] = g_XMIdentityR3.v; - return M; -#elif defined(_XM_SSE_INTRINSICS_) - float SinAngle; - float CosAngle; - XMScalarSinCos(&SinAngle, &CosAngle, Angle); - - XMVECTOR vSin = _mm_set_ss(SinAngle); - XMVECTOR vCos = _mm_set_ss(CosAngle); - // x = sin,y = 0,z = cos, w = 0 - vSin = _mm_shuffle_ps(vSin, vCos, _MM_SHUFFLE(3, 0, 3, 0)); - XMMATRIX M; - M.r[2] = vSin; - M.r[1] = g_XMIdentityR1; - // x = cos,y = 0,z = sin, w = 0 - vSin = XM_PERMUTE_PS(vSin, _MM_SHUFFLE(3, 0, 1, 2)); - // x = cos,y = 0,z = -sin, w = 0 - vSin = _mm_mul_ps(vSin, g_XMNegateZ); - M.r[0] = vSin; - M.r[3] = g_XMIdentityR3; - return M; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX XM_CALLCONV XMMatrixRotationZ(float Angle) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - float fSinAngle; - float fCosAngle; - XMScalarSinCos(&fSinAngle, &fCosAngle, Angle); - - XMMATRIX M; - M.m[0][0] = fCosAngle; - M.m[0][1] = fSinAngle; - M.m[0][2] = 0.0f; - M.m[0][3] = 0.0f; - - M.m[1][0] = -fSinAngle; - M.m[1][1] = fCosAngle; - M.m[1][2] = 0.0f; - M.m[1][3] = 0.0f; - - M.m[2][0] = 0.0f; - M.m[2][1] = 0.0f; - M.m[2][2] = 1.0f; - M.m[2][3] = 0.0f; - - M.m[3][0] = 0.0f; - M.m[3][1] = 0.0f; - M.m[3][2] = 0.0f; - M.m[3][3] = 1.0f; - return M; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float fSinAngle; - float fCosAngle; - XMScalarSinCos(&fSinAngle, &fCosAngle, Angle); - - const float32x4_t Zero = vdupq_n_f32(0); - - float32x4_t T0 = vsetq_lane_f32(fCosAngle, Zero, 0); - T0 = vsetq_lane_f32(fSinAngle, T0, 1); - - float32x4_t T1 = vsetq_lane_f32(-fSinAngle, Zero, 0); - T1 = vsetq_lane_f32(fCosAngle, T1, 1); - - XMMATRIX M; - M.r[0] = T0; - M.r[1] = T1; - M.r[2] = g_XMIdentityR2.v; - M.r[3] = g_XMIdentityR3.v; - return M; -#elif defined(_XM_SSE_INTRINSICS_) - float SinAngle; - float CosAngle; - XMScalarSinCos(&SinAngle, &CosAngle, Angle); - - XMVECTOR vSin = _mm_set_ss(SinAngle); - XMVECTOR vCos = _mm_set_ss(CosAngle); - // x = cos,y = sin,z = 0, w = 0 - vCos = _mm_unpacklo_ps(vCos, vSin); - XMMATRIX M; - M.r[0] = vCos; - // x = sin,y = cos,z = 0, w = 0 - vCos = XM_PERMUTE_PS(vCos, _MM_SHUFFLE(3, 2, 0, 1)); - // x = cos,y = -sin,z = 0, w = 0 - vCos = _mm_mul_ps(vCos, g_XMNegateX); - M.r[1] = vCos; - M.r[2] = g_XMIdentityR2; - M.r[3] = g_XMIdentityR3; - return M; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX XM_CALLCONV XMMatrixRotationRollPitchYaw(float Pitch, float Yaw, - float Roll) noexcept { -#if defined(_XM_NO_INTRINSICS_) - float cp = cosf(Pitch); - float sp = sinf(Pitch); - - float cy = cosf(Yaw); - float sy = sinf(Yaw); - - float cr = cosf(Roll); - float sr = sinf(Roll); - - XMMATRIX M; - M.m[0][0] = cr * cy + sr * sp * sy; - M.m[0][1] = sr * cp; - M.m[0][2] = sr * sp * cy - cr * sy; - M.m[0][3] = 0.0f; - - M.m[1][0] = cr * sp * sy - sr * cy; - M.m[1][1] = cr * cp; - M.m[1][2] = sr * sy + cr * sp * cy; - M.m[1][3] = 0.0f; - - M.m[2][0] = cp * sy; - M.m[2][1] = -sp; - M.m[2][2] = cp * cy; - M.m[2][3] = 0.0f; - - M.m[3][0] = 0.0f; - M.m[3][1] = 0.0f; - M.m[3][2] = 0.0f; - M.m[3][3] = 1.0f; - return M; -#else - XMVECTOR Angles = XMVectorSet(Pitch, Yaw, Roll, 0.0f); - return XMMatrixRotationRollPitchYawFromVector(Angles); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX XM_CALLCONV XMMatrixRotationRollPitchYawFromVector( - FXMVECTOR Angles // - ) noexcept { -#if defined(_XM_NO_INTRINSICS_) - float cp = cosf(Angles.vector4_f32[0]); - float sp = sinf(Angles.vector4_f32[0]); - - float cy = cosf(Angles.vector4_f32[1]); - float sy = sinf(Angles.vector4_f32[1]); - - float cr = cosf(Angles.vector4_f32[2]); - float sr = sinf(Angles.vector4_f32[2]); - - XMMATRIX M; - M.m[0][0] = cr * cy + sr * sp * sy; - M.m[0][1] = sr * cp; - M.m[0][2] = sr * sp * cy - cr * sy; - M.m[0][3] = 0.0f; - - M.m[1][0] = cr * sp * sy - sr * cy; - M.m[1][1] = cr * cp; - M.m[1][2] = sr * sy + cr * sp * cy; - M.m[1][3] = 0.0f; - - M.m[2][0] = cp * sy; - M.m[2][1] = -sp; - M.m[2][2] = cp * cy; - M.m[2][3] = 0.0f; - - M.m[3][0] = 0.0f; - M.m[3][1] = 0.0f; - M.m[3][2] = 0.0f; - M.m[3][3] = 1.0f; - return M; -#else - static const XMVECTORF32 Sign = {{{1.0f, -1.0f, -1.0f, 1.0f}}}; - - XMVECTOR SinAngles, CosAngles; - XMVectorSinCos(&SinAngles, &CosAngles, Angles); - - XMVECTOR P0 = XMVectorPermute(SinAngles, CosAngles); - XMVECTOR Y0 = XMVectorPermute(SinAngles, CosAngles); - XMVECTOR P1 = XMVectorPermute(SinAngles, CosAngles); - XMVECTOR Y1 = XMVectorPermute(SinAngles, CosAngles); - XMVECTOR P2 = XMVectorPermute(SinAngles, CosAngles); - XMVECTOR P3 = XMVectorPermute(SinAngles, CosAngles); - XMVECTOR Y2 = XMVectorSplatX(SinAngles); - XMVECTOR NS = XMVectorNegate(SinAngles); - - XMVECTOR Q0 = XMVectorMultiply(P0, Y0); - XMVECTOR Q1 = XMVectorMultiply(P1, Sign.v); - Q1 = XMVectorMultiply(Q1, Y1); - XMVECTOR Q2 = XMVectorMultiply(P2, Y2); - Q2 = XMVectorMultiplyAdd(Q2, P3, Q1); - - XMVECTOR V0 = XMVectorPermute(Q0, Q2); - XMVECTOR V1 = XMVectorPermute(Q0, Q2); - XMVECTOR V2 = XMVectorPermute(Q0, NS); - - XMMATRIX M; - M.r[0] = XMVectorSelect(g_XMZero, V0, g_XMSelect1110.v); - M.r[1] = XMVectorSelect(g_XMZero, V1, g_XMSelect1110.v); - M.r[2] = XMVectorSelect(g_XMZero, V2, g_XMSelect1110.v); - M.r[3] = g_XMIdentityR3; - return M; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX XM_CALLCONV XMMatrixRotationNormal(FXMVECTOR NormalAxis, - float Angle) noexcept { -#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) - - float fSinAngle; - float fCosAngle; - XMScalarSinCos(&fSinAngle, &fCosAngle, Angle); - - XMVECTOR A = XMVectorSet(fSinAngle, fCosAngle, 1.0f - fCosAngle, 0.0f); - - XMVECTOR C2 = XMVectorSplatZ(A); - XMVECTOR C1 = XMVectorSplatY(A); - XMVECTOR C0 = XMVectorSplatX(A); - - XMVECTOR N0 = - XMVectorSwizzle( - NormalAxis); - XMVECTOR N1 = - XMVectorSwizzle( - NormalAxis); - - XMVECTOR V0 = XMVectorMultiply(C2, N0); - V0 = XMVectorMultiply(V0, N1); - - XMVECTOR R0 = XMVectorMultiply(C2, NormalAxis); - R0 = XMVectorMultiplyAdd(R0, NormalAxis, C1); - - XMVECTOR R1 = XMVectorMultiplyAdd(C0, NormalAxis, V0); - XMVECTOR R2 = XMVectorNegativeMultiplySubtract(C0, NormalAxis, V0); - - V0 = XMVectorSelect(A, R0, g_XMSelect1110.v); - XMVECTOR V1 = XMVectorPermute(R1, R2); - XMVECTOR V2 = XMVectorPermute(R1, R2); - - XMMATRIX M; - M.r[0] = XMVectorPermute(V0, V1); - M.r[1] = XMVectorPermute(V0, V1); - M.r[2] = XMVectorPermute(V0, V2); - M.r[3] = g_XMIdentityR3.v; - return M; - -#elif defined(_XM_SSE_INTRINSICS_) - float fSinAngle; - float fCosAngle; - XMScalarSinCos(&fSinAngle, &fCosAngle, Angle); - - XMVECTOR C2 = _mm_set_ps1(1.0f - fCosAngle); - XMVECTOR C1 = _mm_set_ps1(fCosAngle); - XMVECTOR C0 = _mm_set_ps1(fSinAngle); - - XMVECTOR N0 = XM_PERMUTE_PS(NormalAxis, _MM_SHUFFLE(3, 0, 2, 1)); - XMVECTOR N1 = XM_PERMUTE_PS(NormalAxis, _MM_SHUFFLE(3, 1, 0, 2)); - - XMVECTOR V0 = _mm_mul_ps(C2, N0); - V0 = _mm_mul_ps(V0, N1); - - XMVECTOR R0 = _mm_mul_ps(C2, NormalAxis); - R0 = _mm_mul_ps(R0, NormalAxis); - R0 = _mm_add_ps(R0, C1); - - XMVECTOR R1 = _mm_mul_ps(C0, NormalAxis); - R1 = _mm_add_ps(R1, V0); - XMVECTOR R2 = _mm_mul_ps(C0, NormalAxis); - R2 = _mm_sub_ps(V0, R2); - - V0 = _mm_and_ps(R0, g_XMMask3); - XMVECTOR V1 = _mm_shuffle_ps(R1, R2, _MM_SHUFFLE(2, 1, 2, 0)); - V1 = XM_PERMUTE_PS(V1, _MM_SHUFFLE(0, 3, 2, 1)); - XMVECTOR V2 = _mm_shuffle_ps(R1, R2, _MM_SHUFFLE(0, 0, 1, 1)); - V2 = XM_PERMUTE_PS(V2, _MM_SHUFFLE(2, 0, 2, 0)); - - R2 = _mm_shuffle_ps(V0, V1, _MM_SHUFFLE(1, 0, 3, 0)); - R2 = XM_PERMUTE_PS(R2, _MM_SHUFFLE(1, 3, 2, 0)); - - XMMATRIX M; - M.r[0] = R2; - - R2 = _mm_shuffle_ps(V0, V1, _MM_SHUFFLE(3, 2, 3, 1)); - R2 = XM_PERMUTE_PS(R2, _MM_SHUFFLE(1, 3, 0, 2)); - M.r[1] = R2; - - V2 = _mm_shuffle_ps(V2, V0, _MM_SHUFFLE(3, 2, 1, 0)); - M.r[2] = V2; - M.r[3] = g_XMIdentityR3.v; - return M; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX XM_CALLCONV XMMatrixRotationAxis(FXMVECTOR Axis, - float Angle) noexcept { - assert(!XMVector3Equal(Axis, XMVectorZero())); - assert(!XMVector3IsInfinite(Axis)); - - XMVECTOR Normal = XMVector3Normalize(Axis); - return XMMatrixRotationNormal(Normal, Angle); -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX XM_CALLCONV -XMMatrixRotationQuaternion(FXMVECTOR Quaternion) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - float qx = Quaternion.vector4_f32[0]; - float qxx = qx * qx; - - float qy = Quaternion.vector4_f32[1]; - float qyy = qy * qy; - - float qz = Quaternion.vector4_f32[2]; - float qzz = qz * qz; - - float qw = Quaternion.vector4_f32[3]; - - XMMATRIX M; - M.m[0][0] = 1.f - 2.f * qyy - 2.f * qzz; - M.m[0][1] = 2.f * qx * qy + 2.f * qz * qw; - M.m[0][2] = 2.f * qx * qz - 2.f * qy * qw; - M.m[0][3] = 0.f; - - M.m[1][0] = 2.f * qx * qy - 2.f * qz * qw; - M.m[1][1] = 1.f - 2.f * qxx - 2.f * qzz; - M.m[1][2] = 2.f * qy * qz + 2.f * qx * qw; - M.m[1][3] = 0.f; - - M.m[2][0] = 2.f * qx * qz + 2.f * qy * qw; - M.m[2][1] = 2.f * qy * qz - 2.f * qx * qw; - M.m[2][2] = 1.f - 2.f * qxx - 2.f * qyy; - M.m[2][3] = 0.f; - - M.m[3][0] = 0.f; - M.m[3][1] = 0.f; - M.m[3][2] = 0.f; - M.m[3][3] = 1.0f; - return M; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - static const XMVECTORF32 Constant1110 = {{{1.0f, 1.0f, 1.0f, 0.0f}}}; - - XMVECTOR Q0 = XMVectorAdd(Quaternion, Quaternion); - XMVECTOR Q1 = XMVectorMultiply(Quaternion, Q0); - - XMVECTOR V0 = XMVectorPermute(Q1, Constant1110.v); - XMVECTOR V1 = XMVectorPermute(Q1, Constant1110.v); - XMVECTOR R0 = XMVectorSubtract(Constant1110, V0); - R0 = XMVectorSubtract(R0, V1); - - V0 = - XMVectorSwizzle( - Quaternion); - V1 = - XMVectorSwizzle( - Q0); - V0 = XMVectorMultiply(V0, V1); - - V1 = XMVectorSplatW(Quaternion); - XMVECTOR V2 = - XMVectorSwizzle( - Q0); - V1 = XMVectorMultiply(V1, V2); - - XMVECTOR R1 = XMVectorAdd(V0, V1); - XMVECTOR R2 = XMVectorSubtract(V0, V1); - - V0 = XMVectorPermute(R1, R2); - V1 = XMVectorPermute(R1, R2); - - XMMATRIX M; - M.r[0] = XMVectorPermute(R0, V0); - M.r[1] = XMVectorPermute(R0, V0); - M.r[2] = XMVectorPermute(R0, V1); - M.r[3] = g_XMIdentityR3.v; - return M; - -#elif defined(_XM_SSE_INTRINSICS_) - static const XMVECTORF32 Constant1110 = {{{1.0f, 1.0f, 1.0f, 0.0f}}}; - - XMVECTOR Q0 = _mm_add_ps(Quaternion, Quaternion); - XMVECTOR Q1 = _mm_mul_ps(Quaternion, Q0); - - XMVECTOR V0 = XM_PERMUTE_PS(Q1, _MM_SHUFFLE(3, 0, 0, 1)); - V0 = _mm_and_ps(V0, g_XMMask3); - XMVECTOR V1 = XM_PERMUTE_PS(Q1, _MM_SHUFFLE(3, 1, 2, 2)); - V1 = _mm_and_ps(V1, g_XMMask3); - XMVECTOR R0 = _mm_sub_ps(Constant1110, V0); - R0 = _mm_sub_ps(R0, V1); - - V0 = XM_PERMUTE_PS(Quaternion, _MM_SHUFFLE(3, 1, 0, 0)); - V1 = XM_PERMUTE_PS(Q0, _MM_SHUFFLE(3, 2, 1, 2)); - V0 = _mm_mul_ps(V0, V1); - - V1 = XM_PERMUTE_PS(Quaternion, _MM_SHUFFLE(3, 3, 3, 3)); - XMVECTOR V2 = XM_PERMUTE_PS(Q0, _MM_SHUFFLE(3, 0, 2, 1)); - V1 = _mm_mul_ps(V1, V2); - - XMVECTOR R1 = _mm_add_ps(V0, V1); - XMVECTOR R2 = _mm_sub_ps(V0, V1); - - V0 = _mm_shuffle_ps(R1, R2, _MM_SHUFFLE(1, 0, 2, 1)); - V0 = XM_PERMUTE_PS(V0, _MM_SHUFFLE(1, 3, 2, 0)); - V1 = _mm_shuffle_ps(R1, R2, _MM_SHUFFLE(2, 2, 0, 0)); - V1 = XM_PERMUTE_PS(V1, _MM_SHUFFLE(2, 0, 2, 0)); - - Q1 = _mm_shuffle_ps(R0, V0, _MM_SHUFFLE(1, 0, 3, 0)); - Q1 = XM_PERMUTE_PS(Q1, _MM_SHUFFLE(1, 3, 2, 0)); - - XMMATRIX M; - M.r[0] = Q1; - - Q1 = _mm_shuffle_ps(R0, V0, _MM_SHUFFLE(3, 2, 3, 1)); - Q1 = XM_PERMUTE_PS(Q1, _MM_SHUFFLE(1, 3, 0, 2)); - M.r[1] = Q1; - - Q1 = _mm_shuffle_ps(V1, R0, _MM_SHUFFLE(3, 2, 1, 0)); - M.r[2] = Q1; - M.r[3] = g_XMIdentityR3; - return M; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX XM_CALLCONV XMMatrixTransformation2D( - FXMVECTOR ScalingOrigin, float ScalingOrientation, FXMVECTOR Scaling, - FXMVECTOR RotationOrigin, float Rotation, GXMVECTOR Translation) noexcept { - // M = Inverse(MScalingOrigin) * Transpose(MScalingOrientation) * MScaling * - // MScalingOrientation * - // MScalingOrigin * Inverse(MRotationOrigin) * MRotation * - // MRotationOrigin * MTranslation; - - XMVECTOR VScalingOrigin = - XMVectorSelect(g_XMSelect1100.v, ScalingOrigin, g_XMSelect1100.v); - XMVECTOR NegScalingOrigin = XMVectorNegate(VScalingOrigin); - - XMMATRIX MScalingOriginI = XMMatrixTranslationFromVector(NegScalingOrigin); - XMMATRIX MScalingOrientation = XMMatrixRotationZ(ScalingOrientation); - XMMATRIX MScalingOrientationT = XMMatrixTranspose(MScalingOrientation); - XMVECTOR VScaling = XMVectorSelect(g_XMOne.v, Scaling, g_XMSelect1100.v); - XMMATRIX MScaling = XMMatrixScalingFromVector(VScaling); - XMVECTOR VRotationOrigin = - XMVectorSelect(g_XMSelect1100.v, RotationOrigin, g_XMSelect1100.v); - XMMATRIX MRotation = XMMatrixRotationZ(Rotation); - XMVECTOR VTranslation = - XMVectorSelect(g_XMSelect1100.v, Translation, g_XMSelect1100.v); - - XMMATRIX M = XMMatrixMultiply(MScalingOriginI, MScalingOrientationT); - M = XMMatrixMultiply(M, MScaling); - M = XMMatrixMultiply(M, MScalingOrientation); - M.r[3] = XMVectorAdd(M.r[3], VScalingOrigin); - M.r[3] = XMVectorSubtract(M.r[3], VRotationOrigin); - M = XMMatrixMultiply(M, MRotation); - M.r[3] = XMVectorAdd(M.r[3], VRotationOrigin); - M.r[3] = XMVectorAdd(M.r[3], VTranslation); - - return M; -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX XM_CALLCONV XMMatrixTransformation( - FXMVECTOR ScalingOrigin, FXMVECTOR ScalingOrientationQuaternion, - FXMVECTOR Scaling, GXMVECTOR RotationOrigin, HXMVECTOR RotationQuaternion, - HXMVECTOR Translation) noexcept { - // M = Inverse(MScalingOrigin) * Transpose(MScalingOrientation) * MScaling * - // MScalingOrientation * - // MScalingOrigin * Inverse(MRotationOrigin) * MRotation * - // MRotationOrigin * MTranslation; - - XMVECTOR VScalingOrigin = - XMVectorSelect(g_XMSelect1110.v, ScalingOrigin, g_XMSelect1110.v); - XMVECTOR NegScalingOrigin = XMVectorNegate(ScalingOrigin); - - XMMATRIX MScalingOriginI = XMMatrixTranslationFromVector(NegScalingOrigin); - XMMATRIX MScalingOrientation = - XMMatrixRotationQuaternion(ScalingOrientationQuaternion); - XMMATRIX MScalingOrientationT = XMMatrixTranspose(MScalingOrientation); - XMMATRIX MScaling = XMMatrixScalingFromVector(Scaling); - XMVECTOR VRotationOrigin = - XMVectorSelect(g_XMSelect1110.v, RotationOrigin, g_XMSelect1110.v); - XMMATRIX MRotation = XMMatrixRotationQuaternion(RotationQuaternion); - XMVECTOR VTranslation = - XMVectorSelect(g_XMSelect1110.v, Translation, g_XMSelect1110.v); - - XMMATRIX M; - M = XMMatrixMultiply(MScalingOriginI, MScalingOrientationT); - M = XMMatrixMultiply(M, MScaling); - M = XMMatrixMultiply(M, MScalingOrientation); - M.r[3] = XMVectorAdd(M.r[3], VScalingOrigin); - M.r[3] = XMVectorSubtract(M.r[3], VRotationOrigin); - M = XMMatrixMultiply(M, MRotation); - M.r[3] = XMVectorAdd(M.r[3], VRotationOrigin); - M.r[3] = XMVectorAdd(M.r[3], VTranslation); - return M; -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX XM_CALLCONV -XMMatrixAffineTransformation2D(FXMVECTOR Scaling, FXMVECTOR RotationOrigin, - float Rotation, FXMVECTOR Translation) noexcept { - // M = MScaling * Inverse(MRotationOrigin) * MRotation * MRotationOrigin * - // MTranslation; - - XMVECTOR VScaling = XMVectorSelect(g_XMOne.v, Scaling, g_XMSelect1100.v); - XMMATRIX MScaling = XMMatrixScalingFromVector(VScaling); - XMVECTOR VRotationOrigin = - XMVectorSelect(g_XMSelect1100.v, RotationOrigin, g_XMSelect1100.v); - XMMATRIX MRotation = XMMatrixRotationZ(Rotation); - XMVECTOR VTranslation = - XMVectorSelect(g_XMSelect1100.v, Translation, g_XMSelect1100.v); - - XMMATRIX M; - M = MScaling; - M.r[3] = XMVectorSubtract(M.r[3], VRotationOrigin); - M = XMMatrixMultiply(M, MRotation); - M.r[3] = XMVectorAdd(M.r[3], VRotationOrigin); - M.r[3] = XMVectorAdd(M.r[3], VTranslation); - return M; -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX XM_CALLCONV XMMatrixAffineTransformation( - FXMVECTOR Scaling, FXMVECTOR RotationOrigin, FXMVECTOR RotationQuaternion, - GXMVECTOR Translation) noexcept { - // M = MScaling * Inverse(MRotationOrigin) * MRotation * MRotationOrigin * - // MTranslation; - - XMMATRIX MScaling = XMMatrixScalingFromVector(Scaling); - XMVECTOR VRotationOrigin = - XMVectorSelect(g_XMSelect1110.v, RotationOrigin, g_XMSelect1110.v); - XMMATRIX MRotation = XMMatrixRotationQuaternion(RotationQuaternion); - XMVECTOR VTranslation = - XMVectorSelect(g_XMSelect1110.v, Translation, g_XMSelect1110.v); - - XMMATRIX M; - M = MScaling; - M.r[3] = XMVectorSubtract(M.r[3], VRotationOrigin); - M = XMMatrixMultiply(M, MRotation); - M.r[3] = XMVectorAdd(M.r[3], VRotationOrigin); - M.r[3] = XMVectorAdd(M.r[3], VTranslation); - return M; -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX XM_CALLCONV -XMMatrixReflect(FXMVECTOR ReflectionPlane) noexcept { - assert(!XMVector3Equal(ReflectionPlane, XMVectorZero())); - assert(!XMPlaneIsInfinite(ReflectionPlane)); - - static const XMVECTORF32 NegativeTwo = {{{-2.0f, -2.0f, -2.0f, 0.0f}}}; - - XMVECTOR P = XMPlaneNormalize(ReflectionPlane); - XMVECTOR S = XMVectorMultiply(P, NegativeTwo); - - XMVECTOR A = XMVectorSplatX(P); - XMVECTOR B = XMVectorSplatY(P); - XMVECTOR C = XMVectorSplatZ(P); - XMVECTOR D = XMVectorSplatW(P); - - XMMATRIX M; - M.r[0] = XMVectorMultiplyAdd(A, S, g_XMIdentityR0.v); - M.r[1] = XMVectorMultiplyAdd(B, S, g_XMIdentityR1.v); - M.r[2] = XMVectorMultiplyAdd(C, S, g_XMIdentityR2.v); - M.r[3] = XMVectorMultiplyAdd(D, S, g_XMIdentityR3.v); - return M; -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX XM_CALLCONV XMMatrixShadow(FXMVECTOR ShadowPlane, - FXMVECTOR LightPosition) noexcept { - static const XMVECTORU32 Select0001 = { - {{XM_SELECT_0, XM_SELECT_0, XM_SELECT_0, XM_SELECT_1}}}; - - assert(!XMVector3Equal(ShadowPlane, XMVectorZero())); - assert(!XMPlaneIsInfinite(ShadowPlane)); - - XMVECTOR P = XMPlaneNormalize(ShadowPlane); - XMVECTOR Dot = XMPlaneDot(P, LightPosition); - P = XMVectorNegate(P); - XMVECTOR D = XMVectorSplatW(P); - XMVECTOR C = XMVectorSplatZ(P); - XMVECTOR B = XMVectorSplatY(P); - XMVECTOR A = XMVectorSplatX(P); - Dot = XMVectorSelect(Select0001.v, Dot, Select0001.v); - - XMMATRIX M; - M.r[3] = XMVectorMultiplyAdd(D, LightPosition, Dot); - Dot = XMVectorRotateLeft(Dot, 1); - M.r[2] = XMVectorMultiplyAdd(C, LightPosition, Dot); - Dot = XMVectorRotateLeft(Dot, 1); - M.r[1] = XMVectorMultiplyAdd(B, LightPosition, Dot); - Dot = XMVectorRotateLeft(Dot, 1); - M.r[0] = XMVectorMultiplyAdd(A, LightPosition, Dot); - return M; -} - -//------------------------------------------------------------------------------ -// View and projection initialization operations -//------------------------------------------------------------------------------ - -inline XMMATRIX XM_CALLCONV XMMatrixLookAtLH(FXMVECTOR EyePosition, - FXMVECTOR FocusPosition, - FXMVECTOR UpDirection) noexcept { - XMVECTOR EyeDirection = XMVectorSubtract(FocusPosition, EyePosition); - return XMMatrixLookToLH(EyePosition, EyeDirection, UpDirection); -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX XM_CALLCONV XMMatrixLookAtRH(FXMVECTOR EyePosition, - FXMVECTOR FocusPosition, - FXMVECTOR UpDirection) noexcept { - XMVECTOR NegEyeDirection = XMVectorSubtract(EyePosition, FocusPosition); - return XMMatrixLookToLH(EyePosition, NegEyeDirection, UpDirection); -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX XM_CALLCONV XMMatrixLookToLH(FXMVECTOR EyePosition, - FXMVECTOR EyeDirection, - FXMVECTOR UpDirection) noexcept { - assert(!XMVector3Equal(EyeDirection, XMVectorZero())); - assert(!XMVector3IsInfinite(EyeDirection)); - assert(!XMVector3Equal(UpDirection, XMVectorZero())); - assert(!XMVector3IsInfinite(UpDirection)); - - XMVECTOR R2 = XMVector3Normalize(EyeDirection); - - XMVECTOR R0 = XMVector3Cross(UpDirection, R2); - R0 = XMVector3Normalize(R0); - - XMVECTOR R1 = XMVector3Cross(R2, R0); - - XMVECTOR NegEyePosition = XMVectorNegate(EyePosition); - - XMVECTOR D0 = XMVector3Dot(R0, NegEyePosition); - XMVECTOR D1 = XMVector3Dot(R1, NegEyePosition); - XMVECTOR D2 = XMVector3Dot(R2, NegEyePosition); - - XMMATRIX M; - M.r[0] = XMVectorSelect(D0, R0, g_XMSelect1110.v); - M.r[1] = XMVectorSelect(D1, R1, g_XMSelect1110.v); - M.r[2] = XMVectorSelect(D2, R2, g_XMSelect1110.v); - M.r[3] = g_XMIdentityR3.v; - - M = XMMatrixTranspose(M); - - return M; -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX XM_CALLCONV XMMatrixLookToRH(FXMVECTOR EyePosition, - FXMVECTOR EyeDirection, - FXMVECTOR UpDirection) noexcept { - XMVECTOR NegEyeDirection = XMVectorNegate(EyeDirection); - return XMMatrixLookToLH(EyePosition, NegEyeDirection, UpDirection); -} - -//------------------------------------------------------------------------------ - -#ifdef _PREFAST_ -#pragma prefast(push) -#pragma prefast(disable : 28931, "PREfast noise: Esp:1266") -#endif - -inline XMMATRIX XM_CALLCONV XMMatrixPerspectiveLH(float ViewWidth, - float ViewHeight, float NearZ, - float FarZ) noexcept { - assert(NearZ > 0.f && FarZ > 0.f); - assert(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f)); - assert(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f)); - assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f)); - -#if defined(_XM_NO_INTRINSICS_) - - float TwoNearZ = NearZ + NearZ; - float fRange = FarZ / (FarZ - NearZ); - - XMMATRIX M; - M.m[0][0] = TwoNearZ / ViewWidth; - M.m[0][1] = 0.0f; - M.m[0][2] = 0.0f; - M.m[0][3] = 0.0f; - - M.m[1][0] = 0.0f; - M.m[1][1] = TwoNearZ / ViewHeight; - M.m[1][2] = 0.0f; - M.m[1][3] = 0.0f; - - M.m[2][0] = 0.0f; - M.m[2][1] = 0.0f; - M.m[2][2] = fRange; - M.m[2][3] = 1.0f; - - M.m[3][0] = 0.0f; - M.m[3][1] = 0.0f; - M.m[3][2] = -fRange * NearZ; - M.m[3][3] = 0.0f; - return M; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float TwoNearZ = NearZ + NearZ; - float fRange = FarZ / (FarZ - NearZ); - const float32x4_t Zero = vdupq_n_f32(0); - XMMATRIX M; - M.r[0] = vsetq_lane_f32(TwoNearZ / ViewWidth, Zero, 0); - M.r[1] = vsetq_lane_f32(TwoNearZ / ViewHeight, Zero, 1); - M.r[2] = vsetq_lane_f32(fRange, g_XMIdentityR3.v, 2); - M.r[3] = vsetq_lane_f32(-fRange * NearZ, Zero, 2); - return M; -#elif defined(_XM_SSE_INTRINSICS_) - XMMATRIX M; - float TwoNearZ = NearZ + NearZ; - float fRange = FarZ / (FarZ - NearZ); - // Note: This is recorded on the stack - XMVECTOR rMem = {TwoNearZ / ViewWidth, TwoNearZ / ViewHeight, fRange, - -fRange * NearZ}; - // Copy from memory to SSE register - XMVECTOR vValues = rMem; - XMVECTOR vTemp = _mm_setzero_ps(); - // Copy x only - vTemp = _mm_move_ss(vTemp, vValues); - // TwoNearZ / ViewWidth,0,0,0 - M.r[0] = vTemp; - // 0,TwoNearZ / ViewHeight,0,0 - vTemp = vValues; - vTemp = _mm_and_ps(vTemp, g_XMMaskY); - M.r[1] = vTemp; - // x=fRange,y=-fRange * NearZ,0,1.0f - vValues = _mm_shuffle_ps(vValues, g_XMIdentityR3, _MM_SHUFFLE(3, 2, 3, 2)); - // 0,0,fRange,1.0f - vTemp = _mm_setzero_ps(); - vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(3, 0, 0, 0)); - M.r[2] = vTemp; - // 0,0,-fRange * NearZ,0 - vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(2, 1, 0, 0)); - M.r[3] = vTemp; - return M; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX XM_CALLCONV XMMatrixPerspectiveRH(float ViewWidth, - float ViewHeight, float NearZ, - float FarZ) noexcept { - assert(NearZ > 0.f && FarZ > 0.f); - assert(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f)); - assert(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f)); - assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f)); - -#if defined(_XM_NO_INTRINSICS_) - - float TwoNearZ = NearZ + NearZ; - float fRange = FarZ / (NearZ - FarZ); - - XMMATRIX M; - M.m[0][0] = TwoNearZ / ViewWidth; - M.m[0][1] = 0.0f; - M.m[0][2] = 0.0f; - M.m[0][3] = 0.0f; - - M.m[1][0] = 0.0f; - M.m[1][1] = TwoNearZ / ViewHeight; - M.m[1][2] = 0.0f; - M.m[1][3] = 0.0f; - - M.m[2][0] = 0.0f; - M.m[2][1] = 0.0f; - M.m[2][2] = fRange; - M.m[2][3] = -1.0f; - - M.m[3][0] = 0.0f; - M.m[3][1] = 0.0f; - M.m[3][2] = fRange * NearZ; - M.m[3][3] = 0.0f; - return M; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float TwoNearZ = NearZ + NearZ; - float fRange = FarZ / (NearZ - FarZ); - const float32x4_t Zero = vdupq_n_f32(0); - - XMMATRIX M; - M.r[0] = vsetq_lane_f32(TwoNearZ / ViewWidth, Zero, 0); - M.r[1] = vsetq_lane_f32(TwoNearZ / ViewHeight, Zero, 1); - M.r[2] = vsetq_lane_f32(fRange, g_XMNegIdentityR3.v, 2); - M.r[3] = vsetq_lane_f32(fRange * NearZ, Zero, 2); - return M; -#elif defined(_XM_SSE_INTRINSICS_) - XMMATRIX M; - float TwoNearZ = NearZ + NearZ; - float fRange = FarZ / (NearZ - FarZ); - // Note: This is recorded on the stack - XMVECTOR rMem = {TwoNearZ / ViewWidth, TwoNearZ / ViewHeight, fRange, - fRange * NearZ}; - // Copy from memory to SSE register - XMVECTOR vValues = rMem; - XMVECTOR vTemp = _mm_setzero_ps(); - // Copy x only - vTemp = _mm_move_ss(vTemp, vValues); - // TwoNearZ / ViewWidth,0,0,0 - M.r[0] = vTemp; - // 0,TwoNearZ / ViewHeight,0,0 - vTemp = vValues; - vTemp = _mm_and_ps(vTemp, g_XMMaskY); - M.r[1] = vTemp; - // x=fRange,y=-fRange * NearZ,0,-1.0f - vValues = - _mm_shuffle_ps(vValues, g_XMNegIdentityR3, _MM_SHUFFLE(3, 2, 3, 2)); - // 0,0,fRange,-1.0f - vTemp = _mm_setzero_ps(); - vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(3, 0, 0, 0)); - M.r[2] = vTemp; - // 0,0,-fRange * NearZ,0 - vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(2, 1, 0, 0)); - M.r[3] = vTemp; - return M; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX XM_CALLCONV XMMatrixPerspectiveFovLH(float FovAngleY, - float AspectRatio, - float NearZ, - float FarZ) noexcept { - assert(NearZ > 0.f && FarZ > 0.f); - assert(!XMScalarNearEqual(FovAngleY, 0.0f, 0.00001f * 2.0f)); - assert(!XMScalarNearEqual(AspectRatio, 0.0f, 0.00001f)); - assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f)); - -#if defined(_XM_NO_INTRINSICS_) - - float SinFov; - float CosFov; - XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY); - - float Height = CosFov / SinFov; - float Width = Height / AspectRatio; - float fRange = FarZ / (FarZ - NearZ); - - XMMATRIX M; - M.m[0][0] = Width; - M.m[0][1] = 0.0f; - M.m[0][2] = 0.0f; - M.m[0][3] = 0.0f; - - M.m[1][0] = 0.0f; - M.m[1][1] = Height; - M.m[1][2] = 0.0f; - M.m[1][3] = 0.0f; - - M.m[2][0] = 0.0f; - M.m[2][1] = 0.0f; - M.m[2][2] = fRange; - M.m[2][3] = 1.0f; - - M.m[3][0] = 0.0f; - M.m[3][1] = 0.0f; - M.m[3][2] = -fRange * NearZ; - M.m[3][3] = 0.0f; - return M; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float SinFov; - float CosFov; - XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY); - - float fRange = FarZ / (FarZ - NearZ); - float Height = CosFov / SinFov; - float Width = Height / AspectRatio; - const float32x4_t Zero = vdupq_n_f32(0); - - XMMATRIX M; - M.r[0] = vsetq_lane_f32(Width, Zero, 0); - M.r[1] = vsetq_lane_f32(Height, Zero, 1); - M.r[2] = vsetq_lane_f32(fRange, g_XMIdentityR3.v, 2); - M.r[3] = vsetq_lane_f32(-fRange * NearZ, Zero, 2); - return M; -#elif defined(_XM_SSE_INTRINSICS_) - float SinFov; - float CosFov; - XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY); - - float fRange = FarZ / (FarZ - NearZ); - // Note: This is recorded on the stack - float Height = CosFov / SinFov; - XMVECTOR rMem = {Height / AspectRatio, Height, fRange, -fRange * NearZ}; - // Copy from memory to SSE register - XMVECTOR vValues = rMem; - XMVECTOR vTemp = _mm_setzero_ps(); - // Copy x only - vTemp = _mm_move_ss(vTemp, vValues); - // Height / AspectRatio,0,0,0 - XMMATRIX M; - M.r[0] = vTemp; - // 0,Height,0,0 - vTemp = vValues; - vTemp = _mm_and_ps(vTemp, g_XMMaskY); - M.r[1] = vTemp; - // x=fRange,y=-fRange * NearZ,0,1.0f - vTemp = _mm_setzero_ps(); - vValues = _mm_shuffle_ps(vValues, g_XMIdentityR3, _MM_SHUFFLE(3, 2, 3, 2)); - // 0,0,fRange,1.0f - vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(3, 0, 0, 0)); - M.r[2] = vTemp; - // 0,0,-fRange * NearZ,0.0f - vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(2, 1, 0, 0)); - M.r[3] = vTemp; - return M; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX XM_CALLCONV XMMatrixPerspectiveFovRH(float FovAngleY, - float AspectRatio, - float NearZ, - float FarZ) noexcept { - assert(NearZ > 0.f && FarZ > 0.f); - assert(!XMScalarNearEqual(FovAngleY, 0.0f, 0.00001f * 2.0f)); - assert(!XMScalarNearEqual(AspectRatio, 0.0f, 0.00001f)); - assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f)); - -#if defined(_XM_NO_INTRINSICS_) - - float SinFov; - float CosFov; - XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY); - - float Height = CosFov / SinFov; - float Width = Height / AspectRatio; - float fRange = FarZ / (NearZ - FarZ); - - XMMATRIX M; - M.m[0][0] = Width; - M.m[0][1] = 0.0f; - M.m[0][2] = 0.0f; - M.m[0][3] = 0.0f; - - M.m[1][0] = 0.0f; - M.m[1][1] = Height; - M.m[1][2] = 0.0f; - M.m[1][3] = 0.0f; - - M.m[2][0] = 0.0f; - M.m[2][1] = 0.0f; - M.m[2][2] = fRange; - M.m[2][3] = -1.0f; - - M.m[3][0] = 0.0f; - M.m[3][1] = 0.0f; - M.m[3][2] = fRange * NearZ; - M.m[3][3] = 0.0f; - return M; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float SinFov; - float CosFov; - XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY); - float fRange = FarZ / (NearZ - FarZ); - float Height = CosFov / SinFov; - float Width = Height / AspectRatio; - const float32x4_t Zero = vdupq_n_f32(0); - - XMMATRIX M; - M.r[0] = vsetq_lane_f32(Width, Zero, 0); - M.r[1] = vsetq_lane_f32(Height, Zero, 1); - M.r[2] = vsetq_lane_f32(fRange, g_XMNegIdentityR3.v, 2); - M.r[3] = vsetq_lane_f32(fRange * NearZ, Zero, 2); - return M; -#elif defined(_XM_SSE_INTRINSICS_) - float SinFov; - float CosFov; - XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY); - float fRange = FarZ / (NearZ - FarZ); - // Note: This is recorded on the stack - float Height = CosFov / SinFov; - XMVECTOR rMem = {Height / AspectRatio, Height, fRange, fRange * NearZ}; - // Copy from memory to SSE register - XMVECTOR vValues = rMem; - XMVECTOR vTemp = _mm_setzero_ps(); - // Copy x only - vTemp = _mm_move_ss(vTemp, vValues); - // Height / AspectRatio,0,0,0 - XMMATRIX M; - M.r[0] = vTemp; - // 0,Height,0,0 - vTemp = vValues; - vTemp = _mm_and_ps(vTemp, g_XMMaskY); - M.r[1] = vTemp; - // x=fRange,y=-fRange * NearZ,0,-1.0f - vTemp = _mm_setzero_ps(); - vValues = - _mm_shuffle_ps(vValues, g_XMNegIdentityR3, _MM_SHUFFLE(3, 2, 3, 2)); - // 0,0,fRange,-1.0f - vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(3, 0, 0, 0)); - M.r[2] = vTemp; - // 0,0,fRange * NearZ,0.0f - vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(2, 1, 0, 0)); - M.r[3] = vTemp; - return M; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX XM_CALLCONV XMMatrixPerspectiveOffCenterLH( - float ViewLeft, float ViewRight, float ViewBottom, float ViewTop, - float NearZ, float FarZ) noexcept { - assert(NearZ > 0.f && FarZ > 0.f); - assert(!XMScalarNearEqual(ViewRight, ViewLeft, 0.00001f)); - assert(!XMScalarNearEqual(ViewTop, ViewBottom, 0.00001f)); - assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f)); - -#if defined(_XM_NO_INTRINSICS_) - - float TwoNearZ = NearZ + NearZ; - float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft); - float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom); - float fRange = FarZ / (FarZ - NearZ); - - XMMATRIX M; - M.m[0][0] = TwoNearZ * ReciprocalWidth; - M.m[0][1] = 0.0f; - M.m[0][2] = 0.0f; - M.m[0][3] = 0.0f; - - M.m[1][0] = 0.0f; - M.m[1][1] = TwoNearZ * ReciprocalHeight; - M.m[1][2] = 0.0f; - M.m[1][3] = 0.0f; - - M.m[2][0] = -(ViewLeft + ViewRight) * ReciprocalWidth; - M.m[2][1] = -(ViewTop + ViewBottom) * ReciprocalHeight; - M.m[2][2] = fRange; - M.m[2][3] = 1.0f; - - M.m[3][0] = 0.0f; - M.m[3][1] = 0.0f; - M.m[3][2] = -fRange * NearZ; - M.m[3][3] = 0.0f; - return M; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float TwoNearZ = NearZ + NearZ; - float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft); - float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom); - float fRange = FarZ / (FarZ - NearZ); - const float32x4_t Zero = vdupq_n_f32(0); - - XMMATRIX M; - M.r[0] = vsetq_lane_f32(TwoNearZ * ReciprocalWidth, Zero, 0); - M.r[1] = vsetq_lane_f32(TwoNearZ * ReciprocalHeight, Zero, 1); - M.r[2] = - XMVectorSet(-(ViewLeft + ViewRight) * ReciprocalWidth, - -(ViewTop + ViewBottom) * ReciprocalHeight, fRange, 1.0f); - M.r[3] = vsetq_lane_f32(-fRange * NearZ, Zero, 2); - return M; -#elif defined(_XM_SSE_INTRINSICS_) - XMMATRIX M; - float TwoNearZ = NearZ + NearZ; - float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft); - float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom); - float fRange = FarZ / (FarZ - NearZ); - // Note: This is recorded on the stack - XMVECTOR rMem = {TwoNearZ * ReciprocalWidth, TwoNearZ * ReciprocalHeight, - -fRange * NearZ, 0}; - // Copy from memory to SSE register - XMVECTOR vValues = rMem; - XMVECTOR vTemp = _mm_setzero_ps(); - // Copy x only - vTemp = _mm_move_ss(vTemp, vValues); - // TwoNearZ*ReciprocalWidth,0,0,0 - M.r[0] = vTemp; - // 0,TwoNearZ*ReciprocalHeight,0,0 - vTemp = vValues; - vTemp = _mm_and_ps(vTemp, g_XMMaskY); - M.r[1] = vTemp; - // 0,0,fRange,1.0f - M.r[2] = - XMVectorSet(-(ViewLeft + ViewRight) * ReciprocalWidth, - -(ViewTop + ViewBottom) * ReciprocalHeight, fRange, 1.0f); - // 0,0,-fRange * NearZ,0.0f - vValues = _mm_and_ps(vValues, g_XMMaskZ); - M.r[3] = vValues; - return M; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX XM_CALLCONV XMMatrixPerspectiveOffCenterRH( - float ViewLeft, float ViewRight, float ViewBottom, float ViewTop, - float NearZ, float FarZ) noexcept { - assert(NearZ > 0.f && FarZ > 0.f); - assert(!XMScalarNearEqual(ViewRight, ViewLeft, 0.00001f)); - assert(!XMScalarNearEqual(ViewTop, ViewBottom, 0.00001f)); - assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f)); - -#if defined(_XM_NO_INTRINSICS_) - - float TwoNearZ = NearZ + NearZ; - float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft); - float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom); - float fRange = FarZ / (NearZ - FarZ); - - XMMATRIX M; - M.m[0][0] = TwoNearZ * ReciprocalWidth; - M.m[0][1] = 0.0f; - M.m[0][2] = 0.0f; - M.m[0][3] = 0.0f; - - M.m[1][0] = 0.0f; - M.m[1][1] = TwoNearZ * ReciprocalHeight; - M.m[1][2] = 0.0f; - M.m[1][3] = 0.0f; - - M.m[2][0] = (ViewLeft + ViewRight) * ReciprocalWidth; - M.m[2][1] = (ViewTop + ViewBottom) * ReciprocalHeight; - M.m[2][2] = fRange; - M.m[2][3] = -1.0f; - - M.m[3][0] = 0.0f; - M.m[3][1] = 0.0f; - M.m[3][2] = fRange * NearZ; - M.m[3][3] = 0.0f; - return M; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float TwoNearZ = NearZ + NearZ; - float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft); - float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom); - float fRange = FarZ / (NearZ - FarZ); - const float32x4_t Zero = vdupq_n_f32(0); - - XMMATRIX M; - M.r[0] = vsetq_lane_f32(TwoNearZ * ReciprocalWidth, Zero, 0); - M.r[1] = vsetq_lane_f32(TwoNearZ * ReciprocalHeight, Zero, 1); - M.r[2] = - XMVectorSet((ViewLeft + ViewRight) * ReciprocalWidth, - (ViewTop + ViewBottom) * ReciprocalHeight, fRange, -1.0f); - M.r[3] = vsetq_lane_f32(fRange * NearZ, Zero, 2); - return M; -#elif defined(_XM_SSE_INTRINSICS_) - XMMATRIX M; - float TwoNearZ = NearZ + NearZ; - float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft); - float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom); - float fRange = FarZ / (NearZ - FarZ); - // Note: This is recorded on the stack - XMVECTOR rMem = {TwoNearZ * ReciprocalWidth, TwoNearZ * ReciprocalHeight, - fRange * NearZ, 0}; - // Copy from memory to SSE register - XMVECTOR vValues = rMem; - XMVECTOR vTemp = _mm_setzero_ps(); - // Copy x only - vTemp = _mm_move_ss(vTemp, vValues); - // TwoNearZ*ReciprocalWidth,0,0,0 - M.r[0] = vTemp; - // 0,TwoNearZ*ReciprocalHeight,0,0 - vTemp = vValues; - vTemp = _mm_and_ps(vTemp, g_XMMaskY); - M.r[1] = vTemp; - // 0,0,fRange,1.0f - M.r[2] = - XMVectorSet((ViewLeft + ViewRight) * ReciprocalWidth, - (ViewTop + ViewBottom) * ReciprocalHeight, fRange, -1.0f); - // 0,0,-fRange * NearZ,0.0f - vValues = _mm_and_ps(vValues, g_XMMaskZ); - M.r[3] = vValues; - return M; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX XM_CALLCONV XMMatrixOrthographicLH(float ViewWidth, - float ViewHeight, - float NearZ, - float FarZ) noexcept { - assert(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f)); - assert(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f)); - assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f)); - -#if defined(_XM_NO_INTRINSICS_) - - float fRange = 1.0f / (FarZ - NearZ); - - XMMATRIX M; - M.m[0][0] = 2.0f / ViewWidth; - M.m[0][1] = 0.0f; - M.m[0][2] = 0.0f; - M.m[0][3] = 0.0f; - - M.m[1][0] = 0.0f; - M.m[1][1] = 2.0f / ViewHeight; - M.m[1][2] = 0.0f; - M.m[1][3] = 0.0f; - - M.m[2][0] = 0.0f; - M.m[2][1] = 0.0f; - M.m[2][2] = fRange; - M.m[2][3] = 0.0f; - - M.m[3][0] = 0.0f; - M.m[3][1] = 0.0f; - M.m[3][2] = -fRange * NearZ; - M.m[3][3] = 1.0f; - return M; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float fRange = 1.0f / (FarZ - NearZ); - - const float32x4_t Zero = vdupq_n_f32(0); - XMMATRIX M; - M.r[0] = vsetq_lane_f32(2.0f / ViewWidth, Zero, 0); - M.r[1] = vsetq_lane_f32(2.0f / ViewHeight, Zero, 1); - M.r[2] = vsetq_lane_f32(fRange, Zero, 2); - M.r[3] = vsetq_lane_f32(-fRange * NearZ, g_XMIdentityR3.v, 2); - return M; -#elif defined(_XM_SSE_INTRINSICS_) - XMMATRIX M; - float fRange = 1.0f / (FarZ - NearZ); - // Note: This is recorded on the stack - XMVECTOR rMem = {2.0f / ViewWidth, 2.0f / ViewHeight, fRange, - -fRange * NearZ}; - // Copy from memory to SSE register - XMVECTOR vValues = rMem; - XMVECTOR vTemp = _mm_setzero_ps(); - // Copy x only - vTemp = _mm_move_ss(vTemp, vValues); - // 2.0f / ViewWidth,0,0,0 - M.r[0] = vTemp; - // 0,2.0f / ViewHeight,0,0 - vTemp = vValues; - vTemp = _mm_and_ps(vTemp, g_XMMaskY); - M.r[1] = vTemp; - // x=fRange,y=-fRange * NearZ,0,1.0f - vTemp = _mm_setzero_ps(); - vValues = _mm_shuffle_ps(vValues, g_XMIdentityR3, _MM_SHUFFLE(3, 2, 3, 2)); - // 0,0,fRange,0.0f - vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(2, 0, 0, 0)); - M.r[2] = vTemp; - // 0,0,-fRange * NearZ,1.0f - vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(3, 1, 0, 0)); - M.r[3] = vTemp; - return M; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX XM_CALLCONV XMMatrixOrthographicRH(float ViewWidth, - float ViewHeight, - float NearZ, - float FarZ) noexcept { - assert(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f)); - assert(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f)); - assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f)); - -#if defined(_XM_NO_INTRINSICS_) - - float fRange = 1.0f / (NearZ - FarZ); - - XMMATRIX M; - M.m[0][0] = 2.0f / ViewWidth; - M.m[0][1] = 0.0f; - M.m[0][2] = 0.0f; - M.m[0][3] = 0.0f; - - M.m[1][0] = 0.0f; - M.m[1][1] = 2.0f / ViewHeight; - M.m[1][2] = 0.0f; - M.m[1][3] = 0.0f; - - M.m[2][0] = 0.0f; - M.m[2][1] = 0.0f; - M.m[2][2] = fRange; - M.m[2][3] = 0.0f; - - M.m[3][0] = 0.0f; - M.m[3][1] = 0.0f; - M.m[3][2] = fRange * NearZ; - M.m[3][3] = 1.0f; - return M; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float fRange = 1.0f / (NearZ - FarZ); - - const float32x4_t Zero = vdupq_n_f32(0); - XMMATRIX M; - M.r[0] = vsetq_lane_f32(2.0f / ViewWidth, Zero, 0); - M.r[1] = vsetq_lane_f32(2.0f / ViewHeight, Zero, 1); - M.r[2] = vsetq_lane_f32(fRange, Zero, 2); - M.r[3] = vsetq_lane_f32(fRange * NearZ, g_XMIdentityR3.v, 2); - return M; -#elif defined(_XM_SSE_INTRINSICS_) - XMMATRIX M; - float fRange = 1.0f / (NearZ - FarZ); - // Note: This is recorded on the stack - XMVECTOR rMem = {2.0f / ViewWidth, 2.0f / ViewHeight, fRange, - fRange * NearZ}; - // Copy from memory to SSE register - XMVECTOR vValues = rMem; - XMVECTOR vTemp = _mm_setzero_ps(); - // Copy x only - vTemp = _mm_move_ss(vTemp, vValues); - // 2.0f / ViewWidth,0,0,0 - M.r[0] = vTemp; - // 0,2.0f / ViewHeight,0,0 - vTemp = vValues; - vTemp = _mm_and_ps(vTemp, g_XMMaskY); - M.r[1] = vTemp; - // x=fRange,y=fRange * NearZ,0,1.0f - vTemp = _mm_setzero_ps(); - vValues = _mm_shuffle_ps(vValues, g_XMIdentityR3, _MM_SHUFFLE(3, 2, 3, 2)); - // 0,0,fRange,0.0f - vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(2, 0, 0, 0)); - M.r[2] = vTemp; - // 0,0,fRange * NearZ,1.0f - vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(3, 1, 0, 0)); - M.r[3] = vTemp; - return M; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX XM_CALLCONV XMMatrixOrthographicOffCenterLH( - float ViewLeft, float ViewRight, float ViewBottom, float ViewTop, - float NearZ, float FarZ) noexcept { - assert(!XMScalarNearEqual(ViewRight, ViewLeft, 0.00001f)); - assert(!XMScalarNearEqual(ViewTop, ViewBottom, 0.00001f)); - assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f)); - -#if defined(_XM_NO_INTRINSICS_) - - float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft); - float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom); - float fRange = 1.0f / (FarZ - NearZ); - - XMMATRIX M; - M.m[0][0] = ReciprocalWidth + ReciprocalWidth; - M.m[0][1] = 0.0f; - M.m[0][2] = 0.0f; - M.m[0][3] = 0.0f; - - M.m[1][0] = 0.0f; - M.m[1][1] = ReciprocalHeight + ReciprocalHeight; - M.m[1][2] = 0.0f; - M.m[1][3] = 0.0f; - - M.m[2][0] = 0.0f; - M.m[2][1] = 0.0f; - M.m[2][2] = fRange; - M.m[2][3] = 0.0f; - - M.m[3][0] = -(ViewLeft + ViewRight) * ReciprocalWidth; - M.m[3][1] = -(ViewTop + ViewBottom) * ReciprocalHeight; - M.m[3][2] = -fRange * NearZ; - M.m[3][3] = 1.0f; - return M; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft); - float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom); - float fRange = 1.0f / (FarZ - NearZ); - const float32x4_t Zero = vdupq_n_f32(0); - XMMATRIX M; - M.r[0] = vsetq_lane_f32(ReciprocalWidth + ReciprocalWidth, Zero, 0); - M.r[1] = vsetq_lane_f32(ReciprocalHeight + ReciprocalHeight, Zero, 1); - M.r[2] = vsetq_lane_f32(fRange, Zero, 2); - M.r[3] = XMVectorSet(-(ViewLeft + ViewRight) * ReciprocalWidth, - -(ViewTop + ViewBottom) * ReciprocalHeight, - -fRange * NearZ, 1.0f); - return M; -#elif defined(_XM_SSE_INTRINSICS_) - XMMATRIX M; - float fReciprocalWidth = 1.0f / (ViewRight - ViewLeft); - float fReciprocalHeight = 1.0f / (ViewTop - ViewBottom); - float fRange = 1.0f / (FarZ - NearZ); - // Note: This is recorded on the stack - XMVECTOR rMem = {fReciprocalWidth, fReciprocalHeight, fRange, 1.0f}; - XMVECTOR rMem2 = {-(ViewLeft + ViewRight), -(ViewTop + ViewBottom), -NearZ, - 1.0f}; - // Copy from memory to SSE register - XMVECTOR vValues = rMem; - XMVECTOR vTemp = _mm_setzero_ps(); - // Copy x only - vTemp = _mm_move_ss(vTemp, vValues); - // fReciprocalWidth*2,0,0,0 - vTemp = _mm_add_ss(vTemp, vTemp); - M.r[0] = vTemp; - // 0,fReciprocalHeight*2,0,0 - vTemp = vValues; - vTemp = _mm_and_ps(vTemp, g_XMMaskY); - vTemp = _mm_add_ps(vTemp, vTemp); - M.r[1] = vTemp; - // 0,0,fRange,0.0f - vTemp = vValues; - vTemp = _mm_and_ps(vTemp, g_XMMaskZ); - M.r[2] = vTemp; - // -(ViewLeft + ViewRight)*fReciprocalWidth,-(ViewTop + - // ViewBottom)*fReciprocalHeight,fRange*-NearZ,1.0f - vValues = _mm_mul_ps(vValues, rMem2); - M.r[3] = vValues; - return M; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX XM_CALLCONV XMMatrixOrthographicOffCenterRH( - float ViewLeft, float ViewRight, float ViewBottom, float ViewTop, - float NearZ, float FarZ) noexcept { - assert(!XMScalarNearEqual(ViewRight, ViewLeft, 0.00001f)); - assert(!XMScalarNearEqual(ViewTop, ViewBottom, 0.00001f)); - assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f)); - -#if defined(_XM_NO_INTRINSICS_) - - float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft); - float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom); - float fRange = 1.0f / (NearZ - FarZ); - - XMMATRIX M; - M.m[0][0] = ReciprocalWidth + ReciprocalWidth; - M.m[0][1] = 0.0f; - M.m[0][2] = 0.0f; - M.m[0][3] = 0.0f; - - M.m[1][0] = 0.0f; - M.m[1][1] = ReciprocalHeight + ReciprocalHeight; - M.m[1][2] = 0.0f; - M.m[1][3] = 0.0f; - - M.m[2][0] = 0.0f; - M.m[2][1] = 0.0f; - M.m[2][2] = fRange; - M.m[2][3] = 0.0f; - - M.r[3] = XMVectorSet(-(ViewLeft + ViewRight) * ReciprocalWidth, - -(ViewTop + ViewBottom) * ReciprocalHeight, - fRange * NearZ, 1.0f); - return M; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft); - float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom); - float fRange = 1.0f / (NearZ - FarZ); - const float32x4_t Zero = vdupq_n_f32(0); - XMMATRIX M; - M.r[0] = vsetq_lane_f32(ReciprocalWidth + ReciprocalWidth, Zero, 0); - M.r[1] = vsetq_lane_f32(ReciprocalHeight + ReciprocalHeight, Zero, 1); - M.r[2] = vsetq_lane_f32(fRange, Zero, 2); - M.r[3] = XMVectorSet(-(ViewLeft + ViewRight) * ReciprocalWidth, - -(ViewTop + ViewBottom) * ReciprocalHeight, - fRange * NearZ, 1.0f); - return M; -#elif defined(_XM_SSE_INTRINSICS_) - XMMATRIX M; - float fReciprocalWidth = 1.0f / (ViewRight - ViewLeft); - float fReciprocalHeight = 1.0f / (ViewTop - ViewBottom); - float fRange = 1.0f / (NearZ - FarZ); - // Note: This is recorded on the stack - XMVECTOR rMem = {fReciprocalWidth, fReciprocalHeight, fRange, 1.0f}; - XMVECTOR rMem2 = {-(ViewLeft + ViewRight), -(ViewTop + ViewBottom), NearZ, - 1.0f}; - // Copy from memory to SSE register - XMVECTOR vValues = rMem; - XMVECTOR vTemp = _mm_setzero_ps(); - // Copy x only - vTemp = _mm_move_ss(vTemp, vValues); - // fReciprocalWidth*2,0,0,0 - vTemp = _mm_add_ss(vTemp, vTemp); - M.r[0] = vTemp; - // 0,fReciprocalHeight*2,0,0 - vTemp = vValues; - vTemp = _mm_and_ps(vTemp, g_XMMaskY); - vTemp = _mm_add_ps(vTemp, vTemp); - M.r[1] = vTemp; - // 0,0,fRange,0.0f - vTemp = vValues; - vTemp = _mm_and_ps(vTemp, g_XMMaskZ); - M.r[2] = vTemp; - // -(ViewLeft + ViewRight)*fReciprocalWidth,-(ViewTop + - // ViewBottom)*fReciprocalHeight,fRange*-NearZ,1.0f - vValues = _mm_mul_ps(vValues, rMem2); - M.r[3] = vValues; - return M; -#endif -} - -#ifdef _PREFAST_ -#pragma prefast(pop) -#endif - -/**************************************************************************** - * - * XMMATRIX operators and methods - * - ****************************************************************************/ - -//------------------------------------------------------------------------------ - -inline XMMATRIX::XMMATRIX(float m00, float m01, float m02, float m03, float m10, - float m11, float m12, float m13, float m20, float m21, - float m22, float m23, float m30, float m31, float m32, - float m33) noexcept { - r[0] = XMVectorSet(m00, m01, m02, m03); - r[1] = XMVectorSet(m10, m11, m12, m13); - r[2] = XMVectorSet(m20, m21, m22, m23); - r[3] = XMVectorSet(m30, m31, m32, m33); -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMMATRIX::XMMATRIX(const float* pArray) noexcept { - assert(pArray != nullptr); - r[0] = XMLoadFloat4(reinterpret_cast(pArray)); - r[1] = XMLoadFloat4(reinterpret_cast(pArray + 4)); - r[2] = XMLoadFloat4(reinterpret_cast(pArray + 8)); - r[3] = XMLoadFloat4(reinterpret_cast(pArray + 12)); -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX XMMATRIX::operator-() const noexcept { - XMMATRIX R; - R.r[0] = XMVectorNegate(r[0]); - R.r[1] = XMVectorNegate(r[1]); - R.r[2] = XMVectorNegate(r[2]); - R.r[3] = XMVectorNegate(r[3]); - return R; -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX& XM_CALLCONV XMMATRIX::operator+=(FXMMATRIX M) noexcept { - r[0] = XMVectorAdd(r[0], M.r[0]); - r[1] = XMVectorAdd(r[1], M.r[1]); - r[2] = XMVectorAdd(r[2], M.r[2]); - r[3] = XMVectorAdd(r[3], M.r[3]); - return *this; -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX& XM_CALLCONV XMMATRIX::operator-=(FXMMATRIX M) noexcept { - r[0] = XMVectorSubtract(r[0], M.r[0]); - r[1] = XMVectorSubtract(r[1], M.r[1]); - r[2] = XMVectorSubtract(r[2], M.r[2]); - r[3] = XMVectorSubtract(r[3], M.r[3]); - return *this; -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX& XM_CALLCONV XMMATRIX::operator*=(FXMMATRIX M) noexcept { - *this = XMMatrixMultiply(*this, M); - return *this; -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX& XMMATRIX::operator*=(float S) noexcept { - r[0] = XMVectorScale(r[0], S); - r[1] = XMVectorScale(r[1], S); - r[2] = XMVectorScale(r[2], S); - r[3] = XMVectorScale(r[3], S); - return *this; -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX& XMMATRIX::operator/=(float S) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTOR vS = XMVectorReplicate(S); - r[0] = XMVectorDivide(r[0], vS); - r[1] = XMVectorDivide(r[1], vS); - r[2] = XMVectorDivide(r[2], vS); - r[3] = XMVectorDivide(r[3], vS); - return *this; -#elif defined(_XM_ARM_NEON_INTRINSICS_) -#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || \ - defined(_M_ARM64EC) || __aarch64__ - float32x4_t vS = vdupq_n_f32(S); - r[0] = vdivq_f32(r[0], vS); - r[1] = vdivq_f32(r[1], vS); - r[2] = vdivq_f32(r[2], vS); - r[3] = vdivq_f32(r[3], vS); -#else - // 2 iterations of Newton-Raphson refinement of reciprocal - float32x2_t vS = vdup_n_f32(S); - float32x2_t R0 = vrecpe_f32(vS); - float32x2_t S0 = vrecps_f32(R0, vS); - R0 = vmul_f32(S0, R0); - S0 = vrecps_f32(R0, vS); - R0 = vmul_f32(S0, R0); - float32x4_t Reciprocal = vcombine_f32(R0, R0); - r[0] = vmulq_f32(r[0], Reciprocal); - r[1] = vmulq_f32(r[1], Reciprocal); - r[2] = vmulq_f32(r[2], Reciprocal); - r[3] = vmulq_f32(r[3], Reciprocal); -#endif - return *this; -#elif defined(_XM_SSE_INTRINSICS_) - __m128 vS = _mm_set_ps1(S); - r[0] = _mm_div_ps(r[0], vS); - r[1] = _mm_div_ps(r[1], vS); - r[2] = _mm_div_ps(r[2], vS); - r[3] = _mm_div_ps(r[3], vS); - return *this; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX XM_CALLCONV XMMATRIX::operator+(FXMMATRIX M) const noexcept { - XMMATRIX R; - R.r[0] = XMVectorAdd(r[0], M.r[0]); - R.r[1] = XMVectorAdd(r[1], M.r[1]); - R.r[2] = XMVectorAdd(r[2], M.r[2]); - R.r[3] = XMVectorAdd(r[3], M.r[3]); - return R; -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX XM_CALLCONV XMMATRIX::operator-(FXMMATRIX M) const noexcept { - XMMATRIX R; - R.r[0] = XMVectorSubtract(r[0], M.r[0]); - R.r[1] = XMVectorSubtract(r[1], M.r[1]); - R.r[2] = XMVectorSubtract(r[2], M.r[2]); - R.r[3] = XMVectorSubtract(r[3], M.r[3]); - return R; -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX XM_CALLCONV XMMATRIX::operator*(FXMMATRIX M) const noexcept { - return XMMatrixMultiply(*this, M); -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX XMMATRIX::operator*(float S) const noexcept { - XMMATRIX R; - R.r[0] = XMVectorScale(r[0], S); - R.r[1] = XMVectorScale(r[1], S); - R.r[2] = XMVectorScale(r[2], S); - R.r[3] = XMVectorScale(r[3], S); - return R; -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX XMMATRIX::operator/(float S) const noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTOR vS = XMVectorReplicate(S); - XMMATRIX R; - R.r[0] = XMVectorDivide(r[0], vS); - R.r[1] = XMVectorDivide(r[1], vS); - R.r[2] = XMVectorDivide(r[2], vS); - R.r[3] = XMVectorDivide(r[3], vS); - return R; -#elif defined(_XM_ARM_NEON_INTRINSICS_) -#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || \ - defined(_M_ARM64EC) || __aarch64__ - float32x4_t vS = vdupq_n_f32(S); - XMMATRIX R; - R.r[0] = vdivq_f32(r[0], vS); - R.r[1] = vdivq_f32(r[1], vS); - R.r[2] = vdivq_f32(r[2], vS); - R.r[3] = vdivq_f32(r[3], vS); -#else - // 2 iterations of Newton-Raphson refinement of reciprocal - float32x2_t vS = vdup_n_f32(S); - float32x2_t R0 = vrecpe_f32(vS); - float32x2_t S0 = vrecps_f32(R0, vS); - R0 = vmul_f32(S0, R0); - S0 = vrecps_f32(R0, vS); - R0 = vmul_f32(S0, R0); - float32x4_t Reciprocal = vcombine_f32(R0, R0); - XMMATRIX R; - R.r[0] = vmulq_f32(r[0], Reciprocal); - R.r[1] = vmulq_f32(r[1], Reciprocal); - R.r[2] = vmulq_f32(r[2], Reciprocal); - R.r[3] = vmulq_f32(r[3], Reciprocal); -#endif - return R; -#elif defined(_XM_SSE_INTRINSICS_) - __m128 vS = _mm_set_ps1(S); - XMMATRIX R; - R.r[0] = _mm_div_ps(r[0], vS); - R.r[1] = _mm_div_ps(r[1], vS); - R.r[2] = _mm_div_ps(r[2], vS); - R.r[3] = _mm_div_ps(r[3], vS); - return R; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMMATRIX XM_CALLCONV operator*(float S, FXMMATRIX M) noexcept { - XMMATRIX R; - R.r[0] = XMVectorScale(M.r[0], S); - R.r[1] = XMVectorScale(M.r[1], S); - R.r[2] = XMVectorScale(M.r[2], S); - R.r[3] = XMVectorScale(M.r[3], S); - return R; -} - -/**************************************************************************** - * - * XMFLOAT3X3 operators - * - ****************************************************************************/ - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMFLOAT3X3::XMFLOAT3X3( - const float* pArray) noexcept { - assert(pArray != nullptr); - for (size_t Row = 0; Row < 3; Row++) { - for (size_t Column = 0; Column < 3; Column++) { - m[Row][Column] = pArray[Row * 3 + Column]; - } - } -} - -/**************************************************************************** - * - * XMFLOAT4X3 operators - * - ****************************************************************************/ - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMFLOAT4X3::XMFLOAT4X3( - const float* pArray) noexcept { - assert(pArray != nullptr); - - m[0][0] = pArray[0]; - m[0][1] = pArray[1]; - m[0][2] = pArray[2]; - - m[1][0] = pArray[3]; - m[1][1] = pArray[4]; - m[1][2] = pArray[5]; - - m[2][0] = pArray[6]; - m[2][1] = pArray[7]; - m[2][2] = pArray[8]; - - m[3][0] = pArray[9]; - m[3][1] = pArray[10]; - m[3][2] = pArray[11]; -} - -/**************************************************************************** - * - * XMFLOAT3X4 operators - * - ****************************************************************************/ - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMFLOAT3X4::XMFLOAT3X4( - const float* pArray) noexcept { - assert(pArray != nullptr); - - m[0][0] = pArray[0]; - m[0][1] = pArray[1]; - m[0][2] = pArray[2]; - m[0][3] = pArray[3]; - - m[1][0] = pArray[4]; - m[1][1] = pArray[5]; - m[1][2] = pArray[6]; - m[1][3] = pArray[7]; - - m[2][0] = pArray[8]; - m[2][1] = pArray[9]; - m[2][2] = pArray[10]; - m[2][3] = pArray[11]; -} - -/**************************************************************************** - * - * XMFLOAT4X4 operators - * - ****************************************************************************/ - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMFLOAT4X4::XMFLOAT4X4( - const float* pArray) noexcept { - assert(pArray != nullptr); - - m[0][0] = pArray[0]; - m[0][1] = pArray[1]; - m[0][2] = pArray[2]; - m[0][3] = pArray[3]; - - m[1][0] = pArray[4]; - m[1][1] = pArray[5]; - m[1][2] = pArray[6]; - m[1][3] = pArray[7]; - - m[2][0] = pArray[8]; - m[2][1] = pArray[9]; - m[2][2] = pArray[10]; - m[2][3] = pArray[11]; - - m[3][0] = pArray[12]; - m[3][1] = pArray[13]; - m[3][2] = pArray[14]; - m[3][3] = pArray[15]; -} diff --git a/targets/app/linux/Stubs/DirectXMath/DirectXMathMisc.inl b/targets/app/linux/Stubs/DirectXMath/DirectXMathMisc.inl deleted file mode 100644 index 1e2869428..000000000 --- a/targets/app/linux/Stubs/DirectXMath/DirectXMathMisc.inl +++ /dev/null @@ -1,2261 +0,0 @@ -//------------------------------------------------------------------------------------- -// DirectXMathMisc.inl -- SIMD C++ Math library -// -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. -// -// http://go.microsoft.com/fwlink/?LinkID=615560 -//------------------------------------------------------------------------------------- - -#pragma once - -/**************************************************************************** - * - * Quaternion - * - ****************************************************************************/ - -//------------------------------------------------------------------------------ -// Comparison operations -//------------------------------------------------------------------------------ - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMQuaternionEqual(FXMVECTOR Q1, FXMVECTOR Q2) noexcept { - return XMVector4Equal(Q1, Q2); -} - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMQuaternionNotEqual(FXMVECTOR Q1, - FXMVECTOR Q2) noexcept { - return XMVector4NotEqual(Q1, Q2); -} - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMQuaternionIsNaN(FXMVECTOR Q) noexcept { - return XMVector4IsNaN(Q); -} - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMQuaternionIsInfinite(FXMVECTOR Q) noexcept { - return XMVector4IsInfinite(Q); -} - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMQuaternionIsIdentity(FXMVECTOR Q) noexcept { - return XMVector4Equal(Q, g_XMIdentityR3.v); -} - -//------------------------------------------------------------------------------ -// Computation operations -//------------------------------------------------------------------------------ - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMQuaternionDot(FXMVECTOR Q1, - FXMVECTOR Q2) noexcept { - return XMVector4Dot(Q1, Q2); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMQuaternionMultiply(FXMVECTOR Q1, - FXMVECTOR Q2) noexcept { - // Returns the product Q2*Q1 (which is the concatenation of a rotation Q1 - // followed by the rotation Q2) - - // [ (Q2.w * Q1.x) + (Q2.x * Q1.w) + (Q2.y * Q1.z) - (Q2.z * Q1.y), - // (Q2.w * Q1.y) - (Q2.x * Q1.z) + (Q2.y * Q1.w) + (Q2.z * Q1.x), - // (Q2.w * Q1.z) + (Q2.x * Q1.y) - (Q2.y * Q1.x) + (Q2.z * Q1.w), - // (Q2.w * Q1.w) - (Q2.x * Q1.x) - (Q2.y * Q1.y) - (Q2.z * Q1.z) ] - -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 Result = {{{(Q2.vector4_f32[3] * Q1.vector4_f32[0]) + - (Q2.vector4_f32[0] * Q1.vector4_f32[3]) + - (Q2.vector4_f32[1] * Q1.vector4_f32[2]) - - (Q2.vector4_f32[2] * Q1.vector4_f32[1]), - (Q2.vector4_f32[3] * Q1.vector4_f32[1]) - - (Q2.vector4_f32[0] * Q1.vector4_f32[2]) + - (Q2.vector4_f32[1] * Q1.vector4_f32[3]) + - (Q2.vector4_f32[2] * Q1.vector4_f32[0]), - (Q2.vector4_f32[3] * Q1.vector4_f32[2]) + - (Q2.vector4_f32[0] * Q1.vector4_f32[1]) - - (Q2.vector4_f32[1] * Q1.vector4_f32[0]) + - (Q2.vector4_f32[2] * Q1.vector4_f32[3]), - (Q2.vector4_f32[3] * Q1.vector4_f32[3]) - - (Q2.vector4_f32[0] * Q1.vector4_f32[0]) - - (Q2.vector4_f32[1] * Q1.vector4_f32[1]) - - (Q2.vector4_f32[2] * Q1.vector4_f32[2])}}}; - return Result.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - static const XMVECTORF32 ControlWZYX = {{{1.0f, -1.0f, 1.0f, -1.0f}}}; - static const XMVECTORF32 ControlZWXY = {{{1.0f, 1.0f, -1.0f, -1.0f}}}; - static const XMVECTORF32 ControlYXWZ = {{{-1.0f, 1.0f, 1.0f, -1.0f}}}; - - float32x2_t Q2L = vget_low_f32(Q2); - float32x2_t Q2H = vget_high_f32(Q2); - - float32x4_t Q2X = vdupq_lane_f32(Q2L, 0); - float32x4_t Q2Y = vdupq_lane_f32(Q2L, 1); - float32x4_t Q2Z = vdupq_lane_f32(Q2H, 0); - XMVECTOR vResult = vmulq_lane_f32(Q1, Q2H, 1); - - // Mul by Q1WZYX - float32x4_t vTemp = vrev64q_f32(Q1); - vTemp = vcombine_f32(vget_high_f32(vTemp), vget_low_f32(vTemp)); - Q2X = vmulq_f32(Q2X, vTemp); - vResult = vmlaq_f32(vResult, Q2X, ControlWZYX); - - // Mul by Q1ZWXY - vTemp = vreinterpretq_f32_u32(vrev64q_u32(vreinterpretq_u32_f32(vTemp))); - Q2Y = vmulq_f32(Q2Y, vTemp); - vResult = vmlaq_f32(vResult, Q2Y, ControlZWXY); - - // Mul by Q1YXWZ - vTemp = vreinterpretq_f32_u32(vrev64q_u32(vreinterpretq_u32_f32(vTemp))); - vTemp = vcombine_f32(vget_high_f32(vTemp), vget_low_f32(vTemp)); - Q2Z = vmulq_f32(Q2Z, vTemp); - vResult = vmlaq_f32(vResult, Q2Z, ControlYXWZ); - return vResult; -#elif defined(_XM_SSE_INTRINSICS_) - static const XMVECTORF32 ControlWZYX = {{{1.0f, -1.0f, 1.0f, -1.0f}}}; - static const XMVECTORF32 ControlZWXY = {{{1.0f, 1.0f, -1.0f, -1.0f}}}; - static const XMVECTORF32 ControlYXWZ = {{{-1.0f, 1.0f, 1.0f, -1.0f}}}; - // Copy to SSE registers and use as few as possible for x86 - XMVECTOR Q2X = Q2; - XMVECTOR Q2Y = Q2; - XMVECTOR Q2Z = Q2; - XMVECTOR vResult = Q2; - // Splat with one instruction - vResult = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(3, 3, 3, 3)); - Q2X = XM_PERMUTE_PS(Q2X, _MM_SHUFFLE(0, 0, 0, 0)); - Q2Y = XM_PERMUTE_PS(Q2Y, _MM_SHUFFLE(1, 1, 1, 1)); - Q2Z = XM_PERMUTE_PS(Q2Z, _MM_SHUFFLE(2, 2, 2, 2)); - // Retire Q1 and perform Q1*Q2W - vResult = _mm_mul_ps(vResult, Q1); - XMVECTOR Q1Shuffle = Q1; - // Shuffle the copies of Q1 - Q1Shuffle = XM_PERMUTE_PS(Q1Shuffle, _MM_SHUFFLE(0, 1, 2, 3)); - // Mul by Q1WZYX - Q2X = _mm_mul_ps(Q2X, Q1Shuffle); - Q1Shuffle = XM_PERMUTE_PS(Q1Shuffle, _MM_SHUFFLE(2, 3, 0, 1)); - // Flip the signs on y and z - vResult = XM_FMADD_PS(Q2X, ControlWZYX, vResult); - // Mul by Q1ZWXY - Q2Y = _mm_mul_ps(Q2Y, Q1Shuffle); - Q1Shuffle = XM_PERMUTE_PS(Q1Shuffle, _MM_SHUFFLE(0, 1, 2, 3)); - // Flip the signs on z and w - Q2Y = _mm_mul_ps(Q2Y, ControlZWXY); - // Mul by Q1YXWZ - Q2Z = _mm_mul_ps(Q2Z, Q1Shuffle); - // Flip the signs on x and w - Q2Y = XM_FMADD_PS(Q2Z, ControlYXWZ, Q2Y); - vResult = _mm_add_ps(vResult, Q2Y); - return vResult; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMQuaternionLengthSq(FXMVECTOR Q) noexcept { - return XMVector4LengthSq(Q); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMQuaternionReciprocalLength(FXMVECTOR Q) noexcept { - return XMVector4ReciprocalLength(Q); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMQuaternionLength(FXMVECTOR Q) noexcept { - return XMVector4Length(Q); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMQuaternionNormalizeEst(FXMVECTOR Q) noexcept { - return XMVector4NormalizeEst(Q); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMQuaternionNormalize(FXMVECTOR Q) noexcept { - return XMVector4Normalize(Q); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMQuaternionConjugate(FXMVECTOR Q) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 Result = {{{-Q.vector4_f32[0], -Q.vector4_f32[1], - -Q.vector4_f32[2], Q.vector4_f32[3]}}}; - return Result.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - static const XMVECTORF32 NegativeOne3 = {{{-1.0f, -1.0f, -1.0f, 1.0f}}}; - return vmulq_f32(Q, NegativeOne3.v); -#elif defined(_XM_SSE_INTRINSICS_) - static const XMVECTORF32 NegativeOne3 = {{{-1.0f, -1.0f, -1.0f, 1.0f}}}; - return _mm_mul_ps(Q, NegativeOne3); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMQuaternionInverse(FXMVECTOR Q) noexcept { - XMVECTOR L = XMVector4LengthSq(Q); - XMVECTOR Conjugate = XMQuaternionConjugate(Q); - - XMVECTOR Control = XMVectorLessOrEqual(L, g_XMEpsilon.v); - - XMVECTOR Result = XMVectorDivide(Conjugate, L); - - Result = XMVectorSelect(Result, g_XMZero, Control); - - return Result; -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMQuaternionLn(FXMVECTOR Q) noexcept { - static const XMVECTORF32 OneMinusEpsilon = { - {{1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f}}}; - - XMVECTOR QW = XMVectorSplatW(Q); - XMVECTOR Q0 = XMVectorSelect(g_XMSelect1110.v, Q, g_XMSelect1110.v); - - XMVECTOR ControlW = XMVectorInBounds(QW, OneMinusEpsilon.v); - - XMVECTOR Theta = XMVectorACos(QW); - XMVECTOR SinTheta = XMVectorSin(Theta); - - XMVECTOR S = XMVectorDivide(Theta, SinTheta); - - XMVECTOR Result = XMVectorMultiply(Q0, S); - Result = XMVectorSelect(Q0, Result, ControlW); - - return Result; -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMQuaternionExp(FXMVECTOR Q) noexcept { - XMVECTOR Theta = XMVector3Length(Q); - - XMVECTOR SinTheta, CosTheta; - XMVectorSinCos(&SinTheta, &CosTheta, Theta); - - XMVECTOR S = XMVectorDivide(SinTheta, Theta); - - XMVECTOR Result = XMVectorMultiply(Q, S); - - const XMVECTOR Zero = XMVectorZero(); - XMVECTOR Control = XMVectorNearEqual(Theta, Zero, g_XMEpsilon.v); - Result = XMVectorSelect(Result, Q, Control); - - Result = XMVectorSelect(CosTheta, Result, g_XMSelect1110.v); - - return Result; -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMQuaternionSlerp(FXMVECTOR Q0, FXMVECTOR Q1, - float t) noexcept { - XMVECTOR T = XMVectorReplicate(t); - return XMQuaternionSlerpV(Q0, Q1, T); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMQuaternionSlerpV(FXMVECTOR Q0, FXMVECTOR Q1, - FXMVECTOR T) noexcept { - assert((XMVectorGetY(T) == XMVectorGetX(T)) && - (XMVectorGetZ(T) == XMVectorGetX(T)) && - (XMVectorGetW(T) == XMVectorGetX(T))); - - // Result = Q0 * sin((1.0 - t) * Omega) / sin(Omega) + Q1 * sin(t * Omega) / - // sin(Omega) - -#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) - - const XMVECTORF32 OneMinusEpsilon = { - {{1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f}}}; - - XMVECTOR CosOmega = XMQuaternionDot(Q0, Q1); - - const XMVECTOR Zero = XMVectorZero(); - XMVECTOR Control = XMVectorLess(CosOmega, Zero); - XMVECTOR Sign = XMVectorSelect(g_XMOne.v, g_XMNegativeOne.v, Control); - - CosOmega = XMVectorMultiply(CosOmega, Sign); - - Control = XMVectorLess(CosOmega, OneMinusEpsilon); - - XMVECTOR SinOmega = - XMVectorNegativeMultiplySubtract(CosOmega, CosOmega, g_XMOne.v); - SinOmega = XMVectorSqrt(SinOmega); - - XMVECTOR Omega = XMVectorATan2(SinOmega, CosOmega); - - XMVECTOR SignMask = XMVectorSplatSignMask(); - XMVECTOR V01 = XMVectorShiftLeft(T, Zero, 2); - SignMask = XMVectorShiftLeft(SignMask, Zero, 3); - V01 = XMVectorXorInt(V01, SignMask); - V01 = XMVectorAdd(g_XMIdentityR0.v, V01); - - XMVECTOR InvSinOmega = XMVectorReciprocal(SinOmega); - - XMVECTOR S0 = XMVectorMultiply(V01, Omega); - S0 = XMVectorSin(S0); - S0 = XMVectorMultiply(S0, InvSinOmega); - - S0 = XMVectorSelect(V01, S0, Control); - - XMVECTOR S1 = XMVectorSplatY(S0); - S0 = XMVectorSplatX(S0); - - S1 = XMVectorMultiply(S1, Sign); - - XMVECTOR Result = XMVectorMultiply(Q0, S0); - Result = XMVectorMultiplyAdd(Q1, S1, Result); - - return Result; - -#elif defined(_XM_SSE_INTRINSICS_) - static const XMVECTORF32 OneMinusEpsilon = { - {{1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f}}}; - static const XMVECTORU32 SignMask2 = { - {{0x80000000, 0x00000000, 0x00000000, 0x00000000}}}; - - XMVECTOR CosOmega = XMQuaternionDot(Q0, Q1); - - const XMVECTOR Zero = XMVectorZero(); - XMVECTOR Control = XMVectorLess(CosOmega, Zero); - XMVECTOR Sign = XMVectorSelect(g_XMOne, g_XMNegativeOne, Control); - - CosOmega = _mm_mul_ps(CosOmega, Sign); - - Control = XMVectorLess(CosOmega, OneMinusEpsilon); - - XMVECTOR SinOmega = _mm_mul_ps(CosOmega, CosOmega); - SinOmega = _mm_sub_ps(g_XMOne, SinOmega); - SinOmega = _mm_sqrt_ps(SinOmega); - - XMVECTOR Omega = XMVectorATan2(SinOmega, CosOmega); - - XMVECTOR V01 = XM_PERMUTE_PS(T, _MM_SHUFFLE(2, 3, 0, 1)); - V01 = _mm_and_ps(V01, g_XMMaskXY); - V01 = _mm_xor_ps(V01, SignMask2); - V01 = _mm_add_ps(g_XMIdentityR0, V01); - - XMVECTOR S0 = _mm_mul_ps(V01, Omega); - S0 = XMVectorSin(S0); - S0 = _mm_div_ps(S0, SinOmega); - - S0 = XMVectorSelect(V01, S0, Control); - - XMVECTOR S1 = XMVectorSplatY(S0); - S0 = XMVectorSplatX(S0); - - S1 = _mm_mul_ps(S1, Sign); - XMVECTOR Result = _mm_mul_ps(Q0, S0); - S1 = _mm_mul_ps(S1, Q1); - Result = _mm_add_ps(Result, S1); - return Result; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMQuaternionSquad(FXMVECTOR Q0, FXMVECTOR Q1, - FXMVECTOR Q2, GXMVECTOR Q3, - float t) noexcept { - XMVECTOR T = XMVectorReplicate(t); - return XMQuaternionSquadV(Q0, Q1, Q2, Q3, T); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMQuaternionSquadV(FXMVECTOR Q0, FXMVECTOR Q1, - FXMVECTOR Q2, GXMVECTOR Q3, - HXMVECTOR T) noexcept { - assert((XMVectorGetY(T) == XMVectorGetX(T)) && - (XMVectorGetZ(T) == XMVectorGetX(T)) && - (XMVectorGetW(T) == XMVectorGetX(T))); - - XMVECTOR TP = T; - const XMVECTOR Two = XMVectorSplatConstant(2, 0); - - XMVECTOR Q03 = XMQuaternionSlerpV(Q0, Q3, T); - XMVECTOR Q12 = XMQuaternionSlerpV(Q1, Q2, T); - - TP = XMVectorNegativeMultiplySubtract(TP, TP, TP); - TP = XMVectorMultiply(TP, Two); - - XMVECTOR Result = XMQuaternionSlerpV(Q03, Q12, TP); - - return Result; -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMQuaternionSquadSetup(XMVECTOR* pA, XMVECTOR* pB, XMVECTOR* pC, FXMVECTOR Q0, - FXMVECTOR Q1, FXMVECTOR Q2, GXMVECTOR Q3) noexcept { - assert(pA); - assert(pB); - assert(pC); - - XMVECTOR LS12 = XMQuaternionLengthSq(XMVectorAdd(Q1, Q2)); - XMVECTOR LD12 = XMQuaternionLengthSq(XMVectorSubtract(Q1, Q2)); - XMVECTOR SQ2 = XMVectorNegate(Q2); - - XMVECTOR Control1 = XMVectorLess(LS12, LD12); - SQ2 = XMVectorSelect(Q2, SQ2, Control1); - - XMVECTOR LS01 = XMQuaternionLengthSq(XMVectorAdd(Q0, Q1)); - XMVECTOR LD01 = XMQuaternionLengthSq(XMVectorSubtract(Q0, Q1)); - XMVECTOR SQ0 = XMVectorNegate(Q0); - - XMVECTOR LS23 = XMQuaternionLengthSq(XMVectorAdd(SQ2, Q3)); - XMVECTOR LD23 = XMQuaternionLengthSq(XMVectorSubtract(SQ2, Q3)); - XMVECTOR SQ3 = XMVectorNegate(Q3); - - XMVECTOR Control0 = XMVectorLess(LS01, LD01); - XMVECTOR Control2 = XMVectorLess(LS23, LD23); - - SQ0 = XMVectorSelect(Q0, SQ0, Control0); - SQ3 = XMVectorSelect(Q3, SQ3, Control2); - - XMVECTOR InvQ1 = XMQuaternionInverse(Q1); - XMVECTOR InvQ2 = XMQuaternionInverse(SQ2); - - XMVECTOR LnQ0 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ0)); - XMVECTOR LnQ2 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ2)); - XMVECTOR LnQ1 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, Q1)); - XMVECTOR LnQ3 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, SQ3)); - - const XMVECTOR NegativeOneQuarter = XMVectorSplatConstant(-1, 2); - - XMVECTOR ExpQ02 = - XMVectorMultiply(XMVectorAdd(LnQ0, LnQ2), NegativeOneQuarter); - XMVECTOR ExpQ13 = - XMVectorMultiply(XMVectorAdd(LnQ1, LnQ3), NegativeOneQuarter); - ExpQ02 = XMQuaternionExp(ExpQ02); - ExpQ13 = XMQuaternionExp(ExpQ13); - - *pA = XMQuaternionMultiply(Q1, ExpQ02); - *pB = XMQuaternionMultiply(SQ2, ExpQ13); - *pC = SQ2; -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMQuaternionBaryCentric(FXMVECTOR Q0, FXMVECTOR Q1, - FXMVECTOR Q2, float f, - float g) noexcept { - float s = f + g; - - XMVECTOR Result; - if ((s < 0.00001f) && (s > -0.00001f)) { - Result = Q0; - } else { - XMVECTOR Q01 = XMQuaternionSlerp(Q0, Q1, s); - XMVECTOR Q02 = XMQuaternionSlerp(Q0, Q2, s); - - Result = XMQuaternionSlerp(Q01, Q02, g / s); - } - - return Result; -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMQuaternionBaryCentricV(FXMVECTOR Q0, FXMVECTOR Q1, - FXMVECTOR Q2, GXMVECTOR F, - HXMVECTOR G) noexcept { - assert((XMVectorGetY(F) == XMVectorGetX(F)) && - (XMVectorGetZ(F) == XMVectorGetX(F)) && - (XMVectorGetW(F) == XMVectorGetX(F))); - assert((XMVectorGetY(G) == XMVectorGetX(G)) && - (XMVectorGetZ(G) == XMVectorGetX(G)) && - (XMVectorGetW(G) == XMVectorGetX(G))); - - const XMVECTOR Epsilon = XMVectorSplatConstant(1, 16); - - XMVECTOR S = XMVectorAdd(F, G); - - XMVECTOR Result; - if (XMVector4InBounds(S, Epsilon)) { - Result = Q0; - } else { - XMVECTOR Q01 = XMQuaternionSlerpV(Q0, Q1, S); - XMVECTOR Q02 = XMQuaternionSlerpV(Q0, Q2, S); - XMVECTOR GS = XMVectorReciprocal(S); - GS = XMVectorMultiply(G, GS); - - Result = XMQuaternionSlerpV(Q01, Q02, GS); - } - - return Result; -} - -//------------------------------------------------------------------------------ -// Transformation operations -//------------------------------------------------------------------------------ - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMQuaternionIdentity() noexcept { - return g_XMIdentityR3.v; -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV -XMQuaternionRotationRollPitchYaw(float Pitch, float Yaw, float Roll) noexcept { -#if defined(_XM_NO_INTRINSICS_) - const float halfpitch = Pitch * 0.5f; - float cp = cosf(halfpitch); - float sp = sinf(halfpitch); - - const float halfyaw = Yaw * 0.5f; - float cy = cosf(halfyaw); - float sy = sinf(halfyaw); - - const float halfroll = Roll * 0.5f; - float cr = cosf(halfroll); - float sr = sinf(halfroll); - - XMVECTORF32 vResult = { - {{cr * sp * cy + sr * cp * sy, cr * cp * sy - sr * sp * cy, - sr * cp * cy - cr * sp * sy, cr * cp * cy + sr * sp * sy}}}; - return vResult; -#else - XMVECTOR Angles = XMVectorSet(Pitch, Yaw, Roll, 0.0f); - return XMQuaternionRotationRollPitchYawFromVector(Angles); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMQuaternionRotationRollPitchYawFromVector( - FXMVECTOR Angles // - ) noexcept { -#if defined(_XM_NO_INTRINSICS_) - const float halfpitch = Angles.vector4_f32[0] * 0.5f; - float cp = cosf(halfpitch); - float sp = sinf(halfpitch); - - const float halfyaw = Angles.vector4_f32[1] * 0.5f; - float cy = cosf(halfyaw); - float sy = sinf(halfyaw); - - const float halfroll = Angles.vector4_f32[2] * 0.5f; - float cr = cosf(halfroll); - float sr = sinf(halfroll); - - XMVECTORF32 vResult = { - {{cr * sp * cy + sr * cp * sy, cr * cp * sy - sr * sp * cy, - sr * cp * cy - cr * sp * sy, cr * cp * cy + sr * sp * sy}}}; - return vResult; -#else - static const XMVECTORF32 Sign = {{{1.0f, -1.0f, -1.0f, 1.0f}}}; - - XMVECTOR HalfAngles = XMVectorMultiply(Angles, g_XMOneHalf.v); - - XMVECTOR SinAngles, CosAngles; - XMVectorSinCos(&SinAngles, &CosAngles, HalfAngles); - - XMVECTOR P0 = XMVectorPermute(SinAngles, CosAngles); - XMVECTOR Y0 = XMVectorPermute(SinAngles, CosAngles); - XMVECTOR R0 = XMVectorPermute(SinAngles, CosAngles); - XMVECTOR P1 = XMVectorPermute(CosAngles, SinAngles); - XMVECTOR Y1 = XMVectorPermute(CosAngles, SinAngles); - XMVECTOR R1 = XMVectorPermute(CosAngles, SinAngles); - - XMVECTOR Q1 = XMVectorMultiply(P1, Sign.v); - XMVECTOR Q0 = XMVectorMultiply(P0, Y0); - Q1 = XMVectorMultiply(Q1, Y1); - Q0 = XMVectorMultiply(Q0, R0); - XMVECTOR Q = XMVectorMultiplyAdd(Q1, R1, Q0); - - return Q; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMQuaternionRotationNormal(FXMVECTOR NormalAxis, - float Angle) noexcept { -#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) - - XMVECTOR N = XMVectorSelect(g_XMOne.v, NormalAxis, g_XMSelect1110.v); - - float SinV, CosV; - XMScalarSinCos(&SinV, &CosV, 0.5f * Angle); - - XMVECTOR Scale = XMVectorSet(SinV, SinV, SinV, CosV); - return XMVectorMultiply(N, Scale); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR N = _mm_and_ps(NormalAxis, g_XMMask3); - N = _mm_or_ps(N, g_XMIdentityR3); - XMVECTOR Scale = _mm_set_ps1(0.5f * Angle); - XMVECTOR vSine; - XMVECTOR vCosine; - XMVectorSinCos(&vSine, &vCosine, Scale); - Scale = _mm_and_ps(vSine, g_XMMask3); - vCosine = _mm_and_ps(vCosine, g_XMMaskW); - Scale = _mm_or_ps(Scale, vCosine); - N = _mm_mul_ps(N, Scale); - return N; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMQuaternionRotationAxis(FXMVECTOR Axis, - float Angle) noexcept { - assert(!XMVector3Equal(Axis, XMVectorZero())); - assert(!XMVector3IsInfinite(Axis)); - - XMVECTOR Normal = XMVector3Normalize(Axis); - XMVECTOR Q = XMQuaternionRotationNormal(Normal, Angle); - return Q; -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMQuaternionRotationMatrix(FXMMATRIX M) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTORF32 q; - float r22 = M.m[2][2]; - if (r22 <= 0.f) // x^2 + y^2 >= z^2 + w^2 - { - float dif10 = M.m[1][1] - M.m[0][0]; - float omr22 = 1.f - r22; - if (dif10 <= 0.f) // x^2 >= y^2 - { - float fourXSqr = omr22 - dif10; - float inv4x = 0.5f / sqrtf(fourXSqr); - q.f[0] = fourXSqr * inv4x; - q.f[1] = (M.m[0][1] + M.m[1][0]) * inv4x; - q.f[2] = (M.m[0][2] + M.m[2][0]) * inv4x; - q.f[3] = (M.m[1][2] - M.m[2][1]) * inv4x; - } else // y^2 >= x^2 - { - float fourYSqr = omr22 + dif10; - float inv4y = 0.5f / sqrtf(fourYSqr); - q.f[0] = (M.m[0][1] + M.m[1][0]) * inv4y; - q.f[1] = fourYSqr * inv4y; - q.f[2] = (M.m[1][2] + M.m[2][1]) * inv4y; - q.f[3] = (M.m[2][0] - M.m[0][2]) * inv4y; - } - } else // z^2 + w^2 >= x^2 + y^2 - { - float sum10 = M.m[1][1] + M.m[0][0]; - float opr22 = 1.f + r22; - if (sum10 <= 0.f) // z^2 >= w^2 - { - float fourZSqr = opr22 - sum10; - float inv4z = 0.5f / sqrtf(fourZSqr); - q.f[0] = (M.m[0][2] + M.m[2][0]) * inv4z; - q.f[1] = (M.m[1][2] + M.m[2][1]) * inv4z; - q.f[2] = fourZSqr * inv4z; - q.f[3] = (M.m[0][1] - M.m[1][0]) * inv4z; - } else // w^2 >= z^2 - { - float fourWSqr = opr22 + sum10; - float inv4w = 0.5f / sqrtf(fourWSqr); - q.f[0] = (M.m[1][2] - M.m[2][1]) * inv4w; - q.f[1] = (M.m[2][0] - M.m[0][2]) * inv4w; - q.f[2] = (M.m[0][1] - M.m[1][0]) * inv4w; - q.f[3] = fourWSqr * inv4w; - } - } - return q.v; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - static const XMVECTORF32 XMPMMP = {{{+1.0f, -1.0f, -1.0f, +1.0f}}}; - static const XMVECTORF32 XMMPMP = {{{-1.0f, +1.0f, -1.0f, +1.0f}}}; - static const XMVECTORF32 XMMMPP = {{{-1.0f, -1.0f, +1.0f, +1.0f}}}; - static const XMVECTORU32 Select0110 = { - {{XM_SELECT_0, XM_SELECT_1, XM_SELECT_1, XM_SELECT_0}}}; - static const XMVECTORU32 Select0010 = { - {{XM_SELECT_0, XM_SELECT_0, XM_SELECT_1, XM_SELECT_0}}}; - - float32x4_t r0 = M.r[0]; - float32x4_t r1 = M.r[1]; - float32x4_t r2 = M.r[2]; - - float32x4_t r00 = vdupq_lane_f32(vget_low_f32(r0), 0); - float32x4_t r11 = vdupq_lane_f32(vget_low_f32(r1), 1); - float32x4_t r22 = vdupq_lane_f32(vget_high_f32(r2), 0); - - // x^2 >= y^2 equivalent to r11 - r00 <= 0 - float32x4_t r11mr00 = vsubq_f32(r11, r00); - uint32x4_t x2gey2 = vcleq_f32(r11mr00, g_XMZero); - - // z^2 >= w^2 equivalent to r11 + r00 <= 0 - float32x4_t r11pr00 = vaddq_f32(r11, r00); - uint32x4_t z2gew2 = vcleq_f32(r11pr00, g_XMZero); - - // x^2 + y^2 >= z^2 + w^2 equivalent to r22 <= 0 - uint32x4_t x2py2gez2pw2 = vcleq_f32(r22, g_XMZero); - - // (4*x^2, 4*y^2, 4*z^2, 4*w^2) - float32x4_t t0 = vmulq_f32(XMPMMP, r00); - float32x4_t x2y2z2w2 = vmlaq_f32(t0, XMMPMP, r11); - x2y2z2w2 = vmlaq_f32(x2y2z2w2, XMMMPP, r22); - x2y2z2w2 = vaddq_f32(x2y2z2w2, g_XMOne); - - // (r01, r02, r12, r11) - t0 = vextq_f32(r0, r0, 1); - float32x4_t t1 = vextq_f32(r1, r1, 1); - t0 = vcombine_f32(vget_low_f32(t0), vrev64_f32(vget_low_f32(t1))); - - // (r10, r20, r21, r10) - t1 = vextq_f32(r2, r2, 3); - float32x4_t r10 = vdupq_lane_f32(vget_low_f32(r1), 0); - t1 = vbslq_f32(Select0110, t1, r10); - - // (4*x*y, 4*x*z, 4*y*z, unused) - float32x4_t xyxzyz = vaddq_f32(t0, t1); - - // (r21, r20, r10, r10) - t0 = vcombine_f32(vrev64_f32(vget_low_f32(r2)), vget_low_f32(r10)); - - // (r12, r02, r01, r12) - float32x4_t t2 = vcombine_f32(vrev64_f32(vget_high_f32(r0)), - vrev64_f32(vget_low_f32(r0))); - float32x4_t t3 = vdupq_lane_f32(vget_high_f32(r1), 0); - t1 = vbslq_f32(Select0110, t2, t3); - - // (4*x*w, 4*y*w, 4*z*w, unused) - float32x4_t xwywzw = vsubq_f32(t0, t1); - xwywzw = vmulq_f32(XMMPMP, xwywzw); - - // (4*x*x, 4*x*y, 4*x*z, 4*x*w) - t0 = vextq_f32(xyxzyz, xyxzyz, 3); - t1 = vbslq_f32(Select0110, t0, x2y2z2w2); - t2 = vdupq_lane_f32(vget_low_f32(xwywzw), 0); - float32x4_t tensor0 = vbslq_f32(g_XMSelect1110, t1, t2); - - // (4*y*x, 4*y*y, 4*y*z, 4*y*w) - t0 = vbslq_f32(g_XMSelect1011, xyxzyz, x2y2z2w2); - t1 = vdupq_lane_f32(vget_low_f32(xwywzw), 1); - float32x4_t tensor1 = vbslq_f32(g_XMSelect1110, t0, t1); - - // (4*z*x, 4*z*y, 4*z*z, 4*z*w) - t0 = vextq_f32(xyxzyz, xyxzyz, 1); - t1 = vcombine_f32(vget_low_f32(t0), vrev64_f32(vget_high_f32(xwywzw))); - float32x4_t tensor2 = vbslq_f32(Select0010, x2y2z2w2, t1); - - // (4*w*x, 4*w*y, 4*w*z, 4*w*w) - float32x4_t tensor3 = vbslq_f32(g_XMSelect1110, xwywzw, x2y2z2w2); - - // Select the row of the tensor-product matrix that has the largest - // magnitude. - t0 = vbslq_f32(x2gey2, tensor0, tensor1); - t1 = vbslq_f32(z2gew2, tensor2, tensor3); - t2 = vbslq_f32(x2py2gez2pw2, t0, t1); - - // Normalize the row. No division by zero is possible because the - // quaternion is unit-length (and the row is a nonzero multiple of - // the quaternion). - t0 = XMVector4Length(t2); - return XMVectorDivide(t2, t0); -#elif defined(_XM_SSE_INTRINSICS_) - static const XMVECTORF32 XMPMMP = {{{+1.0f, -1.0f, -1.0f, +1.0f}}}; - static const XMVECTORF32 XMMPMP = {{{-1.0f, +1.0f, -1.0f, +1.0f}}}; - static const XMVECTORF32 XMMMPP = {{{-1.0f, -1.0f, +1.0f, +1.0f}}}; - - XMVECTOR r0 = M.r[0]; // (r00, r01, r02, 0) - XMVECTOR r1 = M.r[1]; // (r10, r11, r12, 0) - XMVECTOR r2 = M.r[2]; // (r20, r21, r22, 0) - - // (r00, r00, r00, r00) - XMVECTOR r00 = XM_PERMUTE_PS(r0, _MM_SHUFFLE(0, 0, 0, 0)); - // (r11, r11, r11, r11) - XMVECTOR r11 = XM_PERMUTE_PS(r1, _MM_SHUFFLE(1, 1, 1, 1)); - // (r22, r22, r22, r22) - XMVECTOR r22 = XM_PERMUTE_PS(r2, _MM_SHUFFLE(2, 2, 2, 2)); - - // x^2 >= y^2 equivalent to r11 - r00 <= 0 - // (r11 - r00, r11 - r00, r11 - r00, r11 - r00) - XMVECTOR r11mr00 = _mm_sub_ps(r11, r00); - XMVECTOR x2gey2 = _mm_cmple_ps(r11mr00, g_XMZero); - - // z^2 >= w^2 equivalent to r11 + r00 <= 0 - // (r11 + r00, r11 + r00, r11 + r00, r11 + r00) - XMVECTOR r11pr00 = _mm_add_ps(r11, r00); - XMVECTOR z2gew2 = _mm_cmple_ps(r11pr00, g_XMZero); - - // x^2 + y^2 >= z^2 + w^2 equivalent to r22 <= 0 - XMVECTOR x2py2gez2pw2 = _mm_cmple_ps(r22, g_XMZero); - - // (4*x^2, 4*y^2, 4*z^2, 4*w^2) - XMVECTOR t0 = XM_FMADD_PS(XMPMMP, r00, g_XMOne); - XMVECTOR t1 = _mm_mul_ps(XMMPMP, r11); - XMVECTOR t2 = XM_FMADD_PS(XMMMPP, r22, t0); - XMVECTOR x2y2z2w2 = _mm_add_ps(t1, t2); - - // (r01, r02, r12, r11) - t0 = _mm_shuffle_ps(r0, r1, _MM_SHUFFLE(1, 2, 2, 1)); - // (r10, r10, r20, r21) - t1 = _mm_shuffle_ps(r1, r2, _MM_SHUFFLE(1, 0, 0, 0)); - // (r10, r20, r21, r10) - t1 = XM_PERMUTE_PS(t1, _MM_SHUFFLE(1, 3, 2, 0)); - // (4*x*y, 4*x*z, 4*y*z, unused) - XMVECTOR xyxzyz = _mm_add_ps(t0, t1); - - // (r21, r20, r10, r10) - t0 = _mm_shuffle_ps(r2, r1, _MM_SHUFFLE(0, 0, 0, 1)); - // (r12, r12, r02, r01) - t1 = _mm_shuffle_ps(r1, r0, _MM_SHUFFLE(1, 2, 2, 2)); - // (r12, r02, r01, r12) - t1 = XM_PERMUTE_PS(t1, _MM_SHUFFLE(1, 3, 2, 0)); - // (4*x*w, 4*y*w, 4*z*w, unused) - XMVECTOR xwywzw = _mm_sub_ps(t0, t1); - xwywzw = _mm_mul_ps(XMMPMP, xwywzw); - - // (4*x^2, 4*y^2, 4*x*y, unused) - t0 = _mm_shuffle_ps(x2y2z2w2, xyxzyz, _MM_SHUFFLE(0, 0, 1, 0)); - // (4*z^2, 4*w^2, 4*z*w, unused) - t1 = _mm_shuffle_ps(x2y2z2w2, xwywzw, _MM_SHUFFLE(0, 2, 3, 2)); - // (4*x*z, 4*y*z, 4*x*w, 4*y*w) - t2 = _mm_shuffle_ps(xyxzyz, xwywzw, _MM_SHUFFLE(1, 0, 2, 1)); - - // (4*x*x, 4*x*y, 4*x*z, 4*x*w) - XMVECTOR tensor0 = _mm_shuffle_ps(t0, t2, _MM_SHUFFLE(2, 0, 2, 0)); - // (4*y*x, 4*y*y, 4*y*z, 4*y*w) - XMVECTOR tensor1 = _mm_shuffle_ps(t0, t2, _MM_SHUFFLE(3, 1, 1, 2)); - // (4*z*x, 4*z*y, 4*z*z, 4*z*w) - XMVECTOR tensor2 = _mm_shuffle_ps(t2, t1, _MM_SHUFFLE(2, 0, 1, 0)); - // (4*w*x, 4*w*y, 4*w*z, 4*w*w) - XMVECTOR tensor3 = _mm_shuffle_ps(t2, t1, _MM_SHUFFLE(1, 2, 3, 2)); - - // Select the row of the tensor-product matrix that has the largest - // magnitude. - t0 = _mm_and_ps(x2gey2, tensor0); - t1 = _mm_andnot_ps(x2gey2, tensor1); - t0 = _mm_or_ps(t0, t1); - t1 = _mm_and_ps(z2gew2, tensor2); - t2 = _mm_andnot_ps(z2gew2, tensor3); - t1 = _mm_or_ps(t1, t2); - t0 = _mm_and_ps(x2py2gez2pw2, t0); - t1 = _mm_andnot_ps(x2py2gez2pw2, t1); - t2 = _mm_or_ps(t0, t1); - - // Normalize the row. No division by zero is possible because the - // quaternion is unit-length (and the row is a nonzero multiple of - // the quaternion). - t0 = XMVector4Length(t2); - return _mm_div_ps(t2, t0); -#endif -} - -//------------------------------------------------------------------------------ -// Conversion operations -//------------------------------------------------------------------------------ - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMQuaternionToAxisAngle(XMVECTOR* pAxis, float* pAngle, FXMVECTOR Q) noexcept { - assert(pAxis); - assert(pAngle); - - *pAxis = Q; - - *pAngle = 2.0f * XMScalarACos(XMVectorGetW(Q)); -} - -/**************************************************************************** - * - * Plane - * - ****************************************************************************/ - -//------------------------------------------------------------------------------ -// Comparison operations -//------------------------------------------------------------------------------ - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMPlaneEqual(FXMVECTOR P1, FXMVECTOR P2) noexcept { - return XMVector4Equal(P1, P2); -} - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMPlaneNearEqual(FXMVECTOR P1, FXMVECTOR P2, - FXMVECTOR Epsilon) noexcept { - XMVECTOR NP1 = XMPlaneNormalize(P1); - XMVECTOR NP2 = XMPlaneNormalize(P2); - return XMVector4NearEqual(NP1, NP2, Epsilon); -} - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMPlaneNotEqual(FXMVECTOR P1, FXMVECTOR P2) noexcept { - return XMVector4NotEqual(P1, P2); -} - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMPlaneIsNaN(FXMVECTOR P) noexcept { - return XMVector4IsNaN(P); -} - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMPlaneIsInfinite(FXMVECTOR P) noexcept { - return XMVector4IsInfinite(P); -} - -//------------------------------------------------------------------------------ -// Computation operations -//------------------------------------------------------------------------------ - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMPlaneDot(FXMVECTOR P, FXMVECTOR V) noexcept { - return XMVector4Dot(P, V); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMPlaneDotCoord(FXMVECTOR P, FXMVECTOR V) noexcept { - // Result = P[0] * V[0] + P[1] * V[1] + P[2] * V[2] + P[3] - - XMVECTOR V3 = XMVectorSelect(g_XMOne.v, V, g_XMSelect1110.v); - XMVECTOR Result = XMVector4Dot(P, V3); - return Result; -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMPlaneDotNormal(FXMVECTOR P, - FXMVECTOR V) noexcept { - return XMVector3Dot(P, V); -} - -//------------------------------------------------------------------------------ -// XMPlaneNormalizeEst uses a reciprocal estimate and -// returns QNaN on zero and infinite vectors. - -inline XMVECTOR XM_CALLCONV XMPlaneNormalizeEst(FXMVECTOR P) noexcept { -#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) - - XMVECTOR Result = XMVector3ReciprocalLengthEst(P); - return XMVectorMultiply(P, Result); - -#elif defined(_XM_SSE4_INTRINSICS_) - XMVECTOR vTemp = _mm_dp_ps(P, P, 0x7f); - XMVECTOR vResult = _mm_rsqrt_ps(vTemp); - return _mm_mul_ps(vResult, P); -#elif defined(_XM_SSE_INTRINSICS_) - // Perform the dot product - XMVECTOR vDot = _mm_mul_ps(P, P); - // x=Dot.y, y=Dot.z - XMVECTOR vTemp = XM_PERMUTE_PS(vDot, _MM_SHUFFLE(2, 1, 2, 1)); - // Result.x = x+y - vDot = _mm_add_ss(vDot, vTemp); - // x=Dot.z - vTemp = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(1, 1, 1, 1)); - // Result.x = (x+y)+z - vDot = _mm_add_ss(vDot, vTemp); - // Splat x - vDot = XM_PERMUTE_PS(vDot, _MM_SHUFFLE(0, 0, 0, 0)); - // Get the reciprocal - vDot = _mm_rsqrt_ps(vDot); - // Get the reciprocal - vDot = _mm_mul_ps(vDot, P); - return vDot; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMPlaneNormalize(FXMVECTOR P) noexcept { -#if defined(_XM_NO_INTRINSICS_) - float fLengthSq = sqrtf((P.vector4_f32[0] * P.vector4_f32[0]) + - (P.vector4_f32[1] * P.vector4_f32[1]) + - (P.vector4_f32[2] * P.vector4_f32[2])); - // Prevent divide by zero - if (fLengthSq > 0) { - fLengthSq = 1.0f / fLengthSq; - } - XMVECTORF32 vResult = { - {{P.vector4_f32[0] * fLengthSq, P.vector4_f32[1] * fLengthSq, - P.vector4_f32[2] * fLengthSq, P.vector4_f32[3] * fLengthSq}}}; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - XMVECTOR vLength = XMVector3ReciprocalLength(P); - return XMVectorMultiply(P, vLength); -#elif defined(_XM_SSE4_INTRINSICS_) - XMVECTOR vLengthSq = _mm_dp_ps(P, P, 0x7f); - // Prepare for the division - XMVECTOR vResult = _mm_sqrt_ps(vLengthSq); - // Failsafe on zero (Or epsilon) length planes - // If the length is infinity, set the elements to zero - vLengthSq = _mm_cmpneq_ps(vLengthSq, g_XMInfinity); - // Reciprocal mul to perform the normalization - vResult = _mm_div_ps(P, vResult); - // Any that are infinity, set to zero - vResult = _mm_and_ps(vResult, vLengthSq); - return vResult; -#elif defined(_XM_SSE_INTRINSICS_) - // Perform the dot product on x,y and z only - XMVECTOR vLengthSq = _mm_mul_ps(P, P); - XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(2, 1, 2, 1)); - vLengthSq = _mm_add_ss(vLengthSq, vTemp); - vTemp = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(1, 1, 1, 1)); - vLengthSq = _mm_add_ss(vLengthSq, vTemp); - vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0)); - // Prepare for the division - XMVECTOR vResult = _mm_sqrt_ps(vLengthSq); - // Failsafe on zero (Or epsilon) length planes - // If the length is infinity, set the elements to zero - vLengthSq = _mm_cmpneq_ps(vLengthSq, g_XMInfinity); - // Reciprocal mul to perform the normalization - vResult = _mm_div_ps(P, vResult); - // Any that are infinity, set to zero - vResult = _mm_and_ps(vResult, vLengthSq); - return vResult; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMPlaneIntersectLine( - FXMVECTOR P, FXMVECTOR LinePoint1, FXMVECTOR LinePoint2) noexcept { - XMVECTOR V1 = XMVector3Dot(P, LinePoint1); - XMVECTOR V2 = XMVector3Dot(P, LinePoint2); - XMVECTOR D = XMVectorSubtract(V1, V2); - - XMVECTOR VT = XMPlaneDotCoord(P, LinePoint1); - VT = XMVectorDivide(VT, D); - - XMVECTOR Point = XMVectorSubtract(LinePoint2, LinePoint1); - Point = XMVectorMultiplyAdd(Point, VT, LinePoint1); - - const XMVECTOR Zero = XMVectorZero(); - XMVECTOR Control = XMVectorNearEqual(D, Zero, g_XMEpsilon.v); - - return XMVectorSelect(Point, g_XMQNaN.v, Control); -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMPlaneIntersectPlane(XMVECTOR* pLinePoint1, XMVECTOR* pLinePoint2, - FXMVECTOR P1, FXMVECTOR P2) noexcept { - assert(pLinePoint1); - assert(pLinePoint2); - - XMVECTOR V1 = XMVector3Cross(P2, P1); - - XMVECTOR LengthSq = XMVector3LengthSq(V1); - - XMVECTOR V2 = XMVector3Cross(P2, V1); - - XMVECTOR P1W = XMVectorSplatW(P1); - XMVECTOR Point = XMVectorMultiply(V2, P1W); - - XMVECTOR V3 = XMVector3Cross(V1, P1); - - XMVECTOR P2W = XMVectorSplatW(P2); - Point = XMVectorMultiplyAdd(V3, P2W, Point); - - XMVECTOR LinePoint1 = XMVectorDivide(Point, LengthSq); - - XMVECTOR LinePoint2 = XMVectorAdd(LinePoint1, V1); - - XMVECTOR Control = XMVectorLessOrEqual(LengthSq, g_XMEpsilon.v); - *pLinePoint1 = XMVectorSelect(LinePoint1, g_XMQNaN.v, Control); - *pLinePoint2 = XMVectorSelect(LinePoint2, g_XMQNaN.v, Control); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMPlaneTransform(FXMVECTOR P, - FXMMATRIX ITM) noexcept { - XMVECTOR W = XMVectorSplatW(P); - XMVECTOR Z = XMVectorSplatZ(P); - XMVECTOR Y = XMVectorSplatY(P); - XMVECTOR X = XMVectorSplatX(P); - - XMVECTOR Result = XMVectorMultiply(W, ITM.r[3]); - Result = XMVectorMultiplyAdd(Z, ITM.r[2], Result); - Result = XMVectorMultiplyAdd(Y, ITM.r[1], Result); - Result = XMVectorMultiplyAdd(X, ITM.r[0], Result); - return Result; -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMFLOAT4* XM_CALLCONV XMPlaneTransformStream( - XMFLOAT4* pOutputStream, size_t OutputStride, const XMFLOAT4* pInputStream, - size_t InputStride, size_t PlaneCount, FXMMATRIX ITM) noexcept { - return XMVector4TransformStream(pOutputStream, OutputStride, pInputStream, - InputStride, PlaneCount, ITM); -} - -//------------------------------------------------------------------------------ -// Conversion operations -//------------------------------------------------------------------------------ - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMPlaneFromPointNormal(FXMVECTOR Point, - FXMVECTOR Normal) noexcept { - XMVECTOR W = XMVector3Dot(Point, Normal); - W = XMVectorNegate(W); - return XMVectorSelect(W, Normal, g_XMSelect1110.v); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMPlaneFromPoints(FXMVECTOR Point1, - FXMVECTOR Point2, - FXMVECTOR Point3) noexcept { - XMVECTOR V21 = XMVectorSubtract(Point1, Point2); - XMVECTOR V31 = XMVectorSubtract(Point1, Point3); - - XMVECTOR N = XMVector3Cross(V21, V31); - N = XMVector3Normalize(N); - - XMVECTOR D = XMPlaneDotNormal(N, Point1); - D = XMVectorNegate(D); - - XMVECTOR Result = XMVectorSelect(D, N, g_XMSelect1110.v); - - return Result; -} - -/**************************************************************************** - * - * Color - * - ****************************************************************************/ - -//------------------------------------------------------------------------------ -// Comparison operations -//------------------------------------------------------------------------------ - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMColorEqual(FXMVECTOR C1, FXMVECTOR C2) noexcept { - return XMVector4Equal(C1, C2); -} - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMColorNotEqual(FXMVECTOR C1, FXMVECTOR C2) noexcept { - return XMVector4NotEqual(C1, C2); -} - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMColorGreater(FXMVECTOR C1, FXMVECTOR C2) noexcept { - return XMVector4Greater(C1, C2); -} - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMColorGreaterOrEqual(FXMVECTOR C1, - FXMVECTOR C2) noexcept { - return XMVector4GreaterOrEqual(C1, C2); -} - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMColorLess(FXMVECTOR C1, FXMVECTOR C2) noexcept { - return XMVector4Less(C1, C2); -} - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMColorLessOrEqual(FXMVECTOR C1, - FXMVECTOR C2) noexcept { - return XMVector4LessOrEqual(C1, C2); -} - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMColorIsNaN(FXMVECTOR C) noexcept { - return XMVector4IsNaN(C); -} - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMColorIsInfinite(FXMVECTOR C) noexcept { - return XMVector4IsInfinite(C); -} - -//------------------------------------------------------------------------------ -// Computation operations -//------------------------------------------------------------------------------ - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMColorNegative(FXMVECTOR vColor) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 vResult = { - {{1.0f - vColor.vector4_f32[0], 1.0f - vColor.vector4_f32[1], - 1.0f - vColor.vector4_f32[2], vColor.vector4_f32[3]}}}; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x4_t vTemp = veorq_u32(vreinterpretq_u32_f32(vColor), g_XMNegate3); - return vaddq_f32(vreinterpretq_f32_u32(vTemp), g_XMOne3); -#elif defined(_XM_SSE_INTRINSICS_) - // Negate only x,y and z. - XMVECTOR vTemp = _mm_xor_ps(vColor, g_XMNegate3); - // Add 1,1,1,0 to -x,-y,-z,w - return _mm_add_ps(vTemp, g_XMOne3); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMColorModulate(FXMVECTOR C1, - FXMVECTOR C2) noexcept { - return XMVectorMultiply(C1, C2); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV -XMColorAdjustSaturation(FXMVECTOR vColor, float fSaturation) noexcept { - // Luminance = 0.2125f * C[0] + 0.7154f * C[1] + 0.0721f * C[2]; - // Result = (C - Luminance) * Saturation + Luminance; - - const XMVECTORF32 gvLuminance = {{{0.2125f, 0.7154f, 0.0721f, 0.0f}}}; -#if defined(_XM_NO_INTRINSICS_) - float fLuminance = (vColor.vector4_f32[0] * gvLuminance.f[0]) + - (vColor.vector4_f32[1] * gvLuminance.f[1]) + - (vColor.vector4_f32[2] * gvLuminance.f[2]); - XMVECTOR vResult; - vResult.vector4_f32[0] = - ((vColor.vector4_f32[0] - fLuminance) * fSaturation) + fLuminance; - vResult.vector4_f32[1] = - ((vColor.vector4_f32[1] - fLuminance) * fSaturation) + fLuminance; - vResult.vector4_f32[2] = - ((vColor.vector4_f32[2] - fLuminance) * fSaturation) + fLuminance; - vResult.vector4_f32[3] = vColor.vector4_f32[3]; - return vResult; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - XMVECTOR vLuminance = XMVector3Dot(vColor, gvLuminance); - XMVECTOR vResult = vsubq_f32(vColor, vLuminance); - vResult = vmlaq_n_f32(vLuminance, vResult, fSaturation); - return vbslq_f32(g_XMSelect1110, vResult, vColor); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vLuminance = XMVector3Dot(vColor, gvLuminance); - // Splat fSaturation - XMVECTOR vSaturation = _mm_set_ps1(fSaturation); - // vResult = ((vColor-vLuminance)*vSaturation)+vLuminance; - XMVECTOR vResult = _mm_sub_ps(vColor, vLuminance); - vResult = XM_FMADD_PS(vResult, vSaturation, vLuminance); - // Retain w from the source color - vLuminance = _mm_shuffle_ps( - vResult, vColor, - _MM_SHUFFLE(3, 2, 2, - 2)); // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w - vResult = _mm_shuffle_ps( - vResult, vLuminance, - _MM_SHUFFLE( - 3, 0, 1, - 0)); // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w - return vResult; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMColorAdjustContrast(FXMVECTOR vColor, - float fContrast) noexcept { - // Result = (vColor - 0.5f) * fContrast + 0.5f; - -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 vResult = {{{ - ((vColor.vector4_f32[0] - 0.5f) * fContrast) + 0.5f, - ((vColor.vector4_f32[1] - 0.5f) * fContrast) + 0.5f, - ((vColor.vector4_f32[2] - 0.5f) * fContrast) + 0.5f, - vColor.vector4_f32[3] // Leave W untouched - }}}; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - XMVECTOR vResult = vsubq_f32(vColor, g_XMOneHalf.v); - vResult = vmlaq_n_f32(g_XMOneHalf.v, vResult, fContrast); - return vbslq_f32(g_XMSelect1110, vResult, vColor); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vScale = _mm_set_ps1(fContrast); // Splat the scale - XMVECTOR vResult = _mm_sub_ps( - vColor, g_XMOneHalf); // Subtract 0.5f from the source (Saving source) - vResult = XM_FMADD_PS(vResult, vScale, g_XMOneHalf); - // Retain w from the source color - vScale = _mm_shuffle_ps( - vResult, vColor, - _MM_SHUFFLE(3, 2, 2, - 2)); // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w - vResult = _mm_shuffle_ps( - vResult, vScale, - _MM_SHUFFLE( - 3, 0, 1, - 0)); // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w - return vResult; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMColorRGBToHSL(FXMVECTOR rgb) noexcept { - XMVECTOR r = XMVectorSplatX(rgb); - XMVECTOR g = XMVectorSplatY(rgb); - XMVECTOR b = XMVectorSplatZ(rgb); - - XMVECTOR min = XMVectorMin(r, XMVectorMin(g, b)); - XMVECTOR max = XMVectorMax(r, XMVectorMax(g, b)); - - XMVECTOR l = XMVectorMultiply(XMVectorAdd(min, max), g_XMOneHalf); - - XMVECTOR d = XMVectorSubtract(max, min); - - XMVECTOR la = XMVectorSelect(rgb, l, g_XMSelect1110); - - if (XMVector3Less(d, g_XMEpsilon)) { - // Achromatic, assume H and S of 0 - return XMVectorSelect(la, g_XMZero, g_XMSelect1100); - } else { - XMVECTOR s, h; - - XMVECTOR d2 = XMVectorAdd(min, max); - - if (XMVector3Greater(l, g_XMOneHalf)) { - // d / (2-max-min) - s = XMVectorDivide(d, XMVectorSubtract(g_XMTwo, d2)); - } else { - // d / (max+min) - s = XMVectorDivide(d, d2); - } - - if (XMVector3Equal(r, max)) { - // Red is max - h = XMVectorDivide(XMVectorSubtract(g, b), d); - } else if (XMVector3Equal(g, max)) { - // Green is max - h = XMVectorDivide(XMVectorSubtract(b, r), d); - h = XMVectorAdd(h, g_XMTwo); - } else { - // Blue is max - h = XMVectorDivide(XMVectorSubtract(r, g), d); - h = XMVectorAdd(h, g_XMFour); - } - - h = XMVectorDivide(h, g_XMSix); - - if (XMVector3Less(h, g_XMZero)) h = XMVectorAdd(h, g_XMOne); - - XMVECTOR lha = XMVectorSelect(la, h, g_XMSelect1100); - return XMVectorSelect(s, lha, g_XMSelect1011); - } -} - -//------------------------------------------------------------------------------ - -namespace MathInternal { - -inline XMVECTOR XM_CALLCONV XMColorHue2Clr(FXMVECTOR p, FXMVECTOR q, - FXMVECTOR h) noexcept { - static const XMVECTORF32 oneSixth = { - {{1.0f / 6.0f, 1.0f / 6.0f, 1.0f / 6.0f, 1.0f / 6.0f}}}; - static const XMVECTORF32 twoThirds = { - {{2.0f / 3.0f, 2.0f / 3.0f, 2.0f / 3.0f, 2.0f / 3.0f}}}; - - XMVECTOR t = h; - - if (XMVector3Less(t, g_XMZero)) t = XMVectorAdd(t, g_XMOne); - - if (XMVector3Greater(t, g_XMOne)) t = XMVectorSubtract(t, g_XMOne); - - if (XMVector3Less(t, oneSixth)) { - // p + (q - p) * 6 * t - XMVECTOR t1 = XMVectorSubtract(q, p); - XMVECTOR t2 = XMVectorMultiply(g_XMSix, t); - return XMVectorMultiplyAdd(t1, t2, p); - } - - if (XMVector3Less(t, g_XMOneHalf)) return q; - - if (XMVector3Less(t, twoThirds)) { - // p + (q - p) * 6 * (2/3 - t) - XMVECTOR t1 = XMVectorSubtract(q, p); - XMVECTOR t2 = XMVectorMultiply(g_XMSix, XMVectorSubtract(twoThirds, t)); - return XMVectorMultiplyAdd(t1, t2, p); - } - - return p; -} - -} // namespace MathInternal - -inline XMVECTOR XM_CALLCONV XMColorHSLToRGB(FXMVECTOR hsl) noexcept { - static const XMVECTORF32 oneThird = { - {{1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f}}}; - - XMVECTOR s = XMVectorSplatY(hsl); - XMVECTOR l = XMVectorSplatZ(hsl); - - if (XMVector3NearEqual(s, g_XMZero, g_XMEpsilon)) { - // Achromatic - return XMVectorSelect(hsl, l, g_XMSelect1110); - } else { - XMVECTOR h = XMVectorSplatX(hsl); - - XMVECTOR q; - if (XMVector3Less(l, g_XMOneHalf)) { - q = XMVectorMultiply(l, XMVectorAdd(g_XMOne, s)); - } else { - q = XMVectorSubtract(XMVectorAdd(l, s), XMVectorMultiply(l, s)); - } - - XMVECTOR p = XMVectorSubtract(XMVectorMultiply(g_XMTwo, l), q); - - XMVECTOR r = DirectX::MathInternal::XMColorHue2Clr( - p, q, XMVectorAdd(h, oneThird)); - XMVECTOR g = DirectX::MathInternal::XMColorHue2Clr(p, q, h); - XMVECTOR b = DirectX::MathInternal::XMColorHue2Clr( - p, q, XMVectorSubtract(h, oneThird)); - - XMVECTOR rg = XMVectorSelect(g, r, g_XMSelect1000); - XMVECTOR ba = XMVectorSelect(hsl, b, g_XMSelect1110); - - return XMVectorSelect(ba, rg, g_XMSelect1100); - } -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMColorRGBToHSV(FXMVECTOR rgb) noexcept { - XMVECTOR r = XMVectorSplatX(rgb); - XMVECTOR g = XMVectorSplatY(rgb); - XMVECTOR b = XMVectorSplatZ(rgb); - - XMVECTOR min = XMVectorMin(r, XMVectorMin(g, b)); - XMVECTOR v = XMVectorMax(r, XMVectorMax(g, b)); - - XMVECTOR d = XMVectorSubtract(v, min); - - XMVECTOR s = (XMVector3NearEqual(v, g_XMZero, g_XMEpsilon)) - ? g_XMZero - : XMVectorDivide(d, v); - - if (XMVector3Less(d, g_XMEpsilon)) { - // Achromatic, assume H of 0 - XMVECTOR hv = XMVectorSelect(v, g_XMZero, g_XMSelect1000); - XMVECTOR hva = XMVectorSelect(rgb, hv, g_XMSelect1110); - return XMVectorSelect(s, hva, g_XMSelect1011); - } else { - XMVECTOR h; - - if (XMVector3Equal(r, v)) { - // Red is max - h = XMVectorDivide(XMVectorSubtract(g, b), d); - - if (XMVector3Less(g, b)) h = XMVectorAdd(h, g_XMSix); - } else if (XMVector3Equal(g, v)) { - // Green is max - h = XMVectorDivide(XMVectorSubtract(b, r), d); - h = XMVectorAdd(h, g_XMTwo); - } else { - // Blue is max - h = XMVectorDivide(XMVectorSubtract(r, g), d); - h = XMVectorAdd(h, g_XMFour); - } - - h = XMVectorDivide(h, g_XMSix); - - XMVECTOR hv = XMVectorSelect(v, h, g_XMSelect1000); - XMVECTOR hva = XMVectorSelect(rgb, hv, g_XMSelect1110); - return XMVectorSelect(s, hva, g_XMSelect1011); - } -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMColorHSVToRGB(FXMVECTOR hsv) noexcept { - XMVECTOR h = XMVectorSplatX(hsv); - XMVECTOR s = XMVectorSplatY(hsv); - XMVECTOR v = XMVectorSplatZ(hsv); - - XMVECTOR h6 = XMVectorMultiply(h, g_XMSix); - - XMVECTOR i = XMVectorFloor(h6); - XMVECTOR f = XMVectorSubtract(h6, i); - - // p = v* (1-s) - XMVECTOR p = XMVectorMultiply(v, XMVectorSubtract(g_XMOne, s)); - - // q = v*(1-f*s) - XMVECTOR q = - XMVectorMultiply(v, XMVectorSubtract(g_XMOne, XMVectorMultiply(f, s))); - - // t = v*(1 - (1-f)*s) - XMVECTOR t = XMVectorMultiply( - v, XMVectorSubtract(g_XMOne, - XMVectorMultiply(XMVectorSubtract(g_XMOne, f), s))); - - auto ii = static_cast(XMVectorGetX(XMVectorMod(i, g_XMSix))); - - XMVECTOR _rgb; - - switch (ii) { - case 0: // rgb = vtp - { - XMVECTOR vt = XMVectorSelect(t, v, g_XMSelect1000); - _rgb = XMVectorSelect(p, vt, g_XMSelect1100); - } break; - case 1: // rgb = qvp - { - XMVECTOR qv = XMVectorSelect(v, q, g_XMSelect1000); - _rgb = XMVectorSelect(p, qv, g_XMSelect1100); - } break; - case 2: // rgb = pvt - { - XMVECTOR pv = XMVectorSelect(v, p, g_XMSelect1000); - _rgb = XMVectorSelect(t, pv, g_XMSelect1100); - } break; - case 3: // rgb = pqv - { - XMVECTOR pq = XMVectorSelect(q, p, g_XMSelect1000); - _rgb = XMVectorSelect(v, pq, g_XMSelect1100); - } break; - case 4: // rgb = tpv - { - XMVECTOR tp = XMVectorSelect(p, t, g_XMSelect1000); - _rgb = XMVectorSelect(v, tp, g_XMSelect1100); - } break; - default: // rgb = vpq - { - XMVECTOR vp = XMVectorSelect(p, v, g_XMSelect1000); - _rgb = XMVectorSelect(q, vp, g_XMSelect1100); - } break; - } - - return XMVectorSelect(hsv, _rgb, g_XMSelect1110); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMColorRGBToYUV(FXMVECTOR rgb) noexcept { - static const XMVECTORF32 Scale0 = {{{0.299f, -0.147f, 0.615f, 0.0f}}}; - static const XMVECTORF32 Scale1 = {{{0.587f, -0.289f, -0.515f, 0.0f}}}; - static const XMVECTORF32 Scale2 = {{{0.114f, 0.436f, -0.100f, 0.0f}}}; - - XMMATRIX M(Scale0, Scale1, Scale2, g_XMZero); - XMVECTOR clr = XMVector3Transform(rgb, M); - - return XMVectorSelect(rgb, clr, g_XMSelect1110); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMColorYUVToRGB(FXMVECTOR yuv) noexcept { - static const XMVECTORF32 Scale1 = {{{0.0f, -0.395f, 2.032f, 0.0f}}}; - static const XMVECTORF32 Scale2 = {{{1.140f, -0.581f, 0.0f, 0.0f}}}; - - XMMATRIX M(g_XMOne, Scale1, Scale2, g_XMZero); - XMVECTOR clr = XMVector3Transform(yuv, M); - - return XMVectorSelect(yuv, clr, g_XMSelect1110); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMColorRGBToYUV_HD(FXMVECTOR rgb) noexcept { - static const XMVECTORF32 Scale0 = {{{0.2126f, -0.0997f, 0.6150f, 0.0f}}}; - static const XMVECTORF32 Scale1 = {{{0.7152f, -0.3354f, -0.5586f, 0.0f}}}; - static const XMVECTORF32 Scale2 = {{{0.0722f, 0.4351f, -0.0564f, 0.0f}}}; - - XMMATRIX M(Scale0, Scale1, Scale2, g_XMZero); - XMVECTOR clr = XMVector3Transform(rgb, M); - - return XMVectorSelect(rgb, clr, g_XMSelect1110); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMColorYUVToRGB_HD(FXMVECTOR yuv) noexcept { - static const XMVECTORF32 Scale1 = {{{0.0f, -0.2153f, 2.1324f, 0.0f}}}; - static const XMVECTORF32 Scale2 = {{{1.2803f, -0.3806f, 0.0f, 0.0f}}}; - - XMMATRIX M(g_XMOne, Scale1, Scale2, g_XMZero); - XMVECTOR clr = XMVector3Transform(yuv, M); - - return XMVectorSelect(yuv, clr, g_XMSelect1110); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMColorRGBToYUV_UHD(FXMVECTOR rgb) noexcept { - static const XMVECTORF32 Scale0 = {{{0.2627f, -0.1215f, 0.6150f, 0.0f}}}; - static const XMVECTORF32 Scale1 = {{{0.6780f, -0.3136f, -0.5655f, 0.0f}}}; - static const XMVECTORF32 Scale2 = {{{0.0593f, 0.4351f, -0.0495f, 0.0f}}}; - - XMMATRIX M(Scale0, Scale1, Scale2, g_XMZero); - XMVECTOR clr = XMVector3Transform(rgb, M); - - return XMVectorSelect(rgb, clr, g_XMSelect1110); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMColorYUVToRGB_UHD(FXMVECTOR yuv) noexcept { - static const XMVECTORF32 Scale1 = {{{0.0f, -0.1891f, 2.1620f, 0.0f}}}; - static const XMVECTORF32 Scale2 = {{{1.1989f, -0.4645f, 0.0f, 0.0f}}}; - - XMMATRIX M(g_XMOne, Scale1, Scale2, g_XMZero); - XMVECTOR clr = XMVector3Transform(yuv, M); - - return XMVectorSelect(yuv, clr, g_XMSelect1110); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMColorRGBToXYZ(FXMVECTOR rgb) noexcept { - static const XMVECTORF32 Scale0 = { - {{0.4887180f, 0.1762044f, 0.0000000f, 0.0f}}}; - static const XMVECTORF32 Scale1 = { - {{0.3106803f, 0.8129847f, 0.0102048f, 0.0f}}}; - static const XMVECTORF32 Scale2 = { - {{0.2006017f, 0.0108109f, 0.9897952f, 0.0f}}}; - static const XMVECTORF32 Scale = { - {{1.f / 0.17697f, 1.f / 0.17697f, 1.f / 0.17697f, 0.0f}}}; - - XMMATRIX M(Scale0, Scale1, Scale2, g_XMZero); - XMVECTOR clr = XMVectorMultiply(XMVector3Transform(rgb, M), Scale); - - return XMVectorSelect(rgb, clr, g_XMSelect1110); -} - -inline XMVECTOR XM_CALLCONV XMColorXYZToRGB(FXMVECTOR xyz) noexcept { - static const XMVECTORF32 Scale0 = { - {{2.3706743f, -0.5138850f, 0.0052982f, 0.0f}}}; - static const XMVECTORF32 Scale1 = { - {{-0.9000405f, 1.4253036f, -0.0146949f, 0.0f}}}; - static const XMVECTORF32 Scale2 = { - {{-0.4706338f, 0.0885814f, 1.0093968f, 0.0f}}}; - static const XMVECTORF32 Scale = {{{0.17697f, 0.17697f, 0.17697f, 0.0f}}}; - - XMMATRIX M(Scale0, Scale1, Scale2, g_XMZero); - XMVECTOR clr = XMVector3Transform(XMVectorMultiply(xyz, Scale), M); - - return XMVectorSelect(xyz, clr, g_XMSelect1110); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMColorXYZToSRGB(FXMVECTOR xyz) noexcept { - static const XMVECTORF32 Scale0 = {{{3.2406f, -0.9689f, 0.0557f, 0.0f}}}; - static const XMVECTORF32 Scale1 = {{{-1.5372f, 1.8758f, -0.2040f, 0.0f}}}; - static const XMVECTORF32 Scale2 = {{{-0.4986f, 0.0415f, 1.0570f, 0.0f}}}; - static const XMVECTORF32 Cutoff = { - {{0.0031308f, 0.0031308f, 0.0031308f, 0.0f}}}; - static const XMVECTORF32 Exp = { - {{1.0f / 2.4f, 1.0f / 2.4f, 1.0f / 2.4f, 1.0f}}}; - - XMMATRIX M(Scale0, Scale1, Scale2, g_XMZero); - XMVECTOR lclr = XMVector3Transform(xyz, M); - - XMVECTOR sel = XMVectorGreater(lclr, Cutoff); - - // clr = 12.92 * lclr for lclr <= 0.0031308f - XMVECTOR smallC = XMVectorMultiply(lclr, g_XMsrgbScale); - - // clr = (1+a)*pow(lclr, 1/2.4) - a for lclr > 0.0031308 (where a = 0.055) - XMVECTOR largeC = XMVectorSubtract( - XMVectorMultiply(g_XMsrgbA1, XMVectorPow(lclr, Exp)), g_XMsrgbA); - - XMVECTOR clr = XMVectorSelect(smallC, largeC, sel); - - return XMVectorSelect(xyz, clr, g_XMSelect1110); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMColorSRGBToXYZ(FXMVECTOR srgb) noexcept { - static const XMVECTORF32 Scale0 = {{{0.4124f, 0.2126f, 0.0193f, 0.0f}}}; - static const XMVECTORF32 Scale1 = {{{0.3576f, 0.7152f, 0.1192f, 0.0f}}}; - static const XMVECTORF32 Scale2 = {{{0.1805f, 0.0722f, 0.9505f, 0.0f}}}; - static const XMVECTORF32 Cutoff = {{{0.04045f, 0.04045f, 0.04045f, 0.0f}}}; - static const XMVECTORF32 Exp = {{{2.4f, 2.4f, 2.4f, 1.0f}}}; - - XMVECTOR sel = XMVectorGreater(srgb, Cutoff); - - // lclr = clr / 12.92 - XMVECTOR smallC = XMVectorDivide(srgb, g_XMsrgbScale); - - // lclr = pow( (clr + a) / (1+a), 2.4 ) - XMVECTOR largeC = XMVectorPow( - XMVectorDivide(XMVectorAdd(srgb, g_XMsrgbA), g_XMsrgbA1), Exp); - - XMVECTOR lclr = XMVectorSelect(smallC, largeC, sel); - - XMMATRIX M(Scale0, Scale1, Scale2, g_XMZero); - XMVECTOR clr = XMVector3Transform(lclr, M); - - return XMVectorSelect(srgb, clr, g_XMSelect1110); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMColorRGBToSRGB(FXMVECTOR rgb) noexcept { - static const XMVECTORF32 Cutoff = { - {{0.0031308f, 0.0031308f, 0.0031308f, 1.f}}}; - static const XMVECTORF32 Linear = {{{12.92f, 12.92f, 12.92f, 1.f}}}; - static const XMVECTORF32 Scale = {{{1.055f, 1.055f, 1.055f, 1.f}}}; - static const XMVECTORF32 Bias = {{{0.055f, 0.055f, 0.055f, 0.f}}}; - static const XMVECTORF32 InvGamma = { - {{1.0f / 2.4f, 1.0f / 2.4f, 1.0f / 2.4f, 1.f}}}; - - XMVECTOR V = XMVectorSaturate(rgb); - XMVECTOR V0 = XMVectorMultiply(V, Linear); - XMVECTOR V1 = XMVectorSubtract( - XMVectorMultiply(Scale, XMVectorPow(V, InvGamma)), Bias); - XMVECTOR select = XMVectorLess(V, Cutoff); - V = XMVectorSelect(V1, V0, select); - return XMVectorSelect(rgb, V, g_XMSelect1110); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMColorSRGBToRGB(FXMVECTOR srgb) noexcept { - static const XMVECTORF32 Cutoff = {{{0.04045f, 0.04045f, 0.04045f, 1.f}}}; - static const XMVECTORF32 ILinear = { - {{1.f / 12.92f, 1.f / 12.92f, 1.f / 12.92f, 1.f}}}; - static const XMVECTORF32 Scale = { - {{1.f / 1.055f, 1.f / 1.055f, 1.f / 1.055f, 1.f}}}; - static const XMVECTORF32 Bias = {{{0.055f, 0.055f, 0.055f, 0.f}}}; - static const XMVECTORF32 Gamma = {{{2.4f, 2.4f, 2.4f, 1.f}}}; - - XMVECTOR V = XMVectorSaturate(srgb); - XMVECTOR V0 = XMVectorMultiply(V, ILinear); - XMVECTOR V1 = - XMVectorPow(XMVectorMultiply(XMVectorAdd(V, Bias), Scale), Gamma); - XMVECTOR select = XMVectorGreater(V, Cutoff); - V = XMVectorSelect(V0, V1, select); - return XMVectorSelect(srgb, V, g_XMSelect1110); -} - -/**************************************************************************** - * - * Miscellaneous - * - ****************************************************************************/ - -//------------------------------------------------------------------------------ - -inline bool XMVerifyCPUSupport() noexcept { -#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) - int CPUInfo[4] = {-1}; -#if (defined(__clang__) || defined(__GNUC__)) && defined(__cpuid) - __cpuid(0, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]); -#else - __cpuid(CPUInfo, 0); -#endif - -#ifdef __AVX2__ - if (CPUInfo[0] < 7) return false; -#else - if (CPUInfo[0] < 1) return false; -#endif - -#if (defined(__clang__) || defined(__GNUC__)) && defined(__cpuid) - __cpuid(1, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]); -#else - __cpuid(CPUInfo, 1); -#endif - -#if defined(__AVX2__) || defined(_XM_AVX2_INTRINSICS_) - // The compiler can emit FMA3 instructions even without explicit intrinsics - // use - if ((CPUInfo[2] & 0x38081001) != 0x38081001) - return false; // No F16C/AVX/OSXSAVE/SSE4.1/FMA3/SSE3 support -#elif defined(_XM_FMA3_INTRINSICS_) && defined(_XM_F16C_INTRINSICS_) - if ((CPUInfo[2] & 0x38081001) != 0x38081001) - return false; // No F16C/AVX/OSXSAVE/SSE4.1/FMA3/SSE3 support -#elif defined(_XM_FMA3_INTRINSICS_) - if ((CPUInfo[2] & 0x18081001) != 0x18081001) - return false; // No AVX/OSXSAVE/SSE4.1/FMA3/SSE3 support -#elif defined(_XM_F16C_INTRINSICS_) - if ((CPUInfo[2] & 0x38080001) != 0x38080001) - return false; // No F16C/AVX/OSXSAVE/SSE4.1/SSE3 support -#elif defined(__AVX__) || defined(_XM_AVX_INTRINSICS_) - if ((CPUInfo[2] & 0x18080001) != 0x18080001) - return false; // No AVX/OSXSAVE/SSE4.1/SSE3 support -#elif defined(_XM_SSE4_INTRINSICS_) - if ((CPUInfo[2] & 0x80001) != 0x80001) - return false; // No SSE3/SSE4.1 support -#elif defined(_XM_SSE3_INTRINSICS_) - if (!(CPUInfo[2] & 0x1)) return false; // No SSE3 support -#endif - - // The x64 processor model requires SSE2 support, but no harm in checking - if ((CPUInfo[3] & 0x6000000) != 0x6000000) - return false; // No SSE2/SSE support - -#if defined(__AVX2__) || defined(_XM_AVX2_INTRINSICS_) -#if defined(__clang__) || defined(__GNUC__) - __cpuid_count(7, 0, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]); -#else - __cpuidex(CPUInfo, 7, 0); -#endif - if (!(CPUInfo[1] & 0x20)) return false; // No AVX2 support -#endif - - return true; -#elif defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) - // ARM-NEON support is required for the Windows on ARM platform - return true; -#else - // No intrinsics path always supported - return true; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMFresnelTerm(FXMVECTOR CosIncidentAngle, - FXMVECTOR RefractionIndex) noexcept { - assert(!XMVector4IsInfinite(CosIncidentAngle)); - - // Result = 0.5f * (g - c)^2 / (g + c)^2 * ((c * (g + c) - 1)^2 / (c * (g - - // c) + 1)^2 + 1) where c = CosIncidentAngle g = sqrt(c^2 + - // RefractionIndex^2 - 1) - -#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) - - XMVECTOR G = XMVectorMultiplyAdd(RefractionIndex, RefractionIndex, - g_XMNegativeOne.v); - G = XMVectorMultiplyAdd(CosIncidentAngle, CosIncidentAngle, G); - G = XMVectorAbs(G); - G = XMVectorSqrt(G); - - XMVECTOR S = XMVectorAdd(G, CosIncidentAngle); - XMVECTOR D = XMVectorSubtract(G, CosIncidentAngle); - - XMVECTOR V0 = XMVectorMultiply(D, D); - XMVECTOR V1 = XMVectorMultiply(S, S); - V1 = XMVectorReciprocal(V1); - V0 = XMVectorMultiply(g_XMOneHalf.v, V0); - V0 = XMVectorMultiply(V0, V1); - - XMVECTOR V2 = XMVectorMultiplyAdd(CosIncidentAngle, S, g_XMNegativeOne.v); - XMVECTOR V3 = XMVectorMultiplyAdd(CosIncidentAngle, D, g_XMOne.v); - V2 = XMVectorMultiply(V2, V2); - V3 = XMVectorMultiply(V3, V3); - V3 = XMVectorReciprocal(V3); - V2 = XMVectorMultiplyAdd(V2, V3, g_XMOne.v); - - XMVECTOR Result = XMVectorMultiply(V0, V2); - - Result = XMVectorSaturate(Result); - - return Result; - -#elif defined(_XM_SSE_INTRINSICS_) - // G = sqrt(abs((RefractionIndex^2-1) + CosIncidentAngle^2)) - XMVECTOR G = _mm_mul_ps(RefractionIndex, RefractionIndex); - XMVECTOR vTemp = _mm_mul_ps(CosIncidentAngle, CosIncidentAngle); - G = _mm_sub_ps(G, g_XMOne); - vTemp = _mm_add_ps(vTemp, G); - // max((0-vTemp),vTemp) == abs(vTemp) - // The abs is needed to deal with refraction and cosine being zero - G = _mm_setzero_ps(); - G = _mm_sub_ps(G, vTemp); - G = _mm_max_ps(G, vTemp); - // Last operation, the sqrt() - G = _mm_sqrt_ps(G); - - // Calc G-C and G+C - XMVECTOR GAddC = _mm_add_ps(G, CosIncidentAngle); - XMVECTOR GSubC = _mm_sub_ps(G, CosIncidentAngle); - // Perform the term (0.5f *(g - c)^2) / (g + c)^2 - XMVECTOR vResult = _mm_mul_ps(GSubC, GSubC); - vTemp = _mm_mul_ps(GAddC, GAddC); - vResult = _mm_mul_ps(vResult, g_XMOneHalf); - vResult = _mm_div_ps(vResult, vTemp); - // Perform the term ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1) - GAddC = _mm_mul_ps(GAddC, CosIncidentAngle); - GSubC = _mm_mul_ps(GSubC, CosIncidentAngle); - GAddC = _mm_sub_ps(GAddC, g_XMOne); - GSubC = _mm_add_ps(GSubC, g_XMOne); - GAddC = _mm_mul_ps(GAddC, GAddC); - GSubC = _mm_mul_ps(GSubC, GSubC); - GAddC = _mm_div_ps(GAddC, GSubC); - GAddC = _mm_add_ps(GAddC, g_XMOne); - // Multiply the two term parts - vResult = _mm_mul_ps(vResult, GAddC); - // Clamp to 0.0 - 1.0f - vResult = _mm_max_ps(vResult, g_XMZero); - vResult = _mm_min_ps(vResult, g_XMOne); - return vResult; -#endif -} - -//------------------------------------------------------------------------------ - -inline bool XMScalarNearEqual(float S1, float S2, float Epsilon) noexcept { - float Delta = S1 - S2; - return (fabsf(Delta) <= Epsilon); -} - -//------------------------------------------------------------------------------ -// Modulo the range of the given angle such that -XM_PI <= Angle < XM_PI -inline float XMScalarModAngle(float Angle) noexcept { - // Note: The modulo is performed with unsigned math only to work - // around a precision error on numbers that are close to PI - - // Normalize the range from 0.0f to XM_2PI - Angle = Angle + XM_PI; - // Perform the modulo, unsigned - float fTemp = fabsf(Angle); - fTemp = fTemp - - (XM_2PI * static_cast(static_cast(fTemp / XM_2PI))); - // Restore the number to the range of -XM_PI to XM_PI-epsilon - fTemp = fTemp - XM_PI; - // If the modulo'd value was negative, restore negation - if (Angle < 0.0f) { - fTemp = -fTemp; - } - return fTemp; -} - -//------------------------------------------------------------------------------ - -inline float XMScalarSin(float Value) noexcept { - // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder. - float quotient = XM_1DIV2PI * Value; - if (Value >= 0.0f) { - quotient = static_cast(static_cast(quotient + 0.5f)); - } else { - quotient = static_cast(static_cast(quotient - 0.5f)); - } - float y = Value - XM_2PI * quotient; - - // Map y to [-pi/2,pi/2] with sin(y) = sin(Value). - if (y > XM_PIDIV2) { - y = XM_PI - y; - } else if (y < -XM_PIDIV2) { - y = -XM_PI - y; - } - - // 11-degree minimax approximation - float y2 = y * y; - return (((((-2.3889859e-08f * y2 + 2.7525562e-06f) * y2 - 0.00019840874f) * - y2 + - 0.0083333310f) * - y2 - - 0.16666667f) * - y2 + - 1.0f) * - y; -} - -//------------------------------------------------------------------------------ - -inline float XMScalarSinEst(float Value) noexcept { - // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder. - float quotient = XM_1DIV2PI * Value; - if (Value >= 0.0f) { - quotient = static_cast(static_cast(quotient + 0.5f)); - } else { - quotient = static_cast(static_cast(quotient - 0.5f)); - } - float y = Value - XM_2PI * quotient; - - // Map y to [-pi/2,pi/2] with sin(y) = sin(Value). - if (y > XM_PIDIV2) { - y = XM_PI - y; - } else if (y < -XM_PIDIV2) { - y = -XM_PI - y; - } - - // 7-degree minimax approximation - float y2 = y * y; - return (((-0.00018524670f * y2 + 0.0083139502f) * y2 - 0.16665852f) * y2 + - 1.0f) * - y; -} - -//------------------------------------------------------------------------------ - -inline float XMScalarCos(float Value) noexcept { - // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder. - float quotient = XM_1DIV2PI * Value; - if (Value >= 0.0f) { - quotient = static_cast(static_cast(quotient + 0.5f)); - } else { - quotient = static_cast(static_cast(quotient - 0.5f)); - } - float y = Value - XM_2PI * quotient; - - // Map y to [-pi/2,pi/2] with cos(y) = sign*cos(x). - float sign; - if (y > XM_PIDIV2) { - y = XM_PI - y; - sign = -1.0f; - } else if (y < -XM_PIDIV2) { - y = -XM_PI - y; - sign = -1.0f; - } else { - sign = +1.0f; - } - - // 10-degree minimax approximation - float y2 = y * y; - float p = - ((((-2.6051615e-07f * y2 + 2.4760495e-05f) * y2 - 0.0013888378f) * y2 + - 0.041666638f) * - y2 - - 0.5f) * - y2 + - 1.0f; - return sign * p; -} - -//------------------------------------------------------------------------------ - -inline float XMScalarCosEst(float Value) noexcept { - // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder. - float quotient = XM_1DIV2PI * Value; - if (Value >= 0.0f) { - quotient = static_cast(static_cast(quotient + 0.5f)); - } else { - quotient = static_cast(static_cast(quotient - 0.5f)); - } - float y = Value - XM_2PI * quotient; - - // Map y to [-pi/2,pi/2] with cos(y) = sign*cos(x). - float sign; - if (y > XM_PIDIV2) { - y = XM_PI - y; - sign = -1.0f; - } else if (y < -XM_PIDIV2) { - y = -XM_PI - y; - sign = -1.0f; - } else { - sign = +1.0f; - } - - // 6-degree minimax approximation - float y2 = y * y; - float p = - ((-0.0012712436f * y2 + 0.041493919f) * y2 - 0.49992746f) * y2 + 1.0f; - return sign * p; -} - -//------------------------------------------------------------------------------ - -_Use_decl_annotations_ inline void XMScalarSinCos(float* pSin, float* pCos, - float Value) noexcept { - assert(pSin); - assert(pCos); - - // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder. - float quotient = XM_1DIV2PI * Value; - if (Value >= 0.0f) { - quotient = static_cast(static_cast(quotient + 0.5f)); - } else { - quotient = static_cast(static_cast(quotient - 0.5f)); - } - float y = Value - XM_2PI * quotient; - - // Map y to [-pi/2,pi/2] with sin(y) = sin(Value). - float sign; - if (y > XM_PIDIV2) { - y = XM_PI - y; - sign = -1.0f; - } else if (y < -XM_PIDIV2) { - y = -XM_PI - y; - sign = -1.0f; - } else { - sign = +1.0f; - } - - float y2 = y * y; - - // 11-degree minimax approximation - *pSin = (((((-2.3889859e-08f * y2 + 2.7525562e-06f) * y2 - 0.00019840874f) * - y2 + - 0.0083333310f) * - y2 - - 0.16666667f) * - y2 + - 1.0f) * - y; - - // 10-degree minimax approximation - float p = - ((((-2.6051615e-07f * y2 + 2.4760495e-05f) * y2 - 0.0013888378f) * y2 + - 0.041666638f) * - y2 - - 0.5f) * - y2 + - 1.0f; - *pCos = sign * p; -} - -//------------------------------------------------------------------------------ - -_Use_decl_annotations_ inline void XMScalarSinCosEst(float* pSin, float* pCos, - float Value) noexcept { - assert(pSin); - assert(pCos); - - // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder. - float quotient = XM_1DIV2PI * Value; - if (Value >= 0.0f) { - quotient = static_cast(static_cast(quotient + 0.5f)); - } else { - quotient = static_cast(static_cast(quotient - 0.5f)); - } - float y = Value - XM_2PI * quotient; - - // Map y to [-pi/2,pi/2] with sin(y) = sin(Value). - float sign; - if (y > XM_PIDIV2) { - y = XM_PI - y; - sign = -1.0f; - } else if (y < -XM_PIDIV2) { - y = -XM_PI - y; - sign = -1.0f; - } else { - sign = +1.0f; - } - - float y2 = y * y; - - // 7-degree minimax approximation - *pSin = (((-0.00018524670f * y2 + 0.0083139502f) * y2 - 0.16665852f) * y2 + - 1.0f) * - y; - - // 6-degree minimax approximation - float p = - ((-0.0012712436f * y2 + 0.041493919f) * y2 - 0.49992746f) * y2 + 1.0f; - *pCos = sign * p; -} - -//------------------------------------------------------------------------------ - -inline float XMScalarASin(float Value) noexcept { - // Clamp input to [-1,1]. - bool nonnegative = (Value >= 0.0f); - float x = fabsf(Value); - float omx = 1.0f - x; - if (omx < 0.0f) { - omx = 0.0f; - } - float root = sqrtf(omx); - - // 7-degree minimax approximation - float result = - ((((((-0.0012624911f * x + 0.0066700901f) * x - 0.0170881256f) * x + - 0.0308918810f) * - x - - 0.0501743046f) * - x + - 0.0889789874f) * - x - - 0.2145988016f) * - x + - 1.5707963050f; - result *= root; // acos(|x|) - - // acos(x) = pi - acos(-x) when x < 0, asin(x) = pi/2 - acos(x) - return (nonnegative ? XM_PIDIV2 - result : result - XM_PIDIV2); -} - -//------------------------------------------------------------------------------ - -inline float XMScalarASinEst(float Value) noexcept { - // Clamp input to [-1,1]. - bool nonnegative = (Value >= 0.0f); - float x = fabsf(Value); - float omx = 1.0f - x; - if (omx < 0.0f) { - omx = 0.0f; - } - float root = sqrtf(omx); - - // 3-degree minimax approximation - float result = - ((-0.0187293f * x + 0.0742610f) * x - 0.2121144f) * x + 1.5707288f; - result *= root; // acos(|x|) - - // acos(x) = pi - acos(-x) when x < 0, asin(x) = pi/2 - acos(x) - return (nonnegative ? XM_PIDIV2 - result : result - XM_PIDIV2); -} - -//------------------------------------------------------------------------------ - -inline float XMScalarACos(float Value) noexcept { - // Clamp input to [-1,1]. - bool nonnegative = (Value >= 0.0f); - float x = fabsf(Value); - float omx = 1.0f - x; - if (omx < 0.0f) { - omx = 0.0f; - } - float root = sqrtf(omx); - - // 7-degree minimax approximation - float result = - ((((((-0.0012624911f * x + 0.0066700901f) * x - 0.0170881256f) * x + - 0.0308918810f) * - x - - 0.0501743046f) * - x + - 0.0889789874f) * - x - - 0.2145988016f) * - x + - 1.5707963050f; - result *= root; - - // acos(x) = pi - acos(-x) when x < 0 - return (nonnegative ? result : XM_PI - result); -} - -//------------------------------------------------------------------------------ - -inline float XMScalarACosEst(float Value) noexcept { - // Clamp input to [-1,1]. - bool nonnegative = (Value >= 0.0f); - float x = fabsf(Value); - float omx = 1.0f - x; - if (omx < 0.0f) { - omx = 0.0f; - } - float root = sqrtf(omx); - - // 3-degree minimax approximation - float result = - ((-0.0187293f * x + 0.0742610f) * x - 0.2121144f) * x + 1.5707288f; - result *= root; - - // acos(x) = pi - acos(-x) when x < 0 - return (nonnegative ? result : XM_PI - result); -} diff --git a/targets/app/linux/Stubs/DirectXMath/DirectXMathVector.inl b/targets/app/linux/Stubs/DirectXMath/DirectXMathVector.inl deleted file mode 100644 index be289e5a6..000000000 --- a/targets/app/linux/Stubs/DirectXMath/DirectXMathVector.inl +++ /dev/null @@ -1,14000 +0,0 @@ -//------------------------------------------------------------------------------------- -// DirectXMathVector.inl -- SIMD C++ Math library -// -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. -// -// http://go.microsoft.com/fwlink/?LinkID=615560 -//------------------------------------------------------------------------------------- - -#pragma once - -#if defined(_XM_NO_INTRINSICS_) -#define XMISNAN(x) isnan(x) -#define XMISINF(x) isinf(x) -#endif - -#if defined(_XM_SSE_INTRINSICS_) - -#define XM3UNPACK3INTO4(l1, l2, l3) \ - XMVECTOR V3 = _mm_shuffle_ps(l2, l3, _MM_SHUFFLE(0, 0, 3, 2)); \ - XMVECTOR V2 = _mm_shuffle_ps(l2, l1, _MM_SHUFFLE(3, 3, 1, 0)); \ - V2 = XM_PERMUTE_PS(V2, _MM_SHUFFLE(1, 1, 0, 2)); \ - XMVECTOR V4 = _mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(L3), 32 / 8)) - -#define XM3PACK4INTO3(v2x) \ - v2x = _mm_shuffle_ps(V2, V3, _MM_SHUFFLE(1, 0, 2, 1)); \ - V2 = _mm_shuffle_ps(V2, V1, _MM_SHUFFLE(2, 2, 0, 0)); \ - V1 = _mm_shuffle_ps(V1, V2, _MM_SHUFFLE(0, 2, 1, 0)); \ - V3 = _mm_shuffle_ps(V3, V4, _MM_SHUFFLE(0, 0, 2, 2)); \ - V3 = _mm_shuffle_ps(V3, V4, _MM_SHUFFLE(2, 1, 2, 0)) - -#endif - -/**************************************************************************** - * - * General Vector - * - ****************************************************************************/ - -//------------------------------------------------------------------------------ -// Assignment operations -//------------------------------------------------------------------------------ - -//------------------------------------------------------------------------------ -// Return a vector with all elements equaling zero -inline XMVECTOR XM_CALLCONV XMVectorZero() noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 vResult = {{{0.0f, 0.0f, 0.0f, 0.0f}}}; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vdupq_n_f32(0); -#elif defined(_XM_SSE_INTRINSICS_) - return _mm_setzero_ps(); -#endif -} - -//------------------------------------------------------------------------------ -// Initialize a vector with four floating point values -inline XMVECTOR XM_CALLCONV XMVectorSet(float x, float y, float z, - float w) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 vResult = {{{x, y, z, w}}}; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x2_t V0 = vcreate_f32( - static_cast(*reinterpret_cast(&x)) | - (static_cast(*reinterpret_cast(&y)) << 32)); - float32x2_t V1 = vcreate_f32( - static_cast(*reinterpret_cast(&z)) | - (static_cast(*reinterpret_cast(&w)) << 32)); - return vcombine_f32(V0, V1); -#elif defined(_XM_SSE_INTRINSICS_) - return _mm_set_ps(w, z, y, x); -#endif -} - -//------------------------------------------------------------------------------ -// Initialize a vector with four integer values -inline XMVECTOR XM_CALLCONV XMVectorSetInt(uint32_t x, uint32_t y, uint32_t z, - uint32_t w) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORU32 vResult = {{{x, y, z, w}}}; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x2_t V0 = vcreate_u32(static_cast(x) | - (static_cast(y) << 32)); - uint32x2_t V1 = vcreate_u32(static_cast(z) | - (static_cast(w) << 32)); - return vreinterpretq_f32_u32(vcombine_u32(V0, V1)); -#elif defined(_XM_SSE_INTRINSICS_) - __m128i V = _mm_set_epi32(static_cast(w), static_cast(z), - static_cast(y), static_cast(x)); - return _mm_castsi128_ps(V); -#endif -} - -//------------------------------------------------------------------------------ -// Initialize a vector with a replicated floating point value -inline XMVECTOR XM_CALLCONV XMVectorReplicate(float Value) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 vResult; - vResult.f[0] = vResult.f[1] = vResult.f[2] = vResult.f[3] = Value; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vdupq_n_f32(Value); -#elif defined(_XM_SSE_INTRINSICS_) - return _mm_set_ps1(Value); -#endif -} - -//------------------------------------------------------------------------------ -// Initialize a vector with a replicated floating point value passed by pointer -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMVectorReplicatePtr(const float* pValue) noexcept { -#if defined(_XM_NO_INTRINSICS_) - float Value = pValue[0]; - XMVECTORF32 vResult; - vResult.f[0] = vResult.f[1] = vResult.f[2] = vResult.f[3] = Value; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vld1q_dup_f32(pValue); -#elif defined(_XM_AVX_INTRINSICS_) - return _mm_broadcast_ss(pValue); -#elif defined(_XM_SSE_INTRINSICS_) - return _mm_load_ps1(pValue); -#endif -} - -//------------------------------------------------------------------------------ -// Initialize a vector with a replicated integer value -inline XMVECTOR XM_CALLCONV XMVectorReplicateInt(uint32_t Value) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORU32 vResult; - vResult.u[0] = vResult.u[1] = vResult.u[2] = vResult.u[3] = Value; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vreinterpretq_f32_u32(vdupq_n_u32(Value)); -#elif defined(_XM_SSE_INTRINSICS_) - __m128i vTemp = _mm_set1_epi32(static_cast(Value)); - return _mm_castsi128_ps(vTemp); -#endif -} - -//------------------------------------------------------------------------------ -// Initialize a vector with a replicated integer value passed by pointer -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMVectorReplicateIntPtr(const uint32_t* pValue) noexcept { -#if defined(_XM_NO_INTRINSICS_) - uint32_t Value = pValue[0]; - XMVECTORU32 vResult; - vResult.u[0] = vResult.u[1] = vResult.u[2] = vResult.u[3] = Value; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vreinterpretq_f32_u32(vld1q_dup_u32(pValue)); -#elif defined(_XM_SSE_INTRINSICS_) - return _mm_load_ps1(reinterpret_cast(pValue)); -#endif -} - -//------------------------------------------------------------------------------ -// Initialize a vector with all bits set (true mask) -inline XMVECTOR XM_CALLCONV XMVectorTrueInt() noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORU32 vResult = { - {{0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU}}}; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vreinterpretq_f32_s32(vdupq_n_s32(-1)); -#elif defined(_XM_SSE_INTRINSICS_) - __m128i V = _mm_set1_epi32(-1); - return _mm_castsi128_ps(V); -#endif -} - -//------------------------------------------------------------------------------ -// Initialize a vector with all bits clear (false mask) -inline XMVECTOR XM_CALLCONV XMVectorFalseInt() noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 vResult = {{{0.0f, 0.0f, 0.0f, 0.0f}}}; - return vResult; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vreinterpretq_f32_u32(vdupq_n_u32(0)); -#elif defined(_XM_SSE_INTRINSICS_) - return _mm_setzero_ps(); -#endif -} - -//------------------------------------------------------------------------------ -// Replicate the x component of the vector -inline XMVECTOR XM_CALLCONV XMVectorSplatX(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 vResult; - vResult.f[0] = vResult.f[1] = vResult.f[2] = vResult.f[3] = - V.vector4_f32[0]; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vdupq_lane_f32(vget_low_f32(V), 0); -#elif defined(_XM_AVX2_INTRINSICS_) && defined(_XM_FAVOR_INTEL_) - return _mm_broadcastss_ps(V); -#elif defined(_XM_SSE_INTRINSICS_) - return XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); -#endif -} - -//------------------------------------------------------------------------------ -// Replicate the y component of the vector -inline XMVECTOR XM_CALLCONV XMVectorSplatY(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 vResult; - vResult.f[0] = vResult.f[1] = vResult.f[2] = vResult.f[3] = - V.vector4_f32[1]; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vdupq_lane_f32(vget_low_f32(V), 1); -#elif defined(_XM_SSE_INTRINSICS_) - return XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); -#endif -} - -//------------------------------------------------------------------------------ -// Replicate the z component of the vector -inline XMVECTOR XM_CALLCONV XMVectorSplatZ(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 vResult; - vResult.f[0] = vResult.f[1] = vResult.f[2] = vResult.f[3] = - V.vector4_f32[2]; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vdupq_lane_f32(vget_high_f32(V), 0); -#elif defined(_XM_SSE_INTRINSICS_) - return XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); -#endif -} - -//------------------------------------------------------------------------------ -// Replicate the w component of the vector -inline XMVECTOR XM_CALLCONV XMVectorSplatW(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 vResult; - vResult.f[0] = vResult.f[1] = vResult.f[2] = vResult.f[3] = - V.vector4_f32[3]; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vdupq_lane_f32(vget_high_f32(V), 1); -#elif defined(_XM_SSE_INTRINSICS_) - return XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 3, 3, 3)); -#endif -} - -//------------------------------------------------------------------------------ -// Return a vector of 1.0f,1.0f,1.0f,1.0f -inline XMVECTOR XM_CALLCONV XMVectorSplatOne() noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 vResult; - vResult.f[0] = vResult.f[1] = vResult.f[2] = vResult.f[3] = 1.0f; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vdupq_n_f32(1.0f); -#elif defined(_XM_SSE_INTRINSICS_) - return g_XMOne; -#endif -} - -//------------------------------------------------------------------------------ -// Return a vector of INF,INF,INF,INF -inline XMVECTOR XM_CALLCONV XMVectorSplatInfinity() noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORU32 vResult; - vResult.u[0] = vResult.u[1] = vResult.u[2] = vResult.u[3] = 0x7F800000; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vreinterpretq_f32_u32(vdupq_n_u32(0x7F800000)); -#elif defined(_XM_SSE_INTRINSICS_) - return g_XMInfinity; -#endif -} - -//------------------------------------------------------------------------------ -// Return a vector of Q_NAN,Q_NAN,Q_NAN,Q_NAN -inline XMVECTOR XM_CALLCONV XMVectorSplatQNaN() noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORU32 vResult; - vResult.u[0] = vResult.u[1] = vResult.u[2] = vResult.u[3] = 0x7FC00000; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vreinterpretq_f32_u32(vdupq_n_u32(0x7FC00000)); -#elif defined(_XM_SSE_INTRINSICS_) - return g_XMQNaN; -#endif -} - -//------------------------------------------------------------------------------ -// Return a vector -// of 1.192092896e-7f,1.192092896e-7f,1.192092896e-7f,1.192092896e-7f -inline XMVECTOR XM_CALLCONV XMVectorSplatEpsilon() noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORU32 vResult; - vResult.u[0] = vResult.u[1] = vResult.u[2] = vResult.u[3] = 0x34000000; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vreinterpretq_f32_u32(vdupq_n_u32(0x34000000)); -#elif defined(_XM_SSE_INTRINSICS_) - return g_XMEpsilon; -#endif -} - -//------------------------------------------------------------------------------ -// Return a vector of -0.0f (0x80000000),-0.0f,-0.0f,-0.0f -inline XMVECTOR XM_CALLCONV XMVectorSplatSignMask() noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORU32 vResult; - vResult.u[0] = vResult.u[1] = vResult.u[2] = vResult.u[3] = 0x80000000U; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vreinterpretq_f32_u32(vdupq_n_u32(0x80000000U)); -#elif defined(_XM_SSE_INTRINSICS_) - __m128i V = _mm_set1_epi32(static_cast(0x80000000)); - return _mm_castsi128_ps(V); -#endif -} - -//------------------------------------------------------------------------------ -// Return a floating point value via an index. This is not a recommended -// function to use due to performance loss. -inline float XM_CALLCONV XMVectorGetByIndex(FXMVECTOR V, size_t i) noexcept { - assert(i < 4); - _Analysis_assume_(i < 4); -#if defined(_XM_NO_INTRINSICS_) - return V.vector4_f32[i]; -#else - XMVECTORF32 U; - U.v = V; - return U.f[i]; -#endif -} - -//------------------------------------------------------------------------------ -// Return the X component in an FPU register. -inline float XM_CALLCONV XMVectorGetX(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - return V.vector4_f32[0]; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vgetq_lane_f32(V, 0); -#elif defined(_XM_SSE_INTRINSICS_) - return _mm_cvtss_f32(V); -#endif -} - -// Return the Y component in an FPU register. -inline float XM_CALLCONV XMVectorGetY(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - return V.vector4_f32[1]; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vgetq_lane_f32(V, 1); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vTemp = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); - return _mm_cvtss_f32(vTemp); -#endif -} - -// Return the Z component in an FPU register. -inline float XM_CALLCONV XMVectorGetZ(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - return V.vector4_f32[2]; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vgetq_lane_f32(V, 2); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vTemp = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); - return _mm_cvtss_f32(vTemp); -#endif -} - -// Return the W component in an FPU register. -inline float XM_CALLCONV XMVectorGetW(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - return V.vector4_f32[3]; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vgetq_lane_f32(V, 3); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vTemp = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 3, 3, 3)); - return _mm_cvtss_f32(vTemp); -#endif -} - -//------------------------------------------------------------------------------ - -// Store a component indexed by i into a 32 bit float location in memory. -_Use_decl_annotations_ inline void XM_CALLCONV -XMVectorGetByIndexPtr(float* f, FXMVECTOR V, size_t i) noexcept { - assert(f != nullptr); - assert(i < 4); - _Analysis_assume_(i < 4); -#if defined(_XM_NO_INTRINSICS_) - *f = V.vector4_f32[i]; -#else - XMVECTORF32 U; - U.v = V; - *f = U.f[i]; -#endif -} - -//------------------------------------------------------------------------------ - -// Store the X component into a 32 bit float location in memory. -_Use_decl_annotations_ inline void XM_CALLCONV -XMVectorGetXPtr(float* x, FXMVECTOR V) noexcept { - assert(x != nullptr); -#if defined(_XM_NO_INTRINSICS_) - *x = V.vector4_f32[0]; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - vst1q_lane_f32(x, V, 0); -#elif defined(_XM_SSE_INTRINSICS_) - _mm_store_ss(x, V); -#endif -} - -// Store the Y component into a 32 bit float location in memory. -_Use_decl_annotations_ inline void XM_CALLCONV -XMVectorGetYPtr(float* y, FXMVECTOR V) noexcept { - assert(y != nullptr); -#if defined(_XM_NO_INTRINSICS_) - *y = V.vector4_f32[1]; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - vst1q_lane_f32(y, V, 1); -#elif defined(_XM_SSE4_INTRINSICS_) - *(reinterpret_cast(y)) = _mm_extract_ps(V, 1); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); - _mm_store_ss(y, vResult); -#endif -} - -// Store the Z component into a 32 bit float location in memory. -_Use_decl_annotations_ inline void XM_CALLCONV -XMVectorGetZPtr(float* z, FXMVECTOR V) noexcept { - assert(z != nullptr); -#if defined(_XM_NO_INTRINSICS_) - *z = V.vector4_f32[2]; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - vst1q_lane_f32(z, V, 2); -#elif defined(_XM_SSE4_INTRINSICS_) - *(reinterpret_cast(z)) = _mm_extract_ps(V, 2); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); - _mm_store_ss(z, vResult); -#endif -} - -// Store the W component into a 32 bit float location in memory. -_Use_decl_annotations_ inline void XM_CALLCONV -XMVectorGetWPtr(float* w, FXMVECTOR V) noexcept { - assert(w != nullptr); -#if defined(_XM_NO_INTRINSICS_) - *w = V.vector4_f32[3]; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - vst1q_lane_f32(w, V, 3); -#elif defined(_XM_SSE4_INTRINSICS_) - *(reinterpret_cast(w)) = _mm_extract_ps(V, 3); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 3, 3, 3)); - _mm_store_ss(w, vResult); -#endif -} - -//------------------------------------------------------------------------------ - -// Return an integer value via an index. This is not a recommended -// function to use due to performance loss. -inline uint32_t XM_CALLCONV XMVectorGetIntByIndex(FXMVECTOR V, - size_t i) noexcept { - assert(i < 4); - _Analysis_assume_(i < 4); -#if defined(_XM_NO_INTRINSICS_) - return V.vector4_u32[i]; -#else - XMVECTORU32 U; - U.v = V; - return U.u[i]; -#endif -} - -//------------------------------------------------------------------------------ - -// Return the X component in an integer register. -inline uint32_t XM_CALLCONV XMVectorGetIntX(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - return V.vector4_u32[0]; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vgetq_lane_u32(vreinterpretq_u32_f32(V), 0); -#elif defined(_XM_SSE_INTRINSICS_) - return static_cast(_mm_cvtsi128_si32(_mm_castps_si128(V))); -#endif -} - -// Return the Y component in an integer register. -inline uint32_t XM_CALLCONV XMVectorGetIntY(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - return V.vector4_u32[1]; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vgetq_lane_u32(vreinterpretq_u32_f32(V), 1); -#elif defined(_XM_SSE4_INTRINSICS_) - __m128i V1 = _mm_castps_si128(V); - return static_cast(_mm_extract_epi32(V1, 1)); -#elif defined(_XM_SSE_INTRINSICS_) - __m128i vResulti = - _mm_shuffle_epi32(_mm_castps_si128(V), _MM_SHUFFLE(1, 1, 1, 1)); - return static_cast(_mm_cvtsi128_si32(vResulti)); -#endif -} - -// Return the Z component in an integer register. -inline uint32_t XM_CALLCONV XMVectorGetIntZ(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - return V.vector4_u32[2]; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vgetq_lane_u32(vreinterpretq_u32_f32(V), 2); -#elif defined(_XM_SSE4_INTRINSICS_) - __m128i V1 = _mm_castps_si128(V); - return static_cast(_mm_extract_epi32(V1, 2)); -#elif defined(_XM_SSE_INTRINSICS_) - __m128i vResulti = - _mm_shuffle_epi32(_mm_castps_si128(V), _MM_SHUFFLE(2, 2, 2, 2)); - return static_cast(_mm_cvtsi128_si32(vResulti)); -#endif -} - -// Return the W component in an integer register. -inline uint32_t XM_CALLCONV XMVectorGetIntW(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - return V.vector4_u32[3]; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vgetq_lane_u32(vreinterpretq_u32_f32(V), 3); -#elif defined(_XM_SSE4_INTRINSICS_) - __m128i V1 = _mm_castps_si128(V); - return static_cast(_mm_extract_epi32(V1, 3)); -#elif defined(_XM_SSE_INTRINSICS_) - __m128i vResulti = - _mm_shuffle_epi32(_mm_castps_si128(V), _MM_SHUFFLE(3, 3, 3, 3)); - return static_cast(_mm_cvtsi128_si32(vResulti)); -#endif -} - -//------------------------------------------------------------------------------ - -// Store a component indexed by i into a 32 bit integer location in memory. -_Use_decl_annotations_ inline void XM_CALLCONV -XMVectorGetIntByIndexPtr(uint32_t* x, FXMVECTOR V, size_t i) noexcept { - assert(x != nullptr); - assert(i < 4); - _Analysis_assume_(i < 4); -#if defined(_XM_NO_INTRINSICS_) - *x = V.vector4_u32[i]; -#else - XMVECTORU32 U; - U.v = V; - *x = U.u[i]; -#endif -} - -//------------------------------------------------------------------------------ - -// Store the X component into a 32 bit integer location in memory. -_Use_decl_annotations_ inline void XM_CALLCONV -XMVectorGetIntXPtr(uint32_t* x, FXMVECTOR V) noexcept { - assert(x != nullptr); -#if defined(_XM_NO_INTRINSICS_) - *x = V.vector4_u32[0]; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - vst1q_lane_u32(x, *reinterpret_cast(&V), 0); -#elif defined(_XM_SSE_INTRINSICS_) - _mm_store_ss(reinterpret_cast(x), V); -#endif -} - -// Store the Y component into a 32 bit integer location in memory. -_Use_decl_annotations_ inline void XM_CALLCONV -XMVectorGetIntYPtr(uint32_t* y, FXMVECTOR V) noexcept { - assert(y != nullptr); -#if defined(_XM_NO_INTRINSICS_) - *y = V.vector4_u32[1]; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - vst1q_lane_u32(y, *reinterpret_cast(&V), 1); -#elif defined(_XM_SSE4_INTRINSICS_) - __m128i V1 = _mm_castps_si128(V); - *y = static_cast(_mm_extract_epi32(V1, 1)); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); - _mm_store_ss(reinterpret_cast(y), vResult); -#endif -} - -// Store the Z component into a 32 bit integer locaCantion in memory. -_Use_decl_annotations_ inline void XM_CALLCONV -XMVectorGetIntZPtr(uint32_t* z, FXMVECTOR V) noexcept { - assert(z != nullptr); -#if defined(_XM_NO_INTRINSICS_) - *z = V.vector4_u32[2]; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - vst1q_lane_u32(z, *reinterpret_cast(&V), 2); -#elif defined(_XM_SSE4_INTRINSICS_) - __m128i V1 = _mm_castps_si128(V); - *z = static_cast(_mm_extract_epi32(V1, 2)); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); - _mm_store_ss(reinterpret_cast(z), vResult); -#endif -} - -// Store the W component into a 32 bit integer location in memory. -_Use_decl_annotations_ inline void XM_CALLCONV -XMVectorGetIntWPtr(uint32_t* w, FXMVECTOR V) noexcept { - assert(w != nullptr); -#if defined(_XM_NO_INTRINSICS_) - *w = V.vector4_u32[3]; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - vst1q_lane_u32(w, *reinterpret_cast(&V), 3); -#elif defined(_XM_SSE4_INTRINSICS_) - __m128i V1 = _mm_castps_si128(V); - *w = static_cast(_mm_extract_epi32(V1, 3)); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 3, 3, 3)); - _mm_store_ss(reinterpret_cast(w), vResult); -#endif -} - -//------------------------------------------------------------------------------ - -// Set a single indexed floating point component -inline XMVECTOR XM_CALLCONV XMVectorSetByIndex(FXMVECTOR V, float f, - size_t i) noexcept { - assert(i < 4); - _Analysis_assume_(i < 4); - XMVECTORF32 U; - U.v = V; - U.f[i] = f; - return U.v; -} - -//------------------------------------------------------------------------------ - -// Sets the X component of a vector to a passed floating point value -inline XMVECTOR XM_CALLCONV XMVectorSetX(FXMVECTOR V, float x) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 U = { - {{x, V.vector4_f32[1], V.vector4_f32[2], V.vector4_f32[3]}}}; - return U.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vsetq_lane_f32(x, V, 0); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vResult = _mm_set_ss(x); - vResult = _mm_move_ss(V, vResult); - return vResult; -#endif -} - -// Sets the Y component of a vector to a passed floating point value -inline XMVECTOR XM_CALLCONV XMVectorSetY(FXMVECTOR V, float y) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 U = { - {{V.vector4_f32[0], y, V.vector4_f32[2], V.vector4_f32[3]}}}; - return U.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vsetq_lane_f32(y, V, 1); -#elif defined(_XM_SSE4_INTRINSICS_) - XMVECTOR vResult = _mm_set_ss(y); - vResult = _mm_insert_ps(V, vResult, 0x10); - return vResult; -#elif defined(_XM_SSE_INTRINSICS_) - // Swap y and x - XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 2, 0, 1)); - // Convert input to vector - XMVECTOR vTemp = _mm_set_ss(y); - // Replace the x component - vResult = _mm_move_ss(vResult, vTemp); - // Swap y and x again - vResult = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(3, 2, 0, 1)); - return vResult; -#endif -} -// Sets the Z component of a vector to a passed floating point value -inline XMVECTOR XM_CALLCONV XMVectorSetZ(FXMVECTOR V, float z) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 U = { - {{V.vector4_f32[0], V.vector4_f32[1], z, V.vector4_f32[3]}}}; - return U.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vsetq_lane_f32(z, V, 2); -#elif defined(_XM_SSE4_INTRINSICS_) - XMVECTOR vResult = _mm_set_ss(z); - vResult = _mm_insert_ps(V, vResult, 0x20); - return vResult; -#elif defined(_XM_SSE_INTRINSICS_) - // Swap z and x - XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 0, 1, 2)); - // Convert input to vector - XMVECTOR vTemp = _mm_set_ss(z); - // Replace the x component - vResult = _mm_move_ss(vResult, vTemp); - // Swap z and x again - vResult = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(3, 0, 1, 2)); - return vResult; -#endif -} - -// Sets the W component of a vector to a passed floating point value -inline XMVECTOR XM_CALLCONV XMVectorSetW(FXMVECTOR V, float w) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 U = { - {{V.vector4_f32[0], V.vector4_f32[1], V.vector4_f32[2], w}}}; - return U.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vsetq_lane_f32(w, V, 3); -#elif defined(_XM_SSE4_INTRINSICS_) - XMVECTOR vResult = _mm_set_ss(w); - vResult = _mm_insert_ps(V, vResult, 0x30); - return vResult; -#elif defined(_XM_SSE_INTRINSICS_) - // Swap w and x - XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 2, 1, 3)); - // Convert input to vector - XMVECTOR vTemp = _mm_set_ss(w); - // Replace the x component - vResult = _mm_move_ss(vResult, vTemp); - // Swap w and x again - vResult = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(0, 2, 1, 3)); - return vResult; -#endif -} - -//------------------------------------------------------------------------------ - -// Sets a component of a vector to a floating point value passed by pointer -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMVectorSetByIndexPtr(FXMVECTOR V, const float* f, size_t i) noexcept { - assert(f != nullptr); - assert(i < 4); - _Analysis_assume_(i < 4); - XMVECTORF32 U; - U.v = V; - U.f[i] = *f; - return U.v; -} - -//------------------------------------------------------------------------------ - -// Sets the X component of a vector to a floating point value passed by pointer -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMVectorSetXPtr(FXMVECTOR V, const float* x) noexcept { - assert(x != nullptr); -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 U = { - {{*x, V.vector4_f32[1], V.vector4_f32[2], V.vector4_f32[3]}}}; - return U.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vld1q_lane_f32(x, V, 0); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vResult = _mm_load_ss(x); - vResult = _mm_move_ss(V, vResult); - return vResult; -#endif -} - -// Sets the Y component of a vector to a floating point value passed by pointer -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMVectorSetYPtr(FXMVECTOR V, const float* y) noexcept { - assert(y != nullptr); -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 U = { - {{V.vector4_f32[0], *y, V.vector4_f32[2], V.vector4_f32[3]}}}; - return U.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vld1q_lane_f32(y, V, 1); -#elif defined(_XM_SSE_INTRINSICS_) - // Swap y and x - XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 2, 0, 1)); - // Convert input to vector - XMVECTOR vTemp = _mm_load_ss(y); - // Replace the x component - vResult = _mm_move_ss(vResult, vTemp); - // Swap y and x again - vResult = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(3, 2, 0, 1)); - return vResult; -#endif -} - -// Sets the Z component of a vector to a floating point value passed by pointer -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMVectorSetZPtr(FXMVECTOR V, const float* z) noexcept { - assert(z != nullptr); -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 U = { - {{V.vector4_f32[0], V.vector4_f32[1], *z, V.vector4_f32[3]}}}; - return U.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vld1q_lane_f32(z, V, 2); -#elif defined(_XM_SSE_INTRINSICS_) - // Swap z and x - XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 0, 1, 2)); - // Convert input to vector - XMVECTOR vTemp = _mm_load_ss(z); - // Replace the x component - vResult = _mm_move_ss(vResult, vTemp); - // Swap z and x again - vResult = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(3, 0, 1, 2)); - return vResult; -#endif -} - -// Sets the W component of a vector to a floating point value passed by pointer -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMVectorSetWPtr(FXMVECTOR V, const float* w) noexcept { - assert(w != nullptr); -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 U = { - {{V.vector4_f32[0], V.vector4_f32[1], V.vector4_f32[2], *w}}}; - return U.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vld1q_lane_f32(w, V, 3); -#elif defined(_XM_SSE_INTRINSICS_) - // Swap w and x - XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 2, 1, 3)); - // Convert input to vector - XMVECTOR vTemp = _mm_load_ss(w); - // Replace the x component - vResult = _mm_move_ss(vResult, vTemp); - // Swap w and x again - vResult = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(0, 2, 1, 3)); - return vResult; -#endif -} - -//------------------------------------------------------------------------------ - -// Sets a component of a vector to an integer passed by value -inline XMVECTOR XM_CALLCONV XMVectorSetIntByIndex(FXMVECTOR V, uint32_t x, - size_t i) noexcept { - assert(i < 4); - _Analysis_assume_(i < 4); - XMVECTORU32 tmp; - tmp.v = V; - tmp.u[i] = x; - return tmp; -} - -//------------------------------------------------------------------------------ - -// Sets the X component of a vector to an integer passed by value -inline XMVECTOR XM_CALLCONV XMVectorSetIntX(FXMVECTOR V, uint32_t x) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORU32 U = { - {{x, V.vector4_u32[1], V.vector4_u32[2], V.vector4_u32[3]}}}; - return U.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vreinterpretq_f32_u32( - vsetq_lane_u32(x, vreinterpretq_u32_f32(V), 0)); -#elif defined(_XM_SSE_INTRINSICS_) - __m128i vTemp = _mm_cvtsi32_si128(static_cast(x)); - XMVECTOR vResult = _mm_move_ss(V, _mm_castsi128_ps(vTemp)); - return vResult; -#endif -} - -// Sets the Y component of a vector to an integer passed by value -inline XMVECTOR XM_CALLCONV XMVectorSetIntY(FXMVECTOR V, uint32_t y) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORU32 U = { - {{V.vector4_u32[0], y, V.vector4_u32[2], V.vector4_u32[3]}}}; - return U.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vreinterpretq_f32_u32( - vsetq_lane_u32(y, vreinterpretq_u32_f32(V), 1)); -#elif defined(_XM_SSE4_INTRINSICS_) - __m128i vResult = _mm_castps_si128(V); - vResult = _mm_insert_epi32(vResult, static_cast(y), 1); - return _mm_castsi128_ps(vResult); -#elif defined(_XM_SSE_INTRINSICS_) - // Swap y and x - XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 2, 0, 1)); - // Convert input to vector - __m128i vTemp = _mm_cvtsi32_si128(static_cast(y)); - // Replace the x component - vResult = _mm_move_ss(vResult, _mm_castsi128_ps(vTemp)); - // Swap y and x again - vResult = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(3, 2, 0, 1)); - return vResult; -#endif -} - -// Sets the Z component of a vector to an integer passed by value -inline XMVECTOR XM_CALLCONV XMVectorSetIntZ(FXMVECTOR V, uint32_t z) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORU32 U = { - {{V.vector4_u32[0], V.vector4_u32[1], z, V.vector4_u32[3]}}}; - return U.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vreinterpretq_f32_u32( - vsetq_lane_u32(z, vreinterpretq_u32_f32(V), 2)); -#elif defined(_XM_SSE4_INTRINSICS_) - __m128i vResult = _mm_castps_si128(V); - vResult = _mm_insert_epi32(vResult, static_cast(z), 2); - return _mm_castsi128_ps(vResult); -#elif defined(_XM_SSE_INTRINSICS_) - // Swap z and x - XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 0, 1, 2)); - // Convert input to vector - __m128i vTemp = _mm_cvtsi32_si128(static_cast(z)); - // Replace the x component - vResult = _mm_move_ss(vResult, _mm_castsi128_ps(vTemp)); - // Swap z and x again - vResult = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(3, 0, 1, 2)); - return vResult; -#endif -} - -// Sets the W component of a vector to an integer passed by value -inline XMVECTOR XM_CALLCONV XMVectorSetIntW(FXMVECTOR V, uint32_t w) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORU32 U = { - {{V.vector4_u32[0], V.vector4_u32[1], V.vector4_u32[2], w}}}; - return U.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vreinterpretq_f32_u32( - vsetq_lane_u32(w, vreinterpretq_u32_f32(V), 3)); -#elif defined(_XM_SSE4_INTRINSICS_) - __m128i vResult = _mm_castps_si128(V); - vResult = _mm_insert_epi32(vResult, static_cast(w), 3); - return _mm_castsi128_ps(vResult); -#elif defined(_XM_SSE_INTRINSICS_) - // Swap w and x - XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 2, 1, 3)); - // Convert input to vector - __m128i vTemp = _mm_cvtsi32_si128(static_cast(w)); - // Replace the x component - vResult = _mm_move_ss(vResult, _mm_castsi128_ps(vTemp)); - // Swap w and x again - vResult = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(0, 2, 1, 3)); - return vResult; -#endif -} - -//------------------------------------------------------------------------------ - -// Sets a component of a vector to an integer value passed by pointer -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMVectorSetIntByIndexPtr(FXMVECTOR V, const uint32_t* x, size_t i) noexcept { - assert(x != nullptr); - assert(i < 4); - _Analysis_assume_(i < 4); - XMVECTORU32 tmp; - tmp.v = V; - tmp.u[i] = *x; - return tmp; -} - -//------------------------------------------------------------------------------ - -// Sets the X component of a vector to an integer value passed by pointer -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMVectorSetIntXPtr(FXMVECTOR V, const uint32_t* x) noexcept { - assert(x != nullptr); -#if defined(_XM_NO_INTRINSICS_) - XMVECTORU32 U = { - {{*x, V.vector4_u32[1], V.vector4_u32[2], V.vector4_u32[3]}}}; - return U.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vreinterpretq_f32_u32( - vld1q_lane_u32(x, *reinterpret_cast(&V), 0)); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vTemp = _mm_load_ss(reinterpret_cast(x)); - XMVECTOR vResult = _mm_move_ss(V, vTemp); - return vResult; -#endif -} - -// Sets the Y component of a vector to an integer value passed by pointer -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMVectorSetIntYPtr(FXMVECTOR V, const uint32_t* y) noexcept { - assert(y != nullptr); -#if defined(_XM_NO_INTRINSICS_) - XMVECTORU32 U = { - {{V.vector4_u32[0], *y, V.vector4_u32[2], V.vector4_u32[3]}}}; - return U.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vreinterpretq_f32_u32( - vld1q_lane_u32(y, *reinterpret_cast(&V), 1)); -#elif defined(_XM_SSE_INTRINSICS_) - // Swap y and x - XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 2, 0, 1)); - // Convert input to vector - XMVECTOR vTemp = _mm_load_ss(reinterpret_cast(y)); - // Replace the x component - vResult = _mm_move_ss(vResult, vTemp); - // Swap y and x again - vResult = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(3, 2, 0, 1)); - return vResult; -#endif -} - -// Sets the Z component of a vector to an integer value passed by pointer -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMVectorSetIntZPtr(FXMVECTOR V, const uint32_t* z) noexcept { - assert(z != nullptr); -#if defined(_XM_NO_INTRINSICS_) - XMVECTORU32 U = { - {{V.vector4_u32[0], V.vector4_u32[1], *z, V.vector4_u32[3]}}}; - return U.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vreinterpretq_f32_u32( - vld1q_lane_u32(z, *reinterpret_cast(&V), 2)); -#elif defined(_XM_SSE_INTRINSICS_) - // Swap z and x - XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 0, 1, 2)); - // Convert input to vector - XMVECTOR vTemp = _mm_load_ss(reinterpret_cast(z)); - // Replace the x component - vResult = _mm_move_ss(vResult, vTemp); - // Swap z and x again - vResult = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(3, 0, 1, 2)); - return vResult; -#endif -} - -// Sets the W component of a vector to an integer value passed by pointer -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMVectorSetIntWPtr(FXMVECTOR V, const uint32_t* w) noexcept { - assert(w != nullptr); -#if defined(_XM_NO_INTRINSICS_) - XMVECTORU32 U = { - {{V.vector4_u32[0], V.vector4_u32[1], V.vector4_u32[2], *w}}}; - return U.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vreinterpretq_f32_u32( - vld1q_lane_u32(w, *reinterpret_cast(&V), 3)); -#elif defined(_XM_SSE_INTRINSICS_) - // Swap w and x - XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 2, 1, 3)); - // Convert input to vector - XMVECTOR vTemp = _mm_load_ss(reinterpret_cast(w)); - // Replace the x component - vResult = _mm_move_ss(vResult, vTemp); - // Swap w and x again - vResult = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(0, 2, 1, 3)); - return vResult; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorSwizzle(FXMVECTOR V, uint32_t E0, - uint32_t E1, uint32_t E2, - uint32_t E3) noexcept { - assert((E0 < 4) && (E1 < 4) && (E2 < 4) && (E3 < 4)); - _Analysis_assume_((E0 < 4) && (E1 < 4) && (E2 < 4) && (E3 < 4)); -#if defined(_XM_NO_INTRINSICS_) - - XMVECTORF32 Result = {{{V.vector4_f32[E0], V.vector4_f32[E1], - V.vector4_f32[E2], V.vector4_f32[E3]}}}; - return Result.v; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - static const uint32_t ControlElement[4] = { - 0x03020100, // XM_SWIZZLE_X - 0x07060504, // XM_SWIZZLE_Y - 0x0B0A0908, // XM_SWIZZLE_Z - 0x0F0E0D0C, // XM_SWIZZLE_W - }; - - uint8x8x2_t tbl; - tbl.val[0] = vreinterpret_u8_f32(vget_low_f32(V)); - tbl.val[1] = vreinterpret_u8_f32(vget_high_f32(V)); - - uint32x2_t idx = - vcreate_u32(static_cast(ControlElement[E0]) | - (static_cast(ControlElement[E1]) << 32)); - const uint8x8_t rL = vtbl2_u8(tbl, vreinterpret_u8_u32(idx)); - - idx = vcreate_u32(static_cast(ControlElement[E2]) | - (static_cast(ControlElement[E3]) << 32)); - const uint8x8_t rH = vtbl2_u8(tbl, vreinterpret_u8_u32(idx)); - - return vcombine_f32(vreinterpret_f32_u8(rL), vreinterpret_f32_u8(rH)); -#elif defined(_XM_AVX_INTRINSICS_) - unsigned int elem[4] = {E0, E1, E2, E3}; - __m128i vControl = - _mm_loadu_si128(reinterpret_cast(&elem[0])); - return _mm_permutevar_ps(V, vControl); -#else - auto aPtr = reinterpret_cast(&V); - - XMVECTOR Result; - auto pWork = reinterpret_cast(&Result); - - pWork[0] = aPtr[E0]; - pWork[1] = aPtr[E1]; - pWork[2] = aPtr[E2]; - pWork[3] = aPtr[E3]; - - return Result; -#endif -} - -//------------------------------------------------------------------------------ -inline XMVECTOR XM_CALLCONV XMVectorPermute(FXMVECTOR V1, FXMVECTOR V2, - uint32_t PermuteX, - uint32_t PermuteY, - uint32_t PermuteZ, - uint32_t PermuteW) noexcept { - assert(PermuteX <= 7 && PermuteY <= 7 && PermuteZ <= 7 && PermuteW <= 7); - _Analysis_assume_(PermuteX <= 7 && PermuteY <= 7 && PermuteZ <= 7 && - PermuteW <= 7); - -#if defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) - static const uint32_t ControlElement[8] = { - 0x03020100, // XM_PERMUTE_0X - 0x07060504, // XM_PERMUTE_0Y - 0x0B0A0908, // XM_PERMUTE_0Z - 0x0F0E0D0C, // XM_PERMUTE_0W - 0x13121110, // XM_PERMUTE_1X - 0x17161514, // XM_PERMUTE_1Y - 0x1B1A1918, // XM_PERMUTE_1Z - 0x1F1E1D1C, // XM_PERMUTE_1W - }; - - uint8x8x4_t tbl; - tbl.val[0] = vreinterpret_u8_f32(vget_low_f32(V1)); - tbl.val[1] = vreinterpret_u8_f32(vget_high_f32(V1)); - tbl.val[2] = vreinterpret_u8_f32(vget_low_f32(V2)); - tbl.val[3] = vreinterpret_u8_f32(vget_high_f32(V2)); - - uint32x2_t idx = - vcreate_u32(static_cast(ControlElement[PermuteX]) | - (static_cast(ControlElement[PermuteY]) << 32)); - const uint8x8_t rL = vtbl4_u8(tbl, vreinterpret_u8_u32(idx)); - - idx = vcreate_u32(static_cast(ControlElement[PermuteZ]) | - (static_cast(ControlElement[PermuteW]) << 32)); - const uint8x8_t rH = vtbl4_u8(tbl, vreinterpret_u8_u32(idx)); - - return vcombine_f32(vreinterpret_f32_u8(rL), vreinterpret_f32_u8(rH)); -#elif defined(_XM_AVX_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) - static const XMVECTORU32 three = {{{3, 3, 3, 3}}}; - - XM_ALIGNED_DATA(16) - unsigned int elem[4] = {PermuteX, PermuteY, PermuteZ, PermuteW}; - __m128i vControl = - _mm_load_si128(reinterpret_cast(&elem[0])); - - __m128i vSelect = _mm_cmpgt_epi32(vControl, three); - vControl = _mm_castps_si128(_mm_and_ps(_mm_castsi128_ps(vControl), three)); - - __m128 shuffled1 = _mm_permutevar_ps(V1, vControl); - __m128 shuffled2 = _mm_permutevar_ps(V2, vControl); - - __m128 masked1 = _mm_andnot_ps(_mm_castsi128_ps(vSelect), shuffled1); - __m128 masked2 = _mm_and_ps(_mm_castsi128_ps(vSelect), shuffled2); - - return _mm_or_ps(masked1, masked2); -#else - - const uint32_t* aPtr[2]; - aPtr[0] = reinterpret_cast(&V1); - aPtr[1] = reinterpret_cast(&V2); - - XMVECTOR Result; - auto pWork = reinterpret_cast(&Result); - - const uint32_t i0 = PermuteX & 3; - const uint32_t vi0 = PermuteX >> 2; - pWork[0] = aPtr[vi0][i0]; - - const uint32_t i1 = PermuteY & 3; - const uint32_t vi1 = PermuteY >> 2; - pWork[1] = aPtr[vi1][i1]; - - const uint32_t i2 = PermuteZ & 3; - const uint32_t vi2 = PermuteZ >> 2; - pWork[2] = aPtr[vi2][i2]; - - const uint32_t i3 = PermuteW & 3; - const uint32_t vi3 = PermuteW >> 2; - pWork[3] = aPtr[vi3][i3]; - - return Result; -#endif -} - -//------------------------------------------------------------------------------ -// Define a control vector to be used in XMVectorSelect -// operations. The four integers specified in XMVectorSelectControl -// serve as indices to select between components in two vectors. -// The first index controls selection for the first component of -// the vectors involved in a select operation, the second index -// controls selection for the second component etc. A value of -// zero for an index causes the corresponding component from the first -// vector to be selected whereas a one causes the component from the -// second vector to be selected instead. - -inline XMVECTOR XM_CALLCONV -XMVectorSelectControl(uint32_t VectorIndex0, uint32_t VectorIndex1, - uint32_t VectorIndex2, uint32_t VectorIndex3) noexcept { -#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) - // x=Index0,y=Index1,z=Index2,w=Index3 - __m128i vTemp = _mm_set_epi32( - static_cast(VectorIndex3), static_cast(VectorIndex2), - static_cast(VectorIndex1), static_cast(VectorIndex0)); - // Any non-zero entries become 0xFFFFFFFF else 0 - vTemp = _mm_cmpgt_epi32(vTemp, g_XMZero); - return _mm_castsi128_ps(vTemp); -#elif defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) - int32x2_t V0 = vcreate_s32(static_cast(VectorIndex0) | - (static_cast(VectorIndex1) << 32)); - int32x2_t V1 = vcreate_s32(static_cast(VectorIndex2) | - (static_cast(VectorIndex3) << 32)); - int32x4_t vTemp = vcombine_s32(V0, V1); - // Any non-zero entries become 0xFFFFFFFF else 0 - return vreinterpretq_f32_u32(vcgtq_s32(vTemp, g_XMZero)); -#else - XMVECTOR ControlVector; - const uint32_t ControlElement[] = {XM_SELECT_0, XM_SELECT_1}; - - assert(VectorIndex0 < 2); - assert(VectorIndex1 < 2); - assert(VectorIndex2 < 2); - assert(VectorIndex3 < 2); - _Analysis_assume_(VectorIndex0 < 2); - _Analysis_assume_(VectorIndex1 < 2); - _Analysis_assume_(VectorIndex2 < 2); - _Analysis_assume_(VectorIndex3 < 2); - - ControlVector.vector4_u32[0] = ControlElement[VectorIndex0]; - ControlVector.vector4_u32[1] = ControlElement[VectorIndex1]; - ControlVector.vector4_u32[2] = ControlElement[VectorIndex2]; - ControlVector.vector4_u32[3] = ControlElement[VectorIndex3]; - - return ControlVector; - -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorSelect(FXMVECTOR V1, FXMVECTOR V2, - FXMVECTOR Control) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTORU32 Result = {{{ - (V1.vector4_u32[0] & ~Control.vector4_u32[0]) | - (V2.vector4_u32[0] & Control.vector4_u32[0]), - (V1.vector4_u32[1] & ~Control.vector4_u32[1]) | - (V2.vector4_u32[1] & Control.vector4_u32[1]), - (V1.vector4_u32[2] & ~Control.vector4_u32[2]) | - (V2.vector4_u32[2] & Control.vector4_u32[2]), - (V1.vector4_u32[3] & ~Control.vector4_u32[3]) | - (V2.vector4_u32[3] & Control.vector4_u32[3]), - }}}; - return Result.v; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vbslq_f32(vreinterpretq_u32_f32(Control), V2, V1); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vTemp1 = _mm_andnot_ps(Control, V1); - XMVECTOR vTemp2 = _mm_and_ps(V2, Control); - return _mm_or_ps(vTemp1, vTemp2); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorMergeXY(FXMVECTOR V1, - FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTORU32 Result = {{{ - V1.vector4_u32[0], - V2.vector4_u32[0], - V1.vector4_u32[1], - V2.vector4_u32[1], - }}}; - return Result.v; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vzipq_f32(V1, V2).val[0]; -#elif defined(_XM_SSE_INTRINSICS_) - return _mm_unpacklo_ps(V1, V2); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorMergeZW(FXMVECTOR V1, - FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTORU32 Result = {{{V1.vector4_u32[2], V2.vector4_u32[2], - V1.vector4_u32[3], V2.vector4_u32[3]}}}; - return Result.v; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vzipq_f32(V1, V2).val[1]; -#elif defined(_XM_SSE_INTRINSICS_) - return _mm_unpackhi_ps(V1, V2); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorShiftLeft(FXMVECTOR V1, FXMVECTOR V2, - uint32_t Elements) noexcept { - assert(Elements < 4); - _Analysis_assume_(Elements < 4); - return XMVectorPermute(V1, V2, Elements, ((Elements) + 1), ((Elements) + 2), - ((Elements) + 3)); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorRotateLeft(FXMVECTOR V, - uint32_t Elements) noexcept { - assert(Elements < 4); - _Analysis_assume_(Elements < 4); - return XMVectorSwizzle(V, Elements & 3, (Elements + 1) & 3, - (Elements + 2) & 3, (Elements + 3) & 3); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorRotateRight(FXMVECTOR V, - uint32_t Elements) noexcept { - assert(Elements < 4); - _Analysis_assume_(Elements < 4); - return XMVectorSwizzle(V, (4 - (Elements)) & 3, (5 - (Elements)) & 3, - (6 - (Elements)) & 3, (7 - (Elements)) & 3); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorInsert(FXMVECTOR VD, FXMVECTOR VS, - uint32_t VSLeftRotateElements, - uint32_t Select0, uint32_t Select1, - uint32_t Select2, - uint32_t Select3) noexcept { - XMVECTOR Control = XMVectorSelectControl(Select0 & 1, Select1 & 1, - Select2 & 1, Select3 & 1); - return XMVectorSelect(VD, XMVectorRotateLeft(VS, VSLeftRotateElements), - Control); -} - -//------------------------------------------------------------------------------ -// Comparison operations -//------------------------------------------------------------------------------ - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorEqual(FXMVECTOR V1, FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTORU32 Control = {{{ - (V1.vector4_f32[0] == V2.vector4_f32[0]) ? 0xFFFFFFFF : 0, - (V1.vector4_f32[1] == V2.vector4_f32[1]) ? 0xFFFFFFFF : 0, - (V1.vector4_f32[2] == V2.vector4_f32[2]) ? 0xFFFFFFFF : 0, - (V1.vector4_f32[3] == V2.vector4_f32[3]) ? 0xFFFFFFFF : 0, - }}}; - return Control.v; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vreinterpretq_f32_u32(vceqq_f32(V1, V2)); -#elif defined(_XM_SSE_INTRINSICS_) - return _mm_cmpeq_ps(V1, V2); -#endif -} - -//------------------------------------------------------------------------------ - -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMVectorEqualR(uint32_t* pCR, FXMVECTOR V1, FXMVECTOR V2) noexcept { - assert(pCR != nullptr); -#if defined(_XM_NO_INTRINSICS_) - uint32_t ux = (V1.vector4_f32[0] == V2.vector4_f32[0]) ? 0xFFFFFFFFU : 0; - uint32_t uy = (V1.vector4_f32[1] == V2.vector4_f32[1]) ? 0xFFFFFFFFU : 0; - uint32_t uz = (V1.vector4_f32[2] == V2.vector4_f32[2]) ? 0xFFFFFFFFU : 0; - uint32_t uw = (V1.vector4_f32[3] == V2.vector4_f32[3]) ? 0xFFFFFFFFU : 0; - uint32_t CR = 0; - if (ux & uy & uz & uw) { - // All elements are greater - CR = XM_CRMASK_CR6TRUE; - } else if (!(ux | uy | uz | uw)) { - // All elements are not greater - CR = XM_CRMASK_CR6FALSE; - } - *pCR = CR; - - XMVECTORU32 Control = {{{ux, uy, uz, uw}}}; - return Control; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x4_t vResult = vceqq_f32(V1, V2); - uint8x8x2_t vTemp = vzip_u8(vreinterpret_u8_u32(vget_low_u32(vResult)), - vreinterpret_u8_u32(vget_high_u32(vResult))); - uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), - vreinterpret_u16_u8(vTemp.val[1])); - uint32_t r = vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1); - uint32_t CR = 0; - if (r == 0xFFFFFFFFU) { - // All elements are equal - CR = XM_CRMASK_CR6TRUE; - } else if (!r) { - // All elements are not equal - CR = XM_CRMASK_CR6FALSE; - } - *pCR = CR; - return vreinterpretq_f32_u32(vResult); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vTemp = _mm_cmpeq_ps(V1, V2); - uint32_t CR = 0; - int iTest = _mm_movemask_ps(vTemp); - if (iTest == 0xf) { - CR = XM_CRMASK_CR6TRUE; - } else if (!iTest) { - // All elements are not greater - CR = XM_CRMASK_CR6FALSE; - } - *pCR = CR; - return vTemp; -#endif -} - -//------------------------------------------------------------------------------ -// Treat the components of the vectors as unsigned integers and -// compare individual bits between the two. This is useful for -// comparing control vectors and result vectors returned from -// other comparison operations. - -inline XMVECTOR XM_CALLCONV XMVectorEqualInt(FXMVECTOR V1, - FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTORU32 Control = {{{ - (V1.vector4_u32[0] == V2.vector4_u32[0]) ? 0xFFFFFFFF : 0, - (V1.vector4_u32[1] == V2.vector4_u32[1]) ? 0xFFFFFFFF : 0, - (V1.vector4_u32[2] == V2.vector4_u32[2]) ? 0xFFFFFFFF : 0, - (V1.vector4_u32[3] == V2.vector4_u32[3]) ? 0xFFFFFFFF : 0, - }}}; - return Control.v; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vreinterpretq_f32_u32( - vceqq_s32(vreinterpretq_s32_f32(V1), vreinterpretq_s32_f32(V2))); -#elif defined(_XM_SSE_INTRINSICS_) - __m128i V = _mm_cmpeq_epi32(_mm_castps_si128(V1), _mm_castps_si128(V2)); - return _mm_castsi128_ps(V); -#endif -} - -//------------------------------------------------------------------------------ - -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMVectorEqualIntR(uint32_t* pCR, FXMVECTOR V1, FXMVECTOR V2) noexcept { - assert(pCR != nullptr); -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR Control = XMVectorEqualInt(V1, V2); - - *pCR = 0; - if (XMVector4EqualInt(Control, XMVectorTrueInt())) { - // All elements are equal - *pCR |= XM_CRMASK_CR6TRUE; - } else if (XMVector4EqualInt(Control, XMVectorFalseInt())) { - // All elements are not equal - *pCR |= XM_CRMASK_CR6FALSE; - } - return Control; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x4_t vResult = - vceqq_u32(vreinterpretq_u32_f32(V1), vreinterpretq_u32_f32(V2)); - uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), - vget_high_u8(vreinterpretq_u8_u32(vResult))); - uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), - vreinterpret_u16_u8(vTemp.val[1])); - uint32_t r = vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1); - uint32_t CR = 0; - if (r == 0xFFFFFFFFU) { - // All elements are equal - CR = XM_CRMASK_CR6TRUE; - } else if (!r) { - // All elements are not equal - CR = XM_CRMASK_CR6FALSE; - } - *pCR = CR; - return vreinterpretq_f32_u32(vResult); -#elif defined(_XM_SSE_INTRINSICS_) - __m128i V = _mm_cmpeq_epi32(_mm_castps_si128(V1), _mm_castps_si128(V2)); - int iTemp = _mm_movemask_ps(_mm_castsi128_ps(V)); - uint32_t CR = 0; - if (iTemp == 0x0F) { - CR = XM_CRMASK_CR6TRUE; - } else if (!iTemp) { - CR = XM_CRMASK_CR6FALSE; - } - *pCR = CR; - return _mm_castsi128_ps(V); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorNearEqual(FXMVECTOR V1, FXMVECTOR V2, - FXMVECTOR Epsilon) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - float fDeltax = V1.vector4_f32[0] - V2.vector4_f32[0]; - float fDeltay = V1.vector4_f32[1] - V2.vector4_f32[1]; - float fDeltaz = V1.vector4_f32[2] - V2.vector4_f32[2]; - float fDeltaw = V1.vector4_f32[3] - V2.vector4_f32[3]; - - fDeltax = fabsf(fDeltax); - fDeltay = fabsf(fDeltay); - fDeltaz = fabsf(fDeltaz); - fDeltaw = fabsf(fDeltaw); - - XMVECTORU32 Control = {{{ - (fDeltax <= Epsilon.vector4_f32[0]) ? 0xFFFFFFFFU : 0, - (fDeltay <= Epsilon.vector4_f32[1]) ? 0xFFFFFFFFU : 0, - (fDeltaz <= Epsilon.vector4_f32[2]) ? 0xFFFFFFFFU : 0, - (fDeltaw <= Epsilon.vector4_f32[3]) ? 0xFFFFFFFFU : 0, - }}}; - return Control.v; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x4_t vDelta = vsubq_f32(V1, V2); -#if defined(_MSC_VER) && !defined(__clang__) && \ - !defined(_ARM64_DISTINCT_NEON_TYPES) - return vacleq_f32(vDelta, Epsilon); -#else - return vreinterpretq_f32_u32(vcleq_f32(vabsq_f32(vDelta), Epsilon)); -#endif -#elif defined(_XM_SSE_INTRINSICS_) - // Get the difference - XMVECTOR vDelta = _mm_sub_ps(V1, V2); - // Get the absolute value of the difference - XMVECTOR vTemp = _mm_setzero_ps(); - vTemp = _mm_sub_ps(vTemp, vDelta); - vTemp = _mm_max_ps(vTemp, vDelta); - vTemp = _mm_cmple_ps(vTemp, Epsilon); - return vTemp; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorNotEqual(FXMVECTOR V1, - FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTORU32 Control = {{{ - (V1.vector4_f32[0] != V2.vector4_f32[0]) ? 0xFFFFFFFF : 0, - (V1.vector4_f32[1] != V2.vector4_f32[1]) ? 0xFFFFFFFF : 0, - (V1.vector4_f32[2] != V2.vector4_f32[2]) ? 0xFFFFFFFF : 0, - (V1.vector4_f32[3] != V2.vector4_f32[3]) ? 0xFFFFFFFF : 0, - }}}; - return Control.v; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vreinterpretq_f32_u32(vmvnq_u32(vceqq_f32(V1, V2))); -#elif defined(_XM_SSE_INTRINSICS_) - return _mm_cmpneq_ps(V1, V2); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorNotEqualInt(FXMVECTOR V1, - FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTORU32 Control = { - {{(V1.vector4_u32[0] != V2.vector4_u32[0]) ? 0xFFFFFFFFU : 0, - (V1.vector4_u32[1] != V2.vector4_u32[1]) ? 0xFFFFFFFFU : 0, - (V1.vector4_u32[2] != V2.vector4_u32[2]) ? 0xFFFFFFFFU : 0, - (V1.vector4_u32[3] != V2.vector4_u32[3]) ? 0xFFFFFFFFU : 0}}}; - return Control.v; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vreinterpretq_f32_u32(vmvnq_u32( - vceqq_u32(vreinterpretq_u32_f32(V1), vreinterpretq_u32_f32(V2)))); -#elif defined(_XM_SSE_INTRINSICS_) - __m128i V = _mm_cmpeq_epi32(_mm_castps_si128(V1), _mm_castps_si128(V2)); - return _mm_xor_ps(_mm_castsi128_ps(V), g_XMNegOneMask); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorGreater(FXMVECTOR V1, - FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTORU32 Control = { - {{(V1.vector4_f32[0] > V2.vector4_f32[0]) ? 0xFFFFFFFF : 0, - (V1.vector4_f32[1] > V2.vector4_f32[1]) ? 0xFFFFFFFF : 0, - (V1.vector4_f32[2] > V2.vector4_f32[2]) ? 0xFFFFFFFF : 0, - (V1.vector4_f32[3] > V2.vector4_f32[3]) ? 0xFFFFFFFF : 0}}}; - return Control.v; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vreinterpretq_f32_u32(vcgtq_f32(V1, V2)); -#elif defined(_XM_SSE_INTRINSICS_) - return _mm_cmpgt_ps(V1, V2); -#endif -} - -//------------------------------------------------------------------------------ - -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMVectorGreaterR(uint32_t* pCR, FXMVECTOR V1, FXMVECTOR V2) noexcept { - assert(pCR != nullptr); -#if defined(_XM_NO_INTRINSICS_) - - uint32_t ux = (V1.vector4_f32[0] > V2.vector4_f32[0]) ? 0xFFFFFFFFU : 0; - uint32_t uy = (V1.vector4_f32[1] > V2.vector4_f32[1]) ? 0xFFFFFFFFU : 0; - uint32_t uz = (V1.vector4_f32[2] > V2.vector4_f32[2]) ? 0xFFFFFFFFU : 0; - uint32_t uw = (V1.vector4_f32[3] > V2.vector4_f32[3]) ? 0xFFFFFFFFU : 0; - uint32_t CR = 0; - if (ux & uy & uz & uw) { - // All elements are greater - CR = XM_CRMASK_CR6TRUE; - } else if (!(ux | uy | uz | uw)) { - // All elements are not greater - CR = XM_CRMASK_CR6FALSE; - } - *pCR = CR; - - XMVECTORU32 Control = {{{ux, uy, uz, uw}}}; - return Control.v; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x4_t vResult = vcgtq_f32(V1, V2); - uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), - vget_high_u8(vreinterpretq_u8_u32(vResult))); - uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), - vreinterpret_u16_u8(vTemp.val[1])); - uint32_t r = vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1); - uint32_t CR = 0; - if (r == 0xFFFFFFFFU) { - // All elements are greater - CR = XM_CRMASK_CR6TRUE; - } else if (!r) { - // All elements are not greater - CR = XM_CRMASK_CR6FALSE; - } - *pCR = CR; - return vreinterpretq_f32_u32(vResult); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vTemp = _mm_cmpgt_ps(V1, V2); - uint32_t CR = 0; - int iTest = _mm_movemask_ps(vTemp); - if (iTest == 0xf) { - CR = XM_CRMASK_CR6TRUE; - } else if (!iTest) { - // All elements are not greater - CR = XM_CRMASK_CR6FALSE; - } - *pCR = CR; - return vTemp; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorGreaterOrEqual(FXMVECTOR V1, - FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTORU32 Control = { - {{(V1.vector4_f32[0] >= V2.vector4_f32[0]) ? 0xFFFFFFFF : 0, - (V1.vector4_f32[1] >= V2.vector4_f32[1]) ? 0xFFFFFFFF : 0, - (V1.vector4_f32[2] >= V2.vector4_f32[2]) ? 0xFFFFFFFF : 0, - (V1.vector4_f32[3] >= V2.vector4_f32[3]) ? 0xFFFFFFFF : 0}}}; - return Control.v; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vreinterpretq_f32_u32(vcgeq_f32(V1, V2)); -#elif defined(_XM_SSE_INTRINSICS_) - return _mm_cmpge_ps(V1, V2); -#endif -} - -//------------------------------------------------------------------------------ - -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMVectorGreaterOrEqualR(uint32_t* pCR, FXMVECTOR V1, FXMVECTOR V2) noexcept { - assert(pCR != nullptr); -#if defined(_XM_NO_INTRINSICS_) - - uint32_t ux = (V1.vector4_f32[0] >= V2.vector4_f32[0]) ? 0xFFFFFFFFU : 0; - uint32_t uy = (V1.vector4_f32[1] >= V2.vector4_f32[1]) ? 0xFFFFFFFFU : 0; - uint32_t uz = (V1.vector4_f32[2] >= V2.vector4_f32[2]) ? 0xFFFFFFFFU : 0; - uint32_t uw = (V1.vector4_f32[3] >= V2.vector4_f32[3]) ? 0xFFFFFFFFU : 0; - uint32_t CR = 0; - if (ux & uy & uz & uw) { - // All elements are greater - CR = XM_CRMASK_CR6TRUE; - } else if (!(ux | uy | uz | uw)) { - // All elements are not greater - CR = XM_CRMASK_CR6FALSE; - } - *pCR = CR; - - XMVECTORU32 Control = {{{ux, uy, uz, uw}}}; - return Control.v; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x4_t vResult = vcgeq_f32(V1, V2); - uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), - vget_high_u8(vreinterpretq_u8_u32(vResult))); - uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), - vreinterpret_u16_u8(vTemp.val[1])); - uint32_t r = vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1); - uint32_t CR = 0; - if (r == 0xFFFFFFFFU) { - // All elements are greater or equal - CR = XM_CRMASK_CR6TRUE; - } else if (!r) { - // All elements are not greater or equal - CR = XM_CRMASK_CR6FALSE; - } - *pCR = CR; - return vreinterpretq_f32_u32(vResult); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vTemp = _mm_cmpge_ps(V1, V2); - uint32_t CR = 0; - int iTest = _mm_movemask_ps(vTemp); - if (iTest == 0xf) { - CR = XM_CRMASK_CR6TRUE; - } else if (!iTest) { - // All elements are not greater - CR = XM_CRMASK_CR6FALSE; - } - *pCR = CR; - return vTemp; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorLess(FXMVECTOR V1, FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTORU32 Control = { - {{(V1.vector4_f32[0] < V2.vector4_f32[0]) ? 0xFFFFFFFF : 0, - (V1.vector4_f32[1] < V2.vector4_f32[1]) ? 0xFFFFFFFF : 0, - (V1.vector4_f32[2] < V2.vector4_f32[2]) ? 0xFFFFFFFF : 0, - (V1.vector4_f32[3] < V2.vector4_f32[3]) ? 0xFFFFFFFF : 0}}}; - return Control.v; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vreinterpretq_f32_u32(vcltq_f32(V1, V2)); -#elif defined(_XM_SSE_INTRINSICS_) - return _mm_cmplt_ps(V1, V2); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorLessOrEqual(FXMVECTOR V1, - FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTORU32 Control = { - {{(V1.vector4_f32[0] <= V2.vector4_f32[0]) ? 0xFFFFFFFF : 0, - (V1.vector4_f32[1] <= V2.vector4_f32[1]) ? 0xFFFFFFFF : 0, - (V1.vector4_f32[2] <= V2.vector4_f32[2]) ? 0xFFFFFFFF : 0, - (V1.vector4_f32[3] <= V2.vector4_f32[3]) ? 0xFFFFFFFF : 0}}}; - return Control.v; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vreinterpretq_f32_u32(vcleq_f32(V1, V2)); -#elif defined(_XM_SSE_INTRINSICS_) - return _mm_cmple_ps(V1, V2); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorInBounds(FXMVECTOR V, - FXMVECTOR Bounds) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTORU32 Control = {{{(V.vector4_f32[0] <= Bounds.vector4_f32[0] && - V.vector4_f32[0] >= -Bounds.vector4_f32[0]) - ? 0xFFFFFFFF - : 0, - (V.vector4_f32[1] <= Bounds.vector4_f32[1] && - V.vector4_f32[1] >= -Bounds.vector4_f32[1]) - ? 0xFFFFFFFF - : 0, - (V.vector4_f32[2] <= Bounds.vector4_f32[2] && - V.vector4_f32[2] >= -Bounds.vector4_f32[2]) - ? 0xFFFFFFFF - : 0, - (V.vector4_f32[3] <= Bounds.vector4_f32[3] && - V.vector4_f32[3] >= -Bounds.vector4_f32[3]) - ? 0xFFFFFFFF - : 0}}}; - return Control.v; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - // Test if less than or equal - uint32x4_t vTemp1 = vcleq_f32(V, Bounds); - // Negate the bounds - uint32x4_t vTemp2 = vreinterpretq_u32_f32(vnegq_f32(Bounds)); - // Test if greater or equal (Reversed) - vTemp2 = vcleq_f32(vreinterpretq_f32_u32(vTemp2), V); - // Blend answers - vTemp1 = vandq_u32(vTemp1, vTemp2); - return vreinterpretq_f32_u32(vTemp1); -#elif defined(_XM_SSE_INTRINSICS_) - // Test if less than or equal - XMVECTOR vTemp1 = _mm_cmple_ps(V, Bounds); - // Negate the bounds - XMVECTOR vTemp2 = _mm_mul_ps(Bounds, g_XMNegativeOne); - // Test if greater or equal (Reversed) - vTemp2 = _mm_cmple_ps(vTemp2, V); - // Blend answers - vTemp1 = _mm_and_ps(vTemp1, vTemp2); - return vTemp1; -#endif -} - -//------------------------------------------------------------------------------ - -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMVectorInBoundsR(uint32_t* pCR, FXMVECTOR V, FXMVECTOR Bounds) noexcept { - assert(pCR != nullptr); -#if defined(_XM_NO_INTRINSICS_) - - uint32_t ux = (V.vector4_f32[0] <= Bounds.vector4_f32[0] && - V.vector4_f32[0] >= -Bounds.vector4_f32[0]) - ? 0xFFFFFFFFU - : 0; - uint32_t uy = (V.vector4_f32[1] <= Bounds.vector4_f32[1] && - V.vector4_f32[1] >= -Bounds.vector4_f32[1]) - ? 0xFFFFFFFFU - : 0; - uint32_t uz = (V.vector4_f32[2] <= Bounds.vector4_f32[2] && - V.vector4_f32[2] >= -Bounds.vector4_f32[2]) - ? 0xFFFFFFFFU - : 0; - uint32_t uw = (V.vector4_f32[3] <= Bounds.vector4_f32[3] && - V.vector4_f32[3] >= -Bounds.vector4_f32[3]) - ? 0xFFFFFFFFU - : 0; - - uint32_t CR = 0; - if (ux & uy & uz & uw) { - // All elements are in bounds - CR = XM_CRMASK_CR6BOUNDS; - } - *pCR = CR; - - XMVECTORU32 Control = {{{ux, uy, uz, uw}}}; - return Control.v; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - // Test if less than or equal - uint32x4_t vTemp1 = vcleq_f32(V, Bounds); - // Negate the bounds - uint32x4_t vTemp2 = vreinterpretq_u32_f32(vnegq_f32(Bounds)); - // Test if greater or equal (Reversed) - vTemp2 = vcleq_f32(vreinterpretq_f32_u32(vTemp2), V); - // Blend answers - vTemp1 = vandq_u32(vTemp1, vTemp2); - uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vTemp1)), - vget_high_u8(vreinterpretq_u8_u32(vTemp1))); - uint16x4x2_t vTemp3 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), - vreinterpret_u16_u8(vTemp.val[1])); - uint32_t r = vget_lane_u32(vreinterpret_u32_u16(vTemp3.val[1]), 1); - uint32_t CR = 0; - if (r == 0xFFFFFFFFU) { - // All elements are in bounds - CR = XM_CRMASK_CR6BOUNDS; - } - *pCR = CR; - return vreinterpretq_f32_u32(vTemp1); -#elif defined(_XM_SSE_INTRINSICS_) - // Test if less than or equal - XMVECTOR vTemp1 = _mm_cmple_ps(V, Bounds); - // Negate the bounds - XMVECTOR vTemp2 = _mm_mul_ps(Bounds, g_XMNegativeOne); - // Test if greater or equal (Reversed) - vTemp2 = _mm_cmple_ps(vTemp2, V); - // Blend answers - vTemp1 = _mm_and_ps(vTemp1, vTemp2); - - uint32_t CR = 0; - if (_mm_movemask_ps(vTemp1) == 0xf) { - // All elements are in bounds - CR = XM_CRMASK_CR6BOUNDS; - } - *pCR = CR; - return vTemp1; -#endif -} - -//------------------------------------------------------------------------------ - -#if !defined(_XM_NO_INTRINSICS_) && defined(_MSC_VER) && \ - !defined(__INTEL_COMPILER) -#pragma float_control(push) -#pragma float_control(precise, on) -#endif - -inline XMVECTOR XM_CALLCONV XMVectorIsNaN(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTORU32 Control = {{{XMISNAN(V.vector4_f32[0]) ? 0xFFFFFFFFU : 0, - XMISNAN(V.vector4_f32[1]) ? 0xFFFFFFFFU : 0, - XMISNAN(V.vector4_f32[2]) ? 0xFFFFFFFFU : 0, - XMISNAN(V.vector4_f32[3]) ? 0xFFFFFFFFU : 0}}}; - return Control.v; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) -#if defined(__clang__) && defined(__FINITE_MATH_ONLY__) - XMVECTORU32 vResult = {{{isnan(vgetq_lane_f32(V, 0)) ? 0xFFFFFFFFU : 0, - isnan(vgetq_lane_f32(V, 1)) ? 0xFFFFFFFFU : 0, - isnan(vgetq_lane_f32(V, 2)) ? 0xFFFFFFFFU : 0, - isnan(vgetq_lane_f32(V, 3)) ? 0xFFFFFFFFU : 0}}}; - return vResult.v; -#else - // Test against itself. NaN is always not equal - uint32x4_t vTempNan = vceqq_f32(V, V); - // Flip results - return vreinterpretq_f32_u32(vmvnq_u32(vTempNan)); -#endif -#elif defined(_XM_SSE_INTRINSICS_) -#if defined(__clang__) && defined(__FINITE_MATH_ONLY__) - XM_ALIGNED_DATA(16) float tmp[4]; - _mm_store_ps(tmp, V); - XMVECTORU32 vResult = { - {{isnan(tmp[0]) ? 0xFFFFFFFFU : 0, isnan(tmp[1]) ? 0xFFFFFFFFU : 0, - isnan(tmp[2]) ? 0xFFFFFFFFU : 0, isnan(tmp[3]) ? 0xFFFFFFFFU : 0}}}; - return vResult.v; -#else - // Test against itself. NaN is always not equal - return _mm_cmpneq_ps(V, V); -#endif -#endif -} - -#if !defined(_XM_NO_INTRINSICS_) && defined(_MSC_VER) && \ - !defined(__INTEL_COMPILER) -#pragma float_control(pop) -#endif - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorIsInfinite(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTORU32 Control = {{{XMISINF(V.vector4_f32[0]) ? 0xFFFFFFFFU : 0, - XMISINF(V.vector4_f32[1]) ? 0xFFFFFFFFU : 0, - XMISINF(V.vector4_f32[2]) ? 0xFFFFFFFFU : 0, - XMISINF(V.vector4_f32[3]) ? 0xFFFFFFFFU : 0}}}; - return Control.v; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - // Mask off the sign bit - uint32x4_t vTemp = vandq_u32(vreinterpretq_u32_f32(V), g_XMAbsMask); - // Compare to infinity - vTemp = vceqq_f32(vreinterpretq_f32_u32(vTemp), g_XMInfinity); - // If any are infinity, the signs are true. - return vreinterpretq_f32_u32(vTemp); -#elif defined(_XM_SSE_INTRINSICS_) - // Mask off the sign bit - __m128 vTemp = _mm_and_ps(V, g_XMAbsMask); - // Compare to infinity - vTemp = _mm_cmpeq_ps(vTemp, g_XMInfinity); - // If any are infinity, the signs are true. - return vTemp; -#endif -} - -//------------------------------------------------------------------------------ -// Rounding and clamping operations -//------------------------------------------------------------------------------ - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorMin(FXMVECTOR V1, FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTORF32 Result = { - {{(V1.vector4_f32[0] < V2.vector4_f32[0]) ? V1.vector4_f32[0] - : V2.vector4_f32[0], - (V1.vector4_f32[1] < V2.vector4_f32[1]) ? V1.vector4_f32[1] - : V2.vector4_f32[1], - (V1.vector4_f32[2] < V2.vector4_f32[2]) ? V1.vector4_f32[2] - : V2.vector4_f32[2], - (V1.vector4_f32[3] < V2.vector4_f32[3]) ? V1.vector4_f32[3] - : V2.vector4_f32[3]}}}; - return Result.v; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vminq_f32(V1, V2); -#elif defined(_XM_SSE_INTRINSICS_) - return _mm_min_ps(V1, V2); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorMax(FXMVECTOR V1, FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTORF32 Result = { - {{(V1.vector4_f32[0] > V2.vector4_f32[0]) ? V1.vector4_f32[0] - : V2.vector4_f32[0], - (V1.vector4_f32[1] > V2.vector4_f32[1]) ? V1.vector4_f32[1] - : V2.vector4_f32[1], - (V1.vector4_f32[2] > V2.vector4_f32[2]) ? V1.vector4_f32[2] - : V2.vector4_f32[2], - (V1.vector4_f32[3] > V2.vector4_f32[3]) ? V1.vector4_f32[3] - : V2.vector4_f32[3]}}}; - return Result.v; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vmaxq_f32(V1, V2); -#elif defined(_XM_SSE_INTRINSICS_) - return _mm_max_ps(V1, V2); -#endif -} - -//------------------------------------------------------------------------------ - -namespace MathInternal { -// Round to nearest (even) a.k.a. banker's rounding -inline float round_to_nearest(float x) noexcept { - float i = floorf(x); - x -= i; - if (x < 0.5f) return i; - if (x > 0.5f) return i + 1.f; - - float int_part; - (void)modff(i / 2.f, &int_part); - if ((2.f * int_part) == i) { - return i; - } - - return i + 1.f; -} -} // namespace MathInternal - -#if !defined(_XM_NO_INTRINSICS_) && defined(_MSC_VER) && \ - !defined(__INTEL_COMPILER) -#pragma float_control(push) -#pragma float_control(precise, on) -#endif - -inline XMVECTOR XM_CALLCONV XMVectorRound(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTORF32 Result = {{{MathInternal::round_to_nearest(V.vector4_f32[0]), - MathInternal::round_to_nearest(V.vector4_f32[1]), - MathInternal::round_to_nearest(V.vector4_f32[2]), - MathInternal::round_to_nearest(V.vector4_f32[3])}}}; - return Result.v; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) -#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || \ - defined(_M_ARM64EC) || __aarch64__ - return vrndnq_f32(V); -#else - uint32x4_t sign = vandq_u32(vreinterpretq_u32_f32(V), g_XMNegativeZero); - float32x4_t sMagic = vreinterpretq_f32_u32(vorrq_u32(g_XMNoFraction, sign)); - float32x4_t R1 = vaddq_f32(V, sMagic); - R1 = vsubq_f32(R1, sMagic); - float32x4_t R2 = vabsq_f32(V); - uint32x4_t mask = vcleq_f32(R2, g_XMNoFraction); - return vbslq_f32(mask, R1, V); -#endif -#elif defined(_XM_SSE4_INTRINSICS_) - return _mm_round_ps(V, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); -#elif defined(_XM_SSE_INTRINSICS_) - __m128 sign = _mm_and_ps(V, g_XMNegativeZero); - __m128 sMagic = _mm_or_ps(g_XMNoFraction, sign); - __m128 R1 = _mm_add_ps(V, sMagic); - R1 = _mm_sub_ps(R1, sMagic); - __m128 R2 = _mm_and_ps(V, g_XMAbsMask); - __m128 mask = _mm_cmple_ps(R2, g_XMNoFraction); - R2 = _mm_andnot_ps(mask, V); - R1 = _mm_and_ps(R1, mask); - XMVECTOR vResult = _mm_xor_ps(R1, R2); - return vResult; -#endif -} - -#if !defined(_XM_NO_INTRINSICS_) && defined(_MSC_VER) && \ - !defined(__INTEL_COMPILER) -#pragma float_control(pop) -#endif - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorTruncate(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTOR Result; - uint32_t i; - - // Avoid C4701 - Result.vector4_f32[0] = 0.0f; - - for (i = 0; i < 4; i++) { - if (XMISNAN(V.vector4_f32[i])) { - Result.vector4_u32[i] = 0x7FC00000; - } else if (fabsf(V.vector4_f32[i]) < 8388608.0f) { - Result.vector4_f32[i] = - static_cast(static_cast(V.vector4_f32[i])); - } else { - Result.vector4_f32[i] = V.vector4_f32[i]; - } - } - return Result; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) -#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || \ - defined(_M_ARM64EC) || __aarch64__ - return vrndq_f32(V); -#else - float32x4_t vTest = vabsq_f32(V); - vTest = vreinterpretq_f32_u32(vcltq_f32(vTest, g_XMNoFraction)); - - int32x4_t vInt = vcvtq_s32_f32(V); - float32x4_t vResult = vcvtq_f32_s32(vInt); - - // All numbers less than 8388608 will use the round to int - // All others, use the ORIGINAL value - return vbslq_f32(vreinterpretq_u32_f32(vTest), vResult, V); -#endif -#elif defined(_XM_SSE4_INTRINSICS_) - return _mm_round_ps(V, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); -#elif defined(_XM_SSE_INTRINSICS_) - // To handle NAN, INF and numbers greater than 8388608, use masking - // Get the abs value - __m128i vTest = _mm_and_si128(_mm_castps_si128(V), g_XMAbsMask); - // Test for greater than 8388608 (All floats with NO fractionals, NAN and - // INF - vTest = _mm_cmplt_epi32(vTest, g_XMNoFraction); - // Convert to int and back to float for rounding with truncation - __m128i vInt = _mm_cvttps_epi32(V); - // Convert back to floats - XMVECTOR vResult = _mm_cvtepi32_ps(vInt); - // All numbers less than 8388608 will use the round to int - vResult = _mm_and_ps(vResult, _mm_castsi128_ps(vTest)); - // All others, use the ORIGINAL value - vTest = _mm_andnot_si128(vTest, _mm_castps_si128(V)); - vResult = _mm_or_ps(vResult, _mm_castsi128_ps(vTest)); - return vResult; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorFloor(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 Result = { - {{floorf(V.vector4_f32[0]), floorf(V.vector4_f32[1]), - floorf(V.vector4_f32[2]), floorf(V.vector4_f32[3])}}}; - return Result.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) -#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || \ - defined(_M_ARM64EC) || __aarch64__ - return vrndmq_f32(V); -#else - float32x4_t vTest = vabsq_f32(V); - vTest = vreinterpretq_f32_u32(vcltq_f32(vTest, g_XMNoFraction)); - // Truncate - int32x4_t vInt = vcvtq_s32_f32(V); - float32x4_t vResult = vcvtq_f32_s32(vInt); - uint32x4_t vLargerMask = vcgtq_f32(vResult, V); - // 0 -> 0, 0xffffffff -> -1.0f - float32x4_t vLarger = vcvtq_f32_s32(vreinterpretq_s32_u32(vLargerMask)); - vResult = vaddq_f32(vResult, vLarger); - // All numbers less than 8388608 will use the round to int - // All others, use the ORIGINAL value - return vbslq_f32(vreinterpretq_u32_f32(vTest), vResult, V); -#endif -#elif defined(_XM_SSE4_INTRINSICS_) - return _mm_floor_ps(V); -#elif defined(_XM_SSE_INTRINSICS_) - // To handle NAN, INF and numbers greater than 8388608, use masking - __m128i vTest = _mm_and_si128(_mm_castps_si128(V), g_XMAbsMask); - vTest = _mm_cmplt_epi32(vTest, g_XMNoFraction); - // Truncate - __m128i vInt = _mm_cvttps_epi32(V); - XMVECTOR vResult = _mm_cvtepi32_ps(vInt); - __m128 vLarger = _mm_cmpgt_ps(vResult, V); - // 0 -> 0, 0xffffffff -> -1.0f - vLarger = _mm_cvtepi32_ps(_mm_castps_si128(vLarger)); - vResult = _mm_add_ps(vResult, vLarger); - // All numbers less than 8388608 will use the round to int - vResult = _mm_and_ps(vResult, _mm_castsi128_ps(vTest)); - // All others, use the ORIGINAL value - vTest = _mm_andnot_si128(vTest, _mm_castps_si128(V)); - vResult = _mm_or_ps(vResult, _mm_castsi128_ps(vTest)); - return vResult; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorCeiling(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 Result = {{{ceilf(V.vector4_f32[0]), ceilf(V.vector4_f32[1]), - ceilf(V.vector4_f32[2]), ceilf(V.vector4_f32[3])}}}; - return Result.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) -#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || \ - defined(_M_ARM64EC) || __aarch64__ - return vrndpq_f32(V); -#else - float32x4_t vTest = vabsq_f32(V); - vTest = vreinterpretq_f32_u32(vcltq_f32(vTest, g_XMNoFraction)); - // Truncate - int32x4_t vInt = vcvtq_s32_f32(V); - float32x4_t vResult = vcvtq_f32_s32(vInt); - uint32x4_t vSmallerMask = vcltq_f32(vResult, V); - // 0 -> 0, 0xffffffff -> -1.0f - float32x4_t vSmaller = vcvtq_f32_s32(vreinterpretq_s32_u32(vSmallerMask)); - vResult = vsubq_f32(vResult, vSmaller); - // All numbers less than 8388608 will use the round to int - // All others, use the ORIGINAL value - return vbslq_f32(vreinterpretq_u32_f32(vTest), vResult, V); -#endif -#elif defined(_XM_SSE4_INTRINSICS_) - return _mm_ceil_ps(V); -#elif defined(_XM_SSE_INTRINSICS_) - // To handle NAN, INF and numbers greater than 8388608, use masking - __m128i vTest = _mm_and_si128(_mm_castps_si128(V), g_XMAbsMask); - vTest = _mm_cmplt_epi32(vTest, g_XMNoFraction); - // Truncate - __m128i vInt = _mm_cvttps_epi32(V); - XMVECTOR vResult = _mm_cvtepi32_ps(vInt); - __m128 vSmaller = _mm_cmplt_ps(vResult, V); - // 0 -> 0, 0xffffffff -> -1.0f - vSmaller = _mm_cvtepi32_ps(_mm_castps_si128(vSmaller)); - vResult = _mm_sub_ps(vResult, vSmaller); - // All numbers less than 8388608 will use the round to int - vResult = _mm_and_ps(vResult, _mm_castsi128_ps(vTest)); - // All others, use the ORIGINAL value - vTest = _mm_andnot_si128(vTest, _mm_castps_si128(V)); - vResult = _mm_or_ps(vResult, _mm_castsi128_ps(vTest)); - return vResult; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorClamp(FXMVECTOR V, FXMVECTOR Min, - FXMVECTOR Max) noexcept { - assert(XMVector4LessOrEqual(Min, Max)); - -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR Result; - Result = XMVectorMax(Min, V); - Result = XMVectorMin(Max, Result); - return Result; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x4_t vResult = vmaxq_f32(Min, V); - vResult = vminq_f32(Max, vResult); - return vResult; -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vResult; - vResult = _mm_max_ps(Min, V); - vResult = _mm_min_ps(Max, vResult); - return vResult; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorSaturate(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - const XMVECTOR Zero = XMVectorZero(); - - return XMVectorClamp(V, Zero, g_XMOne.v); - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - // Set <0 to 0 - float32x4_t vResult = vmaxq_f32(V, vdupq_n_f32(0)); - // Set>1 to 1 - return vminq_f32(vResult, vdupq_n_f32(1.0f)); -#elif defined(_XM_SSE_INTRINSICS_) - // Set <0 to 0 - XMVECTOR vResult = _mm_max_ps(V, g_XMZero); - // Set>1 to 1 - return _mm_min_ps(vResult, g_XMOne); -#endif -} - -//------------------------------------------------------------------------------ -// Bitwise logical operations -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorAndInt(FXMVECTOR V1, - FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTORU32 Result = {{{V1.vector4_u32[0] & V2.vector4_u32[0], - V1.vector4_u32[1] & V2.vector4_u32[1], - V1.vector4_u32[2] & V2.vector4_u32[2], - V1.vector4_u32[3] & V2.vector4_u32[3]}}}; - return Result; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vreinterpretq_f32_u32( - vandq_u32(vreinterpretq_u32_f32(V1), vreinterpretq_u32_f32(V2))); -#elif defined(_XM_SSE_INTRINSICS_) - return _mm_and_ps(V1, V2); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorAndCInt(FXMVECTOR V1, - FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTORU32 Result = {{{V1.vector4_u32[0] & ~V2.vector4_u32[0], - V1.vector4_u32[1] & ~V2.vector4_u32[1], - V1.vector4_u32[2] & ~V2.vector4_u32[2], - V1.vector4_u32[3] & ~V2.vector4_u32[3]}}}; - return Result.v; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vreinterpretq_f32_u32( - vbicq_u32(vreinterpretq_u32_f32(V1), vreinterpretq_u32_f32(V2))); -#elif defined(_XM_SSE_INTRINSICS_) - __m128i V = _mm_andnot_si128(_mm_castps_si128(V2), _mm_castps_si128(V1)); - return _mm_castsi128_ps(V); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorOrInt(FXMVECTOR V1, FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTORU32 Result = {{{V1.vector4_u32[0] | V2.vector4_u32[0], - V1.vector4_u32[1] | V2.vector4_u32[1], - V1.vector4_u32[2] | V2.vector4_u32[2], - V1.vector4_u32[3] | V2.vector4_u32[3]}}}; - return Result.v; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vreinterpretq_f32_u32( - vorrq_u32(vreinterpretq_u32_f32(V1), vreinterpretq_u32_f32(V2))); -#elif defined(_XM_SSE_INTRINSICS_) - __m128i V = _mm_or_si128(_mm_castps_si128(V1), _mm_castps_si128(V2)); - return _mm_castsi128_ps(V); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorNorInt(FXMVECTOR V1, - FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTORU32 Result = {{{~(V1.vector4_u32[0] | V2.vector4_u32[0]), - ~(V1.vector4_u32[1] | V2.vector4_u32[1]), - ~(V1.vector4_u32[2] | V2.vector4_u32[2]), - ~(V1.vector4_u32[3] | V2.vector4_u32[3])}}}; - return Result.v; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x4_t Result = - vorrq_u32(vreinterpretq_u32_f32(V1), vreinterpretq_u32_f32(V2)); - return vreinterpretq_f32_u32(vbicq_u32(g_XMNegOneMask, Result)); -#elif defined(_XM_SSE_INTRINSICS_) - __m128i Result; - Result = _mm_or_si128(_mm_castps_si128(V1), _mm_castps_si128(V2)); - Result = _mm_andnot_si128(Result, g_XMNegOneMask); - return _mm_castsi128_ps(Result); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorXorInt(FXMVECTOR V1, - FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTORU32 Result = {{{V1.vector4_u32[0] ^ V2.vector4_u32[0], - V1.vector4_u32[1] ^ V2.vector4_u32[1], - V1.vector4_u32[2] ^ V2.vector4_u32[2], - V1.vector4_u32[3] ^ V2.vector4_u32[3]}}}; - return Result.v; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vreinterpretq_f32_u32( - veorq_u32(vreinterpretq_u32_f32(V1), vreinterpretq_u32_f32(V2))); -#elif defined(_XM_SSE_INTRINSICS_) - __m128i V = _mm_xor_si128(_mm_castps_si128(V1), _mm_castps_si128(V2)); - return _mm_castsi128_ps(V); -#endif -} - -//------------------------------------------------------------------------------ -// Computation operations -//------------------------------------------------------------------------------ - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorNegate(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTORF32 Result = {{{-V.vector4_f32[0], -V.vector4_f32[1], - -V.vector4_f32[2], -V.vector4_f32[3]}}}; - return Result.v; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vnegq_f32(V); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR Z; - - Z = _mm_setzero_ps(); - - return _mm_sub_ps(Z, V); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorAdd(FXMVECTOR V1, FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTORF32 Result = {{{V1.vector4_f32[0] + V2.vector4_f32[0], - V1.vector4_f32[1] + V2.vector4_f32[1], - V1.vector4_f32[2] + V2.vector4_f32[2], - V1.vector4_f32[3] + V2.vector4_f32[3]}}}; - return Result.v; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vaddq_f32(V1, V2); -#elif defined(_XM_SSE_INTRINSICS_) - return _mm_add_ps(V1, V2); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorSum(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTORF32 Result; - Result.f[0] = Result.f[1] = Result.f[2] = Result.f[3] = - V.vector4_f32[0] + V.vector4_f32[1] + V.vector4_f32[2] + - V.vector4_f32[3]; - return Result.v; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) -#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || \ - defined(_M_ARM64EC) || __aarch64__ - float32x4_t vTemp = vpaddq_f32(V, V); - return vpaddq_f32(vTemp, vTemp); -#else - float32x2_t v1 = vget_low_f32(V); - float32x2_t v2 = vget_high_f32(V); - v1 = vadd_f32(v1, v2); - v1 = vpadd_f32(v1, v1); - return vcombine_f32(v1, v1); -#endif -#elif defined(_XM_SSE3_INTRINSICS_) - XMVECTOR vTemp = _mm_hadd_ps(V, V); - return _mm_hadd_ps(vTemp, vTemp); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vTemp = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 3, 0, 1)); - XMVECTOR vTemp2 = _mm_add_ps(V, vTemp); - vTemp = XM_PERMUTE_PS(vTemp2, _MM_SHUFFLE(1, 0, 3, 2)); - return _mm_add_ps(vTemp, vTemp2); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorAddAngles(FXMVECTOR V1, - FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - const XMVECTOR Zero = XMVectorZero(); - - // Add the given angles together. If the range of V1 is such - // that -Pi <= V1 < Pi and the range of V2 is such that - // -2Pi <= V2 <= 2Pi, then the range of the resulting angle - // will be -Pi <= Result < Pi. - XMVECTOR Result = XMVectorAdd(V1, V2); - - XMVECTOR Mask = XMVectorLess(Result, g_XMNegativePi.v); - XMVECTOR Offset = XMVectorSelect(Zero, g_XMTwoPi.v, Mask); - - Mask = XMVectorGreaterOrEqual(Result, g_XMPi.v); - Offset = XMVectorSelect(Offset, g_XMNegativeTwoPi.v, Mask); - - Result = XMVectorAdd(Result, Offset); - - return Result; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - // Adjust the angles - float32x4_t vResult = vaddq_f32(V1, V2); - // Less than Pi? - uint32x4_t vOffset = vcltq_f32(vResult, g_XMNegativePi); - vOffset = vandq_u32(vOffset, g_XMTwoPi); - // Add 2Pi to all entries less than -Pi - vResult = vaddq_f32(vResult, vreinterpretq_f32_u32(vOffset)); - // Greater than or equal to Pi? - vOffset = vcgeq_f32(vResult, g_XMPi); - vOffset = vandq_u32(vOffset, g_XMTwoPi); - // Sub 2Pi to all entries greater than Pi - vResult = vsubq_f32(vResult, vreinterpretq_f32_u32(vOffset)); - return vResult; -#elif defined(_XM_SSE_INTRINSICS_) - // Adjust the angles - XMVECTOR vResult = _mm_add_ps(V1, V2); - // Less than Pi? - XMVECTOR vOffset = _mm_cmplt_ps(vResult, g_XMNegativePi); - vOffset = _mm_and_ps(vOffset, g_XMTwoPi); - // Add 2Pi to all entries less than -Pi - vResult = _mm_add_ps(vResult, vOffset); - // Greater than or equal to Pi? - vOffset = _mm_cmpge_ps(vResult, g_XMPi); - vOffset = _mm_and_ps(vOffset, g_XMTwoPi); - // Sub 2Pi to all entries greater than Pi - vResult = _mm_sub_ps(vResult, vOffset); - return vResult; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorSubtract(FXMVECTOR V1, - FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTORF32 Result = {{{V1.vector4_f32[0] - V2.vector4_f32[0], - V1.vector4_f32[1] - V2.vector4_f32[1], - V1.vector4_f32[2] - V2.vector4_f32[2], - V1.vector4_f32[3] - V2.vector4_f32[3]}}}; - return Result.v; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vsubq_f32(V1, V2); -#elif defined(_XM_SSE_INTRINSICS_) - return _mm_sub_ps(V1, V2); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorSubtractAngles(FXMVECTOR V1, - FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - const XMVECTOR Zero = XMVectorZero(); - - // Subtract the given angles. If the range of V1 is such - // that -Pi <= V1 < Pi and the range of V2 is such that - // -2Pi <= V2 <= 2Pi, then the range of the resulting angle - // will be -Pi <= Result < Pi. - XMVECTOR Result = XMVectorSubtract(V1, V2); - - XMVECTOR Mask = XMVectorLess(Result, g_XMNegativePi.v); - XMVECTOR Offset = XMVectorSelect(Zero, g_XMTwoPi.v, Mask); - - Mask = XMVectorGreaterOrEqual(Result, g_XMPi.v); - Offset = XMVectorSelect(Offset, g_XMNegativeTwoPi.v, Mask); - - Result = XMVectorAdd(Result, Offset); - - return Result; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - // Adjust the angles - XMVECTOR vResult = vsubq_f32(V1, V2); - // Less than Pi? - uint32x4_t vOffset = vcltq_f32(vResult, g_XMNegativePi); - vOffset = vandq_u32(vOffset, g_XMTwoPi); - // Add 2Pi to all entries less than -Pi - vResult = vaddq_f32(vResult, vreinterpretq_f32_u32(vOffset)); - // Greater than or equal to Pi? - vOffset = vcgeq_f32(vResult, g_XMPi); - vOffset = vandq_u32(vOffset, g_XMTwoPi); - // Sub 2Pi to all entries greater than Pi - vResult = vsubq_f32(vResult, vreinterpretq_f32_u32(vOffset)); - return vResult; -#elif defined(_XM_SSE_INTRINSICS_) - // Adjust the angles - XMVECTOR vResult = _mm_sub_ps(V1, V2); - // Less than Pi? - XMVECTOR vOffset = _mm_cmplt_ps(vResult, g_XMNegativePi); - vOffset = _mm_and_ps(vOffset, g_XMTwoPi); - // Add 2Pi to all entries less than -Pi - vResult = _mm_add_ps(vResult, vOffset); - // Greater than or equal to Pi? - vOffset = _mm_cmpge_ps(vResult, g_XMPi); - vOffset = _mm_and_ps(vOffset, g_XMTwoPi); - // Sub 2Pi to all entries greater than Pi - vResult = _mm_sub_ps(vResult, vOffset); - return vResult; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorMultiply(FXMVECTOR V1, - FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 Result = {{{V1.vector4_f32[0] * V2.vector4_f32[0], - V1.vector4_f32[1] * V2.vector4_f32[1], - V1.vector4_f32[2] * V2.vector4_f32[2], - V1.vector4_f32[3] * V2.vector4_f32[3]}}}; - return Result.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vmulq_f32(V1, V2); -#elif defined(_XM_SSE_INTRINSICS_) - return _mm_mul_ps(V1, V2); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorMultiplyAdd(FXMVECTOR V1, FXMVECTOR V2, - FXMVECTOR V3) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 Result = { - {{V1.vector4_f32[0] * V2.vector4_f32[0] + V3.vector4_f32[0], - V1.vector4_f32[1] * V2.vector4_f32[1] + V3.vector4_f32[1], - V1.vector4_f32[2] * V2.vector4_f32[2] + V3.vector4_f32[2], - V1.vector4_f32[3] * V2.vector4_f32[3] + V3.vector4_f32[3]}}}; - return Result.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) -#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || \ - defined(_M_ARM64EC) || __aarch64__ - return vfmaq_f32(V3, V1, V2); -#else - return vmlaq_f32(V3, V1, V2); -#endif -#elif defined(_XM_SSE_INTRINSICS_) - return XM_FMADD_PS(V1, V2, V3); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorDivide(FXMVECTOR V1, - FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 Result = {{{V1.vector4_f32[0] / V2.vector4_f32[0], - V1.vector4_f32[1] / V2.vector4_f32[1], - V1.vector4_f32[2] / V2.vector4_f32[2], - V1.vector4_f32[3] / V2.vector4_f32[3]}}}; - return Result.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) -#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || \ - defined(_M_ARM64EC) || __aarch64__ - return vdivq_f32(V1, V2); -#else - // 2 iterations of Newton-Raphson refinement of reciprocal - float32x4_t Reciprocal = vrecpeq_f32(V2); - float32x4_t S = vrecpsq_f32(Reciprocal, V2); - Reciprocal = vmulq_f32(S, Reciprocal); - S = vrecpsq_f32(Reciprocal, V2); - Reciprocal = vmulq_f32(S, Reciprocal); - return vmulq_f32(V1, Reciprocal); -#endif -#elif defined(_XM_SSE_INTRINSICS_) - return _mm_div_ps(V1, V2); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorNegativeMultiplySubtract( - FXMVECTOR V1, FXMVECTOR V2, FXMVECTOR V3) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 Result = { - {{V3.vector4_f32[0] - (V1.vector4_f32[0] * V2.vector4_f32[0]), - V3.vector4_f32[1] - (V1.vector4_f32[1] * V2.vector4_f32[1]), - V3.vector4_f32[2] - (V1.vector4_f32[2] * V2.vector4_f32[2]), - V3.vector4_f32[3] - (V1.vector4_f32[3] * V2.vector4_f32[3])}}}; - return Result; -#elif defined(_XM_ARM_NEON_INTRINSICS_) -#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || \ - defined(_M_ARM64EC) || __aarch64__ - return vfmsq_f32(V3, V1, V2); -#else - return vmlsq_f32(V3, V1, V2); -#endif -#elif defined(_XM_SSE_INTRINSICS_) - return XM_FNMADD_PS(V1, V2, V3); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorScale(FXMVECTOR V, - float ScaleFactor) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 Result = { - {{V.vector4_f32[0] * ScaleFactor, V.vector4_f32[1] * ScaleFactor, - V.vector4_f32[2] * ScaleFactor, V.vector4_f32[3] * ScaleFactor}}}; - return Result.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vmulq_n_f32(V, ScaleFactor); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vResult = _mm_set_ps1(ScaleFactor); - return _mm_mul_ps(vResult, V); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorReciprocalEst(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 Result = {{{1.f / V.vector4_f32[0], 1.f / V.vector4_f32[1], - 1.f / V.vector4_f32[2], 1.f / V.vector4_f32[3]}}}; - return Result.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vrecpeq_f32(V); -#elif defined(_XM_SSE_INTRINSICS_) - return _mm_rcp_ps(V); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorReciprocal(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 Result = {{{1.f / V.vector4_f32[0], 1.f / V.vector4_f32[1], - 1.f / V.vector4_f32[2], 1.f / V.vector4_f32[3]}}}; - return Result.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) -#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || \ - defined(_M_ARM64EC) || __aarch64__ - float32x4_t one = vdupq_n_f32(1.0f); - return vdivq_f32(one, V); -#else - // 2 iterations of Newton-Raphson refinement - float32x4_t Reciprocal = vrecpeq_f32(V); - float32x4_t S = vrecpsq_f32(Reciprocal, V); - Reciprocal = vmulq_f32(S, Reciprocal); - S = vrecpsq_f32(Reciprocal, V); - return vmulq_f32(S, Reciprocal); -#endif -#elif defined(_XM_SSE_INTRINSICS_) - return _mm_div_ps(g_XMOne, V); -#endif -} - -//------------------------------------------------------------------------------ -// Return an estimated square root -inline XMVECTOR XM_CALLCONV XMVectorSqrtEst(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 Result = {{{sqrtf(V.vector4_f32[0]), sqrtf(V.vector4_f32[1]), - sqrtf(V.vector4_f32[2]), sqrtf(V.vector4_f32[3])}}}; - return Result.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - // 1 iteration of Newton-Raphson refinment of sqrt - float32x4_t S0 = vrsqrteq_f32(V); - float32x4_t P0 = vmulq_f32(V, S0); - float32x4_t R0 = vrsqrtsq_f32(P0, S0); - float32x4_t S1 = vmulq_f32(S0, R0); - - XMVECTOR VEqualsInfinity = XMVectorEqualInt(V, g_XMInfinity.v); - XMVECTOR VEqualsZero = XMVectorEqual(V, vdupq_n_f32(0)); - XMVECTOR Result = vmulq_f32(V, S1); - XMVECTOR Select = XMVectorEqualInt(VEqualsInfinity, VEqualsZero); - return XMVectorSelect(V, Result, Select); -#elif defined(_XM_SSE_INTRINSICS_) - return _mm_sqrt_ps(V); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorSqrt(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 Result = {{{sqrtf(V.vector4_f32[0]), sqrtf(V.vector4_f32[1]), - sqrtf(V.vector4_f32[2]), sqrtf(V.vector4_f32[3])}}}; - return Result.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - // 3 iterations of Newton-Raphson refinment of sqrt - float32x4_t S0 = vrsqrteq_f32(V); - float32x4_t P0 = vmulq_f32(V, S0); - float32x4_t R0 = vrsqrtsq_f32(P0, S0); - float32x4_t S1 = vmulq_f32(S0, R0); - float32x4_t P1 = vmulq_f32(V, S1); - float32x4_t R1 = vrsqrtsq_f32(P1, S1); - float32x4_t S2 = vmulq_f32(S1, R1); - float32x4_t P2 = vmulq_f32(V, S2); - float32x4_t R2 = vrsqrtsq_f32(P2, S2); - float32x4_t S3 = vmulq_f32(S2, R2); - - XMVECTOR VEqualsInfinity = XMVectorEqualInt(V, g_XMInfinity.v); - XMVECTOR VEqualsZero = XMVectorEqual(V, vdupq_n_f32(0)); - XMVECTOR Result = vmulq_f32(V, S3); - XMVECTOR Select = XMVectorEqualInt(VEqualsInfinity, VEqualsZero); - return XMVectorSelect(V, Result, Select); -#elif defined(_XM_SSE_INTRINSICS_) - return _mm_sqrt_ps(V); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorReciprocalSqrtEst(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 Result = { - {{1.f / sqrtf(V.vector4_f32[0]), 1.f / sqrtf(V.vector4_f32[1]), - 1.f / sqrtf(V.vector4_f32[2]), 1.f / sqrtf(V.vector4_f32[3])}}}; - return Result.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vrsqrteq_f32(V); -#elif defined(_XM_SSE_INTRINSICS_) - return _mm_rsqrt_ps(V); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorReciprocalSqrt(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 Result = { - {{1.f / sqrtf(V.vector4_f32[0]), 1.f / sqrtf(V.vector4_f32[1]), - 1.f / sqrtf(V.vector4_f32[2]), 1.f / sqrtf(V.vector4_f32[3])}}}; - return Result; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - // 2 iterations of Newton-Raphson refinement of reciprocal - float32x4_t S0 = vrsqrteq_f32(V); - - float32x4_t P0 = vmulq_f32(V, S0); - float32x4_t R0 = vrsqrtsq_f32(P0, S0); - - float32x4_t S1 = vmulq_f32(S0, R0); - float32x4_t P1 = vmulq_f32(V, S1); - float32x4_t R1 = vrsqrtsq_f32(P1, S1); - - return vmulq_f32(S1, R1); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vResult = _mm_sqrt_ps(V); - vResult = _mm_div_ps(g_XMOne, vResult); - return vResult; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorExp2(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 Result = {{{exp2f(V.vector4_f32[0]), exp2f(V.vector4_f32[1]), - exp2f(V.vector4_f32[2]), exp2f(V.vector4_f32[3])}}}; - return Result.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - int32x4_t itrunc = vcvtq_s32_f32(V); - float32x4_t ftrunc = vcvtq_f32_s32(itrunc); - float32x4_t y = vsubq_f32(V, ftrunc); - - float32x4_t poly = vmlaq_f32(g_XMExpEst6, g_XMExpEst7, y); - poly = vmlaq_f32(g_XMExpEst5, poly, y); - poly = vmlaq_f32(g_XMExpEst4, poly, y); - poly = vmlaq_f32(g_XMExpEst3, poly, y); - poly = vmlaq_f32(g_XMExpEst2, poly, y); - poly = vmlaq_f32(g_XMExpEst1, poly, y); - poly = vmlaq_f32(g_XMOne, poly, y); - - int32x4_t biased = vaddq_s32(itrunc, g_XMExponentBias); - biased = vshlq_n_s32(biased, 23); - float32x4_t result0 = XMVectorDivide(vreinterpretq_f32_s32(biased), poly); - - biased = vaddq_s32(itrunc, g_XM253); - biased = vshlq_n_s32(biased, 23); - float32x4_t result1 = XMVectorDivide(vreinterpretq_f32_s32(biased), poly); - result1 = vmulq_f32(g_XMMinNormal.v, result1); - - // Use selection to handle the cases - // if (V is NaN) -> QNaN; - // else if (V sign bit set) - // if (V > -150) - // if (V.exponent < -126) -> result1 - // else -> result0 - // else -> +0 - // else - // if (V < 128) -> result0 - // else -> +inf - - uint32x4_t comp = vcltq_s32(vreinterpretq_s32_f32(V), g_XMBin128); - float32x4_t result2 = vbslq_f32(comp, result0, g_XMInfinity); - - comp = vcltq_s32(itrunc, g_XMSubnormalExponent); - float32x4_t result3 = vbslq_f32(comp, result1, result0); - - comp = vcltq_s32(vreinterpretq_s32_f32(V), g_XMBinNeg150); - float32x4_t result4 = vbslq_f32(comp, result3, g_XMZero); - - int32x4_t sign = vandq_s32(vreinterpretq_s32_f32(V), g_XMNegativeZero); - comp = vceqq_s32(sign, g_XMNegativeZero); - float32x4_t result5 = vbslq_f32(comp, result4, result2); - - int32x4_t t0 = vandq_s32(vreinterpretq_s32_f32(V), g_XMQNaNTest); - int32x4_t t1 = vandq_s32(vreinterpretq_s32_f32(V), g_XMInfinity); - t0 = vreinterpretq_s32_u32(vceqq_s32(t0, g_XMZero)); - t1 = vreinterpretq_s32_u32(vceqq_s32(t1, g_XMInfinity)); - int32x4_t isNaN = vbicq_s32(t1, t0); - - float32x4_t vResult = - vbslq_f32(vreinterpretq_u32_s32(isNaN), g_XMQNaN, result5); - return vResult; -#elif defined(_XM_SVML_INTRINSICS_) - XMVECTOR Result = _mm_exp2_ps(V); - return Result; -#elif defined(_XM_SSE_INTRINSICS_) - __m128i itrunc = _mm_cvttps_epi32(V); - __m128 ftrunc = _mm_cvtepi32_ps(itrunc); - __m128 y = _mm_sub_ps(V, ftrunc); - - __m128 poly = XM_FMADD_PS(g_XMExpEst7, y, g_XMExpEst6); - poly = XM_FMADD_PS(poly, y, g_XMExpEst5); - poly = XM_FMADD_PS(poly, y, g_XMExpEst4); - poly = XM_FMADD_PS(poly, y, g_XMExpEst3); - poly = XM_FMADD_PS(poly, y, g_XMExpEst2); - poly = XM_FMADD_PS(poly, y, g_XMExpEst1); - poly = XM_FMADD_PS(poly, y, g_XMOne); - - __m128i biased = _mm_add_epi32(itrunc, g_XMExponentBias); - biased = _mm_slli_epi32(biased, 23); - __m128 result0 = _mm_div_ps(_mm_castsi128_ps(biased), poly); - - biased = _mm_add_epi32(itrunc, g_XM253); - biased = _mm_slli_epi32(biased, 23); - __m128 result1 = _mm_div_ps(_mm_castsi128_ps(biased), poly); - result1 = _mm_mul_ps(g_XMMinNormal.v, result1); - - // Use selection to handle the cases - // if (V is NaN) -> QNaN; - // else if (V sign bit set) - // if (V > -150) - // if (V.exponent < -126) -> result1 - // else -> result0 - // else -> +0 - // else - // if (V < 128) -> result0 - // else -> +inf - - __m128i comp = _mm_cmplt_epi32(_mm_castps_si128(V), g_XMBin128); - __m128i select0 = _mm_and_si128(comp, _mm_castps_si128(result0)); - __m128i select1 = _mm_andnot_si128(comp, g_XMInfinity); - __m128i result2 = _mm_or_si128(select0, select1); - - comp = _mm_cmplt_epi32(itrunc, g_XMSubnormalExponent); - select1 = _mm_and_si128(comp, _mm_castps_si128(result1)); - select0 = _mm_andnot_si128(comp, _mm_castps_si128(result0)); - __m128i result3 = _mm_or_si128(select0, select1); - - comp = _mm_cmplt_epi32(_mm_castps_si128(V), g_XMBinNeg150); - select0 = _mm_and_si128(comp, result3); - select1 = _mm_andnot_si128(comp, g_XMZero); - __m128i result4 = _mm_or_si128(select0, select1); - - __m128i sign = _mm_and_si128(_mm_castps_si128(V), g_XMNegativeZero); - comp = _mm_cmpeq_epi32(sign, g_XMNegativeZero); - select0 = _mm_and_si128(comp, result4); - select1 = _mm_andnot_si128(comp, result2); - __m128i result5 = _mm_or_si128(select0, select1); - - __m128i t0 = _mm_and_si128(_mm_castps_si128(V), g_XMQNaNTest); - __m128i t1 = _mm_and_si128(_mm_castps_si128(V), g_XMInfinity); - t0 = _mm_cmpeq_epi32(t0, g_XMZero); - t1 = _mm_cmpeq_epi32(t1, g_XMInfinity); - __m128i isNaN = _mm_andnot_si128(t0, t1); - - select0 = _mm_and_si128(isNaN, g_XMQNaN); - select1 = _mm_andnot_si128(isNaN, result5); - __m128i vResult = _mm_or_si128(select0, select1); - - return _mm_castsi128_ps(vResult); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorExp10(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTORF32 Result = { - {{powf(10.0f, V.vector4_f32[0]), powf(10.0f, V.vector4_f32[1]), - powf(10.0f, V.vector4_f32[2]), powf(10.0f, V.vector4_f32[3])}}}; - return Result.v; - -#elif defined(_XM_SVML_INTRINSICS_) - XMVECTOR Result = _mm_exp10_ps(V); - return Result; -#else - // exp10(V) = exp2(vin*log2(10)) - XMVECTOR Vten = XMVectorMultiply(g_XMLg10, V); - return XMVectorExp2(Vten); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorExpE(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTORF32 Result = {{{expf(V.vector4_f32[0]), expf(V.vector4_f32[1]), - expf(V.vector4_f32[2]), expf(V.vector4_f32[3])}}}; - return Result.v; - -#elif defined(_XM_SVML_INTRINSICS_) - XMVECTOR Result = _mm_exp_ps(V); - return Result; -#else - // expE(V) = exp2(vin*log2(e)) - XMVECTOR Ve = XMVectorMultiply(g_XMLgE, V); - return XMVectorExp2(Ve); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorExp(FXMVECTOR V) noexcept { - return XMVectorExp2(V); -} - -//------------------------------------------------------------------------------ - -#if defined(_XM_SSE_INTRINSICS_) - -namespace MathInternal { -inline __m128i multi_sll_epi32(__m128i value, __m128i count) noexcept { - __m128i v = _mm_shuffle_epi32(value, _MM_SHUFFLE(0, 0, 0, 0)); - __m128i c = _mm_shuffle_epi32(count, _MM_SHUFFLE(0, 0, 0, 0)); - c = _mm_and_si128(c, g_XMMaskX); - __m128i r0 = _mm_sll_epi32(v, c); - - v = _mm_shuffle_epi32(value, _MM_SHUFFLE(1, 1, 1, 1)); - c = _mm_shuffle_epi32(count, _MM_SHUFFLE(1, 1, 1, 1)); - c = _mm_and_si128(c, g_XMMaskX); - __m128i r1 = _mm_sll_epi32(v, c); - - v = _mm_shuffle_epi32(value, _MM_SHUFFLE(2, 2, 2, 2)); - c = _mm_shuffle_epi32(count, _MM_SHUFFLE(2, 2, 2, 2)); - c = _mm_and_si128(c, g_XMMaskX); - __m128i r2 = _mm_sll_epi32(v, c); - - v = _mm_shuffle_epi32(value, _MM_SHUFFLE(3, 3, 3, 3)); - c = _mm_shuffle_epi32(count, _MM_SHUFFLE(3, 3, 3, 3)); - c = _mm_and_si128(c, g_XMMaskX); - __m128i r3 = _mm_sll_epi32(v, c); - - // (r0,r0,r1,r1) - __m128 r01 = _mm_shuffle_ps(_mm_castsi128_ps(r0), _mm_castsi128_ps(r1), - _MM_SHUFFLE(0, 0, 0, 0)); - // (r2,r2,r3,r3) - __m128 r23 = _mm_shuffle_ps(_mm_castsi128_ps(r2), _mm_castsi128_ps(r3), - _MM_SHUFFLE(0, 0, 0, 0)); - // (r0,r1,r2,r3) - __m128 result = _mm_shuffle_ps(r01, r23, _MM_SHUFFLE(2, 0, 2, 0)); - return _mm_castps_si128(result); -} - -inline __m128i multi_srl_epi32(__m128i value, __m128i count) noexcept { - __m128i v = _mm_shuffle_epi32(value, _MM_SHUFFLE(0, 0, 0, 0)); - __m128i c = _mm_shuffle_epi32(count, _MM_SHUFFLE(0, 0, 0, 0)); - c = _mm_and_si128(c, g_XMMaskX); - __m128i r0 = _mm_srl_epi32(v, c); - - v = _mm_shuffle_epi32(value, _MM_SHUFFLE(1, 1, 1, 1)); - c = _mm_shuffle_epi32(count, _MM_SHUFFLE(1, 1, 1, 1)); - c = _mm_and_si128(c, g_XMMaskX); - __m128i r1 = _mm_srl_epi32(v, c); - - v = _mm_shuffle_epi32(value, _MM_SHUFFLE(2, 2, 2, 2)); - c = _mm_shuffle_epi32(count, _MM_SHUFFLE(2, 2, 2, 2)); - c = _mm_and_si128(c, g_XMMaskX); - __m128i r2 = _mm_srl_epi32(v, c); - - v = _mm_shuffle_epi32(value, _MM_SHUFFLE(3, 3, 3, 3)); - c = _mm_shuffle_epi32(count, _MM_SHUFFLE(3, 3, 3, 3)); - c = _mm_and_si128(c, g_XMMaskX); - __m128i r3 = _mm_srl_epi32(v, c); - - // (r0,r0,r1,r1) - __m128 r01 = _mm_shuffle_ps(_mm_castsi128_ps(r0), _mm_castsi128_ps(r1), - _MM_SHUFFLE(0, 0, 0, 0)); - // (r2,r2,r3,r3) - __m128 r23 = _mm_shuffle_ps(_mm_castsi128_ps(r2), _mm_castsi128_ps(r3), - _MM_SHUFFLE(0, 0, 0, 0)); - // (r0,r1,r2,r3) - __m128 result = _mm_shuffle_ps(r01, r23, _MM_SHUFFLE(2, 0, 2, 0)); - return _mm_castps_si128(result); -} - -inline __m128i GetLeadingBit(const __m128i value) noexcept { - static const XMVECTORI32 g_XM0000FFFF = { - {{0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF}}}; - static const XMVECTORI32 g_XM000000FF = { - {{0x000000FF, 0x000000FF, 0x000000FF, 0x000000FF}}}; - static const XMVECTORI32 g_XM0000000F = { - {{0x0000000F, 0x0000000F, 0x0000000F, 0x0000000F}}}; - static const XMVECTORI32 g_XM00000003 = { - {{0x00000003, 0x00000003, 0x00000003, 0x00000003}}}; - - __m128i v = value, r, c, b, s; - - c = _mm_cmpgt_epi32(v, g_XM0000FFFF); // c = (v > 0xFFFF) - b = _mm_srli_epi32(c, 31); // b = (c ? 1 : 0) - r = _mm_slli_epi32(b, 4); // r = (b << 4) - v = multi_srl_epi32(v, r); // v = (v >> r) - - c = _mm_cmpgt_epi32(v, g_XM000000FF); // c = (v > 0xFF) - b = _mm_srli_epi32(c, 31); // b = (c ? 1 : 0) - s = _mm_slli_epi32(b, 3); // s = (b << 3) - v = multi_srl_epi32(v, s); // v = (v >> s) - r = _mm_or_si128(r, s); // r = (r | s) - - c = _mm_cmpgt_epi32(v, g_XM0000000F); // c = (v > 0xF) - b = _mm_srli_epi32(c, 31); // b = (c ? 1 : 0) - s = _mm_slli_epi32(b, 2); // s = (b << 2) - v = multi_srl_epi32(v, s); // v = (v >> s) - r = _mm_or_si128(r, s); // r = (r | s) - - c = _mm_cmpgt_epi32(v, g_XM00000003); // c = (v > 0x3) - b = _mm_srli_epi32(c, 31); // b = (c ? 1 : 0) - s = _mm_slli_epi32(b, 1); // s = (b << 1) - v = multi_srl_epi32(v, s); // v = (v >> s) - r = _mm_or_si128(r, s); // r = (r | s) - - s = _mm_srli_epi32(v, 1); - r = _mm_or_si128(r, s); - return r; -} -} // namespace MathInternal - -#endif // _XM_SSE_INTRINSICS_ - -#if defined(_XM_ARM_NEON_INTRINSICS_) - -namespace MathInternal { -inline int32x4_t GetLeadingBit(const int32x4_t value) noexcept { - static const XMVECTORI32 g_XM0000FFFF = { - {{0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF}}}; - static const XMVECTORI32 g_XM000000FF = { - {{0x000000FF, 0x000000FF, 0x000000FF, 0x000000FF}}}; - static const XMVECTORI32 g_XM0000000F = { - {{0x0000000F, 0x0000000F, 0x0000000F, 0x0000000F}}}; - static const XMVECTORI32 g_XM00000003 = { - {{0x00000003, 0x00000003, 0x00000003, 0x00000003}}}; - - uint32x4_t c = vcgtq_s32(value, g_XM0000FFFF); // c = (v > 0xFFFF) - int32x4_t b = vshrq_n_s32(vreinterpretq_s32_u32(c), 31); // b = (c ? 1 : 0) - int32x4_t r = vshlq_n_s32(b, 4); // r = (b << 4) - r = vnegq_s32(r); - int32x4_t v = vshlq_s32(value, r); // v = (v >> r) - - c = vcgtq_s32(v, g_XM000000FF); // c = (v > 0xFF) - b = vshrq_n_s32(vreinterpretq_s32_u32(c), 31); // b = (c ? 1 : 0) - int32x4_t s = vshlq_n_s32(b, 3); // s = (b << 3) - s = vnegq_s32(s); - v = vshlq_s32(v, s); // v = (v >> s) - r = vorrq_s32(r, s); // r = (r | s) - - c = vcgtq_s32(v, g_XM0000000F); // c = (v > 0xF) - b = vshrq_n_s32(vreinterpretq_s32_u32(c), 31); // b = (c ? 1 : 0) - s = vshlq_n_s32(b, 2); // s = (b << 2) - s = vnegq_s32(s); - v = vshlq_s32(v, s); // v = (v >> s) - r = vorrq_s32(r, s); // r = (r | s) - - c = vcgtq_s32(v, g_XM00000003); // c = (v > 0x3) - b = vshrq_n_s32(vreinterpretq_s32_u32(c), 31); // b = (c ? 1 : 0) - s = vshlq_n_s32(b, 1); // s = (b << 1) - s = vnegq_s32(s); - v = vshlq_s32(v, s); // v = (v >> s) - r = vorrq_s32(r, s); // r = (r | s) - - s = vshrq_n_s32(v, 1); - r = vorrq_s32(r, s); - return r; -} - -} // namespace MathInternal - -#endif - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorLog2(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 Result = {{{log2f(V.vector4_f32[0]), log2f(V.vector4_f32[1]), - log2f(V.vector4_f32[2]), log2f(V.vector4_f32[3])}}}; - return Result.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - int32x4_t rawBiased = vandq_s32(vreinterpretq_s32_f32(V), g_XMInfinity); - int32x4_t trailing = vandq_s32(vreinterpretq_s32_f32(V), g_XMQNaNTest); - uint32x4_t isExponentZero = - vceqq_s32(vreinterpretq_s32_f32(g_XMZero), rawBiased); - - // Compute exponent and significand for normals. - int32x4_t biased = vshrq_n_s32(rawBiased, 23); - int32x4_t exponentNor = vsubq_s32(biased, g_XMExponentBias); - int32x4_t trailingNor = trailing; - - // Compute exponent and significand for subnormals. - int32x4_t leading = MathInternal::GetLeadingBit(trailing); - int32x4_t shift = vsubq_s32(g_XMNumTrailing, leading); - int32x4_t exponentSub = vsubq_s32(g_XMSubnormalExponent, shift); - int32x4_t trailingSub = vshlq_s32(trailing, shift); - trailingSub = vandq_s32(trailingSub, g_XMQNaNTest); - int32x4_t e = vbslq_s32(isExponentZero, exponentSub, exponentNor); - int32x4_t t = vbslq_s32(isExponentZero, trailingSub, trailingNor); - - // Compute the approximation. - int32x4_t tmp = vorrq_s32(vreinterpretq_s32_f32(g_XMOne), t); - float32x4_t y = vsubq_f32(vreinterpretq_f32_s32(tmp), g_XMOne); - - float32x4_t log2 = vmlaq_f32(g_XMLogEst6, g_XMLogEst7, y); - log2 = vmlaq_f32(g_XMLogEst5, log2, y); - log2 = vmlaq_f32(g_XMLogEst4, log2, y); - log2 = vmlaq_f32(g_XMLogEst3, log2, y); - log2 = vmlaq_f32(g_XMLogEst2, log2, y); - log2 = vmlaq_f32(g_XMLogEst1, log2, y); - log2 = vmlaq_f32(g_XMLogEst0, log2, y); - log2 = vmlaq_f32(vcvtq_f32_s32(e), log2, y); - - // if (x is NaN) -> QNaN - // else if (V is positive) - // if (V is infinite) -> +inf - // else -> log2(V) - // else - // if (V is zero) -> -inf - // else -> -QNaN - - uint32x4_t isInfinite = vandq_u32(vreinterpretq_u32_f32(V), g_XMAbsMask); - isInfinite = vceqq_u32(isInfinite, g_XMInfinity); - - uint32x4_t isGreaterZero = vcgtq_f32(V, g_XMZero); - uint32x4_t isNotFinite = vcgtq_f32(V, g_XMInfinity); - uint32x4_t isPositive = vbicq_u32(isGreaterZero, isNotFinite); - - uint32x4_t isZero = vandq_u32(vreinterpretq_u32_f32(V), g_XMAbsMask); - isZero = vceqq_u32(isZero, g_XMZero); - - uint32x4_t t0 = vandq_u32(vreinterpretq_u32_f32(V), g_XMQNaNTest); - uint32x4_t t1 = vandq_u32(vreinterpretq_u32_f32(V), g_XMInfinity); - t0 = vceqq_u32(t0, g_XMZero); - t1 = vceqq_u32(t1, g_XMInfinity); - uint32x4_t isNaN = vbicq_u32(t1, t0); - - float32x4_t result = vbslq_f32(isInfinite, g_XMInfinity, log2); - float32x4_t tmp2 = vbslq_f32(isZero, g_XMNegInfinity, g_XMNegQNaN); - result = vbslq_f32(isPositive, result, tmp2); - result = vbslq_f32(isNaN, g_XMQNaN, result); - return result; -#elif defined(_XM_SVML_INTRINSICS_) - XMVECTOR Result = _mm_log2_ps(V); - return Result; -#elif defined(_XM_SSE_INTRINSICS_) - __m128i rawBiased = _mm_and_si128(_mm_castps_si128(V), g_XMInfinity); - __m128i trailing = _mm_and_si128(_mm_castps_si128(V), g_XMQNaNTest); - __m128i isExponentZero = _mm_cmpeq_epi32(g_XMZero, rawBiased); - - // Compute exponent and significand for normals. - __m128i biased = _mm_srli_epi32(rawBiased, 23); - __m128i exponentNor = _mm_sub_epi32(biased, g_XMExponentBias); - __m128i trailingNor = trailing; - - // Compute exponent and significand for subnormals. - __m128i leading = MathInternal::GetLeadingBit(trailing); - __m128i shift = _mm_sub_epi32(g_XMNumTrailing, leading); - __m128i exponentSub = _mm_sub_epi32(g_XMSubnormalExponent, shift); - __m128i trailingSub = MathInternal::multi_sll_epi32(trailing, shift); - trailingSub = _mm_and_si128(trailingSub, g_XMQNaNTest); - - __m128i select0 = _mm_and_si128(isExponentZero, exponentSub); - __m128i select1 = _mm_andnot_si128(isExponentZero, exponentNor); - __m128i e = _mm_or_si128(select0, select1); - - select0 = _mm_and_si128(isExponentZero, trailingSub); - select1 = _mm_andnot_si128(isExponentZero, trailingNor); - __m128i t = _mm_or_si128(select0, select1); - - // Compute the approximation. - __m128i tmp = _mm_or_si128(g_XMOne, t); - __m128 y = _mm_sub_ps(_mm_castsi128_ps(tmp), g_XMOne); - - __m128 log2 = XM_FMADD_PS(g_XMLogEst7, y, g_XMLogEst6); - log2 = XM_FMADD_PS(log2, y, g_XMLogEst5); - log2 = XM_FMADD_PS(log2, y, g_XMLogEst4); - log2 = XM_FMADD_PS(log2, y, g_XMLogEst3); - log2 = XM_FMADD_PS(log2, y, g_XMLogEst2); - log2 = XM_FMADD_PS(log2, y, g_XMLogEst1); - log2 = XM_FMADD_PS(log2, y, g_XMLogEst0); - log2 = XM_FMADD_PS(log2, y, _mm_cvtepi32_ps(e)); - - // if (x is NaN) -> QNaN - // else if (V is positive) - // if (V is infinite) -> +inf - // else -> log2(V) - // else - // if (V is zero) -> -inf - // else -> -QNaN - - __m128i isInfinite = _mm_and_si128(_mm_castps_si128(V), g_XMAbsMask); - isInfinite = _mm_cmpeq_epi32(isInfinite, g_XMInfinity); - - __m128i isGreaterZero = _mm_cmpgt_epi32(_mm_castps_si128(V), g_XMZero); - __m128i isNotFinite = _mm_cmpgt_epi32(_mm_castps_si128(V), g_XMInfinity); - __m128i isPositive = _mm_andnot_si128(isNotFinite, isGreaterZero); - - __m128i isZero = _mm_and_si128(_mm_castps_si128(V), g_XMAbsMask); - isZero = _mm_cmpeq_epi32(isZero, g_XMZero); - - __m128i t0 = _mm_and_si128(_mm_castps_si128(V), g_XMQNaNTest); - __m128i t1 = _mm_and_si128(_mm_castps_si128(V), g_XMInfinity); - t0 = _mm_cmpeq_epi32(t0, g_XMZero); - t1 = _mm_cmpeq_epi32(t1, g_XMInfinity); - __m128i isNaN = _mm_andnot_si128(t0, t1); - - select0 = _mm_and_si128(isInfinite, g_XMInfinity); - select1 = _mm_andnot_si128(isInfinite, _mm_castps_si128(log2)); - __m128i result = _mm_or_si128(select0, select1); - - select0 = _mm_and_si128(isZero, g_XMNegInfinity); - select1 = _mm_andnot_si128(isZero, g_XMNegQNaN); - tmp = _mm_or_si128(select0, select1); - - select0 = _mm_and_si128(isPositive, result); - select1 = _mm_andnot_si128(isPositive, tmp); - result = _mm_or_si128(select0, select1); - - select0 = _mm_and_si128(isNaN, g_XMQNaN); - select1 = _mm_andnot_si128(isNaN, result); - result = _mm_or_si128(select0, select1); - - return _mm_castsi128_ps(result); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorLog10(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTORF32 Result = { - {{log10f(V.vector4_f32[0]), log10f(V.vector4_f32[1]), - log10f(V.vector4_f32[2]), log10f(V.vector4_f32[3])}}}; - return Result.v; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - int32x4_t rawBiased = vandq_s32(vreinterpretq_s32_f32(V), g_XMInfinity); - int32x4_t trailing = vandq_s32(vreinterpretq_s32_f32(V), g_XMQNaNTest); - uint32x4_t isExponentZero = vceqq_s32(g_XMZero, rawBiased); - - // Compute exponent and significand for normals. - int32x4_t biased = vshrq_n_s32(rawBiased, 23); - int32x4_t exponentNor = vsubq_s32(biased, g_XMExponentBias); - int32x4_t trailingNor = trailing; - - // Compute exponent and significand for subnormals. - int32x4_t leading = MathInternal::GetLeadingBit(trailing); - int32x4_t shift = vsubq_s32(g_XMNumTrailing, leading); - int32x4_t exponentSub = vsubq_s32(g_XMSubnormalExponent, shift); - int32x4_t trailingSub = vshlq_s32(trailing, shift); - trailingSub = vandq_s32(trailingSub, g_XMQNaNTest); - int32x4_t e = vbslq_s32(isExponentZero, exponentSub, exponentNor); - int32x4_t t = vbslq_s32(isExponentZero, trailingSub, trailingNor); - - // Compute the approximation. - int32x4_t tmp = vorrq_s32(g_XMOne, t); - float32x4_t y = vsubq_f32(vreinterpretq_f32_s32(tmp), g_XMOne); - - float32x4_t log2 = vmlaq_f32(g_XMLogEst6, g_XMLogEst7, y); - log2 = vmlaq_f32(g_XMLogEst5, log2, y); - log2 = vmlaq_f32(g_XMLogEst4, log2, y); - log2 = vmlaq_f32(g_XMLogEst3, log2, y); - log2 = vmlaq_f32(g_XMLogEst2, log2, y); - log2 = vmlaq_f32(g_XMLogEst1, log2, y); - log2 = vmlaq_f32(g_XMLogEst0, log2, y); - log2 = vmlaq_f32(vcvtq_f32_s32(e), log2, y); - - log2 = vmulq_f32(g_XMInvLg10, log2); - - // if (x is NaN) -> QNaN - // else if (V is positive) - // if (V is infinite) -> +inf - // else -> log2(V) - // else - // if (V is zero) -> -inf - // else -> -QNaN - - uint32x4_t isInfinite = vandq_u32(vreinterpretq_u32_f32(V), g_XMAbsMask); - isInfinite = vceqq_u32(isInfinite, g_XMInfinity); - - uint32x4_t isGreaterZero = vcgtq_s32(vreinterpretq_s32_f32(V), g_XMZero); - uint32x4_t isNotFinite = vcgtq_s32(vreinterpretq_s32_f32(V), g_XMInfinity); - uint32x4_t isPositive = vbicq_u32(isGreaterZero, isNotFinite); - - uint32x4_t isZero = vandq_u32(vreinterpretq_u32_f32(V), g_XMAbsMask); - isZero = vceqq_u32(isZero, g_XMZero); - - uint32x4_t t0 = vandq_u32(vreinterpretq_u32_f32(V), g_XMQNaNTest); - uint32x4_t t1 = vandq_u32(vreinterpretq_u32_f32(V), g_XMInfinity); - t0 = vceqq_u32(t0, g_XMZero); - t1 = vceqq_u32(t1, g_XMInfinity); - uint32x4_t isNaN = vbicq_u32(t1, t0); - - float32x4_t result = vbslq_f32(isInfinite, g_XMInfinity, log2); - float32x4_t tmp2 = vbslq_f32(isZero, g_XMNegInfinity, g_XMNegQNaN); - result = vbslq_f32(isPositive, result, tmp2); - result = vbslq_f32(isNaN, g_XMQNaN, result); - return result; -#elif defined(_XM_SVML_INTRINSICS_) - XMVECTOR Result = _mm_log10_ps(V); - return Result; -#elif defined(_XM_SSE_INTRINSICS_) - __m128i rawBiased = _mm_and_si128(_mm_castps_si128(V), g_XMInfinity); - __m128i trailing = _mm_and_si128(_mm_castps_si128(V), g_XMQNaNTest); - __m128i isExponentZero = _mm_cmpeq_epi32(g_XMZero, rawBiased); - - // Compute exponent and significand for normals. - __m128i biased = _mm_srli_epi32(rawBiased, 23); - __m128i exponentNor = _mm_sub_epi32(biased, g_XMExponentBias); - __m128i trailingNor = trailing; - - // Compute exponent and significand for subnormals. - __m128i leading = MathInternal::GetLeadingBit(trailing); - __m128i shift = _mm_sub_epi32(g_XMNumTrailing, leading); - __m128i exponentSub = _mm_sub_epi32(g_XMSubnormalExponent, shift); - __m128i trailingSub = MathInternal::multi_sll_epi32(trailing, shift); - trailingSub = _mm_and_si128(trailingSub, g_XMQNaNTest); - - __m128i select0 = _mm_and_si128(isExponentZero, exponentSub); - __m128i select1 = _mm_andnot_si128(isExponentZero, exponentNor); - __m128i e = _mm_or_si128(select0, select1); - - select0 = _mm_and_si128(isExponentZero, trailingSub); - select1 = _mm_andnot_si128(isExponentZero, trailingNor); - __m128i t = _mm_or_si128(select0, select1); - - // Compute the approximation. - __m128i tmp = _mm_or_si128(g_XMOne, t); - __m128 y = _mm_sub_ps(_mm_castsi128_ps(tmp), g_XMOne); - - __m128 log2 = XM_FMADD_PS(g_XMLogEst7, y, g_XMLogEst6); - log2 = XM_FMADD_PS(log2, y, g_XMLogEst5); - log2 = XM_FMADD_PS(log2, y, g_XMLogEst4); - log2 = XM_FMADD_PS(log2, y, g_XMLogEst3); - log2 = XM_FMADD_PS(log2, y, g_XMLogEst2); - log2 = XM_FMADD_PS(log2, y, g_XMLogEst1); - log2 = XM_FMADD_PS(log2, y, g_XMLogEst0); - log2 = XM_FMADD_PS(log2, y, _mm_cvtepi32_ps(e)); - - log2 = _mm_mul_ps(g_XMInvLg10, log2); - - // if (x is NaN) -> QNaN - // else if (V is positive) - // if (V is infinite) -> +inf - // else -> log2(V) - // else - // if (V is zero) -> -inf - // else -> -QNaN - - __m128i isInfinite = _mm_and_si128(_mm_castps_si128(V), g_XMAbsMask); - isInfinite = _mm_cmpeq_epi32(isInfinite, g_XMInfinity); - - __m128i isGreaterZero = _mm_cmpgt_epi32(_mm_castps_si128(V), g_XMZero); - __m128i isNotFinite = _mm_cmpgt_epi32(_mm_castps_si128(V), g_XMInfinity); - __m128i isPositive = _mm_andnot_si128(isNotFinite, isGreaterZero); - - __m128i isZero = _mm_and_si128(_mm_castps_si128(V), g_XMAbsMask); - isZero = _mm_cmpeq_epi32(isZero, g_XMZero); - - __m128i t0 = _mm_and_si128(_mm_castps_si128(V), g_XMQNaNTest); - __m128i t1 = _mm_and_si128(_mm_castps_si128(V), g_XMInfinity); - t0 = _mm_cmpeq_epi32(t0, g_XMZero); - t1 = _mm_cmpeq_epi32(t1, g_XMInfinity); - __m128i isNaN = _mm_andnot_si128(t0, t1); - - select0 = _mm_and_si128(isInfinite, g_XMInfinity); - select1 = _mm_andnot_si128(isInfinite, _mm_castps_si128(log2)); - __m128i result = _mm_or_si128(select0, select1); - - select0 = _mm_and_si128(isZero, g_XMNegInfinity); - select1 = _mm_andnot_si128(isZero, g_XMNegQNaN); - tmp = _mm_or_si128(select0, select1); - - select0 = _mm_and_si128(isPositive, result); - select1 = _mm_andnot_si128(isPositive, tmp); - result = _mm_or_si128(select0, select1); - - select0 = _mm_and_si128(isNaN, g_XMQNaN); - select1 = _mm_andnot_si128(isNaN, result); - result = _mm_or_si128(select0, select1); - - return _mm_castsi128_ps(result); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorLogE(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTORF32 Result = {{{logf(V.vector4_f32[0]), logf(V.vector4_f32[1]), - logf(V.vector4_f32[2]), logf(V.vector4_f32[3])}}}; - return Result.v; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - int32x4_t rawBiased = vandq_s32(vreinterpretq_s32_f32(V), g_XMInfinity); - int32x4_t trailing = vandq_s32(vreinterpretq_s32_f32(V), g_XMQNaNTest); - uint32x4_t isExponentZero = vceqq_s32(g_XMZero, rawBiased); - - // Compute exponent and significand for normals. - int32x4_t biased = vshrq_n_s32(rawBiased, 23); - int32x4_t exponentNor = vsubq_s32(biased, g_XMExponentBias); - int32x4_t trailingNor = trailing; - - // Compute exponent and significand for subnormals. - int32x4_t leading = MathInternal::GetLeadingBit(trailing); - int32x4_t shift = vsubq_s32(g_XMNumTrailing, leading); - int32x4_t exponentSub = vsubq_s32(g_XMSubnormalExponent, shift); - int32x4_t trailingSub = vshlq_s32(trailing, shift); - trailingSub = vandq_s32(trailingSub, g_XMQNaNTest); - int32x4_t e = vbslq_s32(isExponentZero, exponentSub, exponentNor); - int32x4_t t = vbslq_s32(isExponentZero, trailingSub, trailingNor); - - // Compute the approximation. - int32x4_t tmp = vorrq_s32(g_XMOne, t); - float32x4_t y = vsubq_f32(vreinterpretq_f32_s32(tmp), g_XMOne); - - float32x4_t log2 = vmlaq_f32(g_XMLogEst6, g_XMLogEst7, y); - log2 = vmlaq_f32(g_XMLogEst5, log2, y); - log2 = vmlaq_f32(g_XMLogEst4, log2, y); - log2 = vmlaq_f32(g_XMLogEst3, log2, y); - log2 = vmlaq_f32(g_XMLogEst2, log2, y); - log2 = vmlaq_f32(g_XMLogEst1, log2, y); - log2 = vmlaq_f32(g_XMLogEst0, log2, y); - log2 = vmlaq_f32(vcvtq_f32_s32(e), log2, y); - - log2 = vmulq_f32(g_XMInvLgE, log2); - - // if (x is NaN) -> QNaN - // else if (V is positive) - // if (V is infinite) -> +inf - // else -> log2(V) - // else - // if (V is zero) -> -inf - // else -> -QNaN - - uint32x4_t isInfinite = vandq_u32(vreinterpretq_u32_f32(V), g_XMAbsMask); - isInfinite = vceqq_u32(isInfinite, g_XMInfinity); - - uint32x4_t isGreaterZero = vcgtq_s32(vreinterpretq_s32_f32(V), g_XMZero); - uint32x4_t isNotFinite = vcgtq_s32(vreinterpretq_s32_f32(V), g_XMInfinity); - uint32x4_t isPositive = vbicq_u32(isGreaterZero, isNotFinite); - - uint32x4_t isZero = vandq_u32(vreinterpretq_u32_f32(V), g_XMAbsMask); - isZero = vceqq_u32(isZero, g_XMZero); - - uint32x4_t t0 = vandq_u32(vreinterpretq_u32_f32(V), g_XMQNaNTest); - uint32x4_t t1 = vandq_u32(vreinterpretq_u32_f32(V), g_XMInfinity); - t0 = vceqq_u32(t0, g_XMZero); - t1 = vceqq_u32(t1, g_XMInfinity); - uint32x4_t isNaN = vbicq_u32(t1, t0); - - float32x4_t result = vbslq_f32(isInfinite, g_XMInfinity, log2); - float32x4_t tmp2 = vbslq_f32(isZero, g_XMNegInfinity, g_XMNegQNaN); - result = vbslq_f32(isPositive, result, tmp2); - result = vbslq_f32(isNaN, g_XMQNaN, result); - return result; -#elif defined(_XM_SVML_INTRINSICS_) - XMVECTOR Result = _mm_log_ps(V); - return Result; -#elif defined(_XM_SSE_INTRINSICS_) - __m128i rawBiased = _mm_and_si128(_mm_castps_si128(V), g_XMInfinity); - __m128i trailing = _mm_and_si128(_mm_castps_si128(V), g_XMQNaNTest); - __m128i isExponentZero = _mm_cmpeq_epi32(g_XMZero, rawBiased); - - // Compute exponent and significand for normals. - __m128i biased = _mm_srli_epi32(rawBiased, 23); - __m128i exponentNor = _mm_sub_epi32(biased, g_XMExponentBias); - __m128i trailingNor = trailing; - - // Compute exponent and significand for subnormals. - __m128i leading = MathInternal::GetLeadingBit(trailing); - __m128i shift = _mm_sub_epi32(g_XMNumTrailing, leading); - __m128i exponentSub = _mm_sub_epi32(g_XMSubnormalExponent, shift); - __m128i trailingSub = MathInternal::multi_sll_epi32(trailing, shift); - trailingSub = _mm_and_si128(trailingSub, g_XMQNaNTest); - - __m128i select0 = _mm_and_si128(isExponentZero, exponentSub); - __m128i select1 = _mm_andnot_si128(isExponentZero, exponentNor); - __m128i e = _mm_or_si128(select0, select1); - - select0 = _mm_and_si128(isExponentZero, trailingSub); - select1 = _mm_andnot_si128(isExponentZero, trailingNor); - __m128i t = _mm_or_si128(select0, select1); - - // Compute the approximation. - __m128i tmp = _mm_or_si128(g_XMOne, t); - __m128 y = _mm_sub_ps(_mm_castsi128_ps(tmp), g_XMOne); - - __m128 log2 = XM_FMADD_PS(g_XMLogEst7, y, g_XMLogEst6); - log2 = XM_FMADD_PS(log2, y, g_XMLogEst5); - log2 = XM_FMADD_PS(log2, y, g_XMLogEst4); - log2 = XM_FMADD_PS(log2, y, g_XMLogEst3); - log2 = XM_FMADD_PS(log2, y, g_XMLogEst2); - log2 = XM_FMADD_PS(log2, y, g_XMLogEst1); - log2 = XM_FMADD_PS(log2, y, g_XMLogEst0); - log2 = XM_FMADD_PS(log2, y, _mm_cvtepi32_ps(e)); - - log2 = _mm_mul_ps(g_XMInvLgE, log2); - - // if (x is NaN) -> QNaN - // else if (V is positive) - // if (V is infinite) -> +inf - // else -> log2(V) - // else - // if (V is zero) -> -inf - // else -> -QNaN - - __m128i isInfinite = _mm_and_si128(_mm_castps_si128(V), g_XMAbsMask); - isInfinite = _mm_cmpeq_epi32(isInfinite, g_XMInfinity); - - __m128i isGreaterZero = _mm_cmpgt_epi32(_mm_castps_si128(V), g_XMZero); - __m128i isNotFinite = _mm_cmpgt_epi32(_mm_castps_si128(V), g_XMInfinity); - __m128i isPositive = _mm_andnot_si128(isNotFinite, isGreaterZero); - - __m128i isZero = _mm_and_si128(_mm_castps_si128(V), g_XMAbsMask); - isZero = _mm_cmpeq_epi32(isZero, g_XMZero); - - __m128i t0 = _mm_and_si128(_mm_castps_si128(V), g_XMQNaNTest); - __m128i t1 = _mm_and_si128(_mm_castps_si128(V), g_XMInfinity); - t0 = _mm_cmpeq_epi32(t0, g_XMZero); - t1 = _mm_cmpeq_epi32(t1, g_XMInfinity); - __m128i isNaN = _mm_andnot_si128(t0, t1); - - select0 = _mm_and_si128(isInfinite, g_XMInfinity); - select1 = _mm_andnot_si128(isInfinite, _mm_castps_si128(log2)); - __m128i result = _mm_or_si128(select0, select1); - - select0 = _mm_and_si128(isZero, g_XMNegInfinity); - select1 = _mm_andnot_si128(isZero, g_XMNegQNaN); - tmp = _mm_or_si128(select0, select1); - - select0 = _mm_and_si128(isPositive, result); - select1 = _mm_andnot_si128(isPositive, tmp); - result = _mm_or_si128(select0, select1); - - select0 = _mm_and_si128(isNaN, g_XMQNaN); - select1 = _mm_andnot_si128(isNaN, result); - result = _mm_or_si128(select0, select1); - - return _mm_castsi128_ps(result); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorLog(FXMVECTOR V) noexcept { - return XMVectorLog2(V); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorPow(FXMVECTOR V1, FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTORF32 Result = {{{powf(V1.vector4_f32[0], V2.vector4_f32[0]), - powf(V1.vector4_f32[1], V2.vector4_f32[1]), - powf(V1.vector4_f32[2], V2.vector4_f32[2]), - powf(V1.vector4_f32[3], V2.vector4_f32[3])}}}; - return Result.v; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - XMVECTORF32 vResult = { - {{powf(vgetq_lane_f32(V1, 0), vgetq_lane_f32(V2, 0)), - powf(vgetq_lane_f32(V1, 1), vgetq_lane_f32(V2, 1)), - powf(vgetq_lane_f32(V1, 2), vgetq_lane_f32(V2, 2)), - powf(vgetq_lane_f32(V1, 3), vgetq_lane_f32(V2, 3))}}}; - return vResult.v; -#elif defined(_XM_SVML_INTRINSICS_) - XMVECTOR Result = _mm_pow_ps(V1, V2); - return Result; -#elif defined(_XM_SSE_INTRINSICS_) - XM_ALIGNED_DATA(16) float a[4]; - XM_ALIGNED_DATA(16) float b[4]; - _mm_store_ps(a, V1); - _mm_store_ps(b, V2); - XMVECTOR vResult = _mm_setr_ps(powf(a[0], b[0]), powf(a[1], b[1]), - powf(a[2], b[2]), powf(a[3], b[3])); - return vResult; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorAbs(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 vResult = { - {{fabsf(V.vector4_f32[0]), fabsf(V.vector4_f32[1]), - fabsf(V.vector4_f32[2]), fabsf(V.vector4_f32[3])}}}; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - return vabsq_f32(V); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vResult = _mm_setzero_ps(); - vResult = _mm_sub_ps(vResult, V); - vResult = _mm_max_ps(vResult, V); - return vResult; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorMod(FXMVECTOR V1, FXMVECTOR V2) noexcept { - // V1 % V2 = V1 - V2 * truncate(V1 / V2) - -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR Quotient = XMVectorDivide(V1, V2); - Quotient = XMVectorTruncate(Quotient); - XMVECTOR Result = XMVectorNegativeMultiplySubtract(V2, Quotient, V1); - return Result; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - XMVECTOR vResult = XMVectorDivide(V1, V2); - vResult = XMVectorTruncate(vResult); - return vmlsq_f32(V1, vResult, V2); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vResult = _mm_div_ps(V1, V2); - vResult = XMVectorTruncate(vResult); - return XM_FNMADD_PS(vResult, V2, V1); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorModAngles(FXMVECTOR Angles) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR V; - XMVECTOR Result; - - // Modulo the range of the given angles such that -XM_PI <= Angles < XM_PI - V = XMVectorMultiply(Angles, g_XMReciprocalTwoPi.v); - V = XMVectorRound(V); - Result = XMVectorNegativeMultiplySubtract(g_XMTwoPi.v, V, Angles); - return Result; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - // Modulo the range of the given angles such that -XM_PI <= Angles < XM_PI - XMVECTOR vResult = vmulq_f32(Angles, g_XMReciprocalTwoPi); - // Use the inline function due to complexity for rounding - vResult = XMVectorRound(vResult); - return vmlsq_f32(Angles, vResult, g_XMTwoPi); -#elif defined(_XM_SSE_INTRINSICS_) - // Modulo the range of the given angles such that -XM_PI <= Angles < XM_PI - XMVECTOR vResult = _mm_mul_ps(Angles, g_XMReciprocalTwoPi); - // Use the inline function due to complexity for rounding - vResult = XMVectorRound(vResult); - return XM_FNMADD_PS(vResult, g_XMTwoPi, Angles); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorSin(FXMVECTOR V) noexcept { - // 11-degree minimax approximation - -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 Result = {{{sinf(V.vector4_f32[0]), sinf(V.vector4_f32[1]), - sinf(V.vector4_f32[2]), sinf(V.vector4_f32[3])}}}; - return Result.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - // Force the value within the bounds of pi - XMVECTOR x = XMVectorModAngles(V); - - // Map in [-pi/2,pi/2] with sin(y) = sin(x). - uint32x4_t sign = vandq_u32(vreinterpretq_u32_f32(x), g_XMNegativeZero); - uint32x4_t c = vorrq_u32(g_XMPi, sign); // pi when x >= 0, -pi when x < 0 - float32x4_t absx = vabsq_f32(x); - float32x4_t rflx = vsubq_f32(vreinterpretq_f32_u32(c), x); - uint32x4_t comp = vcleq_f32(absx, g_XMHalfPi); - x = vbslq_f32(comp, x, rflx); - - float32x4_t x2 = vmulq_f32(x, x); - - // Compute polynomial approximation - const XMVECTOR SC1 = g_XMSinCoefficients1; - const XMVECTOR SC0 = g_XMSinCoefficients0; - XMVECTOR vConstants = vdupq_lane_f32(vget_high_f32(SC0), 1); - XMVECTOR Result = vmlaq_lane_f32(vConstants, x2, vget_low_f32(SC1), 0); - - vConstants = vdupq_lane_f32(vget_high_f32(SC0), 0); - Result = vmlaq_f32(vConstants, Result, x2); - - vConstants = vdupq_lane_f32(vget_low_f32(SC0), 1); - Result = vmlaq_f32(vConstants, Result, x2); - - vConstants = vdupq_lane_f32(vget_low_f32(SC0), 0); - Result = vmlaq_f32(vConstants, Result, x2); - - Result = vmlaq_f32(g_XMOne, Result, x2); - Result = vmulq_f32(Result, x); - return Result; -#elif defined(_XM_SVML_INTRINSICS_) - XMVECTOR Result = _mm_sin_ps(V); - return Result; -#elif defined(_XM_SSE_INTRINSICS_) - // Force the value within the bounds of pi - XMVECTOR x = XMVectorModAngles(V); - - // Map in [-pi/2,pi/2] with sin(y) = sin(x). - __m128 sign = _mm_and_ps(x, g_XMNegativeZero); - __m128 c = _mm_or_ps(g_XMPi, sign); // pi when x >= 0, -pi when x < 0 - __m128 absx = _mm_andnot_ps(sign, x); // |x| - __m128 rflx = _mm_sub_ps(c, x); - __m128 comp = _mm_cmple_ps(absx, g_XMHalfPi); - __m128 select0 = _mm_and_ps(comp, x); - __m128 select1 = _mm_andnot_ps(comp, rflx); - x = _mm_or_ps(select0, select1); - - __m128 x2 = _mm_mul_ps(x, x); - - // Compute polynomial approximation - const XMVECTOR SC1 = g_XMSinCoefficients1; - __m128 vConstantsB = XM_PERMUTE_PS(SC1, _MM_SHUFFLE(0, 0, 0, 0)); - const XMVECTOR SC0 = g_XMSinCoefficients0; - __m128 vConstants = XM_PERMUTE_PS(SC0, _MM_SHUFFLE(3, 3, 3, 3)); - __m128 Result = XM_FMADD_PS(vConstantsB, x2, vConstants); - - vConstants = XM_PERMUTE_PS(SC0, _MM_SHUFFLE(2, 2, 2, 2)); - Result = XM_FMADD_PS(Result, x2, vConstants); - - vConstants = XM_PERMUTE_PS(SC0, _MM_SHUFFLE(1, 1, 1, 1)); - Result = XM_FMADD_PS(Result, x2, vConstants); - - vConstants = XM_PERMUTE_PS(SC0, _MM_SHUFFLE(0, 0, 0, 0)); - Result = XM_FMADD_PS(Result, x2, vConstants); - - Result = XM_FMADD_PS(Result, x2, g_XMOne); - Result = _mm_mul_ps(Result, x); - return Result; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorCos(FXMVECTOR V) noexcept { - // 10-degree minimax approximation - -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 Result = {{{cosf(V.vector4_f32[0]), cosf(V.vector4_f32[1]), - cosf(V.vector4_f32[2]), cosf(V.vector4_f32[3])}}}; - return Result.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - // Map V to x in [-pi,pi]. - XMVECTOR x = XMVectorModAngles(V); - - // Map in [-pi/2,pi/2] with cos(y) = sign*cos(x). - uint32x4_t sign = vandq_u32(vreinterpretq_u32_f32(x), g_XMNegativeZero); - uint32x4_t c = vorrq_u32(g_XMPi, sign); // pi when x >= 0, -pi when x < 0 - float32x4_t absx = vabsq_f32(x); - float32x4_t rflx = vsubq_f32(vreinterpretq_f32_u32(c), x); - uint32x4_t comp = vcleq_f32(absx, g_XMHalfPi); - x = vbslq_f32(comp, x, rflx); - float32x4_t fsign = vbslq_f32(comp, g_XMOne, g_XMNegativeOne); - - float32x4_t x2 = vmulq_f32(x, x); - - // Compute polynomial approximation - const XMVECTOR CC1 = g_XMCosCoefficients1; - const XMVECTOR CC0 = g_XMCosCoefficients0; - XMVECTOR vConstants = vdupq_lane_f32(vget_high_f32(CC0), 1); - XMVECTOR Result = vmlaq_lane_f32(vConstants, x2, vget_low_f32(CC1), 0); - - vConstants = vdupq_lane_f32(vget_high_f32(CC0), 0); - Result = vmlaq_f32(vConstants, Result, x2); - - vConstants = vdupq_lane_f32(vget_low_f32(CC0), 1); - Result = vmlaq_f32(vConstants, Result, x2); - - vConstants = vdupq_lane_f32(vget_low_f32(CC0), 0); - Result = vmlaq_f32(vConstants, Result, x2); - - Result = vmlaq_f32(g_XMOne, Result, x2); - Result = vmulq_f32(Result, fsign); - return Result; -#elif defined(_XM_SVML_INTRINSICS_) - XMVECTOR Result = _mm_cos_ps(V); - return Result; -#elif defined(_XM_SSE_INTRINSICS_) - // Map V to x in [-pi,pi]. - XMVECTOR x = XMVectorModAngles(V); - - // Map in [-pi/2,pi/2] with cos(y) = sign*cos(x). - XMVECTOR sign = _mm_and_ps(x, g_XMNegativeZero); - __m128 c = _mm_or_ps(g_XMPi, sign); // pi when x >= 0, -pi when x < 0 - __m128 absx = _mm_andnot_ps(sign, x); // |x| - __m128 rflx = _mm_sub_ps(c, x); - __m128 comp = _mm_cmple_ps(absx, g_XMHalfPi); - __m128 select0 = _mm_and_ps(comp, x); - __m128 select1 = _mm_andnot_ps(comp, rflx); - x = _mm_or_ps(select0, select1); - select0 = _mm_and_ps(comp, g_XMOne); - select1 = _mm_andnot_ps(comp, g_XMNegativeOne); - sign = _mm_or_ps(select0, select1); - - __m128 x2 = _mm_mul_ps(x, x); - - // Compute polynomial approximation - const XMVECTOR CC1 = g_XMCosCoefficients1; - __m128 vConstantsB = XM_PERMUTE_PS(CC1, _MM_SHUFFLE(0, 0, 0, 0)); - const XMVECTOR CC0 = g_XMCosCoefficients0; - __m128 vConstants = XM_PERMUTE_PS(CC0, _MM_SHUFFLE(3, 3, 3, 3)); - __m128 Result = XM_FMADD_PS(vConstantsB, x2, vConstants); - - vConstants = XM_PERMUTE_PS(CC0, _MM_SHUFFLE(2, 2, 2, 2)); - Result = XM_FMADD_PS(Result, x2, vConstants); - - vConstants = XM_PERMUTE_PS(CC0, _MM_SHUFFLE(1, 1, 1, 1)); - Result = XM_FMADD_PS(Result, x2, vConstants); - - vConstants = XM_PERMUTE_PS(CC0, _MM_SHUFFLE(0, 0, 0, 0)); - Result = XM_FMADD_PS(Result, x2, vConstants); - - Result = XM_FMADD_PS(Result, x2, g_XMOne); - Result = _mm_mul_ps(Result, sign); - return Result; -#endif -} - -//------------------------------------------------------------------------------ - -_Use_decl_annotations_ inline void XM_CALLCONV -XMVectorSinCos(XMVECTOR* pSin, XMVECTOR* pCos, FXMVECTOR V) noexcept { - assert(pSin != nullptr); - assert(pCos != nullptr); - - // 11/10-degree minimax approximation - -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 Sin = {{{sinf(V.vector4_f32[0]), sinf(V.vector4_f32[1]), - sinf(V.vector4_f32[2]), sinf(V.vector4_f32[3])}}}; - - XMVECTORF32 Cos = {{{cosf(V.vector4_f32[0]), cosf(V.vector4_f32[1]), - cosf(V.vector4_f32[2]), cosf(V.vector4_f32[3])}}}; - - *pSin = Sin.v; - *pCos = Cos.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - // Force the value within the bounds of pi - XMVECTOR x = XMVectorModAngles(V); - - // Map in [-pi/2,pi/2] with cos(y) = sign*cos(x). - uint32x4_t sign = vandq_u32(vreinterpretq_u32_f32(x), g_XMNegativeZero); - uint32x4_t c = vorrq_u32(g_XMPi, sign); // pi when x >= 0, -pi when x < 0 - float32x4_t absx = vabsq_f32(x); - float32x4_t rflx = vsubq_f32(vreinterpretq_f32_u32(c), x); - uint32x4_t comp = vcleq_f32(absx, g_XMHalfPi); - x = vbslq_f32(comp, x, rflx); - float32x4_t fsign = vbslq_f32(comp, g_XMOne, g_XMNegativeOne); - - float32x4_t x2 = vmulq_f32(x, x); - - // Compute polynomial approximation for sine - const XMVECTOR SC1 = g_XMSinCoefficients1; - const XMVECTOR SC0 = g_XMSinCoefficients0; - XMVECTOR vConstants = vdupq_lane_f32(vget_high_f32(SC0), 1); - XMVECTOR Result = vmlaq_lane_f32(vConstants, x2, vget_low_f32(SC1), 0); - - vConstants = vdupq_lane_f32(vget_high_f32(SC0), 0); - Result = vmlaq_f32(vConstants, Result, x2); - - vConstants = vdupq_lane_f32(vget_low_f32(SC0), 1); - Result = vmlaq_f32(vConstants, Result, x2); - - vConstants = vdupq_lane_f32(vget_low_f32(SC0), 0); - Result = vmlaq_f32(vConstants, Result, x2); - - Result = vmlaq_f32(g_XMOne, Result, x2); - *pSin = vmulq_f32(Result, x); - - // Compute polynomial approximation for cosine - const XMVECTOR CC1 = g_XMCosCoefficients1; - const XMVECTOR CC0 = g_XMCosCoefficients0; - vConstants = vdupq_lane_f32(vget_high_f32(CC0), 1); - Result = vmlaq_lane_f32(vConstants, x2, vget_low_f32(CC1), 0); - - vConstants = vdupq_lane_f32(vget_high_f32(CC0), 0); - Result = vmlaq_f32(vConstants, Result, x2); - - vConstants = vdupq_lane_f32(vget_low_f32(CC0), 1); - Result = vmlaq_f32(vConstants, Result, x2); - - vConstants = vdupq_lane_f32(vget_low_f32(CC0), 0); - Result = vmlaq_f32(vConstants, Result, x2); - - Result = vmlaq_f32(g_XMOne, Result, x2); - *pCos = vmulq_f32(Result, fsign); -#elif defined(_XM_SVML_INTRINSICS_) - *pSin = _mm_sincos_ps(pCos, V); -#elif defined(_XM_SSE_INTRINSICS_) - // Force the value within the bounds of pi - XMVECTOR x = XMVectorModAngles(V); - - // Map in [-pi/2,pi/2] with sin(y) = sin(x), cos(y) = sign*cos(x). - XMVECTOR sign = _mm_and_ps(x, g_XMNegativeZero); - __m128 c = _mm_or_ps(g_XMPi, sign); // pi when x >= 0, -pi when x < 0 - __m128 absx = _mm_andnot_ps(sign, x); // |x| - __m128 rflx = _mm_sub_ps(c, x); - __m128 comp = _mm_cmple_ps(absx, g_XMHalfPi); - __m128 select0 = _mm_and_ps(comp, x); - __m128 select1 = _mm_andnot_ps(comp, rflx); - x = _mm_or_ps(select0, select1); - select0 = _mm_and_ps(comp, g_XMOne); - select1 = _mm_andnot_ps(comp, g_XMNegativeOne); - sign = _mm_or_ps(select0, select1); - - __m128 x2 = _mm_mul_ps(x, x); - - // Compute polynomial approximation of sine - const XMVECTOR SC1 = g_XMSinCoefficients1; - __m128 vConstantsB = XM_PERMUTE_PS(SC1, _MM_SHUFFLE(0, 0, 0, 0)); - const XMVECTOR SC0 = g_XMSinCoefficients0; - __m128 vConstants = XM_PERMUTE_PS(SC0, _MM_SHUFFLE(3, 3, 3, 3)); - __m128 Result = XM_FMADD_PS(vConstantsB, x2, vConstants); - - vConstants = XM_PERMUTE_PS(SC0, _MM_SHUFFLE(2, 2, 2, 2)); - Result = XM_FMADD_PS(Result, x2, vConstants); - - vConstants = XM_PERMUTE_PS(SC0, _MM_SHUFFLE(1, 1, 1, 1)); - Result = XM_FMADD_PS(Result, x2, vConstants); - - vConstants = XM_PERMUTE_PS(SC0, _MM_SHUFFLE(0, 0, 0, 0)); - Result = XM_FMADD_PS(Result, x2, vConstants); - - Result = XM_FMADD_PS(Result, x2, g_XMOne); - Result = _mm_mul_ps(Result, x); - *pSin = Result; - - // Compute polynomial approximation of cosine - const XMVECTOR CC1 = g_XMCosCoefficients1; - vConstantsB = XM_PERMUTE_PS(CC1, _MM_SHUFFLE(0, 0, 0, 0)); - const XMVECTOR CC0 = g_XMCosCoefficients0; - vConstants = XM_PERMUTE_PS(CC0, _MM_SHUFFLE(3, 3, 3, 3)); - Result = XM_FMADD_PS(vConstantsB, x2, vConstants); - - vConstants = XM_PERMUTE_PS(CC0, _MM_SHUFFLE(2, 2, 2, 2)); - Result = XM_FMADD_PS(Result, x2, vConstants); - - vConstants = XM_PERMUTE_PS(CC0, _MM_SHUFFLE(1, 1, 1, 1)); - Result = XM_FMADD_PS(Result, x2, vConstants); - - vConstants = XM_PERMUTE_PS(CC0, _MM_SHUFFLE(0, 0, 0, 0)); - Result = XM_FMADD_PS(Result, x2, vConstants); - - Result = XM_FMADD_PS(Result, x2, g_XMOne); - Result = _mm_mul_ps(Result, sign); - *pCos = Result; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorTan(FXMVECTOR V) noexcept { - // Cody and Waite algorithm to compute tangent. - -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 Result = {{{tanf(V.vector4_f32[0]), tanf(V.vector4_f32[1]), - tanf(V.vector4_f32[2]), tanf(V.vector4_f32[3])}}}; - return Result.v; -#elif defined(_XM_SVML_INTRINSICS_) - XMVECTOR Result = _mm_tan_ps(V); - return Result; -#elif defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) - - static const XMVECTORF32 TanCoefficients0 = { - {{1.0f, -4.667168334e-1f, 2.566383229e-2f, -3.118153191e-4f}}}; - static const XMVECTORF32 TanCoefficients1 = { - {{4.981943399e-7f, -1.333835001e-1f, 3.424887824e-3f, - -1.786170734e-5f}}}; - static const XMVECTORF32 TanConstants = { - {{1.570796371f, 6.077100628e-11f, 0.000244140625f, - 0.63661977228f /*2 / Pi*/}}}; - static const XMVECTORU32 Mask = {{{0x1, 0x1, 0x1, 0x1}}}; - - XMVECTOR TwoDivPi = XMVectorSplatW(TanConstants.v); - - XMVECTOR Zero = XMVectorZero(); - - XMVECTOR C0 = XMVectorSplatX(TanConstants.v); - XMVECTOR C1 = XMVectorSplatY(TanConstants.v); - XMVECTOR Epsilon = XMVectorSplatZ(TanConstants.v); - - XMVECTOR VA = XMVectorMultiply(V, TwoDivPi); - - VA = XMVectorRound(VA); - - XMVECTOR VC = XMVectorNegativeMultiplySubtract(VA, C0, V); - - XMVECTOR VB = XMVectorAbs(VA); - - VC = XMVectorNegativeMultiplySubtract(VA, C1, VC); - -#if defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) - VB = vreinterpretq_f32_u32(vcvtq_u32_f32(VB)); -#elif defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) - reinterpret_cast<__m128i*>(&VB)[0] = _mm_cvttps_epi32(VB); -#else - for (size_t i = 0; i < 4; i++) { - VB.vector4_u32[i] = static_cast(VB.vector4_f32[i]); - } -#endif - - XMVECTOR VC2 = XMVectorMultiply(VC, VC); - - XMVECTOR T7 = XMVectorSplatW(TanCoefficients1.v); - XMVECTOR T6 = XMVectorSplatZ(TanCoefficients1.v); - XMVECTOR T4 = XMVectorSplatX(TanCoefficients1.v); - XMVECTOR T3 = XMVectorSplatW(TanCoefficients0.v); - XMVECTOR T5 = XMVectorSplatY(TanCoefficients1.v); - XMVECTOR T2 = XMVectorSplatZ(TanCoefficients0.v); - XMVECTOR T1 = XMVectorSplatY(TanCoefficients0.v); - XMVECTOR T0 = XMVectorSplatX(TanCoefficients0.v); - - XMVECTOR VBIsEven = XMVectorAndInt(VB, Mask.v); - VBIsEven = XMVectorEqualInt(VBIsEven, Zero); - - XMVECTOR N = XMVectorMultiplyAdd(VC2, T7, T6); - XMVECTOR D = XMVectorMultiplyAdd(VC2, T4, T3); - N = XMVectorMultiplyAdd(VC2, N, T5); - D = XMVectorMultiplyAdd(VC2, D, T2); - N = XMVectorMultiply(VC2, N); - D = XMVectorMultiplyAdd(VC2, D, T1); - N = XMVectorMultiplyAdd(VC, N, VC); - XMVECTOR VCNearZero = XMVectorInBounds(VC, Epsilon); - D = XMVectorMultiplyAdd(VC2, D, T0); - - N = XMVectorSelect(N, VC, VCNearZero); - D = XMVectorSelect(D, g_XMOne.v, VCNearZero); - - XMVECTOR R0 = XMVectorNegate(N); - XMVECTOR R1 = XMVectorDivide(N, D); - R0 = XMVectorDivide(D, R0); - - XMVECTOR VIsZero = XMVectorEqual(V, Zero); - - XMVECTOR Result = XMVectorSelect(R0, R1, VBIsEven); - - Result = XMVectorSelect(Result, Zero, VIsZero); - - return Result; - -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorSinH(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 Result = {{{sinhf(V.vector4_f32[0]), sinhf(V.vector4_f32[1]), - sinhf(V.vector4_f32[2]), sinhf(V.vector4_f32[3])}}}; - return Result.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - static const XMVECTORF32 Scale = { - {{1.442695040888963f, 1.442695040888963f, 1.442695040888963f, - 1.442695040888963f}}}; // 1.0f / ln(2.0f) - - XMVECTOR V1 = vmlaq_f32(g_XMNegativeOne.v, V, Scale.v); - XMVECTOR V2 = vmlsq_f32(g_XMNegativeOne.v, V, Scale.v); - XMVECTOR E1 = XMVectorExp(V1); - XMVECTOR E2 = XMVectorExp(V2); - - return vsubq_f32(E1, E2); -#elif defined(_XM_SVML_INTRINSICS_) - XMVECTOR Result = _mm_sinh_ps(V); - return Result; -#elif defined(_XM_SSE_INTRINSICS_) - static const XMVECTORF32 Scale = { - {{1.442695040888963f, 1.442695040888963f, 1.442695040888963f, - 1.442695040888963f}}}; // 1.0f / ln(2.0f) - - XMVECTOR V1 = XM_FMADD_PS(V, Scale, g_XMNegativeOne); - XMVECTOR V2 = XM_FNMADD_PS(V, Scale, g_XMNegativeOne); - XMVECTOR E1 = XMVectorExp(V1); - XMVECTOR E2 = XMVectorExp(V2); - - return _mm_sub_ps(E1, E2); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorCosH(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 Result = {{{coshf(V.vector4_f32[0]), coshf(V.vector4_f32[1]), - coshf(V.vector4_f32[2]), coshf(V.vector4_f32[3])}}}; - return Result.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - static const XMVECTORF32 Scale = { - {{1.442695040888963f, 1.442695040888963f, 1.442695040888963f, - 1.442695040888963f}}}; // 1.0f / ln(2.0f) - - XMVECTOR V1 = vmlaq_f32(g_XMNegativeOne.v, V, Scale.v); - XMVECTOR V2 = vmlsq_f32(g_XMNegativeOne.v, V, Scale.v); - XMVECTOR E1 = XMVectorExp(V1); - XMVECTOR E2 = XMVectorExp(V2); - return vaddq_f32(E1, E2); -#elif defined(_XM_SVML_INTRINSICS_) - XMVECTOR Result = _mm_cosh_ps(V); - return Result; -#elif defined(_XM_SSE_INTRINSICS_) - static const XMVECTORF32 Scale = { - {{1.442695040888963f, 1.442695040888963f, 1.442695040888963f, - 1.442695040888963f}}}; // 1.0f / ln(2.0f) - - XMVECTOR V1 = XM_FMADD_PS(V, Scale.v, g_XMNegativeOne.v); - XMVECTOR V2 = XM_FNMADD_PS(V, Scale.v, g_XMNegativeOne.v); - XMVECTOR E1 = XMVectorExp(V1); - XMVECTOR E2 = XMVectorExp(V2); - return _mm_add_ps(E1, E2); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorTanH(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 Result = {{{tanhf(V.vector4_f32[0]), tanhf(V.vector4_f32[1]), - tanhf(V.vector4_f32[2]), tanhf(V.vector4_f32[3])}}}; - return Result.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - static const XMVECTORF32 Scale = { - {{2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f, - 2.8853900817779268f}}}; // 2.0f / ln(2.0f) - - XMVECTOR E = vmulq_f32(V, Scale.v); - E = XMVectorExp(E); - E = vmlaq_f32(g_XMOneHalf.v, E, g_XMOneHalf.v); - E = XMVectorReciprocal(E); - return vsubq_f32(g_XMOne.v, E); -#elif defined(_XM_SVML_INTRINSICS_) - XMVECTOR Result = _mm_tanh_ps(V); - return Result; -#elif defined(_XM_SSE_INTRINSICS_) - static const XMVECTORF32 Scale = { - {{2.8853900817779268f, 2.8853900817779268f, 2.8853900817779268f, - 2.8853900817779268f}}}; // 2.0f / ln(2.0f) - - XMVECTOR E = _mm_mul_ps(V, Scale.v); - E = XMVectorExp(E); - E = XM_FMADD_PS(E, g_XMOneHalf.v, g_XMOneHalf.v); - E = _mm_div_ps(g_XMOne.v, E); - return _mm_sub_ps(g_XMOne.v, E); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorASin(FXMVECTOR V) noexcept { - // 7-degree minimax approximation - -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 Result = {{{asinf(V.vector4_f32[0]), asinf(V.vector4_f32[1]), - asinf(V.vector4_f32[2]), asinf(V.vector4_f32[3])}}}; - return Result.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x4_t nonnegative = vcgeq_f32(V, g_XMZero); - float32x4_t x = vabsq_f32(V); - - // Compute (1-|V|), clamp to zero to avoid sqrt of negative number. - float32x4_t oneMValue = vsubq_f32(g_XMOne, x); - float32x4_t clampOneMValue = vmaxq_f32(g_XMZero, oneMValue); - float32x4_t root = XMVectorSqrt(clampOneMValue); - - // Compute polynomial approximation - const XMVECTOR AC1 = g_XMArcCoefficients1; - XMVECTOR vConstants = vdupq_lane_f32(vget_high_f32(AC1), 0); - XMVECTOR t0 = vmlaq_lane_f32(vConstants, x, vget_high_f32(AC1), 1); - - vConstants = vdupq_lane_f32(vget_low_f32(AC1), 1); - t0 = vmlaq_f32(vConstants, t0, x); - - vConstants = vdupq_lane_f32(vget_low_f32(AC1), 0); - t0 = vmlaq_f32(vConstants, t0, x); - - const XMVECTOR AC0 = g_XMArcCoefficients0; - vConstants = vdupq_lane_f32(vget_high_f32(AC0), 1); - t0 = vmlaq_f32(vConstants, t0, x); - - vConstants = vdupq_lane_f32(vget_high_f32(AC0), 0); - t0 = vmlaq_f32(vConstants, t0, x); - - vConstants = vdupq_lane_f32(vget_low_f32(AC0), 1); - t0 = vmlaq_f32(vConstants, t0, x); - - vConstants = vdupq_lane_f32(vget_low_f32(AC0), 0); - t0 = vmlaq_f32(vConstants, t0, x); - t0 = vmulq_f32(t0, root); - - float32x4_t t1 = vsubq_f32(g_XMPi, t0); - t0 = vbslq_f32(nonnegative, t0, t1); - t0 = vsubq_f32(g_XMHalfPi, t0); - return t0; -#elif defined(_XM_SVML_INTRINSICS_) - XMVECTOR Result = _mm_asin_ps(V); - return Result; -#elif defined(_XM_SSE_INTRINSICS_) - __m128 nonnegative = _mm_cmpge_ps(V, g_XMZero); - __m128 mvalue = _mm_sub_ps(g_XMZero, V); - __m128 x = _mm_max_ps(V, mvalue); // |V| - - // Compute (1-|V|), clamp to zero to avoid sqrt of negative number. - __m128 oneMValue = _mm_sub_ps(g_XMOne, x); - __m128 clampOneMValue = _mm_max_ps(g_XMZero, oneMValue); - __m128 root = _mm_sqrt_ps(clampOneMValue); // sqrt(1-|V|) - - // Compute polynomial approximation - const XMVECTOR AC1 = g_XMArcCoefficients1; - __m128 vConstantsB = XM_PERMUTE_PS(AC1, _MM_SHUFFLE(3, 3, 3, 3)); - __m128 vConstants = XM_PERMUTE_PS(AC1, _MM_SHUFFLE(2, 2, 2, 2)); - __m128 t0 = XM_FMADD_PS(vConstantsB, x, vConstants); - - vConstants = XM_PERMUTE_PS(AC1, _MM_SHUFFLE(1, 1, 1, 1)); - t0 = XM_FMADD_PS(t0, x, vConstants); - - vConstants = XM_PERMUTE_PS(AC1, _MM_SHUFFLE(0, 0, 0, 0)); - t0 = XM_FMADD_PS(t0, x, vConstants); - - const XMVECTOR AC0 = g_XMArcCoefficients0; - vConstants = XM_PERMUTE_PS(AC0, _MM_SHUFFLE(3, 3, 3, 3)); - t0 = XM_FMADD_PS(t0, x, vConstants); - - vConstants = XM_PERMUTE_PS(AC0, _MM_SHUFFLE(2, 2, 2, 2)); - t0 = XM_FMADD_PS(t0, x, vConstants); - - vConstants = XM_PERMUTE_PS(AC0, _MM_SHUFFLE(1, 1, 1, 1)); - t0 = XM_FMADD_PS(t0, x, vConstants); - - vConstants = XM_PERMUTE_PS(AC0, _MM_SHUFFLE(0, 0, 0, 0)); - t0 = XM_FMADD_PS(t0, x, vConstants); - t0 = _mm_mul_ps(t0, root); - - __m128 t1 = _mm_sub_ps(g_XMPi, t0); - t0 = _mm_and_ps(nonnegative, t0); - t1 = _mm_andnot_ps(nonnegative, t1); - t0 = _mm_or_ps(t0, t1); - t0 = _mm_sub_ps(g_XMHalfPi, t0); - return t0; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorACos(FXMVECTOR V) noexcept { - // 7-degree minimax approximation - -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 Result = {{{acosf(V.vector4_f32[0]), acosf(V.vector4_f32[1]), - acosf(V.vector4_f32[2]), acosf(V.vector4_f32[3])}}}; - return Result.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x4_t nonnegative = vcgeq_f32(V, g_XMZero); - float32x4_t x = vabsq_f32(V); - - // Compute (1-|V|), clamp to zero to avoid sqrt of negative number. - float32x4_t oneMValue = vsubq_f32(g_XMOne, x); - float32x4_t clampOneMValue = vmaxq_f32(g_XMZero, oneMValue); - float32x4_t root = XMVectorSqrt(clampOneMValue); - - // Compute polynomial approximation - const XMVECTOR AC1 = g_XMArcCoefficients1; - XMVECTOR vConstants = vdupq_lane_f32(vget_high_f32(AC1), 0); - XMVECTOR t0 = vmlaq_lane_f32(vConstants, x, vget_high_f32(AC1), 1); - - vConstants = vdupq_lane_f32(vget_low_f32(AC1), 1); - t0 = vmlaq_f32(vConstants, t0, x); - - vConstants = vdupq_lane_f32(vget_low_f32(AC1), 0); - t0 = vmlaq_f32(vConstants, t0, x); - - const XMVECTOR AC0 = g_XMArcCoefficients0; - vConstants = vdupq_lane_f32(vget_high_f32(AC0), 1); - t0 = vmlaq_f32(vConstants, t0, x); - - vConstants = vdupq_lane_f32(vget_high_f32(AC0), 0); - t0 = vmlaq_f32(vConstants, t0, x); - - vConstants = vdupq_lane_f32(vget_low_f32(AC0), 1); - t0 = vmlaq_f32(vConstants, t0, x); - - vConstants = vdupq_lane_f32(vget_low_f32(AC0), 0); - t0 = vmlaq_f32(vConstants, t0, x); - t0 = vmulq_f32(t0, root); - - float32x4_t t1 = vsubq_f32(g_XMPi, t0); - t0 = vbslq_f32(nonnegative, t0, t1); - return t0; -#elif defined(_XM_SVML_INTRINSICS_) - XMVECTOR Result = _mm_acos_ps(V); - return Result; -#elif defined(_XM_SSE_INTRINSICS_) - __m128 nonnegative = _mm_cmpge_ps(V, g_XMZero); - __m128 mvalue = _mm_sub_ps(g_XMZero, V); - __m128 x = _mm_max_ps(V, mvalue); // |V| - - // Compute (1-|V|), clamp to zero to avoid sqrt of negative number. - __m128 oneMValue = _mm_sub_ps(g_XMOne, x); - __m128 clampOneMValue = _mm_max_ps(g_XMZero, oneMValue); - __m128 root = _mm_sqrt_ps(clampOneMValue); // sqrt(1-|V|) - - // Compute polynomial approximation - const XMVECTOR AC1 = g_XMArcCoefficients1; - __m128 vConstantsB = XM_PERMUTE_PS(AC1, _MM_SHUFFLE(3, 3, 3, 3)); - __m128 vConstants = XM_PERMUTE_PS(AC1, _MM_SHUFFLE(2, 2, 2, 2)); - __m128 t0 = XM_FMADD_PS(vConstantsB, x, vConstants); - - vConstants = XM_PERMUTE_PS(AC1, _MM_SHUFFLE(1, 1, 1, 1)); - t0 = XM_FMADD_PS(t0, x, vConstants); - - vConstants = XM_PERMUTE_PS(AC1, _MM_SHUFFLE(0, 0, 0, 0)); - t0 = XM_FMADD_PS(t0, x, vConstants); - - const XMVECTOR AC0 = g_XMArcCoefficients0; - vConstants = XM_PERMUTE_PS(AC0, _MM_SHUFFLE(3, 3, 3, 3)); - t0 = XM_FMADD_PS(t0, x, vConstants); - - vConstants = XM_PERMUTE_PS(AC0, _MM_SHUFFLE(2, 2, 2, 2)); - t0 = XM_FMADD_PS(t0, x, vConstants); - - vConstants = XM_PERMUTE_PS(AC0, _MM_SHUFFLE(1, 1, 1, 1)); - t0 = XM_FMADD_PS(t0, x, vConstants); - - vConstants = XM_PERMUTE_PS(AC0, _MM_SHUFFLE(0, 0, 0, 0)); - t0 = XM_FMADD_PS(t0, x, vConstants); - t0 = _mm_mul_ps(t0, root); - - __m128 t1 = _mm_sub_ps(g_XMPi, t0); - t0 = _mm_and_ps(nonnegative, t0); - t1 = _mm_andnot_ps(nonnegative, t1); - t0 = _mm_or_ps(t0, t1); - return t0; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorATan(FXMVECTOR V) noexcept { - // 17-degree minimax approximation - -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 Result = {{{atanf(V.vector4_f32[0]), atanf(V.vector4_f32[1]), - atanf(V.vector4_f32[2]), atanf(V.vector4_f32[3])}}}; - return Result.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x4_t absV = vabsq_f32(V); - float32x4_t invV = XMVectorReciprocal(V); - uint32x4_t comp = vcgtq_f32(V, g_XMOne); - float32x4_t sign = vbslq_f32(comp, g_XMOne, g_XMNegativeOne); - comp = vcleq_f32(absV, g_XMOne); - sign = vbslq_f32(comp, g_XMZero, sign); - float32x4_t x = vbslq_f32(comp, V, invV); - - float32x4_t x2 = vmulq_f32(x, x); - - // Compute polynomial approximation - const XMVECTOR TC1 = g_XMATanCoefficients1; - XMVECTOR vConstants = vdupq_lane_f32(vget_high_f32(TC1), 0); - XMVECTOR Result = vmlaq_lane_f32(vConstants, x2, vget_high_f32(TC1), 1); - - vConstants = vdupq_lane_f32(vget_low_f32(TC1), 1); - Result = vmlaq_f32(vConstants, Result, x2); - - vConstants = vdupq_lane_f32(vget_low_f32(TC1), 0); - Result = vmlaq_f32(vConstants, Result, x2); - - const XMVECTOR TC0 = g_XMATanCoefficients0; - vConstants = vdupq_lane_f32(vget_high_f32(TC0), 1); - Result = vmlaq_f32(vConstants, Result, x2); - - vConstants = vdupq_lane_f32(vget_high_f32(TC0), 0); - Result = vmlaq_f32(vConstants, Result, x2); - - vConstants = vdupq_lane_f32(vget_low_f32(TC0), 1); - Result = vmlaq_f32(vConstants, Result, x2); - - vConstants = vdupq_lane_f32(vget_low_f32(TC0), 0); - Result = vmlaq_f32(vConstants, Result, x2); - - Result = vmlaq_f32(g_XMOne, Result, x2); - Result = vmulq_f32(Result, x); - - float32x4_t result1 = vmulq_f32(sign, g_XMHalfPi); - result1 = vsubq_f32(result1, Result); - - comp = vceqq_f32(sign, g_XMZero); - Result = vbslq_f32(comp, Result, result1); - return Result; -#elif defined(_XM_SVML_INTRINSICS_) - XMVECTOR Result = _mm_atan_ps(V); - return Result; -#elif defined(_XM_SSE_INTRINSICS_) - __m128 absV = XMVectorAbs(V); - __m128 invV = _mm_div_ps(g_XMOne, V); - __m128 comp = _mm_cmpgt_ps(V, g_XMOne); - __m128 select0 = _mm_and_ps(comp, g_XMOne); - __m128 select1 = _mm_andnot_ps(comp, g_XMNegativeOne); - __m128 sign = _mm_or_ps(select0, select1); - comp = _mm_cmple_ps(absV, g_XMOne); - select0 = _mm_and_ps(comp, g_XMZero); - select1 = _mm_andnot_ps(comp, sign); - sign = _mm_or_ps(select0, select1); - select0 = _mm_and_ps(comp, V); - select1 = _mm_andnot_ps(comp, invV); - __m128 x = _mm_or_ps(select0, select1); - - __m128 x2 = _mm_mul_ps(x, x); - - // Compute polynomial approximation - const XMVECTOR TC1 = g_XMATanCoefficients1; - __m128 vConstantsB = XM_PERMUTE_PS(TC1, _MM_SHUFFLE(3, 3, 3, 3)); - __m128 vConstants = XM_PERMUTE_PS(TC1, _MM_SHUFFLE(2, 2, 2, 2)); - __m128 Result = XM_FMADD_PS(vConstantsB, x2, vConstants); - - vConstants = XM_PERMUTE_PS(TC1, _MM_SHUFFLE(1, 1, 1, 1)); - Result = XM_FMADD_PS(Result, x2, vConstants); - - vConstants = XM_PERMUTE_PS(TC1, _MM_SHUFFLE(0, 0, 0, 0)); - Result = XM_FMADD_PS(Result, x2, vConstants); - - const XMVECTOR TC0 = g_XMATanCoefficients0; - vConstants = XM_PERMUTE_PS(TC0, _MM_SHUFFLE(3, 3, 3, 3)); - Result = XM_FMADD_PS(Result, x2, vConstants); - - vConstants = XM_PERMUTE_PS(TC0, _MM_SHUFFLE(2, 2, 2, 2)); - Result = XM_FMADD_PS(Result, x2, vConstants); - - vConstants = XM_PERMUTE_PS(TC0, _MM_SHUFFLE(1, 1, 1, 1)); - Result = XM_FMADD_PS(Result, x2, vConstants); - - vConstants = XM_PERMUTE_PS(TC0, _MM_SHUFFLE(0, 0, 0, 0)); - Result = XM_FMADD_PS(Result, x2, vConstants); - - Result = XM_FMADD_PS(Result, x2, g_XMOne); - - Result = _mm_mul_ps(Result, x); - __m128 result1 = _mm_mul_ps(sign, g_XMHalfPi); - result1 = _mm_sub_ps(result1, Result); - - comp = _mm_cmpeq_ps(sign, g_XMZero); - select0 = _mm_and_ps(comp, Result); - select1 = _mm_andnot_ps(comp, result1); - Result = _mm_or_ps(select0, select1); - return Result; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorATan2(FXMVECTOR Y, FXMVECTOR X) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 Result = {{{atan2f(Y.vector4_f32[0], X.vector4_f32[0]), - atan2f(Y.vector4_f32[1], X.vector4_f32[1]), - atan2f(Y.vector4_f32[2], X.vector4_f32[2]), - atan2f(Y.vector4_f32[3], X.vector4_f32[3])}}}; - return Result.v; -#elif defined(_XM_SVML_INTRINSICS_) - XMVECTOR Result = _mm_atan2_ps(Y, X); - return Result; -#else - - // Return the inverse tangent of Y / X in the range of -Pi to Pi with the - // following exceptions: - - // Y == 0 and X is Negative -> Pi with the sign of Y - // y == 0 and x is positive -> 0 with the sign of y - // Y != 0 and X == 0 -> Pi / 2 with the sign of Y - // Y != 0 and X is Negative -> atan(y/x) + (PI with the sign of - // Y) X == -Infinity and Finite Y -> Pi with the sign of Y X == - // +Infinity and Finite Y -> 0 with the sign of Y Y == Infinity and - // X is Finite -> Pi / 2 with the sign of Y Y == Infinity and X == - // -Infinity -> 3Pi / 4 with the sign of Y Y == Infinity and X == - // +Infinity -> Pi / 4 with the sign of Y - - static const XMVECTORF32 ATan2Constants = { - {{XM_PI, XM_PIDIV2, XM_PIDIV4, XM_PI * 3.0f / 4.0f}}}; - - XMVECTOR Zero = XMVectorZero(); - XMVECTOR ATanResultValid = XMVectorTrueInt(); - - XMVECTOR Pi = XMVectorSplatX(ATan2Constants); - XMVECTOR PiOverTwo = XMVectorSplatY(ATan2Constants); - XMVECTOR PiOverFour = XMVectorSplatZ(ATan2Constants); - XMVECTOR ThreePiOverFour = XMVectorSplatW(ATan2Constants); - - XMVECTOR YEqualsZero = XMVectorEqual(Y, Zero); - XMVECTOR XEqualsZero = XMVectorEqual(X, Zero); - XMVECTOR XIsPositive = XMVectorAndInt(X, g_XMNegativeZero.v); - XIsPositive = XMVectorEqualInt(XIsPositive, Zero); - XMVECTOR YEqualsInfinity = XMVectorIsInfinite(Y); - XMVECTOR XEqualsInfinity = XMVectorIsInfinite(X); - - XMVECTOR YSign = XMVectorAndInt(Y, g_XMNegativeZero.v); - Pi = XMVectorOrInt(Pi, YSign); - PiOverTwo = XMVectorOrInt(PiOverTwo, YSign); - PiOverFour = XMVectorOrInt(PiOverFour, YSign); - ThreePiOverFour = XMVectorOrInt(ThreePiOverFour, YSign); - - XMVECTOR R1 = XMVectorSelect(Pi, YSign, XIsPositive); - XMVECTOR R2 = XMVectorSelect(ATanResultValid, PiOverTwo, XEqualsZero); - XMVECTOR R3 = XMVectorSelect(R2, R1, YEqualsZero); - XMVECTOR R4 = XMVectorSelect(ThreePiOverFour, PiOverFour, XIsPositive); - XMVECTOR R5 = XMVectorSelect(PiOverTwo, R4, XEqualsInfinity); - XMVECTOR Result = XMVectorSelect(R3, R5, YEqualsInfinity); - ATanResultValid = XMVectorEqualInt(Result, ATanResultValid); - - XMVECTOR V = XMVectorDivide(Y, X); - - XMVECTOR R0 = XMVectorATan(V); - - R1 = XMVectorSelect(Pi, g_XMNegativeZero, XIsPositive); - R2 = XMVectorAdd(R0, R1); - - return XMVectorSelect(Result, R2, ATanResultValid); - -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorSinEst(FXMVECTOR V) noexcept { - // 7-degree minimax approximation - -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 Result = {{{sinf(V.vector4_f32[0]), sinf(V.vector4_f32[1]), - sinf(V.vector4_f32[2]), sinf(V.vector4_f32[3])}}}; - return Result.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - // Force the value within the bounds of pi - XMVECTOR x = XMVectorModAngles(V); - - // Map in [-pi/2,pi/2] with sin(y) = sin(x). - uint32x4_t sign = vandq_u32(vreinterpretq_u32_f32(x), g_XMNegativeZero); - uint32x4_t c = vorrq_u32(g_XMPi, sign); // pi when x >= 0, -pi when x < 0 - float32x4_t absx = vabsq_f32(x); - float32x4_t rflx = vsubq_f32(vreinterpretq_f32_u32(c), x); - uint32x4_t comp = vcleq_f32(absx, g_XMHalfPi); - x = vbslq_f32(comp, x, rflx); - - float32x4_t x2 = vmulq_f32(x, x); - - // Compute polynomial approximation - const XMVECTOR SEC = g_XMSinCoefficients1; - XMVECTOR vConstants = vdupq_lane_f32(vget_high_f32(SEC), 0); - XMVECTOR Result = vmlaq_lane_f32(vConstants, x2, vget_high_f32(SEC), 1); - - vConstants = vdupq_lane_f32(vget_low_f32(SEC), 1); - Result = vmlaq_f32(vConstants, Result, x2); - - Result = vmlaq_f32(g_XMOne, Result, x2); - Result = vmulq_f32(Result, x); - return Result; -#elif defined(_XM_SVML_INTRINSICS_) - XMVECTOR Result = _mm_sin_ps(V); - return Result; -#elif defined(_XM_SSE_INTRINSICS_) - // Force the value within the bounds of pi - XMVECTOR x = XMVectorModAngles(V); - - // Map in [-pi/2,pi/2] with sin(y) = sin(x). - __m128 sign = _mm_and_ps(x, g_XMNegativeZero); - __m128 c = _mm_or_ps(g_XMPi, sign); // pi when x >= 0, -pi when x < 0 - __m128 absx = _mm_andnot_ps(sign, x); // |x| - __m128 rflx = _mm_sub_ps(c, x); - __m128 comp = _mm_cmple_ps(absx, g_XMHalfPi); - __m128 select0 = _mm_and_ps(comp, x); - __m128 select1 = _mm_andnot_ps(comp, rflx); - x = _mm_or_ps(select0, select1); - - __m128 x2 = _mm_mul_ps(x, x); - - // Compute polynomial approximation - const XMVECTOR SEC = g_XMSinCoefficients1; - __m128 vConstantsB = XM_PERMUTE_PS(SEC, _MM_SHUFFLE(3, 3, 3, 3)); - __m128 vConstants = XM_PERMUTE_PS(SEC, _MM_SHUFFLE(2, 2, 2, 2)); - __m128 Result = XM_FMADD_PS(vConstantsB, x2, vConstants); - - vConstants = XM_PERMUTE_PS(SEC, _MM_SHUFFLE(1, 1, 1, 1)); - Result = XM_FMADD_PS(Result, x2, vConstants); - Result = XM_FMADD_PS(Result, x2, g_XMOne); - Result = _mm_mul_ps(Result, x); - return Result; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorCosEst(FXMVECTOR V) noexcept { - // 6-degree minimax approximation - -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 Result = {{{cosf(V.vector4_f32[0]), cosf(V.vector4_f32[1]), - cosf(V.vector4_f32[2]), cosf(V.vector4_f32[3])}}}; - return Result.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - // Map V to x in [-pi,pi]. - XMVECTOR x = XMVectorModAngles(V); - - // Map in [-pi/2,pi/2] with cos(y) = sign*cos(x). - uint32x4_t sign = vandq_u32(vreinterpretq_u32_f32(x), g_XMNegativeZero); - uint32x4_t c = vorrq_u32(g_XMPi, sign); // pi when x >= 0, -pi when x < 0 - float32x4_t absx = vabsq_f32(x); - float32x4_t rflx = vsubq_f32(vreinterpretq_f32_u32(c), x); - uint32x4_t comp = vcleq_f32(absx, g_XMHalfPi); - x = vbslq_f32(comp, x, rflx); - float32x4_t fsign = vbslq_f32(comp, g_XMOne, g_XMNegativeOne); - - float32x4_t x2 = vmulq_f32(x, x); - - // Compute polynomial approximation - const XMVECTOR CEC = g_XMCosCoefficients1; - XMVECTOR vConstants = vdupq_lane_f32(vget_high_f32(CEC), 0); - XMVECTOR Result = vmlaq_lane_f32(vConstants, x2, vget_high_f32(CEC), 1); - - vConstants = vdupq_lane_f32(vget_low_f32(CEC), 1); - Result = vmlaq_f32(vConstants, Result, x2); - - Result = vmlaq_f32(g_XMOne, Result, x2); - Result = vmulq_f32(Result, fsign); - return Result; -#elif defined(_XM_SVML_INTRINSICS_) - XMVECTOR Result = _mm_cos_ps(V); - return Result; -#elif defined(_XM_SSE_INTRINSICS_) - // Map V to x in [-pi,pi]. - XMVECTOR x = XMVectorModAngles(V); - - // Map in [-pi/2,pi/2] with cos(y) = sign*cos(x). - XMVECTOR sign = _mm_and_ps(x, g_XMNegativeZero); - __m128 c = _mm_or_ps(g_XMPi, sign); // pi when x >= 0, -pi when x < 0 - __m128 absx = _mm_andnot_ps(sign, x); // |x| - __m128 rflx = _mm_sub_ps(c, x); - __m128 comp = _mm_cmple_ps(absx, g_XMHalfPi); - __m128 select0 = _mm_and_ps(comp, x); - __m128 select1 = _mm_andnot_ps(comp, rflx); - x = _mm_or_ps(select0, select1); - select0 = _mm_and_ps(comp, g_XMOne); - select1 = _mm_andnot_ps(comp, g_XMNegativeOne); - sign = _mm_or_ps(select0, select1); - - __m128 x2 = _mm_mul_ps(x, x); - - // Compute polynomial approximation - const XMVECTOR CEC = g_XMCosCoefficients1; - __m128 vConstantsB = XM_PERMUTE_PS(CEC, _MM_SHUFFLE(3, 3, 3, 3)); - __m128 vConstants = XM_PERMUTE_PS(CEC, _MM_SHUFFLE(2, 2, 2, 2)); - __m128 Result = XM_FMADD_PS(vConstantsB, x2, vConstants); - - vConstants = XM_PERMUTE_PS(CEC, _MM_SHUFFLE(1, 1, 1, 1)); - Result = XM_FMADD_PS(Result, x2, vConstants); - Result = XM_FMADD_PS(Result, x2, g_XMOne); - Result = _mm_mul_ps(Result, sign); - return Result; -#endif -} - -//------------------------------------------------------------------------------ - -_Use_decl_annotations_ inline void XM_CALLCONV -XMVectorSinCosEst(XMVECTOR* pSin, XMVECTOR* pCos, FXMVECTOR V) noexcept { - assert(pSin != nullptr); - assert(pCos != nullptr); - - // 7/6-degree minimax approximation - -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 Sin = {{{sinf(V.vector4_f32[0]), sinf(V.vector4_f32[1]), - sinf(V.vector4_f32[2]), sinf(V.vector4_f32[3])}}}; - - XMVECTORF32 Cos = {{{cosf(V.vector4_f32[0]), cosf(V.vector4_f32[1]), - cosf(V.vector4_f32[2]), cosf(V.vector4_f32[3])}}}; - - *pSin = Sin.v; - *pCos = Cos.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - // Force the value within the bounds of pi - XMVECTOR x = XMVectorModAngles(V); - - // Map in [-pi/2,pi/2] with cos(y) = sign*cos(x). - uint32x4_t sign = vandq_u32(vreinterpretq_u32_f32(x), g_XMNegativeZero); - uint32x4_t c = vorrq_u32(g_XMPi, sign); // pi when x >= 0, -pi when x < 0 - float32x4_t absx = vabsq_f32(x); - float32x4_t rflx = vsubq_f32(vreinterpretq_f32_u32(c), x); - uint32x4_t comp = vcleq_f32(absx, g_XMHalfPi); - x = vbslq_f32(comp, x, rflx); - float32x4_t fsign = vbslq_f32(comp, g_XMOne, g_XMNegativeOne); - - float32x4_t x2 = vmulq_f32(x, x); - - // Compute polynomial approximation for sine - const XMVECTOR SEC = g_XMSinCoefficients1; - XMVECTOR vConstants = vdupq_lane_f32(vget_high_f32(SEC), 0); - XMVECTOR Result = vmlaq_lane_f32(vConstants, x2, vget_high_f32(SEC), 1); - - vConstants = vdupq_lane_f32(vget_low_f32(SEC), 1); - Result = vmlaq_f32(vConstants, Result, x2); - - Result = vmlaq_f32(g_XMOne, Result, x2); - *pSin = vmulq_f32(Result, x); - - // Compute polynomial approximation - const XMVECTOR CEC = g_XMCosCoefficients1; - vConstants = vdupq_lane_f32(vget_high_f32(CEC), 0); - Result = vmlaq_lane_f32(vConstants, x2, vget_high_f32(CEC), 1); - - vConstants = vdupq_lane_f32(vget_low_f32(CEC), 1); - Result = vmlaq_f32(vConstants, Result, x2); - - Result = vmlaq_f32(g_XMOne, Result, x2); - *pCos = vmulq_f32(Result, fsign); -#elif defined(_XM_SSE_INTRINSICS_) - // Force the value within the bounds of pi - XMVECTOR x = XMVectorModAngles(V); - - // Map in [-pi/2,pi/2] with sin(y) = sin(x), cos(y) = sign*cos(x). - XMVECTOR sign = _mm_and_ps(x, g_XMNegativeZero); - __m128 c = _mm_or_ps(g_XMPi, sign); // pi when x >= 0, -pi when x < 0 - __m128 absx = _mm_andnot_ps(sign, x); // |x| - __m128 rflx = _mm_sub_ps(c, x); - __m128 comp = _mm_cmple_ps(absx, g_XMHalfPi); - __m128 select0 = _mm_and_ps(comp, x); - __m128 select1 = _mm_andnot_ps(comp, rflx); - x = _mm_or_ps(select0, select1); - select0 = _mm_and_ps(comp, g_XMOne); - select1 = _mm_andnot_ps(comp, g_XMNegativeOne); - sign = _mm_or_ps(select0, select1); - - __m128 x2 = _mm_mul_ps(x, x); - - // Compute polynomial approximation for sine - const XMVECTOR SEC = g_XMSinCoefficients1; - __m128 vConstantsB = XM_PERMUTE_PS(SEC, _MM_SHUFFLE(3, 3, 3, 3)); - __m128 vConstants = XM_PERMUTE_PS(SEC, _MM_SHUFFLE(2, 2, 2, 2)); - __m128 Result = XM_FMADD_PS(vConstantsB, x2, vConstants); - - vConstants = XM_PERMUTE_PS(SEC, _MM_SHUFFLE(1, 1, 1, 1)); - Result = XM_FMADD_PS(Result, x2, vConstants); - Result = XM_FMADD_PS(Result, x2, g_XMOne); - Result = _mm_mul_ps(Result, x); - *pSin = Result; - - // Compute polynomial approximation for cosine - const XMVECTOR CEC = g_XMCosCoefficients1; - vConstantsB = XM_PERMUTE_PS(CEC, _MM_SHUFFLE(3, 3, 3, 3)); - vConstants = XM_PERMUTE_PS(CEC, _MM_SHUFFLE(2, 2, 2, 2)); - Result = XM_FMADD_PS(vConstantsB, x2, vConstants); - - vConstants = XM_PERMUTE_PS(CEC, _MM_SHUFFLE(1, 1, 1, 1)); - Result = XM_FMADD_PS(Result, x2, vConstants); - Result = XM_FMADD_PS(Result, x2, g_XMOne); - Result = _mm_mul_ps(Result, sign); - *pCos = Result; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorTanEst(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 Result = {{{tanf(V.vector4_f32[0]), tanf(V.vector4_f32[1]), - tanf(V.vector4_f32[2]), tanf(V.vector4_f32[3])}}}; - return Result.v; -#elif defined(_XM_SVML_INTRINSICS_) - XMVECTOR Result = _mm_tan_ps(V); - return Result; -#else - - XMVECTOR OneOverPi = XMVectorSplatW(g_XMTanEstCoefficients.v); - - XMVECTOR V1 = XMVectorMultiply(V, OneOverPi); - V1 = XMVectorRound(V1); - - V1 = XMVectorNegativeMultiplySubtract(g_XMPi.v, V1, V); - - XMVECTOR T0 = XMVectorSplatX(g_XMTanEstCoefficients.v); - XMVECTOR T1 = XMVectorSplatY(g_XMTanEstCoefficients.v); - XMVECTOR T2 = XMVectorSplatZ(g_XMTanEstCoefficients.v); - - XMVECTOR V2T2 = XMVectorNegativeMultiplySubtract(V1, V1, T2); - XMVECTOR V2 = XMVectorMultiply(V1, V1); - XMVECTOR V1T0 = XMVectorMultiply(V1, T0); - XMVECTOR V1T1 = XMVectorMultiply(V1, T1); - - XMVECTOR D = XMVectorReciprocalEst(V2T2); - XMVECTOR N = XMVectorMultiplyAdd(V2, V1T1, V1T0); - - return XMVectorMultiply(N, D); - -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorASinEst(FXMVECTOR V) noexcept { - // 3-degree minimax approximation - -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 Result; - Result.f[0] = asinf(V.vector4_f32[0]); - Result.f[1] = asinf(V.vector4_f32[1]); - Result.f[2] = asinf(V.vector4_f32[2]); - Result.f[3] = asinf(V.vector4_f32[3]); - return Result.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x4_t nonnegative = vcgeq_f32(V, g_XMZero); - float32x4_t x = vabsq_f32(V); - - // Compute (1-|V|), clamp to zero to avoid sqrt of negative number. - float32x4_t oneMValue = vsubq_f32(g_XMOne, x); - float32x4_t clampOneMValue = vmaxq_f32(g_XMZero, oneMValue); - float32x4_t root = XMVectorSqrt(clampOneMValue); - - // Compute polynomial approximation - const XMVECTOR AEC = g_XMArcEstCoefficients; - XMVECTOR vConstants = vdupq_lane_f32(vget_high_f32(AEC), 0); - XMVECTOR t0 = vmlaq_lane_f32(vConstants, x, vget_high_f32(AEC), 1); - - vConstants = vdupq_lane_f32(vget_low_f32(AEC), 1); - t0 = vmlaq_f32(vConstants, t0, x); - - vConstants = vdupq_lane_f32(vget_low_f32(AEC), 0); - t0 = vmlaq_f32(vConstants, t0, x); - t0 = vmulq_f32(t0, root); - - float32x4_t t1 = vsubq_f32(g_XMPi, t0); - t0 = vbslq_f32(nonnegative, t0, t1); - t0 = vsubq_f32(g_XMHalfPi, t0); - return t0; -#elif defined(_XM_SVML_INTRINSICS_) - XMVECTOR Result = _mm_asin_ps(V); - return Result; -#elif defined(_XM_SSE_INTRINSICS_) - __m128 nonnegative = _mm_cmpge_ps(V, g_XMZero); - __m128 mvalue = _mm_sub_ps(g_XMZero, V); - __m128 x = _mm_max_ps(V, mvalue); // |V| - - // Compute (1-|V|), clamp to zero to avoid sqrt of negative number. - __m128 oneMValue = _mm_sub_ps(g_XMOne, x); - __m128 clampOneMValue = _mm_max_ps(g_XMZero, oneMValue); - __m128 root = _mm_sqrt_ps(clampOneMValue); // sqrt(1-|V|) - - // Compute polynomial approximation - const XMVECTOR AEC = g_XMArcEstCoefficients; - __m128 vConstantsB = XM_PERMUTE_PS(AEC, _MM_SHUFFLE(3, 3, 3, 3)); - __m128 vConstants = XM_PERMUTE_PS(AEC, _MM_SHUFFLE(2, 2, 2, 2)); - __m128 t0 = XM_FMADD_PS(vConstantsB, x, vConstants); - - vConstants = XM_PERMUTE_PS(AEC, _MM_SHUFFLE(1, 1, 1, 1)); - t0 = XM_FMADD_PS(t0, x, vConstants); - - vConstants = XM_PERMUTE_PS(AEC, _MM_SHUFFLE(0, 0, 0, 0)); - t0 = XM_FMADD_PS(t0, x, vConstants); - t0 = _mm_mul_ps(t0, root); - - __m128 t1 = _mm_sub_ps(g_XMPi, t0); - t0 = _mm_and_ps(nonnegative, t0); - t1 = _mm_andnot_ps(nonnegative, t1); - t0 = _mm_or_ps(t0, t1); - t0 = _mm_sub_ps(g_XMHalfPi, t0); - return t0; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorACosEst(FXMVECTOR V) noexcept { - // 3-degree minimax approximation - -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 Result = {{{acosf(V.vector4_f32[0]), acosf(V.vector4_f32[1]), - acosf(V.vector4_f32[2]), acosf(V.vector4_f32[3])}}}; - return Result.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x4_t nonnegative = vcgeq_f32(V, g_XMZero); - float32x4_t x = vabsq_f32(V); - - // Compute (1-|V|), clamp to zero to avoid sqrt of negative number. - float32x4_t oneMValue = vsubq_f32(g_XMOne, x); - float32x4_t clampOneMValue = vmaxq_f32(g_XMZero, oneMValue); - float32x4_t root = XMVectorSqrt(clampOneMValue); - - // Compute polynomial approximation - const XMVECTOR AEC = g_XMArcEstCoefficients; - XMVECTOR vConstants = vdupq_lane_f32(vget_high_f32(AEC), 0); - XMVECTOR t0 = vmlaq_lane_f32(vConstants, x, vget_high_f32(AEC), 1); - - vConstants = vdupq_lane_f32(vget_low_f32(AEC), 1); - t0 = vmlaq_f32(vConstants, t0, x); - - vConstants = vdupq_lane_f32(vget_low_f32(AEC), 0); - t0 = vmlaq_f32(vConstants, t0, x); - t0 = vmulq_f32(t0, root); - - float32x4_t t1 = vsubq_f32(g_XMPi, t0); - t0 = vbslq_f32(nonnegative, t0, t1); - return t0; -#elif defined(_XM_SVML_INTRINSICS_) - XMVECTOR Result = _mm_acos_ps(V); - return Result; -#elif defined(_XM_SSE_INTRINSICS_) - __m128 nonnegative = _mm_cmpge_ps(V, g_XMZero); - __m128 mvalue = _mm_sub_ps(g_XMZero, V); - __m128 x = _mm_max_ps(V, mvalue); // |V| - - // Compute (1-|V|), clamp to zero to avoid sqrt of negative number. - __m128 oneMValue = _mm_sub_ps(g_XMOne, x); - __m128 clampOneMValue = _mm_max_ps(g_XMZero, oneMValue); - __m128 root = _mm_sqrt_ps(clampOneMValue); // sqrt(1-|V|) - - // Compute polynomial approximation - const XMVECTOR AEC = g_XMArcEstCoefficients; - __m128 vConstantsB = XM_PERMUTE_PS(AEC, _MM_SHUFFLE(3, 3, 3, 3)); - __m128 vConstants = XM_PERMUTE_PS(AEC, _MM_SHUFFLE(2, 2, 2, 2)); - __m128 t0 = XM_FMADD_PS(vConstantsB, x, vConstants); - - vConstants = XM_PERMUTE_PS(AEC, _MM_SHUFFLE(1, 1, 1, 1)); - t0 = XM_FMADD_PS(t0, x, vConstants); - - vConstants = XM_PERMUTE_PS(AEC, _MM_SHUFFLE(0, 0, 0, 0)); - t0 = XM_FMADD_PS(t0, x, vConstants); - t0 = _mm_mul_ps(t0, root); - - __m128 t1 = _mm_sub_ps(g_XMPi, t0); - t0 = _mm_and_ps(nonnegative, t0); - t1 = _mm_andnot_ps(nonnegative, t1); - t0 = _mm_or_ps(t0, t1); - return t0; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorATanEst(FXMVECTOR V) noexcept { - // 9-degree minimax approximation - -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 Result = {{{atanf(V.vector4_f32[0]), atanf(V.vector4_f32[1]), - atanf(V.vector4_f32[2]), atanf(V.vector4_f32[3])}}}; - return Result.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x4_t absV = vabsq_f32(V); - float32x4_t invV = XMVectorReciprocalEst(V); - uint32x4_t comp = vcgtq_f32(V, g_XMOne); - float32x4_t sign = vbslq_f32(comp, g_XMOne, g_XMNegativeOne); - comp = vcleq_f32(absV, g_XMOne); - sign = vbslq_f32(comp, g_XMZero, sign); - float32x4_t x = vbslq_f32(comp, V, invV); - - float32x4_t x2 = vmulq_f32(x, x); - - // Compute polynomial approximation - const XMVECTOR AEC = g_XMATanEstCoefficients1; - XMVECTOR vConstants = vdupq_lane_f32(vget_high_f32(AEC), 0); - XMVECTOR Result = vmlaq_lane_f32(vConstants, x2, vget_high_f32(AEC), 1); - - vConstants = vdupq_lane_f32(vget_low_f32(AEC), 1); - Result = vmlaq_f32(vConstants, Result, x2); - - vConstants = vdupq_lane_f32(vget_low_f32(AEC), 0); - Result = vmlaq_f32(vConstants, Result, x2); - - // ATanEstCoefficients0 is already splatted - Result = vmlaq_f32(g_XMATanEstCoefficients0, Result, x2); - Result = vmulq_f32(Result, x); - - float32x4_t result1 = vmulq_f32(sign, g_XMHalfPi); - result1 = vsubq_f32(result1, Result); - - comp = vceqq_f32(sign, g_XMZero); - Result = vbslq_f32(comp, Result, result1); - return Result; -#elif defined(_XM_SVML_INTRINSICS_) - XMVECTOR Result = _mm_atan_ps(V); - return Result; -#elif defined(_XM_SSE_INTRINSICS_) - __m128 absV = XMVectorAbs(V); - __m128 invV = _mm_div_ps(g_XMOne, V); - __m128 comp = _mm_cmpgt_ps(V, g_XMOne); - __m128 select0 = _mm_and_ps(comp, g_XMOne); - __m128 select1 = _mm_andnot_ps(comp, g_XMNegativeOne); - __m128 sign = _mm_or_ps(select0, select1); - comp = _mm_cmple_ps(absV, g_XMOne); - select0 = _mm_and_ps(comp, g_XMZero); - select1 = _mm_andnot_ps(comp, sign); - sign = _mm_or_ps(select0, select1); - select0 = _mm_and_ps(comp, V); - select1 = _mm_andnot_ps(comp, invV); - __m128 x = _mm_or_ps(select0, select1); - - __m128 x2 = _mm_mul_ps(x, x); - - // Compute polynomial approximation - const XMVECTOR AEC = g_XMATanEstCoefficients1; - __m128 vConstantsB = XM_PERMUTE_PS(AEC, _MM_SHUFFLE(3, 3, 3, 3)); - __m128 vConstants = XM_PERMUTE_PS(AEC, _MM_SHUFFLE(2, 2, 2, 2)); - __m128 Result = XM_FMADD_PS(vConstantsB, x2, vConstants); - - vConstants = XM_PERMUTE_PS(AEC, _MM_SHUFFLE(1, 1, 1, 1)); - Result = XM_FMADD_PS(Result, x2, vConstants); - - vConstants = XM_PERMUTE_PS(AEC, _MM_SHUFFLE(0, 0, 0, 0)); - Result = XM_FMADD_PS(Result, x2, vConstants); - // ATanEstCoefficients0 is already splatted - Result = XM_FMADD_PS(Result, x2, g_XMATanEstCoefficients0); - Result = _mm_mul_ps(Result, x); - __m128 result1 = _mm_mul_ps(sign, g_XMHalfPi); - result1 = _mm_sub_ps(result1, Result); - - comp = _mm_cmpeq_ps(sign, g_XMZero); - select0 = _mm_and_ps(comp, Result); - select1 = _mm_andnot_ps(comp, result1); - Result = _mm_or_ps(select0, select1); - return Result; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorATan2Est(FXMVECTOR Y, - FXMVECTOR X) noexcept { -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 Result = {{{ - atan2f(Y.vector4_f32[0], X.vector4_f32[0]), - atan2f(Y.vector4_f32[1], X.vector4_f32[1]), - atan2f(Y.vector4_f32[2], X.vector4_f32[2]), - atan2f(Y.vector4_f32[3], X.vector4_f32[3]), - }}}; - return Result.v; -#elif defined(_XM_SVML_INTRINSICS_) - XMVECTOR Result = _mm_atan2_ps(Y, X); - return Result; -#else - - static const XMVECTORF32 ATan2Constants = { - {{XM_PI, XM_PIDIV2, XM_PIDIV4, 2.3561944905f /* Pi*3/4 */}}}; - - const XMVECTOR Zero = XMVectorZero(); - XMVECTOR ATanResultValid = XMVectorTrueInt(); - - XMVECTOR Pi = XMVectorSplatX(ATan2Constants); - XMVECTOR PiOverTwo = XMVectorSplatY(ATan2Constants); - XMVECTOR PiOverFour = XMVectorSplatZ(ATan2Constants); - XMVECTOR ThreePiOverFour = XMVectorSplatW(ATan2Constants); - - XMVECTOR YEqualsZero = XMVectorEqual(Y, Zero); - XMVECTOR XEqualsZero = XMVectorEqual(X, Zero); - XMVECTOR XIsPositive = XMVectorAndInt(X, g_XMNegativeZero.v); - XIsPositive = XMVectorEqualInt(XIsPositive, Zero); - XMVECTOR YEqualsInfinity = XMVectorIsInfinite(Y); - XMVECTOR XEqualsInfinity = XMVectorIsInfinite(X); - - XMVECTOR YSign = XMVectorAndInt(Y, g_XMNegativeZero.v); - Pi = XMVectorOrInt(Pi, YSign); - PiOverTwo = XMVectorOrInt(PiOverTwo, YSign); - PiOverFour = XMVectorOrInt(PiOverFour, YSign); - ThreePiOverFour = XMVectorOrInt(ThreePiOverFour, YSign); - - XMVECTOR R1 = XMVectorSelect(Pi, YSign, XIsPositive); - XMVECTOR R2 = XMVectorSelect(ATanResultValid, PiOverTwo, XEqualsZero); - XMVECTOR R3 = XMVectorSelect(R2, R1, YEqualsZero); - XMVECTOR R4 = XMVectorSelect(ThreePiOverFour, PiOverFour, XIsPositive); - XMVECTOR R5 = XMVectorSelect(PiOverTwo, R4, XEqualsInfinity); - XMVECTOR Result = XMVectorSelect(R3, R5, YEqualsInfinity); - ATanResultValid = XMVectorEqualInt(Result, ATanResultValid); - - XMVECTOR Reciprocal = XMVectorReciprocalEst(X); - XMVECTOR V = XMVectorMultiply(Y, Reciprocal); - XMVECTOR R0 = XMVectorATanEst(V); - - R1 = XMVectorSelect(Pi, g_XMNegativeZero, XIsPositive); - R2 = XMVectorAdd(R0, R1); - - Result = XMVectorSelect(Result, R2, ATanResultValid); - - return Result; - -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorLerp(FXMVECTOR V0, FXMVECTOR V1, - float t) noexcept { - // V0 + t * (V1 - V0) - -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR Scale = XMVectorReplicate(t); - XMVECTOR Length = XMVectorSubtract(V1, V0); - return XMVectorMultiplyAdd(Length, Scale, V0); - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - XMVECTOR L = vsubq_f32(V1, V0); - return vmlaq_n_f32(V0, L, t); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR L = _mm_sub_ps(V1, V0); - XMVECTOR S = _mm_set_ps1(t); - return XM_FMADD_PS(L, S, V0); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorLerpV(FXMVECTOR V0, FXMVECTOR V1, - FXMVECTOR T) noexcept { - // V0 + T * (V1 - V0) - -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR Length = XMVectorSubtract(V1, V0); - return XMVectorMultiplyAdd(Length, T, V0); - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - XMVECTOR L = vsubq_f32(V1, V0); - return vmlaq_f32(V0, L, T); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR Length = _mm_sub_ps(V1, V0); - return XM_FMADD_PS(Length, T, V0); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorHermite(FXMVECTOR Position0, - FXMVECTOR Tangent0, - FXMVECTOR Position1, - GXMVECTOR Tangent1, - float t) noexcept { - // Result = (2 * t^3 - 3 * t^2 + 1) * Position0 + - // (t^3 - 2 * t^2 + t) * Tangent0 + - // (-2 * t^3 + 3 * t^2) * Position1 + - // (t^3 - t^2) * Tangent1 - -#if defined(_XM_NO_INTRINSICS_) - - float t2 = t * t; - float t3 = t * t2; - - XMVECTOR P0 = XMVectorReplicate(2.0f * t3 - 3.0f * t2 + 1.0f); - XMVECTOR T0 = XMVectorReplicate(t3 - 2.0f * t2 + t); - XMVECTOR P1 = XMVectorReplicate(-2.0f * t3 + 3.0f * t2); - XMVECTOR T1 = XMVectorReplicate(t3 - t2); - - XMVECTOR Result = XMVectorMultiply(P0, Position0); - Result = XMVectorMultiplyAdd(T0, Tangent0, Result); - Result = XMVectorMultiplyAdd(P1, Position1, Result); - Result = XMVectorMultiplyAdd(T1, Tangent1, Result); - - return Result; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float t2 = t * t; - float t3 = t * t2; - - float p0 = 2.0f * t3 - 3.0f * t2 + 1.0f; - float t0 = t3 - 2.0f * t2 + t; - float p1 = -2.0f * t3 + 3.0f * t2; - float t1 = t3 - t2; - - XMVECTOR vResult = vmulq_n_f32(Position0, p0); - vResult = vmlaq_n_f32(vResult, Tangent0, t0); - vResult = vmlaq_n_f32(vResult, Position1, p1); - vResult = vmlaq_n_f32(vResult, Tangent1, t1); - return vResult; -#elif defined(_XM_SSE_INTRINSICS_) - float t2 = t * t; - float t3 = t * t2; - - XMVECTOR P0 = _mm_set_ps1(2.0f * t3 - 3.0f * t2 + 1.0f); - XMVECTOR T0 = _mm_set_ps1(t3 - 2.0f * t2 + t); - XMVECTOR P1 = _mm_set_ps1(-2.0f * t3 + 3.0f * t2); - XMVECTOR T1 = _mm_set_ps1(t3 - t2); - - XMVECTOR vResult = _mm_mul_ps(P0, Position0); - vResult = XM_FMADD_PS(Tangent0, T0, vResult); - vResult = XM_FMADD_PS(Position1, P1, vResult); - vResult = XM_FMADD_PS(Tangent1, T1, vResult); - return vResult; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorHermiteV(FXMVECTOR Position0, - FXMVECTOR Tangent0, - FXMVECTOR Position1, - GXMVECTOR Tangent1, - HXMVECTOR T) noexcept { - // Result = (2 * t^3 - 3 * t^2 + 1) * Position0 + - // (t^3 - 2 * t^2 + t) * Tangent0 + - // (-2 * t^3 + 3 * t^2) * Position1 + - // (t^3 - t^2) * Tangent1 - -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR T2 = XMVectorMultiply(T, T); - XMVECTOR T3 = XMVectorMultiply(T, T2); - - XMVECTOR P0 = XMVectorReplicate(2.0f * T3.vector4_f32[0] - - 3.0f * T2.vector4_f32[0] + 1.0f); - XMVECTOR T0 = XMVectorReplicate( - T3.vector4_f32[1] - 2.0f * T2.vector4_f32[1] + T.vector4_f32[1]); - XMVECTOR P1 = - XMVectorReplicate(-2.0f * T3.vector4_f32[2] + 3.0f * T2.vector4_f32[2]); - XMVECTOR T1 = XMVectorReplicate(T3.vector4_f32[3] - T2.vector4_f32[3]); - - XMVECTOR Result = XMVectorMultiply(P0, Position0); - Result = XMVectorMultiplyAdd(T0, Tangent0, Result); - Result = XMVectorMultiplyAdd(P1, Position1, Result); - Result = XMVectorMultiplyAdd(T1, Tangent1, Result); - - return Result; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - static const XMVECTORF32 CatMulT2 = {{{-3.0f, -2.0f, 3.0f, -1.0f}}}; - static const XMVECTORF32 CatMulT3 = {{{2.0f, 1.0f, -2.0f, 1.0f}}}; - - XMVECTOR T2 = vmulq_f32(T, T); - XMVECTOR T3 = vmulq_f32(T, T2); - // Mul by the constants against t^2 - T2 = vmulq_f32(T2, CatMulT2); - // Mul by the constants against t^3 - T3 = vmlaq_f32(T2, T3, CatMulT3); - // T3 now has the pre-result. - // I need to add t.y only - T2 = vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(T), g_XMMaskY)); - T3 = vaddq_f32(T3, T2); - // Add 1.0f to x - T3 = vaddq_f32(T3, g_XMIdentityR0); - // Now, I have the constants created - // Mul the x constant to Position0 - XMVECTOR vResult = vmulq_lane_f32(Position0, vget_low_f32(T3), 0); // T3[0] - // Mul the y constant to Tangent0 - vResult = vmlaq_lane_f32(vResult, Tangent0, vget_low_f32(T3), 1); // T3[1] - // Mul the z constant to Position1 - vResult = - vmlaq_lane_f32(vResult, Position1, vget_high_f32(T3), 0); // T3[2] - // Mul the w constant to Tangent1 - vResult = vmlaq_lane_f32(vResult, Tangent1, vget_high_f32(T3), 1); // T3[3] - return vResult; -#elif defined(_XM_SSE_INTRINSICS_) - static const XMVECTORF32 CatMulT2 = {{{-3.0f, -2.0f, 3.0f, -1.0f}}}; - static const XMVECTORF32 CatMulT3 = {{{2.0f, 1.0f, -2.0f, 1.0f}}}; - - XMVECTOR T2 = _mm_mul_ps(T, T); - XMVECTOR T3 = _mm_mul_ps(T, T2); - // Mul by the constants against t^2 - T2 = _mm_mul_ps(T2, CatMulT2); - // Mul by the constants against t^3 - T3 = XM_FMADD_PS(T3, CatMulT3, T2); - // T3 now has the pre-result. - // I need to add t.y only - T2 = _mm_and_ps(T, g_XMMaskY); - T3 = _mm_add_ps(T3, T2); - // Add 1.0f to x - T3 = _mm_add_ps(T3, g_XMIdentityR0); - // Now, I have the constants created - // Mul the x constant to Position0 - XMVECTOR vResult = XM_PERMUTE_PS(T3, _MM_SHUFFLE(0, 0, 0, 0)); - vResult = _mm_mul_ps(vResult, Position0); - // Mul the y constant to Tangent0 - T2 = XM_PERMUTE_PS(T3, _MM_SHUFFLE(1, 1, 1, 1)); - vResult = XM_FMADD_PS(T2, Tangent0, vResult); - // Mul the z constant to Position1 - T2 = XM_PERMUTE_PS(T3, _MM_SHUFFLE(2, 2, 2, 2)); - vResult = XM_FMADD_PS(T2, Position1, vResult); - // Mul the w constant to Tangent1 - T3 = XM_PERMUTE_PS(T3, _MM_SHUFFLE(3, 3, 3, 3)); - vResult = XM_FMADD_PS(T3, Tangent1, vResult); - return vResult; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorCatmullRom(FXMVECTOR Position0, - FXMVECTOR Position1, - FXMVECTOR Position2, - GXMVECTOR Position3, - float t) noexcept { - // Result = ((-t^3 + 2 * t^2 - t) * Position0 + - // (3 * t^3 - 5 * t^2 + 2) * Position1 + - // (-3 * t^3 + 4 * t^2 + t) * Position2 + - // (t^3 - t^2) * Position3) * 0.5 - -#if defined(_XM_NO_INTRINSICS_) - - float t2 = t * t; - float t3 = t * t2; - - XMVECTOR P0 = XMVectorReplicate((-t3 + 2.0f * t2 - t) * 0.5f); - XMVECTOR P1 = XMVectorReplicate((3.0f * t3 - 5.0f * t2 + 2.0f) * 0.5f); - XMVECTOR P2 = XMVectorReplicate((-3.0f * t3 + 4.0f * t2 + t) * 0.5f); - XMVECTOR P3 = XMVectorReplicate((t3 - t2) * 0.5f); - - XMVECTOR Result = XMVectorMultiply(P0, Position0); - Result = XMVectorMultiplyAdd(P1, Position1, Result); - Result = XMVectorMultiplyAdd(P2, Position2, Result); - Result = XMVectorMultiplyAdd(P3, Position3, Result); - - return Result; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float t2 = t * t; - float t3 = t * t2; - - float p0 = (-t3 + 2.0f * t2 - t) * 0.5f; - float p1 = (3.0f * t3 - 5.0f * t2 + 2.0f) * 0.5f; - float p2 = (-3.0f * t3 + 4.0f * t2 + t) * 0.5f; - float p3 = (t3 - t2) * 0.5f; - - XMVECTOR P1 = vmulq_n_f32(Position1, p1); - XMVECTOR P0 = vmlaq_n_f32(P1, Position0, p0); - XMVECTOR P3 = vmulq_n_f32(Position3, p3); - XMVECTOR P2 = vmlaq_n_f32(P3, Position2, p2); - P0 = vaddq_f32(P0, P2); - return P0; -#elif defined(_XM_SSE_INTRINSICS_) - float t2 = t * t; - float t3 = t * t2; - - XMVECTOR P0 = _mm_set_ps1((-t3 + 2.0f * t2 - t) * 0.5f); - XMVECTOR P1 = _mm_set_ps1((3.0f * t3 - 5.0f * t2 + 2.0f) * 0.5f); - XMVECTOR P2 = _mm_set_ps1((-3.0f * t3 + 4.0f * t2 + t) * 0.5f); - XMVECTOR P3 = _mm_set_ps1((t3 - t2) * 0.5f); - - P1 = _mm_mul_ps(Position1, P1); - P0 = XM_FMADD_PS(Position0, P0, P1); - P3 = _mm_mul_ps(Position3, P3); - P2 = XM_FMADD_PS(Position2, P2, P3); - P0 = _mm_add_ps(P0, P2); - return P0; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorCatmullRomV(FXMVECTOR Position0, - FXMVECTOR Position1, - FXMVECTOR Position2, - GXMVECTOR Position3, - HXMVECTOR T) noexcept { -#if defined(_XM_NO_INTRINSICS_) - float fx = T.vector4_f32[0]; - float fy = T.vector4_f32[1]; - float fz = T.vector4_f32[2]; - float fw = T.vector4_f32[3]; - XMVECTORF32 vResult = { - {{0.5f * - ((-fx * fx * fx + 2 * fx * fx - fx) * Position0.vector4_f32[0] + - (3 * fx * fx * fx - 5 * fx * fx + 2) * Position1.vector4_f32[0] + - (-3 * fx * fx * fx + 4 * fx * fx + fx) * - Position2.vector4_f32[0] + - (fx * fx * fx - fx * fx) * Position3.vector4_f32[0]), - - 0.5f * - ((-fy * fy * fy + 2 * fy * fy - fy) * Position0.vector4_f32[1] + - (3 * fy * fy * fy - 5 * fy * fy + 2) * Position1.vector4_f32[1] + - (-3 * fy * fy * fy + 4 * fy * fy + fy) * - Position2.vector4_f32[1] + - (fy * fy * fy - fy * fy) * Position3.vector4_f32[1]), - - 0.5f * - ((-fz * fz * fz + 2 * fz * fz - fz) * Position0.vector4_f32[2] + - (3 * fz * fz * fz - 5 * fz * fz + 2) * Position1.vector4_f32[2] + - (-3 * fz * fz * fz + 4 * fz * fz + fz) * - Position2.vector4_f32[2] + - (fz * fz * fz - fz * fz) * Position3.vector4_f32[2]), - - 0.5f * - ((-fw * fw * fw + 2 * fw * fw - fw) * Position0.vector4_f32[3] + - (3 * fw * fw * fw - 5 * fw * fw + 2) * Position1.vector4_f32[3] + - (-3 * fw * fw * fw + 4 * fw * fw + fw) * - Position2.vector4_f32[3] + - (fw * fw * fw - fw * fw) * Position3.vector4_f32[3])}}}; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - static const XMVECTORF32 Catmul2 = {{{2.0f, 2.0f, 2.0f, 2.0f}}}; - static const XMVECTORF32 Catmul3 = {{{3.0f, 3.0f, 3.0f, 3.0f}}}; - static const XMVECTORF32 Catmul4 = {{{4.0f, 4.0f, 4.0f, 4.0f}}}; - static const XMVECTORF32 Catmul5 = {{{5.0f, 5.0f, 5.0f, 5.0f}}}; - // Cache T^2 and T^3 - XMVECTOR T2 = vmulq_f32(T, T); - XMVECTOR T3 = vmulq_f32(T, T2); - // Perform the Position0 term - XMVECTOR vResult = vaddq_f32(T2, T2); - vResult = vsubq_f32(vResult, T); - vResult = vsubq_f32(vResult, T3); - vResult = vmulq_f32(vResult, Position0); - // Perform the Position1 term and add - XMVECTOR vTemp = vmulq_f32(T3, Catmul3); - vTemp = vmlsq_f32(vTemp, T2, Catmul5); - vTemp = vaddq_f32(vTemp, Catmul2); - vResult = vmlaq_f32(vResult, vTemp, Position1); - // Perform the Position2 term and add - vTemp = vmulq_f32(T2, Catmul4); - vTemp = vmlsq_f32(vTemp, T3, Catmul3); - vTemp = vaddq_f32(vTemp, T); - vResult = vmlaq_f32(vResult, vTemp, Position2); - // Position3 is the last term - T3 = vsubq_f32(T3, T2); - vResult = vmlaq_f32(vResult, T3, Position3); - // Multiply by 0.5f and exit - vResult = vmulq_f32(vResult, g_XMOneHalf); - return vResult; -#elif defined(_XM_SSE_INTRINSICS_) - static const XMVECTORF32 Catmul2 = {{{2.0f, 2.0f, 2.0f, 2.0f}}}; - static const XMVECTORF32 Catmul3 = {{{3.0f, 3.0f, 3.0f, 3.0f}}}; - static const XMVECTORF32 Catmul4 = {{{4.0f, 4.0f, 4.0f, 4.0f}}}; - static const XMVECTORF32 Catmul5 = {{{5.0f, 5.0f, 5.0f, 5.0f}}}; - // Cache T^2 and T^3 - XMVECTOR T2 = _mm_mul_ps(T, T); - XMVECTOR T3 = _mm_mul_ps(T, T2); - // Perform the Position0 term - XMVECTOR vResult = _mm_add_ps(T2, T2); - vResult = _mm_sub_ps(vResult, T); - vResult = _mm_sub_ps(vResult, T3); - vResult = _mm_mul_ps(vResult, Position0); - // Perform the Position1 term and add - XMVECTOR vTemp = _mm_mul_ps(T3, Catmul3); - vTemp = XM_FNMADD_PS(T2, Catmul5, vTemp); - vTemp = _mm_add_ps(vTemp, Catmul2); - vResult = XM_FMADD_PS(vTemp, Position1, vResult); - // Perform the Position2 term and add - vTemp = _mm_mul_ps(T2, Catmul4); - vTemp = XM_FNMADD_PS(T3, Catmul3, vTemp); - vTemp = _mm_add_ps(vTemp, T); - vResult = XM_FMADD_PS(vTemp, Position2, vResult); - // Position3 is the last term - T3 = _mm_sub_ps(T3, T2); - vResult = XM_FMADD_PS(T3, Position3, vResult); - // Multiply by 0.5f and exit - vResult = _mm_mul_ps(vResult, g_XMOneHalf); - return vResult; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorBaryCentric(FXMVECTOR Position0, - FXMVECTOR Position1, - FXMVECTOR Position2, float f, - float g) noexcept { - // Result = Position0 + f * (Position1 - Position0) + g * (Position2 - - // Position0) - -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR P10 = XMVectorSubtract(Position1, Position0); - XMVECTOR ScaleF = XMVectorReplicate(f); - - XMVECTOR P20 = XMVectorSubtract(Position2, Position0); - XMVECTOR ScaleG = XMVectorReplicate(g); - - XMVECTOR Result = XMVectorMultiplyAdd(P10, ScaleF, Position0); - Result = XMVectorMultiplyAdd(P20, ScaleG, Result); - - return Result; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - XMVECTOR R1 = vsubq_f32(Position1, Position0); - XMVECTOR R2 = vsubq_f32(Position2, Position0); - R1 = vmlaq_n_f32(Position0, R1, f); - return vmlaq_n_f32(R1, R2, g); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR R1 = _mm_sub_ps(Position1, Position0); - XMVECTOR R2 = _mm_sub_ps(Position2, Position0); - XMVECTOR SF = _mm_set_ps1(f); - R1 = XM_FMADD_PS(R1, SF, Position0); - XMVECTOR SG = _mm_set_ps1(g); - return XM_FMADD_PS(R2, SG, R1); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVectorBaryCentricV(FXMVECTOR Position0, - FXMVECTOR Position1, - FXMVECTOR Position2, - GXMVECTOR F, - HXMVECTOR G) noexcept { - // Result = Position0 + f * (Position1 - Position0) + g * (Position2 - - // Position0) - -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR P10 = XMVectorSubtract(Position1, Position0); - XMVECTOR P20 = XMVectorSubtract(Position2, Position0); - - XMVECTOR Result = XMVectorMultiplyAdd(P10, F, Position0); - Result = XMVectorMultiplyAdd(P20, G, Result); - - return Result; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - XMVECTOR R1 = vsubq_f32(Position1, Position0); - XMVECTOR R2 = vsubq_f32(Position2, Position0); - R1 = vmlaq_f32(Position0, R1, F); - return vmlaq_f32(R1, R2, G); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR R1 = _mm_sub_ps(Position1, Position0); - XMVECTOR R2 = _mm_sub_ps(Position2, Position0); - R1 = XM_FMADD_PS(R1, F, Position0); - return XM_FMADD_PS(R2, G, R1); -#endif -} - -/**************************************************************************** - * - * 2D Vector - * - ****************************************************************************/ - -//------------------------------------------------------------------------------ -// Comparison operations -//------------------------------------------------------------------------------ - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMVector2Equal(FXMVECTOR V1, FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - return (((V1.vector4_f32[0] == V2.vector4_f32[0]) && - (V1.vector4_f32[1] == V2.vector4_f32[1])) != 0); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x2_t vTemp = vceq_f32(vget_low_f32(V1), vget_low_f32(V2)); - return (vget_lane_u64(vreinterpret_u64_u32(vTemp), 0) == - 0xFFFFFFFFFFFFFFFFU); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vTemp = _mm_cmpeq_ps(V1, V2); - // z and w are don't care - return (((_mm_movemask_ps(vTemp) & 3) == 3) != 0); -#endif -} - -//------------------------------------------------------------------------------ - -inline uint32_t XM_CALLCONV XMVector2EqualR(FXMVECTOR V1, - FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - uint32_t CR = 0; - if ((V1.vector4_f32[0] == V2.vector4_f32[0]) && - (V1.vector4_f32[1] == V2.vector4_f32[1])) { - CR = XM_CRMASK_CR6TRUE; - } else if ((V1.vector4_f32[0] != V2.vector4_f32[0]) && - (V1.vector4_f32[1] != V2.vector4_f32[1])) { - CR = XM_CRMASK_CR6FALSE; - } - return CR; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x2_t vTemp = vceq_f32(vget_low_f32(V1), vget_low_f32(V2)); - uint64_t r = vget_lane_u64(vreinterpret_u64_u32(vTemp), 0); - uint32_t CR = 0; - if (r == 0xFFFFFFFFFFFFFFFFU) { - CR = XM_CRMASK_CR6TRUE; - } else if (!r) { - CR = XM_CRMASK_CR6FALSE; - } - return CR; -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vTemp = _mm_cmpeq_ps(V1, V2); - // z and w are don't care - int iTest = _mm_movemask_ps(vTemp) & 3; - uint32_t CR = 0; - if (iTest == 3) { - CR = XM_CRMASK_CR6TRUE; - } else if (!iTest) { - CR = XM_CRMASK_CR6FALSE; - } - return CR; -#endif -} - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMVector2EqualInt(FXMVECTOR V1, FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - return (((V1.vector4_u32[0] == V2.vector4_u32[0]) && - (V1.vector4_u32[1] == V2.vector4_u32[1])) != 0); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x2_t vTemp = vceq_u32(vget_low_u32(vreinterpretq_u32_f32(V1)), - vget_low_u32(vreinterpretq_u32_f32(V2))); - return (vget_lane_u64(vreinterpret_u64_u32(vTemp), 0) == - 0xFFFFFFFFFFFFFFFFU); -#elif defined(_XM_SSE_INTRINSICS_) - __m128i vTemp = _mm_cmpeq_epi32(_mm_castps_si128(V1), _mm_castps_si128(V2)); - return (((_mm_movemask_ps(_mm_castsi128_ps(vTemp)) & 3) == 3) != 0); -#endif -} - -//------------------------------------------------------------------------------ - -inline uint32_t XM_CALLCONV XMVector2EqualIntR(FXMVECTOR V1, - FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - uint32_t CR = 0; - if ((V1.vector4_u32[0] == V2.vector4_u32[0]) && - (V1.vector4_u32[1] == V2.vector4_u32[1])) { - CR = XM_CRMASK_CR6TRUE; - } else if ((V1.vector4_u32[0] != V2.vector4_u32[0]) && - (V1.vector4_u32[1] != V2.vector4_u32[1])) { - CR = XM_CRMASK_CR6FALSE; - } - return CR; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x2_t vTemp = vceq_u32(vget_low_u32(vreinterpretq_u32_f32(V1)), - vget_low_u32(vreinterpretq_u32_f32(V2))); - uint64_t r = vget_lane_u64(vreinterpret_u64_u32(vTemp), 0); - uint32_t CR = 0; - if (r == 0xFFFFFFFFFFFFFFFFU) { - CR = XM_CRMASK_CR6TRUE; - } else if (!r) { - CR = XM_CRMASK_CR6FALSE; - } - return CR; -#elif defined(_XM_SSE_INTRINSICS_) - __m128i vTemp = _mm_cmpeq_epi32(_mm_castps_si128(V1), _mm_castps_si128(V2)); - int iTest = _mm_movemask_ps(_mm_castsi128_ps(vTemp)) & 3; - uint32_t CR = 0; - if (iTest == 3) { - CR = XM_CRMASK_CR6TRUE; - } else if (!iTest) { - CR = XM_CRMASK_CR6FALSE; - } - return CR; -#endif -} - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMVector2NearEqual(FXMVECTOR V1, FXMVECTOR V2, - FXMVECTOR Epsilon) noexcept { -#if defined(_XM_NO_INTRINSICS_) - float dx = fabsf(V1.vector4_f32[0] - V2.vector4_f32[0]); - float dy = fabsf(V1.vector4_f32[1] - V2.vector4_f32[1]); - return ((dx <= Epsilon.vector4_f32[0]) && (dy <= Epsilon.vector4_f32[1])); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x2_t vDelta = vsub_f32(vget_low_f32(V1), vget_low_f32(V2)); -#if defined(_MSC_VER) && !defined(__clang__) && \ - !defined(_ARM64_DISTINCT_NEON_TYPES) - uint32x2_t vTemp = vacle_f32(vDelta, vget_low_u32(Epsilon)); -#else - uint32x2_t vTemp = vcle_f32(vabs_f32(vDelta), vget_low_f32(Epsilon)); -#endif - uint64_t r = vget_lane_u64(vreinterpret_u64_u32(vTemp), 0); - return (r == 0xFFFFFFFFFFFFFFFFU); -#elif defined(_XM_SSE_INTRINSICS_) - // Get the difference - XMVECTOR vDelta = _mm_sub_ps(V1, V2); - // Get the absolute value of the difference - XMVECTOR vTemp = _mm_setzero_ps(); - vTemp = _mm_sub_ps(vTemp, vDelta); - vTemp = _mm_max_ps(vTemp, vDelta); - vTemp = _mm_cmple_ps(vTemp, Epsilon); - // z and w are don't care - return (((_mm_movemask_ps(vTemp) & 3) == 0x3) != 0); -#endif -} - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMVector2NotEqual(FXMVECTOR V1, FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - return (((V1.vector4_f32[0] != V2.vector4_f32[0]) || - (V1.vector4_f32[1] != V2.vector4_f32[1])) != 0); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x2_t vTemp = vceq_f32(vget_low_f32(V1), vget_low_f32(V2)); - return (vget_lane_u64(vreinterpret_u64_u32(vTemp), 0) != - 0xFFFFFFFFFFFFFFFFU); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vTemp = _mm_cmpeq_ps(V1, V2); - // z and w are don't care - return (((_mm_movemask_ps(vTemp) & 3) != 3) != 0); -#endif -} - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMVector2NotEqualInt(FXMVECTOR V1, - FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - return (((V1.vector4_u32[0] != V2.vector4_u32[0]) || - (V1.vector4_u32[1] != V2.vector4_u32[1])) != 0); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x2_t vTemp = vceq_u32(vget_low_u32(vreinterpretq_u32_f32(V1)), - vget_low_u32(vreinterpretq_u32_f32(V2))); - return (vget_lane_u64(vreinterpret_u64_u32(vTemp), 0) != - 0xFFFFFFFFFFFFFFFFU); -#elif defined(_XM_SSE_INTRINSICS_) - __m128i vTemp = _mm_cmpeq_epi32(_mm_castps_si128(V1), _mm_castps_si128(V2)); - return (((_mm_movemask_ps(_mm_castsi128_ps(vTemp)) & 3) != 3) != 0); -#endif -} - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMVector2Greater(FXMVECTOR V1, FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - return (((V1.vector4_f32[0] > V2.vector4_f32[0]) && - (V1.vector4_f32[1] > V2.vector4_f32[1])) != 0); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x2_t vTemp = vcgt_f32(vget_low_f32(V1), vget_low_f32(V2)); - return (vget_lane_u64(vreinterpret_u64_u32(vTemp), 0) == - 0xFFFFFFFFFFFFFFFFU); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vTemp = _mm_cmpgt_ps(V1, V2); - // z and w are don't care - return (((_mm_movemask_ps(vTemp) & 3) == 3) != 0); -#endif -} - -//------------------------------------------------------------------------------ - -inline uint32_t XM_CALLCONV XMVector2GreaterR(FXMVECTOR V1, - FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - uint32_t CR = 0; - if ((V1.vector4_f32[0] > V2.vector4_f32[0]) && - (V1.vector4_f32[1] > V2.vector4_f32[1])) { - CR = XM_CRMASK_CR6TRUE; - } else if ((V1.vector4_f32[0] <= V2.vector4_f32[0]) && - (V1.vector4_f32[1] <= V2.vector4_f32[1])) { - CR = XM_CRMASK_CR6FALSE; - } - return CR; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x2_t vTemp = vcgt_f32(vget_low_f32(V1), vget_low_f32(V2)); - uint64_t r = vget_lane_u64(vreinterpret_u64_u32(vTemp), 0); - uint32_t CR = 0; - if (r == 0xFFFFFFFFFFFFFFFFU) { - CR = XM_CRMASK_CR6TRUE; - } else if (!r) { - CR = XM_CRMASK_CR6FALSE; - } - return CR; -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vTemp = _mm_cmpgt_ps(V1, V2); - int iTest = _mm_movemask_ps(vTemp) & 3; - uint32_t CR = 0; - if (iTest == 3) { - CR = XM_CRMASK_CR6TRUE; - } else if (!iTest) { - CR = XM_CRMASK_CR6FALSE; - } - return CR; -#endif -} - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMVector2GreaterOrEqual(FXMVECTOR V1, - FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - return (((V1.vector4_f32[0] >= V2.vector4_f32[0]) && - (V1.vector4_f32[1] >= V2.vector4_f32[1])) != 0); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x2_t vTemp = vcge_f32(vget_low_f32(V1), vget_low_f32(V2)); - return (vget_lane_u64(vreinterpret_u64_u32(vTemp), 0) == - 0xFFFFFFFFFFFFFFFFU); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vTemp = _mm_cmpge_ps(V1, V2); - return (((_mm_movemask_ps(vTemp) & 3) == 3) != 0); -#endif -} - -//------------------------------------------------------------------------------ - -inline uint32_t XM_CALLCONV XMVector2GreaterOrEqualR(FXMVECTOR V1, - FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - uint32_t CR = 0; - if ((V1.vector4_f32[0] >= V2.vector4_f32[0]) && - (V1.vector4_f32[1] >= V2.vector4_f32[1])) { - CR = XM_CRMASK_CR6TRUE; - } else if ((V1.vector4_f32[0] < V2.vector4_f32[0]) && - (V1.vector4_f32[1] < V2.vector4_f32[1])) { - CR = XM_CRMASK_CR6FALSE; - } - return CR; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x2_t vTemp = vcge_f32(vget_low_f32(V1), vget_low_f32(V2)); - uint64_t r = vget_lane_u64(vreinterpret_u64_u32(vTemp), 0); - uint32_t CR = 0; - if (r == 0xFFFFFFFFFFFFFFFFU) { - CR = XM_CRMASK_CR6TRUE; - } else if (!r) { - CR = XM_CRMASK_CR6FALSE; - } - return CR; -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vTemp = _mm_cmpge_ps(V1, V2); - int iTest = _mm_movemask_ps(vTemp) & 3; - uint32_t CR = 0; - if (iTest == 3) { - CR = XM_CRMASK_CR6TRUE; - } else if (!iTest) { - CR = XM_CRMASK_CR6FALSE; - } - return CR; -#endif -} - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMVector2Less(FXMVECTOR V1, FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - return (((V1.vector4_f32[0] < V2.vector4_f32[0]) && - (V1.vector4_f32[1] < V2.vector4_f32[1])) != 0); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x2_t vTemp = vclt_f32(vget_low_f32(V1), vget_low_f32(V2)); - return (vget_lane_u64(vreinterpret_u64_u32(vTemp), 0) == - 0xFFFFFFFFFFFFFFFFU); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vTemp = _mm_cmplt_ps(V1, V2); - return (((_mm_movemask_ps(vTemp) & 3) == 3) != 0); -#endif -} - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMVector2LessOrEqual(FXMVECTOR V1, - FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - return (((V1.vector4_f32[0] <= V2.vector4_f32[0]) && - (V1.vector4_f32[1] <= V2.vector4_f32[1])) != 0); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x2_t vTemp = vcle_f32(vget_low_f32(V1), vget_low_f32(V2)); - return (vget_lane_u64(vreinterpret_u64_u32(vTemp), 0) == - 0xFFFFFFFFFFFFFFFFU); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vTemp = _mm_cmple_ps(V1, V2); - return (((_mm_movemask_ps(vTemp) & 3) == 3) != 0); -#endif -} - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMVector2InBounds(FXMVECTOR V, - FXMVECTOR Bounds) noexcept { -#if defined(_XM_NO_INTRINSICS_) - return (((V.vector4_f32[0] <= Bounds.vector4_f32[0] && - V.vector4_f32[0] >= -Bounds.vector4_f32[0]) && - (V.vector4_f32[1] <= Bounds.vector4_f32[1] && - V.vector4_f32[1] >= -Bounds.vector4_f32[1])) != 0); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x2_t VL = vget_low_f32(V); - float32x2_t B = vget_low_f32(Bounds); - // Test if less than or equal - uint32x2_t ivTemp1 = vcle_f32(VL, B); - // Negate the bounds - float32x2_t vTemp2 = vneg_f32(B); - // Test if greater or equal (Reversed) - uint32x2_t ivTemp2 = vcle_f32(vTemp2, VL); - // Blend answers - ivTemp1 = vand_u32(ivTemp1, ivTemp2); - // x and y in bounds? - return (vget_lane_u64(vreinterpret_u64_u32(ivTemp1), 0) == - 0xFFFFFFFFFFFFFFFFU); -#elif defined(_XM_SSE_INTRINSICS_) - // Test if less than or equal - XMVECTOR vTemp1 = _mm_cmple_ps(V, Bounds); - // Negate the bounds - XMVECTOR vTemp2 = _mm_mul_ps(Bounds, g_XMNegativeOne); - // Test if greater or equal (Reversed) - vTemp2 = _mm_cmple_ps(vTemp2, V); - // Blend answers - vTemp1 = _mm_and_ps(vTemp1, vTemp2); - // x and y in bounds? (z and w are don't care) - return (((_mm_movemask_ps(vTemp1) & 0x3) == 0x3) != 0); -#endif -} - -//------------------------------------------------------------------------------ - -#if !defined(_XM_NO_INTRINSICS_) && defined(_MSC_VER) && \ - !defined(__INTEL_COMPILER) -#pragma float_control(push) -#pragma float_control(precise, on) -#endif - -inline bool XM_CALLCONV XMVector2IsNaN(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - return (XMISNAN(V.vector4_f32[0]) || XMISNAN(V.vector4_f32[1])); -#elif defined(_XM_ARM_NEON_INTRINSICS_) -#if defined(__clang__) && defined(__FINITE_MATH_ONLY__) - return isnan(vgetq_lane_f32(V, 0)) || isnan(vgetq_lane_f32(V, 1)); -#else - float32x2_t VL = vget_low_f32(V); - // Test against itself. NaN is always not equal - uint32x2_t vTempNan = vceq_f32(VL, VL); - // If x or y are NaN, the mask is zero - return (vget_lane_u64(vreinterpret_u64_u32(vTempNan), 0) != - 0xFFFFFFFFFFFFFFFFU); -#endif -#elif defined(_XM_SSE_INTRINSICS_) -#if defined(__clang__) && defined(__FINITE_MATH_ONLY__) - XM_ALIGNED_DATA(16) float tmp[4]; - _mm_store_ps(tmp, V); - return isnan(tmp[0]) || isnan(tmp[1]); -#else - // Test against itself. NaN is always not equal - XMVECTOR vTempNan = _mm_cmpneq_ps(V, V); - // If x or y are NaN, the mask is non-zero - return ((_mm_movemask_ps(vTempNan) & 3) != 0); -#endif -#endif -} - -#if !defined(_XM_NO_INTRINSICS_) && defined(_MSC_VER) && \ - !defined(__INTEL_COMPILER) -#pragma float_control(pop) -#endif - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMVector2IsInfinite(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - return (XMISINF(V.vector4_f32[0]) || XMISINF(V.vector4_f32[1])); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - // Mask off the sign bit - uint32x2_t vTemp = vand_u32(vget_low_u32(vreinterpretq_u32_f32(V)), - vget_low_u32(g_XMAbsMask)); - // Compare to infinity - vTemp = vceq_f32(vreinterpret_f32_u32(vTemp), vget_low_f32(g_XMInfinity)); - // If any are infinity, the signs are true. - return vget_lane_u64(vreinterpret_u64_u32(vTemp), 0) != 0; -#elif defined(_XM_SSE_INTRINSICS_) - // Mask off the sign bit - __m128 vTemp = _mm_and_ps(V, g_XMAbsMask); - // Compare to infinity - vTemp = _mm_cmpeq_ps(vTemp, g_XMInfinity); - // If x or z are infinity, the signs are true. - return ((_mm_movemask_ps(vTemp) & 3) != 0); -#endif -} - -//------------------------------------------------------------------------------ -// Computation operations -//------------------------------------------------------------------------------ - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector2Dot(FXMVECTOR V1, FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTORF32 Result; - Result.f[0] = Result.f[1] = Result.f[2] = Result.f[3] = - V1.vector4_f32[0] * V2.vector4_f32[0] + - V1.vector4_f32[1] * V2.vector4_f32[1]; - return Result.v; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - // Perform the dot product on x and y - float32x2_t vTemp = vmul_f32(vget_low_f32(V1), vget_low_f32(V2)); - vTemp = vpadd_f32(vTemp, vTemp); - return vcombine_f32(vTemp, vTemp); -#elif defined(_XM_SSE4_INTRINSICS_) - return _mm_dp_ps(V1, V2, 0x3f); -#elif defined(_XM_SSE3_INTRINSICS_) - XMVECTOR vDot = _mm_mul_ps(V1, V2); - vDot = _mm_hadd_ps(vDot, vDot); - vDot = _mm_moveldup_ps(vDot); - return vDot; -#elif defined(_XM_SSE_INTRINSICS_) - // Perform the dot product on x and y - XMVECTOR vLengthSq = _mm_mul_ps(V1, V2); - // vTemp has y splatted - XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(1, 1, 1, 1)); - // x+y - vLengthSq = _mm_add_ss(vLengthSq, vTemp); - vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0)); - return vLengthSq; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector2Cross(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - // [ V1.x*V2.y - V1.y*V2.x, V1.x*V2.y - V1.y*V2.x ] - -#if defined(_XM_NO_INTRINSICS_) - float fCross = (V1.vector4_f32[0] * V2.vector4_f32[1]) - - (V1.vector4_f32[1] * V2.vector4_f32[0]); - XMVECTORF32 vResult; - vResult.f[0] = vResult.f[1] = vResult.f[2] = vResult.f[3] = fCross; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - static const XMVECTORF32 Negate = {{{1.f, -1.f, 0, 0}}}; - - float32x2_t vTemp = - vmul_f32(vget_low_f32(V1), vrev64_f32(vget_low_f32(V2))); - vTemp = vmul_f32(vTemp, vget_low_f32(Negate)); - vTemp = vpadd_f32(vTemp, vTemp); - return vcombine_f32(vTemp, vTemp); -#elif defined(_XM_SSE_INTRINSICS_) - // Swap x and y - XMVECTOR vResult = XM_PERMUTE_PS(V2, _MM_SHUFFLE(0, 1, 0, 1)); - // Perform the muls - vResult = _mm_mul_ps(vResult, V1); - // Splat y - XMVECTOR vTemp = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(1, 1, 1, 1)); - // Sub the values - vResult = _mm_sub_ss(vResult, vTemp); - // Splat the cross product - vResult = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(0, 0, 0, 0)); - return vResult; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector2LengthSq(FXMVECTOR V) noexcept { - return XMVector2Dot(V, V); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector2ReciprocalLengthEst(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR Result; - Result = XMVector2LengthSq(V); - Result = XMVectorReciprocalSqrtEst(Result); - return Result; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x2_t VL = vget_low_f32(V); - // Dot2 - float32x2_t vTemp = vmul_f32(VL, VL); - vTemp = vpadd_f32(vTemp, vTemp); - // Reciprocal sqrt (estimate) - vTemp = vrsqrte_f32(vTemp); - return vcombine_f32(vTemp, vTemp); -#elif defined(_XM_SSE4_INTRINSICS_) - XMVECTOR vTemp = _mm_dp_ps(V, V, 0x3f); - return _mm_rsqrt_ps(vTemp); -#elif defined(_XM_SSE3_INTRINSICS_) - XMVECTOR vLengthSq = _mm_mul_ps(V, V); - XMVECTOR vTemp = _mm_hadd_ps(vLengthSq, vLengthSq); - vLengthSq = _mm_rsqrt_ss(vTemp); - vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0)); - return vLengthSq; -#elif defined(_XM_SSE_INTRINSICS_) - // Perform the dot product on x and y - XMVECTOR vLengthSq = _mm_mul_ps(V, V); - // vTemp has y splatted - XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(1, 1, 1, 1)); - // x+y - vLengthSq = _mm_add_ss(vLengthSq, vTemp); - vLengthSq = _mm_rsqrt_ss(vLengthSq); - vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0)); - return vLengthSq; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector2ReciprocalLength(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR Result; - Result = XMVector2LengthSq(V); - Result = XMVectorReciprocalSqrt(Result); - return Result; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x2_t VL = vget_low_f32(V); - // Dot2 - float32x2_t vTemp = vmul_f32(VL, VL); - vTemp = vpadd_f32(vTemp, vTemp); - // Reciprocal sqrt - float32x2_t S0 = vrsqrte_f32(vTemp); - float32x2_t P0 = vmul_f32(vTemp, S0); - float32x2_t R0 = vrsqrts_f32(P0, S0); - float32x2_t S1 = vmul_f32(S0, R0); - float32x2_t P1 = vmul_f32(vTemp, S1); - float32x2_t R1 = vrsqrts_f32(P1, S1); - float32x2_t Result = vmul_f32(S1, R1); - return vcombine_f32(Result, Result); -#elif defined(_XM_SSE4_INTRINSICS_) - XMVECTOR vTemp = _mm_dp_ps(V, V, 0x3f); - XMVECTOR vLengthSq = _mm_sqrt_ps(vTemp); - return _mm_div_ps(g_XMOne, vLengthSq); -#elif defined(_XM_SSE3_INTRINSICS_) - XMVECTOR vLengthSq = _mm_mul_ps(V, V); - XMVECTOR vTemp = _mm_hadd_ps(vLengthSq, vLengthSq); - vLengthSq = _mm_sqrt_ss(vTemp); - vLengthSq = _mm_div_ss(g_XMOne, vLengthSq); - vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0)); - return vLengthSq; -#elif defined(_XM_SSE_INTRINSICS_) - // Perform the dot product on x and y - XMVECTOR vLengthSq = _mm_mul_ps(V, V); - // vTemp has y splatted - XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(1, 1, 1, 1)); - // x+y - vLengthSq = _mm_add_ss(vLengthSq, vTemp); - vLengthSq = _mm_sqrt_ss(vLengthSq); - vLengthSq = _mm_div_ss(g_XMOne, vLengthSq); - vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0)); - return vLengthSq; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector2LengthEst(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR Result; - Result = XMVector2LengthSq(V); - Result = XMVectorSqrtEst(Result); - return Result; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x2_t VL = vget_low_f32(V); - // Dot2 - float32x2_t vTemp = vmul_f32(VL, VL); - vTemp = vpadd_f32(vTemp, vTemp); - const float32x2_t zero = vdup_n_f32(0); - uint32x2_t VEqualsZero = vceq_f32(vTemp, zero); - // Sqrt (estimate) - float32x2_t Result = vrsqrte_f32(vTemp); - Result = vmul_f32(vTemp, Result); - Result = vbsl_f32(VEqualsZero, zero, Result); - return vcombine_f32(Result, Result); -#elif defined(_XM_SSE4_INTRINSICS_) - XMVECTOR vTemp = _mm_dp_ps(V, V, 0x3f); - return _mm_sqrt_ps(vTemp); -#elif defined(_XM_SSE3_INTRINSICS_) - XMVECTOR vLengthSq = _mm_mul_ps(V, V); - XMVECTOR vTemp = _mm_hadd_ps(vLengthSq, vLengthSq); - vLengthSq = _mm_sqrt_ss(vTemp); - vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0)); - return vLengthSq; -#elif defined(_XM_SSE_INTRINSICS_) - // Perform the dot product on x and y - XMVECTOR vLengthSq = _mm_mul_ps(V, V); - // vTemp has y splatted - XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(1, 1, 1, 1)); - // x+y - vLengthSq = _mm_add_ss(vLengthSq, vTemp); - vLengthSq = _mm_sqrt_ss(vLengthSq); - vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0)); - return vLengthSq; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector2Length(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR Result; - Result = XMVector2LengthSq(V); - Result = XMVectorSqrt(Result); - return Result; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x2_t VL = vget_low_f32(V); - // Dot2 - float32x2_t vTemp = vmul_f32(VL, VL); - vTemp = vpadd_f32(vTemp, vTemp); - const float32x2_t zero = vdup_n_f32(0); - uint32x2_t VEqualsZero = vceq_f32(vTemp, zero); - // Sqrt - float32x2_t S0 = vrsqrte_f32(vTemp); - float32x2_t P0 = vmul_f32(vTemp, S0); - float32x2_t R0 = vrsqrts_f32(P0, S0); - float32x2_t S1 = vmul_f32(S0, R0); - float32x2_t P1 = vmul_f32(vTemp, S1); - float32x2_t R1 = vrsqrts_f32(P1, S1); - float32x2_t Result = vmul_f32(S1, R1); - Result = vmul_f32(vTemp, Result); - Result = vbsl_f32(VEqualsZero, zero, Result); - return vcombine_f32(Result, Result); -#elif defined(_XM_SSE4_INTRINSICS_) - XMVECTOR vTemp = _mm_dp_ps(V, V, 0x3f); - return _mm_sqrt_ps(vTemp); -#elif defined(_XM_SSE3_INTRINSICS_) - XMVECTOR vLengthSq = _mm_mul_ps(V, V); - XMVECTOR vTemp = _mm_hadd_ps(vLengthSq, vLengthSq); - vLengthSq = _mm_sqrt_ss(vTemp); - vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0)); - return vLengthSq; -#elif defined(_XM_SSE_INTRINSICS_) - // Perform the dot product on x and y - XMVECTOR vLengthSq = _mm_mul_ps(V, V); - // vTemp has y splatted - XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(1, 1, 1, 1)); - // x+y - vLengthSq = _mm_add_ss(vLengthSq, vTemp); - vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0)); - vLengthSq = _mm_sqrt_ps(vLengthSq); - return vLengthSq; -#endif -} - -//------------------------------------------------------------------------------ -// XMVector2NormalizeEst uses a reciprocal estimate and -// returns QNaN on zero and infinite vectors. - -inline XMVECTOR XM_CALLCONV XMVector2NormalizeEst(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR Result; - Result = XMVector2ReciprocalLength(V); - Result = XMVectorMultiply(V, Result); - return Result; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x2_t VL = vget_low_f32(V); - // Dot2 - float32x2_t vTemp = vmul_f32(VL, VL); - vTemp = vpadd_f32(vTemp, vTemp); - // Reciprocal sqrt (estimate) - vTemp = vrsqrte_f32(vTemp); - // Normalize - float32x2_t Result = vmul_f32(VL, vTemp); - return vcombine_f32(Result, Result); -#elif defined(_XM_SSE4_INTRINSICS_) - XMVECTOR vTemp = _mm_dp_ps(V, V, 0x3f); - XMVECTOR vResult = _mm_rsqrt_ps(vTemp); - return _mm_mul_ps(vResult, V); -#elif defined(_XM_SSE3_INTRINSICS_) - XMVECTOR vLengthSq = _mm_mul_ps(V, V); - vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); - vLengthSq = _mm_rsqrt_ss(vLengthSq); - vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0)); - vLengthSq = _mm_mul_ps(vLengthSq, V); - return vLengthSq; -#elif defined(_XM_SSE_INTRINSICS_) - // Perform the dot product on x and y - XMVECTOR vLengthSq = _mm_mul_ps(V, V); - // vTemp has y splatted - XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(1, 1, 1, 1)); - // x+y - vLengthSq = _mm_add_ss(vLengthSq, vTemp); - vLengthSq = _mm_rsqrt_ss(vLengthSq); - vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0)); - vLengthSq = _mm_mul_ps(vLengthSq, V); - return vLengthSq; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector2Normalize(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR vResult = XMVector2Length(V); - float fLength = vResult.vector4_f32[0]; - - // Prevent divide by zero - if (fLength > 0) { - fLength = 1.0f / fLength; - } - - vResult.vector4_f32[0] = V.vector4_f32[0] * fLength; - vResult.vector4_f32[1] = V.vector4_f32[1] * fLength; - vResult.vector4_f32[2] = V.vector4_f32[2] * fLength; - vResult.vector4_f32[3] = V.vector4_f32[3] * fLength; - return vResult; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x2_t VL = vget_low_f32(V); - // Dot2 - float32x2_t vTemp = vmul_f32(VL, VL); - vTemp = vpadd_f32(vTemp, vTemp); - uint32x2_t VEqualsZero = vceq_f32(vTemp, vdup_n_f32(0)); - uint32x2_t VEqualsInf = vceq_f32(vTemp, vget_low_f32(g_XMInfinity)); - // Reciprocal sqrt (2 iterations of Newton-Raphson) - float32x2_t S0 = vrsqrte_f32(vTemp); - float32x2_t P0 = vmul_f32(vTemp, S0); - float32x2_t R0 = vrsqrts_f32(P0, S0); - float32x2_t S1 = vmul_f32(S0, R0); - float32x2_t P1 = vmul_f32(vTemp, S1); - float32x2_t R1 = vrsqrts_f32(P1, S1); - vTemp = vmul_f32(S1, R1); - // Normalize - float32x2_t Result = vmul_f32(VL, vTemp); - Result = vbsl_f32(VEqualsZero, vdup_n_f32(0), Result); - Result = vbsl_f32(VEqualsInf, vget_low_f32(g_XMQNaN), Result); - return vcombine_f32(Result, Result); -#elif defined(_XM_SSE4_INTRINSICS_) - XMVECTOR vLengthSq = _mm_dp_ps(V, V, 0x3f); - // Prepare for the division - XMVECTOR vResult = _mm_sqrt_ps(vLengthSq); - // Create zero with a single instruction - XMVECTOR vZeroMask = _mm_setzero_ps(); - // Test for a divide by zero (Must be FP to detect -0.0) - vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult); - // Failsafe on zero (Or epsilon) length planes - // If the length is infinity, set the elements to zero - vLengthSq = _mm_cmpneq_ps(vLengthSq, g_XMInfinity); - // Reciprocal mul to perform the normalization - vResult = _mm_div_ps(V, vResult); - // Any that are infinity, set to zero - vResult = _mm_and_ps(vResult, vZeroMask); - // Select qnan or result based on infinite length - XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq, g_XMQNaN); - XMVECTOR vTemp2 = _mm_and_ps(vResult, vLengthSq); - vResult = _mm_or_ps(vTemp1, vTemp2); - return vResult; -#elif defined(_XM_SSE3_INTRINSICS_) - // Perform the dot product on x and y only - XMVECTOR vLengthSq = _mm_mul_ps(V, V); - vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); - vLengthSq = _mm_moveldup_ps(vLengthSq); - // Prepare for the division - XMVECTOR vResult = _mm_sqrt_ps(vLengthSq); - // Create zero with a single instruction - XMVECTOR vZeroMask = _mm_setzero_ps(); - // Test for a divide by zero (Must be FP to detect -0.0) - vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult); - // Failsafe on zero (Or epsilon) length planes - // If the length is infinity, set the elements to zero - vLengthSq = _mm_cmpneq_ps(vLengthSq, g_XMInfinity); - // Reciprocal mul to perform the normalization - vResult = _mm_div_ps(V, vResult); - // Any that are infinity, set to zero - vResult = _mm_and_ps(vResult, vZeroMask); - // Select qnan or result based on infinite length - XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq, g_XMQNaN); - XMVECTOR vTemp2 = _mm_and_ps(vResult, vLengthSq); - vResult = _mm_or_ps(vTemp1, vTemp2); - return vResult; -#elif defined(_XM_SSE_INTRINSICS_) - // Perform the dot product on x and y only - XMVECTOR vLengthSq = _mm_mul_ps(V, V); - XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(1, 1, 1, 1)); - vLengthSq = _mm_add_ss(vLengthSq, vTemp); - vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0)); - // Prepare for the division - XMVECTOR vResult = _mm_sqrt_ps(vLengthSq); - // Create zero with a single instruction - XMVECTOR vZeroMask = _mm_setzero_ps(); - // Test for a divide by zero (Must be FP to detect -0.0) - vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult); - // Failsafe on zero (Or epsilon) length planes - // If the length is infinity, set the elements to zero - vLengthSq = _mm_cmpneq_ps(vLengthSq, g_XMInfinity); - // Reciprocal mul to perform the normalization - vResult = _mm_div_ps(V, vResult); - // Any that are infinity, set to zero - vResult = _mm_and_ps(vResult, vZeroMask); - // Select qnan or result based on infinite length - XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq, g_XMQNaN); - XMVECTOR vTemp2 = _mm_and_ps(vResult, vLengthSq); - vResult = _mm_or_ps(vTemp1, vTemp2); - return vResult; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector2ClampLength(FXMVECTOR V, float LengthMin, - float LengthMax) noexcept { - XMVECTOR ClampMax = XMVectorReplicate(LengthMax); - XMVECTOR ClampMin = XMVectorReplicate(LengthMin); - return XMVector2ClampLengthV(V, ClampMin, ClampMax); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector2ClampLengthV( - FXMVECTOR V, FXMVECTOR LengthMin, FXMVECTOR LengthMax) noexcept { - assert((XMVectorGetY(LengthMin) == XMVectorGetX(LengthMin))); - assert((XMVectorGetY(LengthMax) == XMVectorGetX(LengthMax))); - assert(XMVector2GreaterOrEqual(LengthMin, g_XMZero)); - assert(XMVector2GreaterOrEqual(LengthMax, g_XMZero)); - assert(XMVector2GreaterOrEqual(LengthMax, LengthMin)); - - XMVECTOR LengthSq = XMVector2LengthSq(V); - - const XMVECTOR Zero = XMVectorZero(); - - XMVECTOR RcpLength = XMVectorReciprocalSqrt(LengthSq); - - XMVECTOR InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity.v); - XMVECTOR ZeroLength = XMVectorEqual(LengthSq, Zero); - - XMVECTOR Length = XMVectorMultiply(LengthSq, RcpLength); - - XMVECTOR Normal = XMVectorMultiply(V, RcpLength); - - XMVECTOR Select = XMVectorEqualInt(InfiniteLength, ZeroLength); - Length = XMVectorSelect(LengthSq, Length, Select); - Normal = XMVectorSelect(LengthSq, Normal, Select); - - XMVECTOR ControlMax = XMVectorGreater(Length, LengthMax); - XMVECTOR ControlMin = XMVectorLess(Length, LengthMin); - - XMVECTOR ClampLength = XMVectorSelect(Length, LengthMax, ControlMax); - ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin); - - XMVECTOR Result = XMVectorMultiply(Normal, ClampLength); - - // Preserve the original vector (with no precision loss) if the length falls - // within the given range - XMVECTOR Control = XMVectorEqualInt(ControlMax, ControlMin); - Result = XMVectorSelect(Result, V, Control); - - return Result; -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector2Reflect(FXMVECTOR Incident, - FXMVECTOR Normal) noexcept { - // Result = Incident - (2 * dot(Incident, Normal)) * Normal - - XMVECTOR Result; - Result = XMVector2Dot(Incident, Normal); - Result = XMVectorAdd(Result, Result); - Result = XMVectorNegativeMultiplySubtract(Result, Normal, Incident); - return Result; -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector2Refract(FXMVECTOR Incident, - FXMVECTOR Normal, - float RefractionIndex) noexcept { - XMVECTOR Index = XMVectorReplicate(RefractionIndex); - return XMVector2RefractV(Incident, Normal, Index); -} - -//------------------------------------------------------------------------------ - -// Return the refraction of a 2D vector -inline XMVECTOR XM_CALLCONV XMVector2RefractV( - FXMVECTOR Incident, FXMVECTOR Normal, FXMVECTOR RefractionIndex) noexcept { - // Result = RefractionIndex * Incident - Normal * (RefractionIndex * - // dot(Incident, Normal) + sqrt(1 - RefractionIndex * RefractionIndex * (1 - - // dot(Incident, Normal) * dot(Incident, Normal)))) - -#if defined(_XM_NO_INTRINSICS_) - - float IDotN = (Incident.vector4_f32[0] * Normal.vector4_f32[0]) + - (Incident.vector4_f32[1] * Normal.vector4_f32[1]); - // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN) - float RY = 1.0f - (IDotN * IDotN); - float RX = 1.0f - (RY * RefractionIndex.vector4_f32[0] * - RefractionIndex.vector4_f32[0]); - RY = 1.0f - - (RY * RefractionIndex.vector4_f32[1] * RefractionIndex.vector4_f32[1]); - if (RX >= 0.0f) { - RX = (RefractionIndex.vector4_f32[0] * Incident.vector4_f32[0]) - - (Normal.vector4_f32[0] * - ((RefractionIndex.vector4_f32[0] * IDotN) + sqrtf(RX))); - } else { - RX = 0.0f; - } - if (RY >= 0.0f) { - RY = (RefractionIndex.vector4_f32[1] * Incident.vector4_f32[1]) - - (Normal.vector4_f32[1] * - ((RefractionIndex.vector4_f32[1] * IDotN) + sqrtf(RY))); - } else { - RY = 0.0f; - } - - XMVECTOR vResult; - vResult.vector4_f32[0] = RX; - vResult.vector4_f32[1] = RY; - vResult.vector4_f32[2] = 0.0f; - vResult.vector4_f32[3] = 0.0f; - return vResult; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x2_t IL = vget_low_f32(Incident); - float32x2_t NL = vget_low_f32(Normal); - float32x2_t RIL = vget_low_f32(RefractionIndex); - // Get the 2D Dot product of Incident-Normal - float32x2_t vTemp = vmul_f32(IL, NL); - float32x2_t IDotN = vpadd_f32(vTemp, vTemp); - // vTemp = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN) - vTemp = vmls_f32(vget_low_f32(g_XMOne), IDotN, IDotN); - vTemp = vmul_f32(vTemp, RIL); - vTemp = vmls_f32(vget_low_f32(g_XMOne), vTemp, RIL); - // If any terms are <=0, sqrt() will fail, punt to zero - uint32x2_t vMask = vcgt_f32(vTemp, vget_low_f32(g_XMZero)); - // Sqrt(vTemp) - float32x2_t S0 = vrsqrte_f32(vTemp); - float32x2_t P0 = vmul_f32(vTemp, S0); - float32x2_t R0 = vrsqrts_f32(P0, S0); - float32x2_t S1 = vmul_f32(S0, R0); - float32x2_t P1 = vmul_f32(vTemp, S1); - float32x2_t R1 = vrsqrts_f32(P1, S1); - float32x2_t S2 = vmul_f32(S1, R1); - vTemp = vmul_f32(vTemp, S2); - // R = RefractionIndex * IDotN + sqrt(R) - vTemp = vmla_f32(vTemp, RIL, IDotN); - // Result = RefractionIndex * Incident - Normal * R - float32x2_t vResult = vmul_f32(RIL, IL); - vResult = vmls_f32(vResult, vTemp, NL); - vResult = - vreinterpret_f32_u32(vand_u32(vreinterpret_u32_f32(vResult), vMask)); - return vcombine_f32(vResult, vResult); -#elif defined(_XM_SSE_INTRINSICS_) - // Result = RefractionIndex * Incident - Normal * (RefractionIndex * - // dot(Incident, Normal) + sqrt(1 - RefractionIndex * RefractionIndex * (1 - - // dot(Incident, Normal) * dot(Incident, Normal)))) Get the 2D Dot product - // of Incident-Normal - XMVECTOR IDotN = XMVector2Dot(Incident, Normal); - // vTemp = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN) - XMVECTOR vTemp = XM_FNMADD_PS(IDotN, IDotN, g_XMOne); - vTemp = _mm_mul_ps(vTemp, RefractionIndex); - vTemp = XM_FNMADD_PS(vTemp, RefractionIndex, g_XMOne); - // If any terms are <=0, sqrt() will fail, punt to zero - XMVECTOR vMask = _mm_cmpgt_ps(vTemp, g_XMZero); - // R = RefractionIndex * IDotN + sqrt(R) - vTemp = _mm_sqrt_ps(vTemp); - vTemp = XM_FMADD_PS(RefractionIndex, IDotN, vTemp); - // Result = RefractionIndex * Incident - Normal * R - XMVECTOR vResult = _mm_mul_ps(RefractionIndex, Incident); - vResult = XM_FNMADD_PS(vTemp, Normal, vResult); - vResult = _mm_and_ps(vResult, vMask); - return vResult; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector2Orthogonal(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTORF32 Result = {{{-V.vector4_f32[1], V.vector4_f32[0], 0.f, 0.f}}}; - return Result.v; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - static const XMVECTORF32 Negate = {{{-1.f, 1.f, 0, 0}}}; - const float32x2_t zero = vdup_n_f32(0); - - float32x2_t VL = vget_low_f32(V); - float32x2_t Result = vmul_f32(vrev64_f32(VL), vget_low_f32(Negate)); - return vcombine_f32(Result, zero); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 2, 0, 1)); - vResult = _mm_mul_ps(vResult, g_XMNegateX); - return vResult; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV -XMVector2AngleBetweenNormalsEst(FXMVECTOR N1, FXMVECTOR N2) noexcept { - XMVECTOR Result = XMVector2Dot(N1, N2); - Result = XMVectorClamp(Result, g_XMNegativeOne.v, g_XMOne.v); - Result = XMVectorACosEst(Result); - return Result; -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV -XMVector2AngleBetweenNormals(FXMVECTOR N1, FXMVECTOR N2) noexcept { - XMVECTOR Result = XMVector2Dot(N1, N2); - Result = XMVectorClamp(Result, g_XMNegativeOne, g_XMOne); - Result = XMVectorACos(Result); - return Result; -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV -XMVector2AngleBetweenVectors(FXMVECTOR V1, FXMVECTOR V2) noexcept { - XMVECTOR L1 = XMVector2ReciprocalLength(V1); - XMVECTOR L2 = XMVector2ReciprocalLength(V2); - - XMVECTOR Dot = XMVector2Dot(V1, V2); - - L1 = XMVectorMultiply(L1, L2); - - XMVECTOR CosAngle = XMVectorMultiply(Dot, L1); - CosAngle = XMVectorClamp(CosAngle, g_XMNegativeOne.v, g_XMOne.v); - - return XMVectorACos(CosAngle); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector2LinePointDistance( - FXMVECTOR LinePoint1, FXMVECTOR LinePoint2, FXMVECTOR Point) noexcept { - // Given a vector PointVector from LinePoint1 to Point and a vector - // LineVector from LinePoint1 to LinePoint2, the scaled distance - // PointProjectionScale from LinePoint1 to the perpendicular projection - // of PointVector onto the line is defined as: - // - // PointProjectionScale = dot(PointVector, LineVector) / - // LengthSq(LineVector) - - XMVECTOR PointVector = XMVectorSubtract(Point, LinePoint1); - XMVECTOR LineVector = XMVectorSubtract(LinePoint2, LinePoint1); - - XMVECTOR LengthSq = XMVector2LengthSq(LineVector); - - XMVECTOR PointProjectionScale = XMVector2Dot(PointVector, LineVector); - PointProjectionScale = XMVectorDivide(PointProjectionScale, LengthSq); - - XMVECTOR DistanceVector = - XMVectorMultiply(LineVector, PointProjectionScale); - DistanceVector = XMVectorSubtract(PointVector, DistanceVector); - - return XMVector2Length(DistanceVector); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV -XMVector2IntersectLine(FXMVECTOR Line1Point1, FXMVECTOR Line1Point2, - FXMVECTOR Line2Point1, GXMVECTOR Line2Point2) noexcept { -#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) - - XMVECTOR V1 = XMVectorSubtract(Line1Point2, Line1Point1); - XMVECTOR V2 = XMVectorSubtract(Line2Point2, Line2Point1); - XMVECTOR V3 = XMVectorSubtract(Line1Point1, Line2Point1); - - XMVECTOR C1 = XMVector2Cross(V1, V2); - XMVECTOR C2 = XMVector2Cross(V2, V3); - - XMVECTOR Result; - const XMVECTOR Zero = XMVectorZero(); - if (XMVector2NearEqual(C1, Zero, g_XMEpsilon.v)) { - if (XMVector2NearEqual(C2, Zero, g_XMEpsilon.v)) { - // Coincident - Result = g_XMInfinity.v; - } else { - // Parallel - Result = g_XMQNaN.v; - } - } else { - // Intersection point = Line1Point1 + V1 * (C2 / C1) - XMVECTOR Scale = XMVectorReciprocal(C1); - Scale = XMVectorMultiply(C2, Scale); - Result = XMVectorMultiplyAdd(V1, Scale, Line1Point1); - } - - return Result; - -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR V1 = _mm_sub_ps(Line1Point2, Line1Point1); - XMVECTOR V2 = _mm_sub_ps(Line2Point2, Line2Point1); - XMVECTOR V3 = _mm_sub_ps(Line1Point1, Line2Point1); - // Generate the cross products - XMVECTOR C1 = XMVector2Cross(V1, V2); - XMVECTOR C2 = XMVector2Cross(V2, V3); - // If C1 is not close to epsilon, use the calculated value - XMVECTOR vResultMask = _mm_setzero_ps(); - vResultMask = _mm_sub_ps(vResultMask, C1); - vResultMask = _mm_max_ps(vResultMask, C1); - // 0xFFFFFFFF if the calculated value is to be used - vResultMask = _mm_cmpgt_ps(vResultMask, g_XMEpsilon); - // If C1 is close to epsilon, which fail type is it? INFINITY or NAN? - XMVECTOR vFailMask = _mm_setzero_ps(); - vFailMask = _mm_sub_ps(vFailMask, C2); - vFailMask = _mm_max_ps(vFailMask, C2); - vFailMask = _mm_cmple_ps(vFailMask, g_XMEpsilon); - XMVECTOR vFail = _mm_and_ps(vFailMask, g_XMInfinity); - vFailMask = _mm_andnot_ps(vFailMask, g_XMQNaN); - // vFail is NAN or INF - vFail = _mm_or_ps(vFail, vFailMask); - // Intersection point = Line1Point1 + V1 * (C2 / C1) - XMVECTOR vResult = _mm_div_ps(C2, C1); - vResult = XM_FMADD_PS(vResult, V1, Line1Point1); - // Use result, or failure value - vResult = _mm_and_ps(vResult, vResultMask); - vResultMask = _mm_andnot_ps(vResultMask, vFail); - vResult = _mm_or_ps(vResult, vResultMask); - return vResult; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector2Transform(FXMVECTOR V, - FXMMATRIX M) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR Y = XMVectorSplatY(V); - XMVECTOR X = XMVectorSplatX(V); - - XMVECTOR Result = XMVectorMultiplyAdd(Y, M.r[1], M.r[3]); - Result = XMVectorMultiplyAdd(X, M.r[0], Result); - - return Result; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x2_t VL = vget_low_f32(V); - float32x4_t Result = vmlaq_lane_f32(M.r[3], M.r[1], VL, 1); // Y - return vmlaq_lane_f32(Result, M.r[0], VL, 0); // X -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); // Y - vResult = XM_FMADD_PS(vResult, M.r[1], M.r[3]); - XMVECTOR vTemp = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); // X - vResult = XM_FMADD_PS(vTemp, M.r[0], vResult); - return vResult; -#endif -} - -//------------------------------------------------------------------------------ - -_Use_decl_annotations_ inline XMFLOAT4* XM_CALLCONV XMVector2TransformStream( - XMFLOAT4* pOutputStream, size_t OutputStride, const XMFLOAT2* pInputStream, - size_t InputStride, size_t VectorCount, FXMMATRIX M) noexcept { - assert(pOutputStream != nullptr); - assert(pInputStream != nullptr); - - assert(InputStride >= sizeof(XMFLOAT2)); - _Analysis_assume_(InputStride >= sizeof(XMFLOAT2)); - - assert(OutputStride >= sizeof(XMFLOAT4)); - _Analysis_assume_(OutputStride >= sizeof(XMFLOAT4)); - -#if defined(_XM_NO_INTRINSICS_) - - auto pInputVector = reinterpret_cast(pInputStream); - auto pOutputVector = reinterpret_cast(pOutputStream); - - const XMVECTOR row0 = M.r[0]; - const XMVECTOR row1 = M.r[1]; - const XMVECTOR row3 = M.r[3]; - - for (size_t i = 0; i < VectorCount; i++) { - XMVECTOR V = - XMLoadFloat2(reinterpret_cast(pInputVector)); - XMVECTOR Y = XMVectorSplatY(V); - XMVECTOR X = XMVectorSplatX(V); - - XMVECTOR Result = XMVectorMultiplyAdd(Y, row1, row3); - Result = XMVectorMultiplyAdd(X, row0, Result); - -#ifdef _PREFAST_ -#pragma prefast(push) -#pragma prefast(disable : 26015, "PREfast noise: Esp:1307") -#endif - - XMStoreFloat4(reinterpret_cast(pOutputVector), Result); - -#ifdef _PREFAST_ -#pragma prefast(pop) -#endif - - pInputVector += InputStride; - pOutputVector += OutputStride; - } - - return pOutputStream; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - auto pInputVector = reinterpret_cast(pInputStream); - auto pOutputVector = reinterpret_cast(pOutputStream); - - const XMVECTOR row0 = M.r[0]; - const XMVECTOR row1 = M.r[1]; - const XMVECTOR row3 = M.r[3]; - - size_t i = 0; - size_t four = VectorCount >> 2; - if (four > 0) { - if ((InputStride == sizeof(XMFLOAT2)) && - (OutputStride == sizeof(XMFLOAT4))) { - for (size_t j = 0; j < four; ++j) { - float32x4x2_t V = - vld2q_f32(reinterpret_cast(pInputVector)); - pInputVector += sizeof(XMFLOAT2) * 4; - - float32x2_t r3 = vget_low_f32(row3); - float32x2_t r = vget_low_f32(row0); - XMVECTOR vResult0 = vmlaq_lane_f32(vdupq_lane_f32(r3, 0), - V.val[0], r, 0); // Ax+M - XMVECTOR vResult1 = vmlaq_lane_f32(vdupq_lane_f32(r3, 1), - V.val[0], r, 1); // Bx+N - - XM_PREFETCH(pInputVector); - - r3 = vget_high_f32(row3); - r = vget_high_f32(row0); - XMVECTOR vResult2 = vmlaq_lane_f32(vdupq_lane_f32(r3, 0), - V.val[0], r, 0); // Cx+O - XMVECTOR vResult3 = vmlaq_lane_f32(vdupq_lane_f32(r3, 1), - V.val[0], r, 1); // Dx+P - - XM_PREFETCH(pInputVector + XM_CACHE_LINE_SIZE); - - r = vget_low_f32(row1); - vResult0 = vmlaq_lane_f32(vResult0, V.val[1], r, 0); // Ax+Ey+M - vResult1 = vmlaq_lane_f32(vResult1, V.val[1], r, 1); // Bx+Fy+N - - XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 2)); - - r = vget_high_f32(row1); - vResult2 = vmlaq_lane_f32(vResult2, V.val[1], r, 0); // Cx+Gy+O - vResult3 = vmlaq_lane_f32(vResult3, V.val[1], r, 1); // Dx+Hy+P - - XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 3)); - - float32x4x4_t R; - R.val[0] = vResult0; - R.val[1] = vResult1; - R.val[2] = vResult2; - R.val[3] = vResult3; - - vst4q_f32(reinterpret_cast(pOutputVector), R); - pOutputVector += sizeof(XMFLOAT4) * 4; - - i += 4; - } - } - } - - for (; i < VectorCount; i++) { - float32x2_t V = vld1_f32(reinterpret_cast(pInputVector)); - pInputVector += InputStride; - - XMVECTOR vResult = vmlaq_lane_f32(row3, row0, V, 0); // X - vResult = vmlaq_lane_f32(vResult, row1, V, 1); // Y - - vst1q_f32(reinterpret_cast(pOutputVector), vResult); - pOutputVector += OutputStride; - } - - return pOutputStream; -#elif defined(_XM_AVX2_INTRINSICS_) - auto pInputVector = reinterpret_cast(pInputStream); - auto pOutputVector = reinterpret_cast(pOutputStream); - - size_t i = 0; - size_t four = VectorCount >> 2; - if (four > 0) { - __m256 row0 = _mm256_broadcast_ps(&M.r[0]); - __m256 row1 = _mm256_broadcast_ps(&M.r[1]); - __m256 row3 = _mm256_broadcast_ps(&M.r[3]); - - if (InputStride == sizeof(XMFLOAT2)) { - if (OutputStride == sizeof(XMFLOAT4)) { - if (!(reinterpret_cast(pOutputStream) & 0x1F)) { - // Packed input, aligned & packed output - for (size_t j = 0; j < four; ++j) { - __m256 VV = _mm256_loadu_ps( - reinterpret_cast(pInputVector)); - pInputVector += sizeof(XMFLOAT2) * 4; - - __m256 Y2 = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(3, 3, 3, 3)); - __m256 X2 = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(2, 2, 2, 2)); - __m256 Y1 = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(1, 1, 1, 1)); - __m256 X1 = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(0, 0, 0, 0)); - - __m256 vTempB = _mm256_fmadd_ps(Y1, row1, row3); - __m256 vTempB2 = _mm256_fmadd_ps(Y2, row1, row3); - __m256 vTempA = _mm256_mul_ps(X1, row0); - __m256 vTempA2 = _mm256_mul_ps(X2, row0); - vTempA = _mm256_add_ps(vTempA, vTempB); - vTempA2 = _mm256_add_ps(vTempA2, vTempB2); - - X1 = _mm256_insertf128_ps( - vTempA, _mm256_castps256_ps128(vTempA2), 1); - XM256_STREAM_PS(reinterpret_cast(pOutputVector), - X1); - pOutputVector += sizeof(XMFLOAT4) * 2; - - X2 = _mm256_insertf128_ps( - vTempA2, _mm256_extractf128_ps(vTempA, 1), 0); - XM256_STREAM_PS(reinterpret_cast(pOutputVector), - X2); - pOutputVector += sizeof(XMFLOAT4) * 2; - - i += 4; - } - } else { - // Packed input, packed output - for (size_t j = 0; j < four; ++j) { - __m256 VV = _mm256_loadu_ps( - reinterpret_cast(pInputVector)); - pInputVector += sizeof(XMFLOAT2) * 4; - - __m256 Y2 = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(3, 3, 3, 3)); - __m256 X2 = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(2, 2, 2, 2)); - __m256 Y1 = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(1, 1, 1, 1)); - __m256 X1 = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(0, 0, 0, 0)); - - __m256 vTempB = _mm256_fmadd_ps(Y1, row1, row3); - __m256 vTempB2 = _mm256_fmadd_ps(Y2, row1, row3); - __m256 vTempA = _mm256_mul_ps(X1, row0); - __m256 vTempA2 = _mm256_mul_ps(X2, row0); - vTempA = _mm256_add_ps(vTempA, vTempB); - vTempA2 = _mm256_add_ps(vTempA2, vTempB2); - - X1 = _mm256_insertf128_ps( - vTempA, _mm256_castps256_ps128(vTempA2), 1); - _mm256_storeu_ps( - reinterpret_cast(pOutputVector), X1); - pOutputVector += sizeof(XMFLOAT4) * 2; - - X2 = _mm256_insertf128_ps( - vTempA2, _mm256_extractf128_ps(vTempA, 1), 0); - _mm256_storeu_ps( - reinterpret_cast(pOutputVector), X2); - pOutputVector += sizeof(XMFLOAT4) * 2; - - i += 4; - } - } - } else { - // Packed input, unpacked output - for (size_t j = 0; j < four; ++j) { - __m256 VV = _mm256_loadu_ps( - reinterpret_cast(pInputVector)); - pInputVector += sizeof(XMFLOAT2) * 4; - - __m256 Y2 = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(3, 3, 3, 3)); - __m256 X2 = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(2, 2, 2, 2)); - __m256 Y1 = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(1, 1, 1, 1)); - __m256 X1 = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(0, 0, 0, 0)); - - __m256 vTempB = _mm256_fmadd_ps(Y1, row1, row3); - __m256 vTempB2 = _mm256_fmadd_ps(Y2, row1, row3); - __m256 vTempA = _mm256_mul_ps(X1, row0); - __m256 vTempA2 = _mm256_mul_ps(X2, row0); - vTempA = _mm256_add_ps(vTempA, vTempB); - vTempA2 = _mm256_add_ps(vTempA2, vTempB2); - - _mm_storeu_ps(reinterpret_cast(pOutputVector), - _mm256_castps256_ps128(vTempA)); - pOutputVector += OutputStride; - - _mm_storeu_ps(reinterpret_cast(pOutputVector), - _mm256_castps256_ps128(vTempA2)); - pOutputVector += OutputStride; - - _mm_storeu_ps(reinterpret_cast(pOutputVector), - _mm256_extractf128_ps(vTempA, 1)); - pOutputVector += OutputStride; - - _mm_storeu_ps(reinterpret_cast(pOutputVector), - _mm256_extractf128_ps(vTempA2, 1)); - pOutputVector += OutputStride; - - i += 4; - } - } - } - } - - if (i < VectorCount) { - const XMVECTOR row0 = M.r[0]; - const XMVECTOR row1 = M.r[1]; - const XMVECTOR row3 = M.r[3]; - - for (; i < VectorCount; i++) { - __m128 xy = _mm_castpd_ps( - _mm_load_sd(reinterpret_cast(pInputVector))); - pInputVector += InputStride; - - XMVECTOR Y = XM_PERMUTE_PS(xy, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR X = XM_PERMUTE_PS(xy, _MM_SHUFFLE(0, 0, 0, 0)); - - XMVECTOR vTemp = XM_FMADD_PS(Y, row1, row3); - XMVECTOR vTemp2 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - - _mm_storeu_ps(reinterpret_cast(pOutputVector), vTemp); - pOutputVector += OutputStride; - } - } - - XM_SFENCE(); - - return pOutputStream; -#elif defined(_XM_SSE_INTRINSICS_) - auto pInputVector = reinterpret_cast(pInputStream); - auto pOutputVector = reinterpret_cast(pOutputStream); - - const XMVECTOR row0 = M.r[0]; - const XMVECTOR row1 = M.r[1]; - const XMVECTOR row3 = M.r[3]; - - size_t i = 0; - size_t two = VectorCount >> 1; - if (two > 0) { - if (InputStride == sizeof(XMFLOAT2)) { - if (!(reinterpret_cast(pOutputStream) & 0xF) && - !(OutputStride & 0xF)) { - // Packed input, aligned output - for (size_t j = 0; j < two; ++j) { - XMVECTOR V = _mm_loadu_ps( - reinterpret_cast(pInputVector)); - pInputVector += sizeof(XMFLOAT2) * 2; - - XMVECTOR Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR X = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); - - XMVECTOR vTemp = XM_FMADD_PS(Y, row1, row3); - XMVECTOR vTemp2 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - - XM_STREAM_PS(reinterpret_cast(pOutputVector), - vTemp); - pOutputVector += OutputStride; - - Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 3, 3, 3)); - X = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); - - vTemp = XM_FMADD_PS(Y, row1, row3); - vTemp2 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - - XM_STREAM_PS(reinterpret_cast(pOutputVector), - vTemp); - pOutputVector += OutputStride; - - i += 2; - } - } else { - // Packed input, unaligned output - for (size_t j = 0; j < two; ++j) { - XMVECTOR V = _mm_loadu_ps( - reinterpret_cast(pInputVector)); - pInputVector += sizeof(XMFLOAT2) * 2; - - XMVECTOR Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR X = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); - - XMVECTOR vTemp = XM_FMADD_PS(Y, row1, row3); - XMVECTOR vTemp2 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - - _mm_storeu_ps(reinterpret_cast(pOutputVector), - vTemp); - pOutputVector += OutputStride; - - Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 3, 3, 3)); - X = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); - - vTemp = XM_FMADD_PS(Y, row1, row3); - vTemp2 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - - _mm_storeu_ps(reinterpret_cast(pOutputVector), - vTemp); - pOutputVector += OutputStride; - - i += 2; - } - } - } - } - - if (!(reinterpret_cast(pInputVector) & 0xF) && - !(InputStride & 0xF)) { - if (!(reinterpret_cast(pOutputStream) & 0xF) && - !(OutputStride & 0xF)) { - // Aligned input, aligned output - for (; i < VectorCount; i++) { - XMVECTOR V = _mm_castsi128_ps(_mm_loadl_epi64( - reinterpret_cast(pInputVector))); - pInputVector += InputStride; - - XMVECTOR Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR X = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); - - XMVECTOR vTemp = XM_FMADD_PS(Y, row1, row3); - XMVECTOR vTemp2 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - - XM_STREAM_PS(reinterpret_cast(pOutputVector), vTemp); - pOutputVector += OutputStride; - } - } else { - // Aligned input, unaligned output - for (; i < VectorCount; i++) { - XMVECTOR V = _mm_castsi128_ps(_mm_loadl_epi64( - reinterpret_cast(pInputVector))); - pInputVector += InputStride; - - XMVECTOR Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR X = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); - - XMVECTOR vTemp = XM_FMADD_PS(Y, row1, row3); - XMVECTOR vTemp2 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - - _mm_storeu_ps(reinterpret_cast(pOutputVector), vTemp); - pOutputVector += OutputStride; - } - } - } else { - // Unaligned input - for (; i < VectorCount; i++) { - __m128 xy = _mm_castpd_ps( - _mm_load_sd(reinterpret_cast(pInputVector))); - pInputVector += InputStride; - - XMVECTOR Y = XM_PERMUTE_PS(xy, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR X = XM_PERMUTE_PS(xy, _MM_SHUFFLE(0, 0, 0, 0)); - - XMVECTOR vTemp = XM_FMADD_PS(Y, row1, row3); - XMVECTOR vTemp2 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - - _mm_storeu_ps(reinterpret_cast(pOutputVector), vTemp); - pOutputVector += OutputStride; - } - } - - XM_SFENCE(); - - return pOutputStream; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector2TransformCoord(FXMVECTOR V, - FXMMATRIX M) noexcept { - XMVECTOR Y = XMVectorSplatY(V); - XMVECTOR X = XMVectorSplatX(V); - - XMVECTOR Result = XMVectorMultiplyAdd(Y, M.r[1], M.r[3]); - Result = XMVectorMultiplyAdd(X, M.r[0], Result); - - XMVECTOR W = XMVectorSplatW(Result); - return XMVectorDivide(Result, W); -} - -//------------------------------------------------------------------------------ - -_Use_decl_annotations_ inline XMFLOAT2* XM_CALLCONV -XMVector2TransformCoordStream(XMFLOAT2* pOutputStream, size_t OutputStride, - const XMFLOAT2* pInputStream, size_t InputStride, - size_t VectorCount, FXMMATRIX M) noexcept { - assert(pOutputStream != nullptr); - assert(pInputStream != nullptr); - - assert(InputStride >= sizeof(XMFLOAT2)); - _Analysis_assume_(InputStride >= sizeof(XMFLOAT2)); - - assert(OutputStride >= sizeof(XMFLOAT2)); - _Analysis_assume_(OutputStride >= sizeof(XMFLOAT2)); - -#if defined(_XM_NO_INTRINSICS_) - - auto pInputVector = reinterpret_cast(pInputStream); - auto pOutputVector = reinterpret_cast(pOutputStream); - - const XMVECTOR row0 = M.r[0]; - const XMVECTOR row1 = M.r[1]; - const XMVECTOR row3 = M.r[3]; - - for (size_t i = 0; i < VectorCount; i++) { - XMVECTOR V = - XMLoadFloat2(reinterpret_cast(pInputVector)); - XMVECTOR Y = XMVectorSplatY(V); - XMVECTOR X = XMVectorSplatX(V); - - XMVECTOR Result = XMVectorMultiplyAdd(Y, row1, row3); - Result = XMVectorMultiplyAdd(X, row0, Result); - - XMVECTOR W = XMVectorSplatW(Result); - - Result = XMVectorDivide(Result, W); - -#ifdef _PREFAST_ -#pragma prefast(push) -#pragma prefast(disable : 26015, "PREfast noise: Esp:1307") -#endif - - XMStoreFloat2(reinterpret_cast(pOutputVector), Result); - -#ifdef _PREFAST_ -#pragma prefast(pop) -#endif - - pInputVector += InputStride; - pOutputVector += OutputStride; - } - - return pOutputStream; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - auto pInputVector = reinterpret_cast(pInputStream); - auto pOutputVector = reinterpret_cast(pOutputStream); - - const XMVECTOR row0 = M.r[0]; - const XMVECTOR row1 = M.r[1]; - const XMVECTOR row3 = M.r[3]; - - size_t i = 0; - size_t four = VectorCount >> 2; - if (four > 0) { - if ((InputStride == sizeof(XMFLOAT2)) && - (OutputStride == sizeof(XMFLOAT2))) { - for (size_t j = 0; j < four; ++j) { - float32x4x2_t V = - vld2q_f32(reinterpret_cast(pInputVector)); - pInputVector += sizeof(XMFLOAT2) * 4; - - float32x2_t r3 = vget_low_f32(row3); - float32x2_t r = vget_low_f32(row0); - XMVECTOR vResult0 = vmlaq_lane_f32(vdupq_lane_f32(r3, 0), - V.val[0], r, 0); // Ax+M - XMVECTOR vResult1 = vmlaq_lane_f32(vdupq_lane_f32(r3, 1), - V.val[0], r, 1); // Bx+N - - XM_PREFETCH(pInputVector); - - r3 = vget_high_f32(row3); - r = vget_high_f32(row0); - XMVECTOR W = vmlaq_lane_f32(vdupq_lane_f32(r3, 1), V.val[0], r, - 1); // Dx+P - - XM_PREFETCH(pInputVector + XM_CACHE_LINE_SIZE); - - r = vget_low_f32(row1); - vResult0 = vmlaq_lane_f32(vResult0, V.val[1], r, 0); // Ax+Ey+M - vResult1 = vmlaq_lane_f32(vResult1, V.val[1], r, 1); // Bx+Fy+N - - XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 2)); - - r = vget_high_f32(row1); - W = vmlaq_lane_f32(W, V.val[1], r, 1); // Dx+Hy+P - - XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 3)); - -#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || \ - defined(_M_ARM64EC) || __aarch64__ - V.val[0] = vdivq_f32(vResult0, W); - V.val[1] = vdivq_f32(vResult1, W); -#else - // 2 iterations of Newton-Raphson refinement of reciprocal - float32x4_t Reciprocal = vrecpeq_f32(W); - float32x4_t S = vrecpsq_f32(Reciprocal, W); - Reciprocal = vmulq_f32(S, Reciprocal); - S = vrecpsq_f32(Reciprocal, W); - Reciprocal = vmulq_f32(S, Reciprocal); - - V.val[0] = vmulq_f32(vResult0, Reciprocal); - V.val[1] = vmulq_f32(vResult1, Reciprocal); -#endif - - vst2q_f32(reinterpret_cast(pOutputVector), V); - pOutputVector += sizeof(XMFLOAT2) * 4; - - i += 4; - } - } - } - - for (; i < VectorCount; i++) { - float32x2_t V = vld1_f32(reinterpret_cast(pInputVector)); - pInputVector += InputStride; - - XMVECTOR vResult = vmlaq_lane_f32(row3, row0, V, 0); // X - vResult = vmlaq_lane_f32(vResult, row1, V, 1); // Y - - V = vget_high_f32(vResult); - float32x2_t W = vdup_lane_f32(V, 1); - -#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || \ - defined(_M_ARM64EC) || __aarch64__ - V = vget_low_f32(vResult); - V = vdiv_f32(V, W); -#else - // 2 iterations of Newton-Raphson refinement of reciprocal for W - float32x2_t Reciprocal = vrecpe_f32(W); - float32x2_t S = vrecps_f32(Reciprocal, W); - Reciprocal = vmul_f32(S, Reciprocal); - S = vrecps_f32(Reciprocal, W); - Reciprocal = vmul_f32(S, Reciprocal); - - V = vget_low_f32(vResult); - V = vmul_f32(V, Reciprocal); -#endif - - vst1_f32(reinterpret_cast(pOutputVector), V); - pOutputVector += OutputStride; - } - - return pOutputStream; -#elif defined(_XM_AVX2_INTRINSICS_) - auto pInputVector = reinterpret_cast(pInputStream); - auto pOutputVector = reinterpret_cast(pOutputStream); - - size_t i = 0; - size_t four = VectorCount >> 2; - if (four > 0) { - __m256 row0 = _mm256_broadcast_ps(&M.r[0]); - __m256 row1 = _mm256_broadcast_ps(&M.r[1]); - __m256 row3 = _mm256_broadcast_ps(&M.r[3]); - - if (InputStride == sizeof(XMFLOAT2)) { - if (OutputStride == sizeof(XMFLOAT2)) { - if (!(reinterpret_cast(pOutputStream) & 0x1F)) { - // Packed input, aligned & packed output - for (size_t j = 0; j < four; ++j) { - __m256 VV = _mm256_loadu_ps( - reinterpret_cast(pInputVector)); - pInputVector += sizeof(XMFLOAT2) * 4; - - __m256 Y2 = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(3, 3, 3, 3)); - __m256 X2 = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(2, 2, 2, 2)); - __m256 Y1 = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(1, 1, 1, 1)); - __m256 X1 = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(0, 0, 0, 0)); - - __m256 vTempB = _mm256_fmadd_ps(Y1, row1, row3); - __m256 vTempB2 = _mm256_fmadd_ps(Y2, row1, row3); - __m256 vTempA = _mm256_mul_ps(X1, row0); - __m256 vTempA2 = _mm256_mul_ps(X2, row0); - vTempA = _mm256_add_ps(vTempA, vTempB); - vTempA2 = _mm256_add_ps(vTempA2, vTempB2); - - __m256 W = _mm256_shuffle_ps(vTempA, vTempA, - _MM_SHUFFLE(3, 3, 3, 3)); - vTempA = _mm256_div_ps(vTempA, W); - - W = _mm256_shuffle_ps(vTempA2, vTempA2, - _MM_SHUFFLE(3, 3, 3, 3)); - vTempA2 = _mm256_div_ps(vTempA2, W); - - X1 = _mm256_shuffle_ps(vTempA, vTempA2, 0x44); - XM256_STREAM_PS(reinterpret_cast(pOutputVector), - X1); - pOutputVector += sizeof(XMFLOAT2) * 4; - - i += 4; - } - } else { - // Packed input, packed output - for (size_t j = 0; j < four; ++j) { - __m256 VV = _mm256_loadu_ps( - reinterpret_cast(pInputVector)); - pInputVector += sizeof(XMFLOAT2) * 4; - - __m256 Y2 = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(3, 3, 3, 3)); - __m256 X2 = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(2, 2, 2, 2)); - __m256 Y1 = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(1, 1, 1, 1)); - __m256 X1 = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(0, 0, 0, 0)); - - __m256 vTempB = _mm256_fmadd_ps(Y1, row1, row3); - __m256 vTempB2 = _mm256_fmadd_ps(Y2, row1, row3); - __m256 vTempA = _mm256_mul_ps(X1, row0); - __m256 vTempA2 = _mm256_mul_ps(X2, row0); - vTempA = _mm256_add_ps(vTempA, vTempB); - vTempA2 = _mm256_add_ps(vTempA2, vTempB2); - - __m256 W = _mm256_shuffle_ps(vTempA, vTempA, - _MM_SHUFFLE(3, 3, 3, 3)); - vTempA = _mm256_div_ps(vTempA, W); - - W = _mm256_shuffle_ps(vTempA2, vTempA2, - _MM_SHUFFLE(3, 3, 3, 3)); - vTempA2 = _mm256_div_ps(vTempA2, W); - - X1 = _mm256_shuffle_ps(vTempA, vTempA2, 0x44); - _mm256_storeu_ps( - reinterpret_cast(pOutputVector), X1); - pOutputVector += sizeof(XMFLOAT2) * 4; - - i += 4; - } - } - } else { - // Packed input, unpacked output - for (size_t j = 0; j < four; ++j) { - __m256 VV = _mm256_loadu_ps( - reinterpret_cast(pInputVector)); - pInputVector += sizeof(XMFLOAT2) * 4; - - __m256 Y2 = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(3, 3, 3, 3)); - __m256 X2 = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(2, 2, 2, 2)); - __m256 Y1 = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(1, 1, 1, 1)); - __m256 X1 = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(0, 0, 0, 0)); - - __m256 vTempB = _mm256_fmadd_ps(Y1, row1, row3); - __m256 vTempB2 = _mm256_fmadd_ps(Y2, row1, row3); - __m256 vTempA = _mm256_mul_ps(X1, row0); - __m256 vTempA2 = _mm256_mul_ps(X2, row0); - vTempA = _mm256_add_ps(vTempA, vTempB); - vTempA2 = _mm256_add_ps(vTempA2, vTempB2); - - __m256 W = _mm256_shuffle_ps(vTempA, vTempA, - _MM_SHUFFLE(3, 3, 3, 3)); - vTempA = _mm256_div_ps(vTempA, W); - - W = _mm256_shuffle_ps(vTempA2, vTempA2, - _MM_SHUFFLE(3, 3, 3, 3)); - vTempA2 = _mm256_div_ps(vTempA2, W); - - _mm_store_sd(reinterpret_cast(pOutputVector), - _mm_castps_pd(_mm256_castps256_ps128(vTempA))); - pOutputVector += OutputStride; - - _mm_store_sd( - reinterpret_cast(pOutputVector), - _mm_castps_pd(_mm256_castps256_ps128(vTempA2))); - pOutputVector += OutputStride; - - _mm_store_sd( - reinterpret_cast(pOutputVector), - _mm_castps_pd(_mm256_extractf128_ps(vTempA, 1))); - pOutputVector += OutputStride; - - _mm_store_sd( - reinterpret_cast(pOutputVector), - _mm_castps_pd(_mm256_extractf128_ps(vTempA2, 1))); - pOutputVector += OutputStride; - - i += 4; - } - } - } - } - - if (i < VectorCount) { - const XMVECTOR row0 = M.r[0]; - const XMVECTOR row1 = M.r[1]; - const XMVECTOR row3 = M.r[3]; - - for (; i < VectorCount; i++) { - __m128 xy = _mm_castpd_ps( - _mm_load_sd(reinterpret_cast(pInputVector))); - pInputVector += InputStride; - - XMVECTOR Y = XM_PERMUTE_PS(xy, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR X = XM_PERMUTE_PS(xy, _MM_SHUFFLE(0, 0, 0, 0)); - - XMVECTOR vTemp = XM_FMADD_PS(Y, row1, row3); - XMVECTOR vTemp2 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - - XMVECTOR W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - vTemp = _mm_div_ps(vTemp, W); - - _mm_store_sd(reinterpret_cast(pOutputVector), - _mm_castps_pd(vTemp)); - pOutputVector += OutputStride; - } - } - - XM_SFENCE(); - - return pOutputStream; -#elif defined(_XM_SSE_INTRINSICS_) - auto pInputVector = reinterpret_cast(pInputStream); - auto pOutputVector = reinterpret_cast(pOutputStream); - - const XMVECTOR row0 = M.r[0]; - const XMVECTOR row1 = M.r[1]; - const XMVECTOR row3 = M.r[3]; - - size_t i = 0; - size_t two = VectorCount >> 1; - if (two > 0) { - if (InputStride == sizeof(XMFLOAT2)) { - if (OutputStride == sizeof(XMFLOAT2)) { - if (!(reinterpret_cast(pOutputStream) & 0xF)) { - // Packed input, aligned & packed output - for (size_t j = 0; j < two; ++j) { - XMVECTOR V = _mm_loadu_ps( - reinterpret_cast(pInputVector)); - pInputVector += sizeof(XMFLOAT2) * 2; - - // Result 1 - XMVECTOR Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR X = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); - - XMVECTOR vTemp = XM_FMADD_PS(Y, row1, row3); - XMVECTOR vTemp2 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - - XMVECTOR W = - XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - - XMVECTOR V1 = _mm_div_ps(vTemp, W); - - // Result 2 - Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 3, 3, 3)); - X = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); - - vTemp = XM_FMADD_PS(Y, row1, row3); - vTemp2 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - - W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - - XMVECTOR V2 = _mm_div_ps(vTemp, W); - - vTemp = _mm_movelh_ps(V1, V2); - - XM_STREAM_PS(reinterpret_cast(pOutputVector), - vTemp); - pOutputVector += sizeof(XMFLOAT2) * 2; - - i += 2; - } - } else { - // Packed input, unaligned & packed output - for (size_t j = 0; j < two; ++j) { - XMVECTOR V = _mm_loadu_ps( - reinterpret_cast(pInputVector)); - pInputVector += sizeof(XMFLOAT2) * 2; - - // Result 1 - XMVECTOR Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR X = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); - - XMVECTOR vTemp = XM_FMADD_PS(Y, row1, row3); - XMVECTOR vTemp2 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - - XMVECTOR W = - XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - - XMVECTOR V1 = _mm_div_ps(vTemp, W); - - // Result 2 - Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 3, 3, 3)); - X = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); - - vTemp = XM_FMADD_PS(Y, row1, row3); - vTemp2 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - - W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - - XMVECTOR V2 = _mm_div_ps(vTemp, W); - - vTemp = _mm_movelh_ps(V1, V2); - - _mm_storeu_ps(reinterpret_cast(pOutputVector), - vTemp); - pOutputVector += sizeof(XMFLOAT2) * 2; - - i += 2; - } - } - } else { - // Packed input, unpacked output - for (size_t j = 0; j < two; ++j) { - XMVECTOR V = _mm_loadu_ps( - reinterpret_cast(pInputVector)); - pInputVector += sizeof(XMFLOAT2) * 2; - - // Result 1 - XMVECTOR Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR X = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); - - XMVECTOR vTemp = XM_FMADD_PS(Y, row1, row3); - XMVECTOR vTemp2 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - - XMVECTOR W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - - vTemp = _mm_div_ps(vTemp, W); - - _mm_store_sd(reinterpret_cast(pOutputVector), - _mm_castps_pd(vTemp)); - pOutputVector += OutputStride; - - // Result 2 - Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 3, 3, 3)); - X = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); - - vTemp = XM_FMADD_PS(Y, row1, row3); - vTemp2 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - - W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - - vTemp = _mm_div_ps(vTemp, W); - - _mm_store_sd(reinterpret_cast(pOutputVector), - _mm_castps_pd(vTemp)); - pOutputVector += OutputStride; - - i += 2; - } - } - } - } - - if (!(reinterpret_cast(pInputVector) & 0xF) && - !(InputStride & 0xF)) { - // Aligned input - for (; i < VectorCount; i++) { - XMVECTOR V = _mm_castsi128_ps(_mm_loadl_epi64( - reinterpret_cast(pInputVector))); - pInputVector += InputStride; - - XMVECTOR Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR X = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); - - XMVECTOR vTemp = XM_FMADD_PS(Y, row1, row3); - XMVECTOR vTemp2 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - - XMVECTOR W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - - vTemp = _mm_div_ps(vTemp, W); - - _mm_store_sd(reinterpret_cast(pOutputVector), - _mm_castps_pd(vTemp)); - pOutputVector += OutputStride; - } - } else { - // Unaligned input - for (; i < VectorCount; i++) { - __m128 xy = _mm_castpd_ps( - _mm_load_sd(reinterpret_cast(pInputVector))); - pInputVector += InputStride; - - XMVECTOR Y = XM_PERMUTE_PS(xy, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR X = XM_PERMUTE_PS(xy, _MM_SHUFFLE(0, 0, 0, 0)); - - XMVECTOR vTemp = XM_FMADD_PS(Y, row1, row3); - XMVECTOR vTemp2 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - - XMVECTOR W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - - vTemp = _mm_div_ps(vTemp, W); - - _mm_store_sd(reinterpret_cast(pOutputVector), - _mm_castps_pd(vTemp)); - pOutputVector += OutputStride; - } - } - - XM_SFENCE(); - - return pOutputStream; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector2TransformNormal(FXMVECTOR V, - FXMMATRIX M) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR Y = XMVectorSplatY(V); - XMVECTOR X = XMVectorSplatX(V); - - XMVECTOR Result = XMVectorMultiply(Y, M.r[1]); - Result = XMVectorMultiplyAdd(X, M.r[0], Result); - - return Result; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x2_t VL = vget_low_f32(V); - float32x4_t Result = vmulq_lane_f32(M.r[1], VL, 1); // Y - return vmlaq_lane_f32(Result, M.r[0], VL, 0); // X -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); // Y - vResult = _mm_mul_ps(vResult, M.r[1]); - XMVECTOR vTemp = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); // X - vResult = XM_FMADD_PS(vTemp, M.r[0], vResult); - return vResult; -#endif -} - -//------------------------------------------------------------------------------ - -_Use_decl_annotations_ inline XMFLOAT2* XM_CALLCONV -XMVector2TransformNormalStream(XMFLOAT2* pOutputStream, size_t OutputStride, - const XMFLOAT2* pInputStream, size_t InputStride, - size_t VectorCount, FXMMATRIX M) noexcept { - assert(pOutputStream != nullptr); - assert(pInputStream != nullptr); - - assert(InputStride >= sizeof(XMFLOAT2)); - _Analysis_assume_(InputStride >= sizeof(XMFLOAT2)); - - assert(OutputStride >= sizeof(XMFLOAT2)); - _Analysis_assume_(OutputStride >= sizeof(XMFLOAT2)); - -#if defined(_XM_NO_INTRINSICS_) - - auto pInputVector = reinterpret_cast(pInputStream); - auto pOutputVector = reinterpret_cast(pOutputStream); - - const XMVECTOR row0 = M.r[0]; - const XMVECTOR row1 = M.r[1]; - - for (size_t i = 0; i < VectorCount; i++) { - XMVECTOR V = - XMLoadFloat2(reinterpret_cast(pInputVector)); - XMVECTOR Y = XMVectorSplatY(V); - XMVECTOR X = XMVectorSplatX(V); - - XMVECTOR Result = XMVectorMultiply(Y, row1); - Result = XMVectorMultiplyAdd(X, row0, Result); - -#ifdef _PREFAST_ -#pragma prefast(push) -#pragma prefast(disable : 26015, "PREfast noise: Esp:1307") -#endif - - XMStoreFloat2(reinterpret_cast(pOutputVector), Result); - -#ifdef _PREFAST_ -#pragma prefast(pop) -#endif - - pInputVector += InputStride; - pOutputVector += OutputStride; - } - - return pOutputStream; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - auto pInputVector = reinterpret_cast(pInputStream); - auto pOutputVector = reinterpret_cast(pOutputStream); - - const XMVECTOR row0 = M.r[0]; - const XMVECTOR row1 = M.r[1]; - - size_t i = 0; - size_t four = VectorCount >> 2; - if (four > 0) { - if ((InputStride == sizeof(XMFLOAT2)) && - (OutputStride == sizeof(XMFLOAT2))) { - for (size_t j = 0; j < four; ++j) { - float32x4x2_t V = - vld2q_f32(reinterpret_cast(pInputVector)); - pInputVector += sizeof(XMFLOAT2) * 4; - - float32x2_t r = vget_low_f32(row0); - XMVECTOR vResult0 = vmulq_lane_f32(V.val[0], r, 0); // Ax - XMVECTOR vResult1 = vmulq_lane_f32(V.val[0], r, 1); // Bx - - XM_PREFETCH(pInputVector); - XM_PREFETCH(pInputVector + XM_CACHE_LINE_SIZE); - - r = vget_low_f32(row1); - vResult0 = vmlaq_lane_f32(vResult0, V.val[1], r, 0); // Ax+Ey - vResult1 = vmlaq_lane_f32(vResult1, V.val[1], r, 1); // Bx+Fy - - XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 2)); - XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 3)); - - V.val[0] = vResult0; - V.val[1] = vResult1; - - vst2q_f32(reinterpret_cast(pOutputVector), V); - pOutputVector += sizeof(XMFLOAT2) * 4; - - i += 4; - } - } - } - - for (; i < VectorCount; i++) { - float32x2_t V = vld1_f32(reinterpret_cast(pInputVector)); - pInputVector += InputStride; - - XMVECTOR vResult = vmulq_lane_f32(row0, V, 0); // X - vResult = vmlaq_lane_f32(vResult, row1, V, 1); // Y - - V = vget_low_f32(vResult); - vst1_f32(reinterpret_cast(pOutputVector), V); - pOutputVector += OutputStride; - } - - return pOutputStream; -#elif defined(_XM_AVX2_INTRINSICS_) - auto pInputVector = reinterpret_cast(pInputStream); - auto pOutputVector = reinterpret_cast(pOutputStream); - - size_t i = 0; - size_t four = VectorCount >> 2; - if (four > 0) { - __m256 row0 = _mm256_broadcast_ps(&M.r[0]); - __m256 row1 = _mm256_broadcast_ps(&M.r[1]); - - if (InputStride == sizeof(XMFLOAT2)) { - if (OutputStride == sizeof(XMFLOAT2)) { - if (!(reinterpret_cast(pOutputStream) & 0x1F)) { - // Packed input, aligned & packed output - for (size_t j = 0; j < four; ++j) { - __m256 VV = _mm256_loadu_ps( - reinterpret_cast(pInputVector)); - pInputVector += sizeof(XMFLOAT2) * 4; - - __m256 Y2 = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(3, 3, 3, 3)); - __m256 X2 = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(2, 2, 2, 2)); - __m256 Y1 = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(1, 1, 1, 1)); - __m256 X1 = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(0, 0, 0, 0)); - - __m256 vTempA = _mm256_mul_ps(Y1, row1); - __m256 vTempB = _mm256_mul_ps(Y2, row1); - vTempA = _mm256_fmadd_ps(X1, row0, vTempA); - vTempB = _mm256_fmadd_ps(X2, row0, vTempB); - - X1 = _mm256_shuffle_ps(vTempA, vTempB, 0x44); - XM256_STREAM_PS(reinterpret_cast(pOutputVector), - X1); - pOutputVector += sizeof(XMFLOAT2) * 4; - - i += 4; - } - } else { - // Packed input, packed output - for (size_t j = 0; j < four; ++j) { - __m256 VV = _mm256_loadu_ps( - reinterpret_cast(pInputVector)); - pInputVector += sizeof(XMFLOAT2) * 4; - - __m256 Y2 = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(3, 3, 3, 3)); - __m256 X2 = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(2, 2, 2, 2)); - __m256 Y1 = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(1, 1, 1, 1)); - __m256 X1 = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(0, 0, 0, 0)); - - __m256 vTempA = _mm256_mul_ps(Y1, row1); - __m256 vTempB = _mm256_mul_ps(Y2, row1); - vTempA = _mm256_fmadd_ps(X1, row0, vTempA); - vTempB = _mm256_fmadd_ps(X2, row0, vTempB); - - X1 = _mm256_shuffle_ps(vTempA, vTempB, 0x44); - _mm256_storeu_ps( - reinterpret_cast(pOutputVector), X1); - pOutputVector += sizeof(XMFLOAT2) * 4; - - i += 4; - } - } - } else { - // Packed input, unpacked output - for (size_t j = 0; j < four; ++j) { - __m256 VV = _mm256_loadu_ps( - reinterpret_cast(pInputVector)); - pInputVector += sizeof(XMFLOAT2) * 4; - - __m256 Y2 = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(3, 3, 3, 3)); - __m256 X2 = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(2, 2, 2, 2)); - __m256 Y1 = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(1, 1, 1, 1)); - __m256 X1 = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(0, 0, 0, 0)); - - __m256 vTempA = _mm256_mul_ps(Y1, row1); - __m256 vTempB = _mm256_mul_ps(Y2, row1); - vTempA = _mm256_fmadd_ps(X1, row0, vTempA); - vTempB = _mm256_fmadd_ps(X2, row0, vTempB); - - _mm_store_sd(reinterpret_cast(pOutputVector), - _mm_castps_pd(_mm256_castps256_ps128(vTempA))); - pOutputVector += OutputStride; - - _mm_store_sd(reinterpret_cast(pOutputVector), - _mm_castps_pd(_mm256_castps256_ps128(vTempB))); - pOutputVector += OutputStride; - - _mm_store_sd( - reinterpret_cast(pOutputVector), - _mm_castps_pd(_mm256_extractf128_ps(vTempA, 1))); - pOutputVector += OutputStride; - - _mm_store_sd( - reinterpret_cast(pOutputVector), - _mm_castps_pd(_mm256_extractf128_ps(vTempB, 1))); - pOutputVector += OutputStride; - - i += 4; - } - } - } - } - - if (i < VectorCount) { - const XMVECTOR row0 = M.r[0]; - const XMVECTOR row1 = M.r[1]; - - for (; i < VectorCount; i++) { - __m128 xy = _mm_castpd_ps( - _mm_load_sd(reinterpret_cast(pInputVector))); - pInputVector += InputStride; - - XMVECTOR Y = XM_PERMUTE_PS(xy, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR X = XM_PERMUTE_PS(xy, _MM_SHUFFLE(0, 0, 0, 0)); - - XMVECTOR vTemp = _mm_mul_ps(Y, row1); - vTemp = XM_FMADD_PS(X, row0, vTemp); - - _mm_store_sd(reinterpret_cast(pOutputVector), - _mm_castps_pd(vTemp)); - pOutputVector += OutputStride; - } - } - - XM_SFENCE(); - - return pOutputStream; -#elif defined(_XM_SSE_INTRINSICS_) - auto pInputVector = reinterpret_cast(pInputStream); - auto pOutputVector = reinterpret_cast(pOutputStream); - - const XMVECTOR row0 = M.r[0]; - const XMVECTOR row1 = M.r[1]; - - size_t i = 0; - size_t two = VectorCount >> 1; - if (two > 0) { - if (InputStride == sizeof(XMFLOAT2)) { - if (OutputStride == sizeof(XMFLOAT2)) { - if (!(reinterpret_cast(pOutputStream) & 0xF)) { - // Packed input, aligned & packed output - for (size_t j = 0; j < two; ++j) { - XMVECTOR V = _mm_loadu_ps( - reinterpret_cast(pInputVector)); - pInputVector += sizeof(XMFLOAT2) * 2; - - // Result 1 - XMVECTOR Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR X = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); - - XMVECTOR vTemp = _mm_mul_ps(Y, row1); - XMVECTOR V1 = XM_FMADD_PS(X, row0, vTemp); - - // Result 2 - Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 3, 3, 3)); - X = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); - - vTemp = _mm_mul_ps(Y, row1); - XMVECTOR V2 = XM_FMADD_PS(X, row0, vTemp); - - vTemp = _mm_movelh_ps(V1, V2); - - XM_STREAM_PS(reinterpret_cast(pOutputVector), - vTemp); - pOutputVector += sizeof(XMFLOAT2) * 2; - - i += 2; - } - } else { - // Packed input, unaligned & packed output - for (size_t j = 0; j < two; ++j) { - XMVECTOR V = _mm_loadu_ps( - reinterpret_cast(pInputVector)); - pInputVector += sizeof(XMFLOAT2) * 2; - - // Result 1 - XMVECTOR Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR X = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); - - XMVECTOR vTemp = _mm_mul_ps(Y, row1); - XMVECTOR V1 = XM_FMADD_PS(X, row0, vTemp); - - // Result 2 - Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 3, 3, 3)); - X = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); - - vTemp = _mm_mul_ps(Y, row1); - XMVECTOR V2 = XM_FMADD_PS(X, row0, vTemp); - - vTemp = _mm_movelh_ps(V1, V2); - - _mm_storeu_ps(reinterpret_cast(pOutputVector), - vTemp); - pOutputVector += sizeof(XMFLOAT2) * 2; - - i += 2; - } - } - } else { - // Packed input, unpacked output - for (size_t j = 0; j < two; ++j) { - XMVECTOR V = _mm_loadu_ps( - reinterpret_cast(pInputVector)); - pInputVector += sizeof(XMFLOAT2) * 2; - - // Result 1 - XMVECTOR Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR X = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); - - XMVECTOR vTemp = _mm_mul_ps(Y, row1); - vTemp = XM_FMADD_PS(X, row0, vTemp); - - _mm_store_sd(reinterpret_cast(pOutputVector), - _mm_castps_pd(vTemp)); - pOutputVector += OutputStride; - - // Result 2 - Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 3, 3, 3)); - X = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); - - vTemp = _mm_mul_ps(Y, row1); - vTemp = XM_FMADD_PS(X, row0, vTemp); - - _mm_store_sd(reinterpret_cast(pOutputVector), - _mm_castps_pd(vTemp)); - pOutputVector += OutputStride; - - i += 2; - } - } - } - } - - if (!(reinterpret_cast(pInputVector) & 0xF) && - !(InputStride & 0xF)) { - // Aligned input - for (; i < VectorCount; i++) { - XMVECTOR V = _mm_castsi128_ps(_mm_loadl_epi64( - reinterpret_cast(pInputVector))); - pInputVector += InputStride; - - XMVECTOR Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR X = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); - - XMVECTOR vTemp = _mm_mul_ps(Y, row1); - vTemp = XM_FMADD_PS(X, row0, vTemp); - - _mm_store_sd(reinterpret_cast(pOutputVector), - _mm_castps_pd(vTemp)); - pOutputVector += OutputStride; - } - } else { - // Unaligned input - for (; i < VectorCount; i++) { - __m128 xy = _mm_castpd_ps( - _mm_load_sd(reinterpret_cast(pInputVector))); - pInputVector += InputStride; - - XMVECTOR Y = XM_PERMUTE_PS(xy, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR X = XM_PERMUTE_PS(xy, _MM_SHUFFLE(0, 0, 0, 0)); - - XMVECTOR vTemp = _mm_mul_ps(Y, row1); - vTemp = XM_FMADD_PS(X, row0, vTemp); - - _mm_store_sd(reinterpret_cast(pOutputVector), - _mm_castps_pd(vTemp)); - pOutputVector += OutputStride; - } - } - - XM_SFENCE(); - - return pOutputStream; -#endif -} - -/**************************************************************************** - * - * 3D Vector - * - ****************************************************************************/ - -//------------------------------------------------------------------------------ -// Comparison operations -//------------------------------------------------------------------------------ - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMVector3Equal(FXMVECTOR V1, FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - return (((V1.vector4_f32[0] == V2.vector4_f32[0]) && - (V1.vector4_f32[1] == V2.vector4_f32[1]) && - (V1.vector4_f32[2] == V2.vector4_f32[2])) != 0); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x4_t vResult = vceqq_f32(V1, V2); - uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), - vget_high_u8(vreinterpretq_u8_u32(vResult))); - uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), - vreinterpret_u16_u8(vTemp.val[1])); - return ((vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) & - 0xFFFFFFU) == 0xFFFFFFU); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vTemp = _mm_cmpeq_ps(V1, V2); - return (((_mm_movemask_ps(vTemp) & 7) == 7) != 0); -#endif -} - -//------------------------------------------------------------------------------ - -inline uint32_t XM_CALLCONV XMVector3EqualR(FXMVECTOR V1, - FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - uint32_t CR = 0; - if ((V1.vector4_f32[0] == V2.vector4_f32[0]) && - (V1.vector4_f32[1] == V2.vector4_f32[1]) && - (V1.vector4_f32[2] == V2.vector4_f32[2])) { - CR = XM_CRMASK_CR6TRUE; - } else if ((V1.vector4_f32[0] != V2.vector4_f32[0]) && - (V1.vector4_f32[1] != V2.vector4_f32[1]) && - (V1.vector4_f32[2] != V2.vector4_f32[2])) { - CR = XM_CRMASK_CR6FALSE; - } - return CR; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x4_t vResult = vceqq_f32(V1, V2); - uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), - vget_high_u8(vreinterpretq_u8_u32(vResult))); - uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), - vreinterpret_u16_u8(vTemp.val[1])); - uint32_t r = - vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) & 0xFFFFFFU; - - uint32_t CR = 0; - if (r == 0xFFFFFFU) { - CR = XM_CRMASK_CR6TRUE; - } else if (!r) { - CR = XM_CRMASK_CR6FALSE; - } - return CR; -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vTemp = _mm_cmpeq_ps(V1, V2); - int iTest = _mm_movemask_ps(vTemp) & 7; - uint32_t CR = 0; - if (iTest == 7) { - CR = XM_CRMASK_CR6TRUE; - } else if (!iTest) { - CR = XM_CRMASK_CR6FALSE; - } - return CR; -#endif -} - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMVector3EqualInt(FXMVECTOR V1, FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - return (((V1.vector4_u32[0] == V2.vector4_u32[0]) && - (V1.vector4_u32[1] == V2.vector4_u32[1]) && - (V1.vector4_u32[2] == V2.vector4_u32[2])) != 0); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x4_t vResult = - vceqq_u32(vreinterpretq_u32_f32(V1), vreinterpretq_u32_f32(V2)); - uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), - vget_high_u8(vreinterpretq_u8_u32(vResult))); - uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), - vreinterpret_u16_u8(vTemp.val[1])); - return ((vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) & - 0xFFFFFFU) == 0xFFFFFFU); -#elif defined(_XM_SSE_INTRINSICS_) - __m128i vTemp = _mm_cmpeq_epi32(_mm_castps_si128(V1), _mm_castps_si128(V2)); - return (((_mm_movemask_ps(_mm_castsi128_ps(vTemp)) & 7) == 7) != 0); -#endif -} - -//------------------------------------------------------------------------------ - -inline uint32_t XM_CALLCONV XMVector3EqualIntR(FXMVECTOR V1, - FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - uint32_t CR = 0; - if ((V1.vector4_u32[0] == V2.vector4_u32[0]) && - (V1.vector4_u32[1] == V2.vector4_u32[1]) && - (V1.vector4_u32[2] == V2.vector4_u32[2])) { - CR = XM_CRMASK_CR6TRUE; - } else if ((V1.vector4_u32[0] != V2.vector4_u32[0]) && - (V1.vector4_u32[1] != V2.vector4_u32[1]) && - (V1.vector4_u32[2] != V2.vector4_u32[2])) { - CR = XM_CRMASK_CR6FALSE; - } - return CR; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x4_t vResult = - vceqq_u32(vreinterpretq_u32_f32(V1), vreinterpretq_u32_f32(V2)); - uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), - vget_high_u8(vreinterpretq_u8_u32(vResult))); - uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), - vreinterpret_u16_u8(vTemp.val[1])); - uint32_t r = - vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) & 0xFFFFFFU; - - uint32_t CR = 0; - if (r == 0xFFFFFFU) { - CR = XM_CRMASK_CR6TRUE; - } else if (!r) { - CR = XM_CRMASK_CR6FALSE; - } - return CR; -#elif defined(_XM_SSE_INTRINSICS_) - __m128i vTemp = _mm_cmpeq_epi32(_mm_castps_si128(V1), _mm_castps_si128(V2)); - int iTemp = _mm_movemask_ps(_mm_castsi128_ps(vTemp)) & 7; - uint32_t CR = 0; - if (iTemp == 7) { - CR = XM_CRMASK_CR6TRUE; - } else if (!iTemp) { - CR = XM_CRMASK_CR6FALSE; - } - return CR; -#endif -} - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMVector3NearEqual(FXMVECTOR V1, FXMVECTOR V2, - FXMVECTOR Epsilon) noexcept { -#if defined(_XM_NO_INTRINSICS_) - float dx, dy, dz; - - dx = fabsf(V1.vector4_f32[0] - V2.vector4_f32[0]); - dy = fabsf(V1.vector4_f32[1] - V2.vector4_f32[1]); - dz = fabsf(V1.vector4_f32[2] - V2.vector4_f32[2]); - return (((dx <= Epsilon.vector4_f32[0]) && (dy <= Epsilon.vector4_f32[1]) && - (dz <= Epsilon.vector4_f32[2])) != 0); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x4_t vDelta = vsubq_f32(V1, V2); -#if defined(_MSC_VER) && !defined(__clang__) && \ - !defined(_ARM64_DISTINCT_NEON_TYPES) - uint32x4_t vResult = vacleq_f32(vDelta, Epsilon); -#else - uint32x4_t vResult = vcleq_f32(vabsq_f32(vDelta), Epsilon); -#endif - uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), - vget_high_u8(vreinterpretq_u8_u32(vResult))); - uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), - vreinterpret_u16_u8(vTemp.val[1])); - return ((vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) & - 0xFFFFFFU) == 0xFFFFFFU); -#elif defined(_XM_SSE_INTRINSICS_) - // Get the difference - XMVECTOR vDelta = _mm_sub_ps(V1, V2); - // Get the absolute value of the difference - XMVECTOR vTemp = _mm_setzero_ps(); - vTemp = _mm_sub_ps(vTemp, vDelta); - vTemp = _mm_max_ps(vTemp, vDelta); - vTemp = _mm_cmple_ps(vTemp, Epsilon); - // w is don't care - return (((_mm_movemask_ps(vTemp) & 7) == 0x7) != 0); -#endif -} - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMVector3NotEqual(FXMVECTOR V1, FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - return (((V1.vector4_f32[0] != V2.vector4_f32[0]) || - (V1.vector4_f32[1] != V2.vector4_f32[1]) || - (V1.vector4_f32[2] != V2.vector4_f32[2])) != 0); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x4_t vResult = vceqq_f32(V1, V2); - uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), - vget_high_u8(vreinterpretq_u8_u32(vResult))); - uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), - vreinterpret_u16_u8(vTemp.val[1])); - return ((vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) & - 0xFFFFFFU) != 0xFFFFFFU); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vTemp = _mm_cmpeq_ps(V1, V2); - return (((_mm_movemask_ps(vTemp) & 7) != 7) != 0); -#endif -} - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMVector3NotEqualInt(FXMVECTOR V1, - FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - return (((V1.vector4_u32[0] != V2.vector4_u32[0]) || - (V1.vector4_u32[1] != V2.vector4_u32[1]) || - (V1.vector4_u32[2] != V2.vector4_u32[2])) != 0); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x4_t vResult = - vceqq_u32(vreinterpretq_u32_f32(V1), vreinterpretq_u32_f32(V2)); - uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), - vget_high_u8(vreinterpretq_u8_u32(vResult))); - uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), - vreinterpret_u16_u8(vTemp.val[1])); - return ((vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) & - 0xFFFFFFU) != 0xFFFFFFU); -#elif defined(_XM_SSE_INTRINSICS_) - __m128i vTemp = _mm_cmpeq_epi32(_mm_castps_si128(V1), _mm_castps_si128(V2)); - return (((_mm_movemask_ps(_mm_castsi128_ps(vTemp)) & 7) != 7) != 0); -#endif -} - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMVector3Greater(FXMVECTOR V1, FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - return (((V1.vector4_f32[0] > V2.vector4_f32[0]) && - (V1.vector4_f32[1] > V2.vector4_f32[1]) && - (V1.vector4_f32[2] > V2.vector4_f32[2])) != 0); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x4_t vResult = vcgtq_f32(V1, V2); - uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), - vget_high_u8(vreinterpretq_u8_u32(vResult))); - uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), - vreinterpret_u16_u8(vTemp.val[1])); - return ((vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) & - 0xFFFFFFU) == 0xFFFFFFU); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vTemp = _mm_cmpgt_ps(V1, V2); - return (((_mm_movemask_ps(vTemp) & 7) == 7) != 0); -#endif -} - -//------------------------------------------------------------------------------ - -inline uint32_t XM_CALLCONV XMVector3GreaterR(FXMVECTOR V1, - FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - uint32_t CR = 0; - if ((V1.vector4_f32[0] > V2.vector4_f32[0]) && - (V1.vector4_f32[1] > V2.vector4_f32[1]) && - (V1.vector4_f32[2] > V2.vector4_f32[2])) { - CR = XM_CRMASK_CR6TRUE; - } else if ((V1.vector4_f32[0] <= V2.vector4_f32[0]) && - (V1.vector4_f32[1] <= V2.vector4_f32[1]) && - (V1.vector4_f32[2] <= V2.vector4_f32[2])) { - CR = XM_CRMASK_CR6FALSE; - } - return CR; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x4_t vResult = vcgtq_f32(V1, V2); - uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), - vget_high_u8(vreinterpretq_u8_u32(vResult))); - uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), - vreinterpret_u16_u8(vTemp.val[1])); - uint32_t r = - vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) & 0xFFFFFFU; - - uint32_t CR = 0; - if (r == 0xFFFFFFU) { - CR = XM_CRMASK_CR6TRUE; - } else if (!r) { - CR = XM_CRMASK_CR6FALSE; - } - return CR; -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vTemp = _mm_cmpgt_ps(V1, V2); - uint32_t CR = 0; - int iTest = _mm_movemask_ps(vTemp) & 7; - if (iTest == 7) { - CR = XM_CRMASK_CR6TRUE; - } else if (!iTest) { - CR = XM_CRMASK_CR6FALSE; - } - return CR; -#endif -} - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMVector3GreaterOrEqual(FXMVECTOR V1, - FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - return (((V1.vector4_f32[0] >= V2.vector4_f32[0]) && - (V1.vector4_f32[1] >= V2.vector4_f32[1]) && - (V1.vector4_f32[2] >= V2.vector4_f32[2])) != 0); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x4_t vResult = vcgeq_f32(V1, V2); - uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), - vget_high_u8(vreinterpretq_u8_u32(vResult))); - uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), - vreinterpret_u16_u8(vTemp.val[1])); - return ((vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) & - 0xFFFFFFU) == 0xFFFFFFU); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vTemp = _mm_cmpge_ps(V1, V2); - return (((_mm_movemask_ps(vTemp) & 7) == 7) != 0); -#endif -} - -//------------------------------------------------------------------------------ - -inline uint32_t XM_CALLCONV XMVector3GreaterOrEqualR(FXMVECTOR V1, - FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - uint32_t CR = 0; - if ((V1.vector4_f32[0] >= V2.vector4_f32[0]) && - (V1.vector4_f32[1] >= V2.vector4_f32[1]) && - (V1.vector4_f32[2] >= V2.vector4_f32[2])) { - CR = XM_CRMASK_CR6TRUE; - } else if ((V1.vector4_f32[0] < V2.vector4_f32[0]) && - (V1.vector4_f32[1] < V2.vector4_f32[1]) && - (V1.vector4_f32[2] < V2.vector4_f32[2])) { - CR = XM_CRMASK_CR6FALSE; - } - return CR; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x4_t vResult = vcgeq_f32(V1, V2); - uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), - vget_high_u8(vreinterpretq_u8_u32(vResult))); - uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), - vreinterpret_u16_u8(vTemp.val[1])); - uint32_t r = - vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) & 0xFFFFFFU; - - uint32_t CR = 0; - if (r == 0xFFFFFFU) { - CR = XM_CRMASK_CR6TRUE; - } else if (!r) { - CR = XM_CRMASK_CR6FALSE; - } - return CR; -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vTemp = _mm_cmpge_ps(V1, V2); - uint32_t CR = 0; - int iTest = _mm_movemask_ps(vTemp) & 7; - if (iTest == 7) { - CR = XM_CRMASK_CR6TRUE; - } else if (!iTest) { - CR = XM_CRMASK_CR6FALSE; - } - return CR; -#endif -} - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMVector3Less(FXMVECTOR V1, FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - return (((V1.vector4_f32[0] < V2.vector4_f32[0]) && - (V1.vector4_f32[1] < V2.vector4_f32[1]) && - (V1.vector4_f32[2] < V2.vector4_f32[2])) != 0); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x4_t vResult = vcltq_f32(V1, V2); - uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), - vget_high_u8(vreinterpretq_u8_u32(vResult))); - uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), - vreinterpret_u16_u8(vTemp.val[1])); - return ((vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) & - 0xFFFFFFU) == 0xFFFFFFU); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vTemp = _mm_cmplt_ps(V1, V2); - return (((_mm_movemask_ps(vTemp) & 7) == 7) != 0); -#endif -} - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMVector3LessOrEqual(FXMVECTOR V1, - FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - return (((V1.vector4_f32[0] <= V2.vector4_f32[0]) && - (V1.vector4_f32[1] <= V2.vector4_f32[1]) && - (V1.vector4_f32[2] <= V2.vector4_f32[2])) != 0); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x4_t vResult = vcleq_f32(V1, V2); - uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), - vget_high_u8(vreinterpretq_u8_u32(vResult))); - uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), - vreinterpret_u16_u8(vTemp.val[1])); - return ((vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) & - 0xFFFFFFU) == 0xFFFFFFU); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vTemp = _mm_cmple_ps(V1, V2); - return (((_mm_movemask_ps(vTemp) & 7) == 7) != 0); -#endif -} - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMVector3InBounds(FXMVECTOR V, - FXMVECTOR Bounds) noexcept { -#if defined(_XM_NO_INTRINSICS_) - return (((V.vector4_f32[0] <= Bounds.vector4_f32[0] && - V.vector4_f32[0] >= -Bounds.vector4_f32[0]) && - (V.vector4_f32[1] <= Bounds.vector4_f32[1] && - V.vector4_f32[1] >= -Bounds.vector4_f32[1]) && - (V.vector4_f32[2] <= Bounds.vector4_f32[2] && - V.vector4_f32[2] >= -Bounds.vector4_f32[2])) != 0); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - // Test if less than or equal - uint32x4_t ivTemp1 = vcleq_f32(V, Bounds); - // Negate the bounds - float32x4_t vTemp2 = vnegq_f32(Bounds); - // Test if greater or equal (Reversed) - uint32x4_t ivTemp2 = vcleq_f32(vTemp2, V); - // Blend answers - ivTemp1 = vandq_u32(ivTemp1, ivTemp2); - // in bounds? - uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(ivTemp1)), - vget_high_u8(vreinterpretq_u8_u32(ivTemp1))); - uint16x4x2_t vTemp3 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), - vreinterpret_u16_u8(vTemp.val[1])); - return ((vget_lane_u32(vreinterpret_u32_u16(vTemp3.val[1]), 1) & - 0xFFFFFFU) == 0xFFFFFFU); -#elif defined(_XM_SSE_INTRINSICS_) - // Test if less than or equal - XMVECTOR vTemp1 = _mm_cmple_ps(V, Bounds); - // Negate the bounds - XMVECTOR vTemp2 = _mm_mul_ps(Bounds, g_XMNegativeOne); - // Test if greater or equal (Reversed) - vTemp2 = _mm_cmple_ps(vTemp2, V); - // Blend answers - vTemp1 = _mm_and_ps(vTemp1, vTemp2); - // x,y and z in bounds? (w is don't care) - return (((_mm_movemask_ps(vTemp1) & 0x7) == 0x7) != 0); -#else - return XMComparisonAllInBounds(XMVector3InBoundsR(V, Bounds)); -#endif -} - -//------------------------------------------------------------------------------ - -#if !defined(_XM_NO_INTRINSICS_) && defined(_MSC_VER) && \ - !defined(__INTEL_COMPILER) -#pragma float_control(push) -#pragma float_control(precise, on) -#endif - -inline bool XM_CALLCONV XMVector3IsNaN(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - return (XMISNAN(V.vector4_f32[0]) || XMISNAN(V.vector4_f32[1]) || - XMISNAN(V.vector4_f32[2])); - -#elif defined(_XM_ARM_NEON_INTRINSICS_) -#if defined(__clang__) && defined(__FINITE_MATH_ONLY__) - return isnan(vgetq_lane_f32(V, 0)) || isnan(vgetq_lane_f32(V, 1)) || - isnan(vgetq_lane_f32(V, 2)); -#else - // Test against itself. NaN is always not equal - uint32x4_t vTempNan = vceqq_f32(V, V); - uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vTempNan)), - vget_high_u8(vreinterpretq_u8_u32(vTempNan))); - uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), - vreinterpret_u16_u8(vTemp.val[1])); - // If x or y or z are NaN, the mask is zero - return ((vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) & - 0xFFFFFFU) != 0xFFFFFFU); -#endif -#elif defined(_XM_SSE_INTRINSICS_) -#if defined(__clang__) && defined(__FINITE_MATH_ONLY__) - XM_ALIGNED_DATA(16) float tmp[4]; - _mm_store_ps(tmp, V); - return isnan(tmp[0]) || isnan(tmp[1]) || isnan(tmp[2]); -#else - // Test against itself. NaN is always not equal - XMVECTOR vTempNan = _mm_cmpneq_ps(V, V); - // If x or y or z are NaN, the mask is non-zero - return ((_mm_movemask_ps(vTempNan) & 7) != 0); -#endif -#endif -} - -#if !defined(_XM_NO_INTRINSICS_) && defined(_MSC_VER) && \ - !defined(__INTEL_COMPILER) -#pragma float_control(pop) -#endif - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMVector3IsInfinite(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - return (XMISINF(V.vector4_f32[0]) || XMISINF(V.vector4_f32[1]) || - XMISINF(V.vector4_f32[2])); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - // Mask off the sign bit - uint32x4_t vTempInf = vandq_u32(vreinterpretq_u32_f32(V), g_XMAbsMask); - // Compare to infinity - vTempInf = vceqq_f32(vreinterpretq_f32_u32(vTempInf), g_XMInfinity); - // If any are infinity, the signs are true. - uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vTempInf)), - vget_high_u8(vreinterpretq_u8_u32(vTempInf))); - uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), - vreinterpret_u16_u8(vTemp.val[1])); - return ((vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) & - 0xFFFFFFU) != 0); -#elif defined(_XM_SSE_INTRINSICS_) - // Mask off the sign bit - __m128 vTemp = _mm_and_ps(V, g_XMAbsMask); - // Compare to infinity - vTemp = _mm_cmpeq_ps(vTemp, g_XMInfinity); - // If x,y or z are infinity, the signs are true. - return ((_mm_movemask_ps(vTemp) & 7) != 0); -#endif -} - -//------------------------------------------------------------------------------ -// Computation operations -//------------------------------------------------------------------------------ - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector3Dot(FXMVECTOR V1, FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - float fValue = V1.vector4_f32[0] * V2.vector4_f32[0] + - V1.vector4_f32[1] * V2.vector4_f32[1] + - V1.vector4_f32[2] * V2.vector4_f32[2]; - XMVECTORF32 vResult; - vResult.f[0] = vResult.f[1] = vResult.f[2] = vResult.f[3] = fValue; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x4_t vTemp = vmulq_f32(V1, V2); - float32x2_t v1 = vget_low_f32(vTemp); - float32x2_t v2 = vget_high_f32(vTemp); - v1 = vpadd_f32(v1, v1); - v2 = vdup_lane_f32(v2, 0); - v1 = vadd_f32(v1, v2); - return vcombine_f32(v1, v1); -#elif defined(_XM_SSE4_INTRINSICS_) - return _mm_dp_ps(V1, V2, 0x7f); -#elif defined(_XM_SSE3_INTRINSICS_) - XMVECTOR vTemp = _mm_mul_ps(V1, V2); - vTemp = _mm_and_ps(vTemp, g_XMMask3); - vTemp = _mm_hadd_ps(vTemp, vTemp); - return _mm_hadd_ps(vTemp, vTemp); -#elif defined(_XM_SSE_INTRINSICS_) - // Perform the dot product - XMVECTOR vDot = _mm_mul_ps(V1, V2); - // x=Dot.vector4_f32[1], y=Dot.vector4_f32[2] - XMVECTOR vTemp = XM_PERMUTE_PS(vDot, _MM_SHUFFLE(2, 1, 2, 1)); - // Result.vector4_f32[0] = x+y - vDot = _mm_add_ss(vDot, vTemp); - // x=Dot.vector4_f32[2] - vTemp = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(1, 1, 1, 1)); - // Result.vector4_f32[0] = (x+y)+z - vDot = _mm_add_ss(vDot, vTemp); - // Splat x - return XM_PERMUTE_PS(vDot, _MM_SHUFFLE(0, 0, 0, 0)); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector3Cross(FXMVECTOR V1, - FXMVECTOR V2) noexcept { - // [ V1.y*V2.z - V1.z*V2.y, V1.z*V2.x - V1.x*V2.z, V1.x*V2.y - V1.y*V2.x ] - -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 vResult = {{{(V1.vector4_f32[1] * V2.vector4_f32[2]) - - (V1.vector4_f32[2] * V2.vector4_f32[1]), - (V1.vector4_f32[2] * V2.vector4_f32[0]) - - (V1.vector4_f32[0] * V2.vector4_f32[2]), - (V1.vector4_f32[0] * V2.vector4_f32[1]) - - (V1.vector4_f32[1] * V2.vector4_f32[0]), - 0.0f}}}; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x2_t v1xy = vget_low_f32(V1); - float32x2_t v2xy = vget_low_f32(V2); - - float32x2_t v1yx = vrev64_f32(v1xy); - float32x2_t v2yx = vrev64_f32(v2xy); - - float32x2_t v1zz = vdup_lane_f32(vget_high_f32(V1), 0); - float32x2_t v2zz = vdup_lane_f32(vget_high_f32(V2), 0); - - XMVECTOR vResult = - vmulq_f32(vcombine_f32(v1yx, v1xy), vcombine_f32(v2zz, v2yx)); - vResult = - vmlsq_f32(vResult, vcombine_f32(v1zz, v1yx), vcombine_f32(v2yx, v2xy)); - vResult = vreinterpretq_f32_u32( - veorq_u32(vreinterpretq_u32_f32(vResult), g_XMFlipY)); - return vreinterpretq_f32_u32( - vandq_u32(vreinterpretq_u32_f32(vResult), g_XMMask3)); -#elif defined(_XM_SSE_INTRINSICS_) - // y1,z1,x1,w1 - XMVECTOR vTemp1 = XM_PERMUTE_PS(V1, _MM_SHUFFLE(3, 0, 2, 1)); - // z2,x2,y2,w2 - XMVECTOR vTemp2 = XM_PERMUTE_PS(V2, _MM_SHUFFLE(3, 1, 0, 2)); - // Perform the left operation - XMVECTOR vResult = _mm_mul_ps(vTemp1, vTemp2); - // z1,x1,y1,w1 - vTemp1 = XM_PERMUTE_PS(vTemp1, _MM_SHUFFLE(3, 0, 2, 1)); - // y2,z2,x2,w2 - vTemp2 = XM_PERMUTE_PS(vTemp2, _MM_SHUFFLE(3, 1, 0, 2)); - // Perform the right operation - vResult = XM_FNMADD_PS(vTemp1, vTemp2, vResult); - // Set w to zero - return _mm_and_ps(vResult, g_XMMask3); -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector3LengthSq(FXMVECTOR V) noexcept { - return XMVector3Dot(V, V); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector3ReciprocalLengthEst(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR Result; - - Result = XMVector3LengthSq(V); - Result = XMVectorReciprocalSqrtEst(Result); - - return Result; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - // Dot3 - float32x4_t vTemp = vmulq_f32(V, V); - float32x2_t v1 = vget_low_f32(vTemp); - float32x2_t v2 = vget_high_f32(vTemp); - v1 = vpadd_f32(v1, v1); - v2 = vdup_lane_f32(v2, 0); - v1 = vadd_f32(v1, v2); - // Reciprocal sqrt (estimate) - v2 = vrsqrte_f32(v1); - return vcombine_f32(v2, v2); -#elif defined(_XM_SSE4_INTRINSICS_) - XMVECTOR vTemp = _mm_dp_ps(V, V, 0x7f); - return _mm_rsqrt_ps(vTemp); -#elif defined(_XM_SSE3_INTRINSICS_) - XMVECTOR vLengthSq = _mm_mul_ps(V, V); - vLengthSq = _mm_and_ps(vLengthSq, g_XMMask3); - vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); - vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); - vLengthSq = _mm_rsqrt_ps(vLengthSq); - return vLengthSq; -#elif defined(_XM_SSE_INTRINSICS_) - // Perform the dot product on x,y and z - XMVECTOR vLengthSq = _mm_mul_ps(V, V); - // vTemp has z and y - XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(1, 2, 1, 2)); - // x+z, y - vLengthSq = _mm_add_ss(vLengthSq, vTemp); - // y,y,y,y - vTemp = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(1, 1, 1, 1)); - // x+z+y,??,??,?? - vLengthSq = _mm_add_ss(vLengthSq, vTemp); - // Splat the length squared - vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0)); - // Get the reciprocal - vLengthSq = _mm_rsqrt_ps(vLengthSq); - return vLengthSq; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector3ReciprocalLength(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR Result; - - Result = XMVector3LengthSq(V); - Result = XMVectorReciprocalSqrt(Result); - - return Result; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - // Dot3 - float32x4_t vTemp = vmulq_f32(V, V); - float32x2_t v1 = vget_low_f32(vTemp); - float32x2_t v2 = vget_high_f32(vTemp); - v1 = vpadd_f32(v1, v1); - v2 = vdup_lane_f32(v2, 0); - v1 = vadd_f32(v1, v2); - // Reciprocal sqrt - float32x2_t S0 = vrsqrte_f32(v1); - float32x2_t P0 = vmul_f32(v1, S0); - float32x2_t R0 = vrsqrts_f32(P0, S0); - float32x2_t S1 = vmul_f32(S0, R0); - float32x2_t P1 = vmul_f32(v1, S1); - float32x2_t R1 = vrsqrts_f32(P1, S1); - float32x2_t Result = vmul_f32(S1, R1); - return vcombine_f32(Result, Result); -#elif defined(_XM_SSE4_INTRINSICS_) - XMVECTOR vTemp = _mm_dp_ps(V, V, 0x7f); - XMVECTOR vLengthSq = _mm_sqrt_ps(vTemp); - return _mm_div_ps(g_XMOne, vLengthSq); -#elif defined(_XM_SSE3_INTRINSICS_) - XMVECTOR vDot = _mm_mul_ps(V, V); - vDot = _mm_and_ps(vDot, g_XMMask3); - vDot = _mm_hadd_ps(vDot, vDot); - vDot = _mm_hadd_ps(vDot, vDot); - vDot = _mm_sqrt_ps(vDot); - vDot = _mm_div_ps(g_XMOne, vDot); - return vDot; -#elif defined(_XM_SSE_INTRINSICS_) - // Perform the dot product - XMVECTOR vDot = _mm_mul_ps(V, V); - // x=Dot.y, y=Dot.z - XMVECTOR vTemp = XM_PERMUTE_PS(vDot, _MM_SHUFFLE(2, 1, 2, 1)); - // Result.x = x+y - vDot = _mm_add_ss(vDot, vTemp); - // x=Dot.z - vTemp = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(1, 1, 1, 1)); - // Result.x = (x+y)+z - vDot = _mm_add_ss(vDot, vTemp); - // Splat x - vDot = XM_PERMUTE_PS(vDot, _MM_SHUFFLE(0, 0, 0, 0)); - // Get the reciprocal - vDot = _mm_sqrt_ps(vDot); - // Get the reciprocal - vDot = _mm_div_ps(g_XMOne, vDot); - return vDot; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector3LengthEst(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR Result; - - Result = XMVector3LengthSq(V); - Result = XMVectorSqrtEst(Result); - - return Result; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - // Dot3 - float32x4_t vTemp = vmulq_f32(V, V); - float32x2_t v1 = vget_low_f32(vTemp); - float32x2_t v2 = vget_high_f32(vTemp); - v1 = vpadd_f32(v1, v1); - v2 = vdup_lane_f32(v2, 0); - v1 = vadd_f32(v1, v2); - const float32x2_t zero = vdup_n_f32(0); - uint32x2_t VEqualsZero = vceq_f32(v1, zero); - // Sqrt (estimate) - float32x2_t Result = vrsqrte_f32(v1); - Result = vmul_f32(v1, Result); - Result = vbsl_f32(VEqualsZero, zero, Result); - return vcombine_f32(Result, Result); -#elif defined(_XM_SSE4_INTRINSICS_) - XMVECTOR vTemp = _mm_dp_ps(V, V, 0x7f); - return _mm_sqrt_ps(vTemp); -#elif defined(_XM_SSE3_INTRINSICS_) - XMVECTOR vLengthSq = _mm_mul_ps(V, V); - vLengthSq = _mm_and_ps(vLengthSq, g_XMMask3); - vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); - vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); - vLengthSq = _mm_sqrt_ps(vLengthSq); - return vLengthSq; -#elif defined(_XM_SSE_INTRINSICS_) - // Perform the dot product on x,y and z - XMVECTOR vLengthSq = _mm_mul_ps(V, V); - // vTemp has z and y - XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(1, 2, 1, 2)); - // x+z, y - vLengthSq = _mm_add_ss(vLengthSq, vTemp); - // y,y,y,y - vTemp = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(1, 1, 1, 1)); - // x+z+y,??,??,?? - vLengthSq = _mm_add_ss(vLengthSq, vTemp); - // Splat the length squared - vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0)); - // Get the length - vLengthSq = _mm_sqrt_ps(vLengthSq); - return vLengthSq; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector3Length(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR Result; - - Result = XMVector3LengthSq(V); - Result = XMVectorSqrt(Result); - - return Result; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - // Dot3 - float32x4_t vTemp = vmulq_f32(V, V); - float32x2_t v1 = vget_low_f32(vTemp); - float32x2_t v2 = vget_high_f32(vTemp); - v1 = vpadd_f32(v1, v1); - v2 = vdup_lane_f32(v2, 0); - v1 = vadd_f32(v1, v2); - const float32x2_t zero = vdup_n_f32(0); - uint32x2_t VEqualsZero = vceq_f32(v1, zero); - // Sqrt - float32x2_t S0 = vrsqrte_f32(v1); - float32x2_t P0 = vmul_f32(v1, S0); - float32x2_t R0 = vrsqrts_f32(P0, S0); - float32x2_t S1 = vmul_f32(S0, R0); - float32x2_t P1 = vmul_f32(v1, S1); - float32x2_t R1 = vrsqrts_f32(P1, S1); - float32x2_t Result = vmul_f32(S1, R1); - Result = vmul_f32(v1, Result); - Result = vbsl_f32(VEqualsZero, zero, Result); - return vcombine_f32(Result, Result); -#elif defined(_XM_SSE4_INTRINSICS_) - XMVECTOR vTemp = _mm_dp_ps(V, V, 0x7f); - return _mm_sqrt_ps(vTemp); -#elif defined(_XM_SSE3_INTRINSICS_) - XMVECTOR vLengthSq = _mm_mul_ps(V, V); - vLengthSq = _mm_and_ps(vLengthSq, g_XMMask3); - vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); - vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); - vLengthSq = _mm_sqrt_ps(vLengthSq); - return vLengthSq; -#elif defined(_XM_SSE_INTRINSICS_) - // Perform the dot product on x,y and z - XMVECTOR vLengthSq = _mm_mul_ps(V, V); - // vTemp has z and y - XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(1, 2, 1, 2)); - // x+z, y - vLengthSq = _mm_add_ss(vLengthSq, vTemp); - // y,y,y,y - vTemp = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(1, 1, 1, 1)); - // x+z+y,??,??,?? - vLengthSq = _mm_add_ss(vLengthSq, vTemp); - // Splat the length squared - vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0)); - // Get the length - vLengthSq = _mm_sqrt_ps(vLengthSq); - return vLengthSq; -#endif -} - -//------------------------------------------------------------------------------ -// XMVector3NormalizeEst uses a reciprocal estimate and -// returns QNaN on zero and infinite vectors. - -inline XMVECTOR XM_CALLCONV XMVector3NormalizeEst(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR Result; - Result = XMVector3ReciprocalLength(V); - Result = XMVectorMultiply(V, Result); - return Result; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - // Dot3 - float32x4_t vTemp = vmulq_f32(V, V); - float32x2_t v1 = vget_low_f32(vTemp); - float32x2_t v2 = vget_high_f32(vTemp); - v1 = vpadd_f32(v1, v1); - v2 = vdup_lane_f32(v2, 0); - v1 = vadd_f32(v1, v2); - // Reciprocal sqrt (estimate) - v2 = vrsqrte_f32(v1); - // Normalize - return vmulq_f32(V, vcombine_f32(v2, v2)); -#elif defined(_XM_SSE4_INTRINSICS_) - XMVECTOR vTemp = _mm_dp_ps(V, V, 0x7f); - XMVECTOR vResult = _mm_rsqrt_ps(vTemp); - return _mm_mul_ps(vResult, V); -#elif defined(_XM_SSE3_INTRINSICS_) - XMVECTOR vDot = _mm_mul_ps(V, V); - vDot = _mm_and_ps(vDot, g_XMMask3); - vDot = _mm_hadd_ps(vDot, vDot); - vDot = _mm_hadd_ps(vDot, vDot); - vDot = _mm_rsqrt_ps(vDot); - vDot = _mm_mul_ps(vDot, V); - return vDot; -#elif defined(_XM_SSE_INTRINSICS_) - // Perform the dot product - XMVECTOR vDot = _mm_mul_ps(V, V); - // x=Dot.y, y=Dot.z - XMVECTOR vTemp = XM_PERMUTE_PS(vDot, _MM_SHUFFLE(2, 1, 2, 1)); - // Result.x = x+y - vDot = _mm_add_ss(vDot, vTemp); - // x=Dot.z - vTemp = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(1, 1, 1, 1)); - // Result.x = (x+y)+z - vDot = _mm_add_ss(vDot, vTemp); - // Splat x - vDot = XM_PERMUTE_PS(vDot, _MM_SHUFFLE(0, 0, 0, 0)); - // Get the reciprocal - vDot = _mm_rsqrt_ps(vDot); - // Perform the normalization - vDot = _mm_mul_ps(vDot, V); - return vDot; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector3Normalize(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - float fLength; - XMVECTOR vResult; - - vResult = XMVector3Length(V); - fLength = vResult.vector4_f32[0]; - - // Prevent divide by zero - if (fLength > 0) { - fLength = 1.0f / fLength; - } - - vResult.vector4_f32[0] = V.vector4_f32[0] * fLength; - vResult.vector4_f32[1] = V.vector4_f32[1] * fLength; - vResult.vector4_f32[2] = V.vector4_f32[2] * fLength; - vResult.vector4_f32[3] = V.vector4_f32[3] * fLength; - return vResult; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - // Dot3 - float32x4_t vTemp = vmulq_f32(V, V); - float32x2_t v1 = vget_low_f32(vTemp); - float32x2_t v2 = vget_high_f32(vTemp); - v1 = vpadd_f32(v1, v1); - v2 = vdup_lane_f32(v2, 0); - v1 = vadd_f32(v1, v2); - uint32x2_t VEqualsZero = vceq_f32(v1, vdup_n_f32(0)); - uint32x2_t VEqualsInf = vceq_f32(v1, vget_low_f32(g_XMInfinity)); - // Reciprocal sqrt (2 iterations of Newton-Raphson) - float32x2_t S0 = vrsqrte_f32(v1); - float32x2_t P0 = vmul_f32(v1, S0); - float32x2_t R0 = vrsqrts_f32(P0, S0); - float32x2_t S1 = vmul_f32(S0, R0); - float32x2_t P1 = vmul_f32(v1, S1); - float32x2_t R1 = vrsqrts_f32(P1, S1); - v2 = vmul_f32(S1, R1); - // Normalize - XMVECTOR vResult = vmulq_f32(V, vcombine_f32(v2, v2)); - vResult = vbslq_f32(vcombine_u32(VEqualsZero, VEqualsZero), vdupq_n_f32(0), - vResult); - return vbslq_f32(vcombine_u32(VEqualsInf, VEqualsInf), g_XMQNaN, vResult); -#elif defined(_XM_SSE4_INTRINSICS_) - XMVECTOR vLengthSq = _mm_dp_ps(V, V, 0x7f); - // Prepare for the division - XMVECTOR vResult = _mm_sqrt_ps(vLengthSq); - // Create zero with a single instruction - XMVECTOR vZeroMask = _mm_setzero_ps(); - // Test for a divide by zero (Must be FP to detect -0.0) - vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult); - // Failsafe on zero (Or epsilon) length planes - // If the length is infinity, set the elements to zero - vLengthSq = _mm_cmpneq_ps(vLengthSq, g_XMInfinity); - // Divide to perform the normalization - vResult = _mm_div_ps(V, vResult); - // Any that are infinity, set to zero - vResult = _mm_and_ps(vResult, vZeroMask); - // Select qnan or result based on infinite length - XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq, g_XMQNaN); - XMVECTOR vTemp2 = _mm_and_ps(vResult, vLengthSq); - vResult = _mm_or_ps(vTemp1, vTemp2); - return vResult; -#elif defined(_XM_SSE3_INTRINSICS_) - // Perform the dot product on x,y and z only - XMVECTOR vLengthSq = _mm_mul_ps(V, V); - vLengthSq = _mm_and_ps(vLengthSq, g_XMMask3); - vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); - vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); - // Prepare for the division - XMVECTOR vResult = _mm_sqrt_ps(vLengthSq); - // Create zero with a single instruction - XMVECTOR vZeroMask = _mm_setzero_ps(); - // Test for a divide by zero (Must be FP to detect -0.0) - vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult); - // Failsafe on zero (Or epsilon) length planes - // If the length is infinity, set the elements to zero - vLengthSq = _mm_cmpneq_ps(vLengthSq, g_XMInfinity); - // Divide to perform the normalization - vResult = _mm_div_ps(V, vResult); - // Any that are infinity, set to zero - vResult = _mm_and_ps(vResult, vZeroMask); - // Select qnan or result based on infinite length - XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq, g_XMQNaN); - XMVECTOR vTemp2 = _mm_and_ps(vResult, vLengthSq); - vResult = _mm_or_ps(vTemp1, vTemp2); - return vResult; -#elif defined(_XM_SSE_INTRINSICS_) - // Perform the dot product on x,y and z only - XMVECTOR vLengthSq = _mm_mul_ps(V, V); - XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(2, 1, 2, 1)); - vLengthSq = _mm_add_ss(vLengthSq, vTemp); - vTemp = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(1, 1, 1, 1)); - vLengthSq = _mm_add_ss(vLengthSq, vTemp); - vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0)); - // Prepare for the division - XMVECTOR vResult = _mm_sqrt_ps(vLengthSq); - // Create zero with a single instruction - XMVECTOR vZeroMask = _mm_setzero_ps(); - // Test for a divide by zero (Must be FP to detect -0.0) - vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult); - // Failsafe on zero (Or epsilon) length planes - // If the length is infinity, set the elements to zero - vLengthSq = _mm_cmpneq_ps(vLengthSq, g_XMInfinity); - // Divide to perform the normalization - vResult = _mm_div_ps(V, vResult); - // Any that are infinity, set to zero - vResult = _mm_and_ps(vResult, vZeroMask); - // Select qnan or result based on infinite length - XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq, g_XMQNaN); - XMVECTOR vTemp2 = _mm_and_ps(vResult, vLengthSq); - vResult = _mm_or_ps(vTemp1, vTemp2); - return vResult; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector3ClampLength(FXMVECTOR V, float LengthMin, - float LengthMax) noexcept { - XMVECTOR ClampMax = XMVectorReplicate(LengthMax); - XMVECTOR ClampMin = XMVectorReplicate(LengthMin); - - return XMVector3ClampLengthV(V, ClampMin, ClampMax); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector3ClampLengthV( - FXMVECTOR V, FXMVECTOR LengthMin, FXMVECTOR LengthMax) noexcept { - assert((XMVectorGetY(LengthMin) == XMVectorGetX(LengthMin)) && - (XMVectorGetZ(LengthMin) == XMVectorGetX(LengthMin))); - assert((XMVectorGetY(LengthMax) == XMVectorGetX(LengthMax)) && - (XMVectorGetZ(LengthMax) == XMVectorGetX(LengthMax))); - assert(XMVector3GreaterOrEqual(LengthMin, XMVectorZero())); - assert(XMVector3GreaterOrEqual(LengthMax, XMVectorZero())); - assert(XMVector3GreaterOrEqual(LengthMax, LengthMin)); - - XMVECTOR LengthSq = XMVector3LengthSq(V); - - const XMVECTOR Zero = XMVectorZero(); - - XMVECTOR RcpLength = XMVectorReciprocalSqrt(LengthSq); - - XMVECTOR InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity.v); - XMVECTOR ZeroLength = XMVectorEqual(LengthSq, Zero); - - XMVECTOR Normal = XMVectorMultiply(V, RcpLength); - - XMVECTOR Length = XMVectorMultiply(LengthSq, RcpLength); - - XMVECTOR Select = XMVectorEqualInt(InfiniteLength, ZeroLength); - Length = XMVectorSelect(LengthSq, Length, Select); - Normal = XMVectorSelect(LengthSq, Normal, Select); - - XMVECTOR ControlMax = XMVectorGreater(Length, LengthMax); - XMVECTOR ControlMin = XMVectorLess(Length, LengthMin); - - XMVECTOR ClampLength = XMVectorSelect(Length, LengthMax, ControlMax); - ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin); - - XMVECTOR Result = XMVectorMultiply(Normal, ClampLength); - - // Preserve the original vector (with no precision loss) if the length falls - // within the given range - XMVECTOR Control = XMVectorEqualInt(ControlMax, ControlMin); - Result = XMVectorSelect(Result, V, Control); - - return Result; -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector3Reflect(FXMVECTOR Incident, - FXMVECTOR Normal) noexcept { - // Result = Incident - (2 * dot(Incident, Normal)) * Normal - - XMVECTOR Result = XMVector3Dot(Incident, Normal); - Result = XMVectorAdd(Result, Result); - Result = XMVectorNegativeMultiplySubtract(Result, Normal, Incident); - - return Result; -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector3Refract(FXMVECTOR Incident, - FXMVECTOR Normal, - float RefractionIndex) noexcept { - XMVECTOR Index = XMVectorReplicate(RefractionIndex); - return XMVector3RefractV(Incident, Normal, Index); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector3RefractV( - FXMVECTOR Incident, FXMVECTOR Normal, FXMVECTOR RefractionIndex) noexcept { - // Result = RefractionIndex * Incident - Normal * (RefractionIndex * - // dot(Incident, Normal) + sqrt(1 - RefractionIndex * RefractionIndex * (1 - - // dot(Incident, Normal) * dot(Incident, Normal)))) - -#if defined(_XM_NO_INTRINSICS_) - - const XMVECTOR Zero = XMVectorZero(); - - XMVECTOR IDotN = XMVector3Dot(Incident, Normal); - - // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN) - XMVECTOR R = XMVectorNegativeMultiplySubtract(IDotN, IDotN, g_XMOne.v); - R = XMVectorMultiply(R, RefractionIndex); - R = XMVectorNegativeMultiplySubtract(R, RefractionIndex, g_XMOne.v); - - if (XMVector4LessOrEqual(R, Zero)) { - // Total internal reflection - return Zero; - } else { - // R = RefractionIndex * IDotN + sqrt(R) - R = XMVectorSqrt(R); - R = XMVectorMultiplyAdd(RefractionIndex, IDotN, R); - - // Result = RefractionIndex * Incident - Normal * R - XMVECTOR Result = XMVectorMultiply(RefractionIndex, Incident); - Result = XMVectorNegativeMultiplySubtract(Normal, R, Result); - - return Result; - } - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - XMVECTOR IDotN = XMVector3Dot(Incident, Normal); - - // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN) - float32x4_t R = vmlsq_f32(g_XMOne, IDotN, IDotN); - R = vmulq_f32(R, RefractionIndex); - R = vmlsq_f32(g_XMOne, R, RefractionIndex); - - uint32x4_t isrzero = vcleq_f32(R, g_XMZero); - uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(isrzero)), - vget_high_u8(vreinterpretq_u8_u32(isrzero))); - uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), - vreinterpret_u16_u8(vTemp.val[1])); - - float32x4_t vResult; - if (vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) == 0xFFFFFFFFU) { - // Total internal reflection - vResult = g_XMZero; - } else { - // Sqrt(R) - float32x4_t S0 = vrsqrteq_f32(R); - float32x4_t P0 = vmulq_f32(R, S0); - float32x4_t R0 = vrsqrtsq_f32(P0, S0); - float32x4_t S1 = vmulq_f32(S0, R0); - float32x4_t P1 = vmulq_f32(R, S1); - float32x4_t R1 = vrsqrtsq_f32(P1, S1); - float32x4_t S2 = vmulq_f32(S1, R1); - R = vmulq_f32(R, S2); - // R = RefractionIndex * IDotN + sqrt(R) - R = vmlaq_f32(R, RefractionIndex, IDotN); - // Result = RefractionIndex * Incident - Normal * R - vResult = vmulq_f32(RefractionIndex, Incident); - vResult = vmlsq_f32(vResult, R, Normal); - } - return vResult; -#elif defined(_XM_SSE_INTRINSICS_) - // Result = RefractionIndex * Incident - Normal * (RefractionIndex * - // dot(Incident, Normal) + sqrt(1 - RefractionIndex * RefractionIndex * (1 - - // dot(Incident, Normal) * dot(Incident, Normal)))) - XMVECTOR IDotN = XMVector3Dot(Incident, Normal); - // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN) - XMVECTOR R = XM_FNMADD_PS(IDotN, IDotN, g_XMOne); - XMVECTOR R2 = _mm_mul_ps(RefractionIndex, RefractionIndex); - R = XM_FNMADD_PS(R, R2, g_XMOne); - - XMVECTOR vResult = _mm_cmple_ps(R, g_XMZero); - if (_mm_movemask_ps(vResult) == 0x0f) { - // Total internal reflection - vResult = g_XMZero; - } else { - // R = RefractionIndex * IDotN + sqrt(R) - R = _mm_sqrt_ps(R); - R = XM_FMADD_PS(RefractionIndex, IDotN, R); - // Result = RefractionIndex * Incident - Normal * R - vResult = _mm_mul_ps(RefractionIndex, Incident); - vResult = XM_FNMADD_PS(R, Normal, vResult); - } - return vResult; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector3Orthogonal(FXMVECTOR V) noexcept { - XMVECTOR Zero = XMVectorZero(); - XMVECTOR Z = XMVectorSplatZ(V); - XMVECTOR YZYY = - XMVectorSwizzle( - V); - - XMVECTOR NegativeV = XMVectorSubtract(Zero, V); - - XMVECTOR ZIsNegative = XMVectorLess(Z, Zero); - XMVECTOR YZYYIsNegative = XMVectorLess(YZYY, Zero); - - XMVECTOR S = XMVectorAdd(YZYY, Z); - XMVECTOR D = XMVectorSubtract(YZYY, Z); - - XMVECTOR Select = XMVectorEqualInt(ZIsNegative, YZYYIsNegative); - - XMVECTOR R0 = XMVectorPermute(NegativeV, S); - XMVECTOR R1 = XMVectorPermute(V, D); - - return XMVectorSelect(R1, R0, Select); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV -XMVector3AngleBetweenNormalsEst(FXMVECTOR N1, FXMVECTOR N2) noexcept { - XMVECTOR Result = XMVector3Dot(N1, N2); - Result = XMVectorClamp(Result, g_XMNegativeOne.v, g_XMOne.v); - Result = XMVectorACosEst(Result); - return Result; -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV -XMVector3AngleBetweenNormals(FXMVECTOR N1, FXMVECTOR N2) noexcept { - XMVECTOR Result = XMVector3Dot(N1, N2); - Result = XMVectorClamp(Result, g_XMNegativeOne.v, g_XMOne.v); - Result = XMVectorACos(Result); - return Result; -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV -XMVector3AngleBetweenVectors(FXMVECTOR V1, FXMVECTOR V2) noexcept { - XMVECTOR L1 = XMVector3ReciprocalLength(V1); - XMVECTOR L2 = XMVector3ReciprocalLength(V2); - - XMVECTOR Dot = XMVector3Dot(V1, V2); - - L1 = XMVectorMultiply(L1, L2); - - XMVECTOR CosAngle = XMVectorMultiply(Dot, L1); - CosAngle = XMVectorClamp(CosAngle, g_XMNegativeOne.v, g_XMOne.v); - - return XMVectorACos(CosAngle); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector3LinePointDistance( - FXMVECTOR LinePoint1, FXMVECTOR LinePoint2, FXMVECTOR Point) noexcept { - // Given a vector PointVector from LinePoint1 to Point and a vector - // LineVector from LinePoint1 to LinePoint2, the scaled distance - // PointProjectionScale from LinePoint1 to the perpendicular projection - // of PointVector onto the line is defined as: - // - // PointProjectionScale = dot(PointVector, LineVector) / - // LengthSq(LineVector) - - XMVECTOR PointVector = XMVectorSubtract(Point, LinePoint1); - XMVECTOR LineVector = XMVectorSubtract(LinePoint2, LinePoint1); - - XMVECTOR LengthSq = XMVector3LengthSq(LineVector); - - XMVECTOR PointProjectionScale = XMVector3Dot(PointVector, LineVector); - PointProjectionScale = XMVectorDivide(PointProjectionScale, LengthSq); - - XMVECTOR DistanceVector = - XMVectorMultiply(LineVector, PointProjectionScale); - DistanceVector = XMVectorSubtract(PointVector, DistanceVector); - - return XMVector3Length(DistanceVector); -} - -//------------------------------------------------------------------------------ - -_Use_decl_annotations_ inline void XM_CALLCONV -XMVector3ComponentsFromNormal(XMVECTOR* pParallel, XMVECTOR* pPerpendicular, - FXMVECTOR V, FXMVECTOR Normal) noexcept { - assert(pParallel != nullptr); - assert(pPerpendicular != nullptr); - - XMVECTOR Scale = XMVector3Dot(V, Normal); - - XMVECTOR Parallel = XMVectorMultiply(Normal, Scale); - - *pParallel = Parallel; - *pPerpendicular = XMVectorSubtract(V, Parallel); -} - -//------------------------------------------------------------------------------ -// Transform a vector using a rotation expressed as a unit quaternion - -inline XMVECTOR XM_CALLCONV -XMVector3Rotate(FXMVECTOR V, FXMVECTOR RotationQuaternion) noexcept { - XMVECTOR A = XMVectorSelect(g_XMSelect1110.v, V, g_XMSelect1110.v); - XMVECTOR Q = XMQuaternionConjugate(RotationQuaternion); - XMVECTOR Result = XMQuaternionMultiply(Q, A); - return XMQuaternionMultiply(Result, RotationQuaternion); -} - -//------------------------------------------------------------------------------ -// Transform a vector using the inverse of a rotation expressed as a unit -// quaternion - -inline XMVECTOR XM_CALLCONV -XMVector3InverseRotate(FXMVECTOR V, FXMVECTOR RotationQuaternion) noexcept { - XMVECTOR A = XMVectorSelect(g_XMSelect1110.v, V, g_XMSelect1110.v); - XMVECTOR Result = XMQuaternionMultiply(RotationQuaternion, A); - XMVECTOR Q = XMQuaternionConjugate(RotationQuaternion); - return XMQuaternionMultiply(Result, Q); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector3Transform(FXMVECTOR V, - FXMMATRIX M) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR Z = XMVectorSplatZ(V); - XMVECTOR Y = XMVectorSplatY(V); - XMVECTOR X = XMVectorSplatX(V); - - XMVECTOR Result = XMVectorMultiplyAdd(Z, M.r[2], M.r[3]); - Result = XMVectorMultiplyAdd(Y, M.r[1], Result); - Result = XMVectorMultiplyAdd(X, M.r[0], Result); - - return Result; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x2_t VL = vget_low_f32(V); - XMVECTOR vResult = vmlaq_lane_f32(M.r[3], M.r[0], VL, 0); // X - vResult = vmlaq_lane_f32(vResult, M.r[1], VL, 1); // Y - return vmlaq_lane_f32(vResult, M.r[2], vget_high_f32(V), 0); // Z -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); // Z - vResult = XM_FMADD_PS(vResult, M.r[2], M.r[3]); - XMVECTOR vTemp = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); // Y - vResult = XM_FMADD_PS(vTemp, M.r[1], vResult); - vTemp = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); // X - vResult = XM_FMADD_PS(vTemp, M.r[0], vResult); - return vResult; -#endif -} - -//------------------------------------------------------------------------------ - -#ifdef _PREFAST_ -#pragma prefast(push) -#pragma prefast(disable : 26015 26019, "PREfast noise: Esp:1307") -#endif - -_Use_decl_annotations_ inline XMFLOAT4* XM_CALLCONV XMVector3TransformStream( - XMFLOAT4* pOutputStream, size_t OutputStride, const XMFLOAT3* pInputStream, - size_t InputStride, size_t VectorCount, FXMMATRIX M) noexcept { - assert(pOutputStream != nullptr); - assert(pInputStream != nullptr); - - assert(InputStride >= sizeof(XMFLOAT3)); - _Analysis_assume_(InputStride >= sizeof(XMFLOAT3)); - - assert(OutputStride >= sizeof(XMFLOAT4)); - _Analysis_assume_(OutputStride >= sizeof(XMFLOAT4)); - -#if defined(_XM_NO_INTRINSICS_) - - auto pInputVector = reinterpret_cast(pInputStream); - auto pOutputVector = reinterpret_cast(pOutputStream); - - const XMVECTOR row0 = M.r[0]; - const XMVECTOR row1 = M.r[1]; - const XMVECTOR row2 = M.r[2]; - const XMVECTOR row3 = M.r[3]; - - for (size_t i = 0; i < VectorCount; i++) { - XMVECTOR V = - XMLoadFloat3(reinterpret_cast(pInputVector)); - XMVECTOR Z = XMVectorSplatZ(V); - XMVECTOR Y = XMVectorSplatY(V); - XMVECTOR X = XMVectorSplatX(V); - - XMVECTOR Result = XMVectorMultiplyAdd(Z, row2, row3); - Result = XMVectorMultiplyAdd(Y, row1, Result); - Result = XMVectorMultiplyAdd(X, row0, Result); - - XMStoreFloat4(reinterpret_cast(pOutputVector), Result); - - pInputVector += InputStride; - pOutputVector += OutputStride; - } - - return pOutputStream; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - auto pInputVector = reinterpret_cast(pInputStream); - auto pOutputVector = reinterpret_cast(pOutputStream); - - const XMVECTOR row0 = M.r[0]; - const XMVECTOR row1 = M.r[1]; - const XMVECTOR row2 = M.r[2]; - const XMVECTOR row3 = M.r[3]; - - size_t i = 0; - size_t four = VectorCount >> 2; - if (four > 0) { - if ((InputStride == sizeof(XMFLOAT3)) && - (OutputStride == sizeof(XMFLOAT4))) { - for (size_t j = 0; j < four; ++j) { - float32x4x3_t V = - vld3q_f32(reinterpret_cast(pInputVector)); - pInputVector += sizeof(XMFLOAT3) * 4; - - float32x2_t r3 = vget_low_f32(row3); - float32x2_t r = vget_low_f32(row0); - XMVECTOR vResult0 = vmlaq_lane_f32(vdupq_lane_f32(r3, 0), - V.val[0], r, 0); // Ax+M - XMVECTOR vResult1 = vmlaq_lane_f32(vdupq_lane_f32(r3, 1), - V.val[0], r, 1); // Bx+N - - XM_PREFETCH(pInputVector); - - r3 = vget_high_f32(row3); - r = vget_high_f32(row0); - XMVECTOR vResult2 = vmlaq_lane_f32(vdupq_lane_f32(r3, 0), - V.val[0], r, 0); // Cx+O - XMVECTOR vResult3 = vmlaq_lane_f32(vdupq_lane_f32(r3, 1), - V.val[0], r, 1); // Dx+P - - XM_PREFETCH(pInputVector + XM_CACHE_LINE_SIZE); - - r = vget_low_f32(row1); - vResult0 = vmlaq_lane_f32(vResult0, V.val[1], r, 0); // Ax+Ey+M - vResult1 = vmlaq_lane_f32(vResult1, V.val[1], r, 1); // Bx+Fy+N - - XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 2)); - - r = vget_high_f32(row1); - vResult2 = vmlaq_lane_f32(vResult2, V.val[1], r, 0); // Cx+Gy+O - vResult3 = vmlaq_lane_f32(vResult3, V.val[1], r, 1); // Dx+Hy+P - - XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 3)); - - r = vget_low_f32(row2); - vResult0 = - vmlaq_lane_f32(vResult0, V.val[2], r, 0); // Ax+Ey+Iz+M - vResult1 = - vmlaq_lane_f32(vResult1, V.val[2], r, 1); // Bx+Fy+Jz+N - - XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 4)); - - r = vget_high_f32(row2); - vResult2 = - vmlaq_lane_f32(vResult2, V.val[2], r, 0); // Cx+Gy+Kz+O - vResult3 = - vmlaq_lane_f32(vResult3, V.val[2], r, 1); // Dx+Hy+Lz+P - - XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 5)); - - float32x4x4_t R; - R.val[0] = vResult0; - R.val[1] = vResult1; - R.val[2] = vResult2; - R.val[3] = vResult3; - - vst4q_f32(reinterpret_cast(pOutputVector), R); - pOutputVector += sizeof(XMFLOAT4) * 4; - - i += 4; - } - } - } - - for (; i < VectorCount; i++) { - float32x2_t VL = vld1_f32(reinterpret_cast(pInputVector)); - float32x2_t zero = vdup_n_f32(0); - float32x2_t VH = vld1_lane_f32( - reinterpret_cast(pInputVector) + 2, zero, 0); - pInputVector += InputStride; - - XMVECTOR vResult = vmlaq_lane_f32(row3, row0, VL, 0); // X - vResult = vmlaq_lane_f32(vResult, row1, VL, 1); // Y - vResult = vmlaq_lane_f32(vResult, row2, VH, 0); // Z - - vst1q_f32(reinterpret_cast(pOutputVector), vResult); - pOutputVector += OutputStride; - } - - return pOutputStream; -#elif defined(_XM_SSE_INTRINSICS_) - auto pInputVector = reinterpret_cast(pInputStream); - auto pOutputVector = reinterpret_cast(pOutputStream); - - const XMVECTOR row0 = M.r[0]; - const XMVECTOR row1 = M.r[1]; - const XMVECTOR row2 = M.r[2]; - const XMVECTOR row3 = M.r[3]; - - size_t i = 0; - size_t four = VectorCount >> 2; - if (four > 0) { - if (InputStride == sizeof(XMFLOAT3)) { - if (!(reinterpret_cast(pOutputStream) & 0xF) && - !(OutputStride & 0xF)) { - // Packed input, aligned output - for (size_t j = 0; j < four; ++j) { - __m128 V1 = _mm_loadu_ps( - reinterpret_cast(pInputVector)); - __m128 L2 = _mm_loadu_ps( - reinterpret_cast(pInputVector + 16)); - __m128 L3 = _mm_loadu_ps( - reinterpret_cast(pInputVector + 32)); - pInputVector += sizeof(XMFLOAT3) * 4; - - // Unpack the 4 vectors (.w components are junk) - XM3UNPACK3INTO4(V1, L2, L3); - - // Result 1 - XMVECTOR Z = XM_PERMUTE_PS(V1, _MM_SHUFFLE(2, 2, 2, 2)); - XMVECTOR Y = XM_PERMUTE_PS(V1, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR X = XM_PERMUTE_PS(V1, _MM_SHUFFLE(0, 0, 0, 0)); - - XMVECTOR vTemp = XM_FMADD_PS(Z, row2, row3); - XMVECTOR vTemp2 = _mm_mul_ps(Y, row1); - XMVECTOR vTemp3 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - XM_STREAM_PS(reinterpret_cast(pOutputVector), - vTemp); - pOutputVector += OutputStride; - - // Result 2 - Z = XM_PERMUTE_PS(V2, _MM_SHUFFLE(2, 2, 2, 2)); - Y = XM_PERMUTE_PS(V2, _MM_SHUFFLE(1, 1, 1, 1)); - X = XM_PERMUTE_PS(V2, _MM_SHUFFLE(0, 0, 0, 0)); - - vTemp = XM_FMADD_PS(Z, row2, row3); - vTemp2 = _mm_mul_ps(Y, row1); - vTemp3 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - XM_STREAM_PS(reinterpret_cast(pOutputVector), - vTemp); - pOutputVector += OutputStride; - - // Result 3 - Z = XM_PERMUTE_PS(V3, _MM_SHUFFLE(2, 2, 2, 2)); - Y = XM_PERMUTE_PS(V3, _MM_SHUFFLE(1, 1, 1, 1)); - X = XM_PERMUTE_PS(V3, _MM_SHUFFLE(0, 0, 0, 0)); - - vTemp = XM_FMADD_PS(Z, row2, row3); - vTemp2 = _mm_mul_ps(Y, row1); - vTemp3 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - XM_STREAM_PS(reinterpret_cast(pOutputVector), - vTemp); - pOutputVector += OutputStride; - - // Result 4 - Z = XM_PERMUTE_PS(V4, _MM_SHUFFLE(2, 2, 2, 2)); - Y = XM_PERMUTE_PS(V4, _MM_SHUFFLE(1, 1, 1, 1)); - X = XM_PERMUTE_PS(V4, _MM_SHUFFLE(0, 0, 0, 0)); - - vTemp = XM_FMADD_PS(Z, row2, row3); - vTemp2 = _mm_mul_ps(Y, row1); - vTemp3 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - XM_STREAM_PS(reinterpret_cast(pOutputVector), - vTemp); - pOutputVector += OutputStride; - - i += 4; - } - } else { - // Packed input, unaligned output - for (size_t j = 0; j < four; ++j) { - __m128 V1 = _mm_loadu_ps( - reinterpret_cast(pInputVector)); - __m128 L2 = _mm_loadu_ps( - reinterpret_cast(pInputVector + 16)); - __m128 L3 = _mm_loadu_ps( - reinterpret_cast(pInputVector + 32)); - pInputVector += sizeof(XMFLOAT3) * 4; - - // Unpack the 4 vectors (.w components are junk) - XM3UNPACK3INTO4(V1, L2, L3); - - // Result 1 - XMVECTOR Z = XM_PERMUTE_PS(V1, _MM_SHUFFLE(2, 2, 2, 2)); - XMVECTOR Y = XM_PERMUTE_PS(V1, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR X = XM_PERMUTE_PS(V1, _MM_SHUFFLE(0, 0, 0, 0)); - - XMVECTOR vTemp = XM_FMADD_PS(Z, row2, row3); - XMVECTOR vTemp2 = _mm_mul_ps(Y, row1); - XMVECTOR vTemp3 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - _mm_storeu_ps(reinterpret_cast(pOutputVector), - vTemp); - pOutputVector += OutputStride; - - // Result 2 - Z = XM_PERMUTE_PS(V2, _MM_SHUFFLE(2, 2, 2, 2)); - Y = XM_PERMUTE_PS(V2, _MM_SHUFFLE(1, 1, 1, 1)); - X = XM_PERMUTE_PS(V2, _MM_SHUFFLE(0, 0, 0, 0)); - - vTemp = XM_FMADD_PS(Z, row2, row3); - vTemp2 = _mm_mul_ps(Y, row1); - vTemp3 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - _mm_storeu_ps(reinterpret_cast(pOutputVector), - vTemp); - pOutputVector += OutputStride; - - // Result 3 - Z = XM_PERMUTE_PS(V3, _MM_SHUFFLE(2, 2, 2, 2)); - Y = XM_PERMUTE_PS(V3, _MM_SHUFFLE(1, 1, 1, 1)); - X = XM_PERMUTE_PS(V3, _MM_SHUFFLE(0, 0, 0, 0)); - - vTemp = XM_FMADD_PS(Z, row2, row3); - vTemp2 = _mm_mul_ps(Y, row1); - vTemp3 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - _mm_storeu_ps(reinterpret_cast(pOutputVector), - vTemp); - pOutputVector += OutputStride; - - // Result 4 - Z = XM_PERMUTE_PS(V4, _MM_SHUFFLE(2, 2, 2, 2)); - Y = XM_PERMUTE_PS(V4, _MM_SHUFFLE(1, 1, 1, 1)); - X = XM_PERMUTE_PS(V4, _MM_SHUFFLE(0, 0, 0, 0)); - - vTemp = XM_FMADD_PS(Z, row2, row3); - vTemp2 = _mm_mul_ps(Y, row1); - vTemp3 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - _mm_storeu_ps(reinterpret_cast(pOutputVector), - vTemp); - pOutputVector += OutputStride; - - i += 4; - } - } - } - } - - if (!(reinterpret_cast(pOutputStream) & 0xF) && - !(OutputStride & 0xF)) { - // Aligned output - for (; i < VectorCount; ++i) { - XMVECTOR V = - XMLoadFloat3(reinterpret_cast(pInputVector)); - pInputVector += InputStride; - - XMVECTOR Z = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); - XMVECTOR Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR X = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); - - XMVECTOR vTemp = XM_FMADD_PS(Z, row2, row3); - XMVECTOR vTemp2 = _mm_mul_ps(Y, row1); - XMVECTOR vTemp3 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - XM_STREAM_PS(reinterpret_cast(pOutputVector), vTemp); - pOutputVector += OutputStride; - } - } else { - // Unaligned output - for (; i < VectorCount; ++i) { - XMVECTOR V = - XMLoadFloat3(reinterpret_cast(pInputVector)); - pInputVector += InputStride; - - XMVECTOR Z = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); - XMVECTOR Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR X = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); - - XMVECTOR vTemp = XM_FMADD_PS(Z, row2, row3); - XMVECTOR vTemp2 = _mm_mul_ps(Y, row1); - XMVECTOR vTemp3 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - _mm_storeu_ps(reinterpret_cast(pOutputVector), vTemp); - pOutputVector += OutputStride; - } - } - - XM_SFENCE(); - - return pOutputStream; -#endif -} - -#ifdef _PREFAST_ -#pragma prefast(pop) -#endif - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector3TransformCoord(FXMVECTOR V, - FXMMATRIX M) noexcept { - XMVECTOR Z = XMVectorSplatZ(V); - XMVECTOR Y = XMVectorSplatY(V); - XMVECTOR X = XMVectorSplatX(V); - - XMVECTOR Result = XMVectorMultiplyAdd(Z, M.r[2], M.r[3]); - Result = XMVectorMultiplyAdd(Y, M.r[1], Result); - Result = XMVectorMultiplyAdd(X, M.r[0], Result); - - XMVECTOR W = XMVectorSplatW(Result); - return XMVectorDivide(Result, W); -} - -//------------------------------------------------------------------------------ - -#ifdef _PREFAST_ -#pragma prefast(push) -#pragma prefast(disable : 26015 26019, "PREfast noise: Esp:1307") -#endif - -_Use_decl_annotations_ inline XMFLOAT3* XM_CALLCONV -XMVector3TransformCoordStream(XMFLOAT3* pOutputStream, size_t OutputStride, - const XMFLOAT3* pInputStream, size_t InputStride, - size_t VectorCount, FXMMATRIX M) noexcept { - assert(pOutputStream != nullptr); - assert(pInputStream != nullptr); - - assert(InputStride >= sizeof(XMFLOAT3)); - _Analysis_assume_(InputStride >= sizeof(XMFLOAT3)); - - assert(OutputStride >= sizeof(XMFLOAT3)); - _Analysis_assume_(OutputStride >= sizeof(XMFLOAT3)); - -#if defined(_XM_NO_INTRINSICS_) - - auto pInputVector = reinterpret_cast(pInputStream); - auto pOutputVector = reinterpret_cast(pOutputStream); - - const XMVECTOR row0 = M.r[0]; - const XMVECTOR row1 = M.r[1]; - const XMVECTOR row2 = M.r[2]; - const XMVECTOR row3 = M.r[3]; - - for (size_t i = 0; i < VectorCount; i++) { - XMVECTOR V = - XMLoadFloat3(reinterpret_cast(pInputVector)); - XMVECTOR Z = XMVectorSplatZ(V); - XMVECTOR Y = XMVectorSplatY(V); - XMVECTOR X = XMVectorSplatX(V); - - XMVECTOR Result = XMVectorMultiplyAdd(Z, row2, row3); - Result = XMVectorMultiplyAdd(Y, row1, Result); - Result = XMVectorMultiplyAdd(X, row0, Result); - - XMVECTOR W = XMVectorSplatW(Result); - - Result = XMVectorDivide(Result, W); - - XMStoreFloat3(reinterpret_cast(pOutputVector), Result); - - pInputVector += InputStride; - pOutputVector += OutputStride; - } - - return pOutputStream; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - auto pInputVector = reinterpret_cast(pInputStream); - auto pOutputVector = reinterpret_cast(pOutputStream); - - const XMVECTOR row0 = M.r[0]; - const XMVECTOR row1 = M.r[1]; - const XMVECTOR row2 = M.r[2]; - const XMVECTOR row3 = M.r[3]; - - size_t i = 0; - size_t four = VectorCount >> 2; - if (four > 0) { - if ((InputStride == sizeof(XMFLOAT3)) && - (OutputStride == sizeof(XMFLOAT3))) { - for (size_t j = 0; j < four; ++j) { - float32x4x3_t V = - vld3q_f32(reinterpret_cast(pInputVector)); - pInputVector += sizeof(XMFLOAT3) * 4; - - float32x2_t r3 = vget_low_f32(row3); - float32x2_t r = vget_low_f32(row0); - XMVECTOR vResult0 = vmlaq_lane_f32(vdupq_lane_f32(r3, 0), - V.val[0], r, 0); // Ax+M - XMVECTOR vResult1 = vmlaq_lane_f32(vdupq_lane_f32(r3, 1), - V.val[0], r, 1); // Bx+N - - XM_PREFETCH(pInputVector); - - r3 = vget_high_f32(row3); - r = vget_high_f32(row0); - XMVECTOR vResult2 = vmlaq_lane_f32(vdupq_lane_f32(r3, 0), - V.val[0], r, 0); // Cx+O - XMVECTOR W = vmlaq_lane_f32(vdupq_lane_f32(r3, 1), V.val[0], r, - 1); // Dx+P - - XM_PREFETCH(pInputVector + XM_CACHE_LINE_SIZE); - - r = vget_low_f32(row1); - vResult0 = vmlaq_lane_f32(vResult0, V.val[1], r, 0); // Ax+Ey+M - vResult1 = vmlaq_lane_f32(vResult1, V.val[1], r, 1); // Bx+Fy+N - - XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 2)); - - r = vget_high_f32(row1); - vResult2 = vmlaq_lane_f32(vResult2, V.val[1], r, 0); // Cx+Gy+O - W = vmlaq_lane_f32(W, V.val[1], r, 1); // Dx+Hy+P - - XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 3)); - - r = vget_low_f32(row2); - vResult0 = - vmlaq_lane_f32(vResult0, V.val[2], r, 0); // Ax+Ey+Iz+M - vResult1 = - vmlaq_lane_f32(vResult1, V.val[2], r, 1); // Bx+Fy+Jz+N - - XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 4)); - - r = vget_high_f32(row2); - vResult2 = - vmlaq_lane_f32(vResult2, V.val[2], r, 0); // Cx+Gy+Kz+O - W = vmlaq_lane_f32(W, V.val[2], r, 1); // Dx+Hy+Lz+P - - XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 5)); - -#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || \ - defined(_M_ARM64EC) || __aarch64__ - V.val[0] = vdivq_f32(vResult0, W); - V.val[1] = vdivq_f32(vResult1, W); - V.val[2] = vdivq_f32(vResult2, W); -#else - // 2 iterations of Newton-Raphson refinement of reciprocal - float32x4_t Reciprocal = vrecpeq_f32(W); - float32x4_t S = vrecpsq_f32(Reciprocal, W); - Reciprocal = vmulq_f32(S, Reciprocal); - S = vrecpsq_f32(Reciprocal, W); - Reciprocal = vmulq_f32(S, Reciprocal); - - V.val[0] = vmulq_f32(vResult0, Reciprocal); - V.val[1] = vmulq_f32(vResult1, Reciprocal); - V.val[2] = vmulq_f32(vResult2, Reciprocal); -#endif - - vst3q_f32(reinterpret_cast(pOutputVector), V); - pOutputVector += sizeof(XMFLOAT3) * 4; - - i += 4; - } - } - } - - for (; i < VectorCount; i++) { - float32x2_t VL = vld1_f32(reinterpret_cast(pInputVector)); - float32x2_t zero = vdup_n_f32(0); - float32x2_t VH = vld1_lane_f32( - reinterpret_cast(pInputVector) + 2, zero, 0); - pInputVector += InputStride; - - XMVECTOR vResult = vmlaq_lane_f32(row3, row0, VL, 0); // X - vResult = vmlaq_lane_f32(vResult, row1, VL, 1); // Y - vResult = vmlaq_lane_f32(vResult, row2, VH, 0); // Z - - VH = vget_high_f32(vResult); - XMVECTOR W = vdupq_lane_f32(VH, 1); - -#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || \ - defined(_M_ARM64EC) || __aarch64__ - vResult = vdivq_f32(vResult, W); -#else - // 2 iterations of Newton-Raphson refinement of reciprocal for W - float32x4_t Reciprocal = vrecpeq_f32(W); - float32x4_t S = vrecpsq_f32(Reciprocal, W); - Reciprocal = vmulq_f32(S, Reciprocal); - S = vrecpsq_f32(Reciprocal, W); - Reciprocal = vmulq_f32(S, Reciprocal); - - vResult = vmulq_f32(vResult, Reciprocal); -#endif - - VL = vget_low_f32(vResult); - vst1_f32(reinterpret_cast(pOutputVector), VL); - vst1q_lane_f32(reinterpret_cast(pOutputVector) + 2, vResult, 2); - pOutputVector += OutputStride; - } - - return pOutputStream; -#elif defined(_XM_SSE_INTRINSICS_) - auto pInputVector = reinterpret_cast(pInputStream); - auto pOutputVector = reinterpret_cast(pOutputStream); - - const XMVECTOR row0 = M.r[0]; - const XMVECTOR row1 = M.r[1]; - const XMVECTOR row2 = M.r[2]; - const XMVECTOR row3 = M.r[3]; - - size_t i = 0; - size_t four = VectorCount >> 2; - if (four > 0) { - if (InputStride == sizeof(XMFLOAT3)) { - if (OutputStride == sizeof(XMFLOAT3)) { - if (!(reinterpret_cast(pOutputStream) & 0xF)) { - // Packed input, aligned & packed output - for (size_t j = 0; j < four; ++j) { - __m128 V1 = _mm_loadu_ps( - reinterpret_cast(pInputVector)); - __m128 L2 = _mm_loadu_ps( - reinterpret_cast(pInputVector + 16)); - __m128 L3 = _mm_loadu_ps( - reinterpret_cast(pInputVector + 32)); - pInputVector += sizeof(XMFLOAT3) * 4; - - // Unpack the 4 vectors (.w components are junk) - XM3UNPACK3INTO4(V1, L2, L3); - - // Result 1 - XMVECTOR Z = XM_PERMUTE_PS(V1, _MM_SHUFFLE(2, 2, 2, 2)); - XMVECTOR Y = XM_PERMUTE_PS(V1, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR X = XM_PERMUTE_PS(V1, _MM_SHUFFLE(0, 0, 0, 0)); - - XMVECTOR vTemp = XM_FMADD_PS(Z, row2, row3); - XMVECTOR vTemp2 = _mm_mul_ps(Y, row1); - XMVECTOR vTemp3 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - XMVECTOR W = - XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - - V1 = _mm_div_ps(vTemp, W); - - // Result 2 - Z = XM_PERMUTE_PS(V2, _MM_SHUFFLE(2, 2, 2, 2)); - Y = XM_PERMUTE_PS(V2, _MM_SHUFFLE(1, 1, 1, 1)); - X = XM_PERMUTE_PS(V2, _MM_SHUFFLE(0, 0, 0, 0)); - - vTemp = XM_FMADD_PS(Z, row2, row3); - vTemp2 = _mm_mul_ps(Y, row1); - vTemp3 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - - V2 = _mm_div_ps(vTemp, W); - - // Result 3 - Z = XM_PERMUTE_PS(V3, _MM_SHUFFLE(2, 2, 2, 2)); - Y = XM_PERMUTE_PS(V3, _MM_SHUFFLE(1, 1, 1, 1)); - X = XM_PERMUTE_PS(V3, _MM_SHUFFLE(0, 0, 0, 0)); - - vTemp = XM_FMADD_PS(Z, row2, row3); - vTemp2 = _mm_mul_ps(Y, row1); - vTemp3 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - - V3 = _mm_div_ps(vTemp, W); - - // Result 4 - Z = XM_PERMUTE_PS(V4, _MM_SHUFFLE(2, 2, 2, 2)); - Y = XM_PERMUTE_PS(V4, _MM_SHUFFLE(1, 1, 1, 1)); - X = XM_PERMUTE_PS(V4, _MM_SHUFFLE(0, 0, 0, 0)); - - vTemp = XM_FMADD_PS(Z, row2, row3); - vTemp2 = _mm_mul_ps(Y, row1); - vTemp3 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - - V4 = _mm_div_ps(vTemp, W); - - // Pack and store the vectors - XM3PACK4INTO3(vTemp); - XM_STREAM_PS(reinterpret_cast(pOutputVector), - V1); - XM_STREAM_PS( - reinterpret_cast(pOutputVector + 16), - vTemp); - XM_STREAM_PS( - reinterpret_cast(pOutputVector + 32), V3); - pOutputVector += sizeof(XMFLOAT3) * 4; - i += 4; - } - } else { - // Packed input, unaligned & packed output - for (size_t j = 0; j < four; ++j) { - __m128 V1 = _mm_loadu_ps( - reinterpret_cast(pInputVector)); - __m128 L2 = _mm_loadu_ps( - reinterpret_cast(pInputVector + 16)); - __m128 L3 = _mm_loadu_ps( - reinterpret_cast(pInputVector + 32)); - pInputVector += sizeof(XMFLOAT3) * 4; - - // Unpack the 4 vectors (.w components are junk) - XM3UNPACK3INTO4(V1, L2, L3); - - // Result 1 - XMVECTOR Z = XM_PERMUTE_PS(V1, _MM_SHUFFLE(2, 2, 2, 2)); - XMVECTOR Y = XM_PERMUTE_PS(V1, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR X = XM_PERMUTE_PS(V1, _MM_SHUFFLE(0, 0, 0, 0)); - - XMVECTOR vTemp = XM_FMADD_PS(Z, row2, row3); - XMVECTOR vTemp2 = _mm_mul_ps(Y, row1); - XMVECTOR vTemp3 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - XMVECTOR W = - XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - - V1 = _mm_div_ps(vTemp, W); - - // Result 2 - Z = XM_PERMUTE_PS(V2, _MM_SHUFFLE(2, 2, 2, 2)); - Y = XM_PERMUTE_PS(V2, _MM_SHUFFLE(1, 1, 1, 1)); - X = XM_PERMUTE_PS(V2, _MM_SHUFFLE(0, 0, 0, 0)); - - vTemp = XM_FMADD_PS(Z, row2, row3); - vTemp2 = _mm_mul_ps(Y, row1); - vTemp3 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - - V2 = _mm_div_ps(vTemp, W); - - // Result 3 - Z = XM_PERMUTE_PS(V3, _MM_SHUFFLE(2, 2, 2, 2)); - Y = XM_PERMUTE_PS(V3, _MM_SHUFFLE(1, 1, 1, 1)); - X = XM_PERMUTE_PS(V3, _MM_SHUFFLE(0, 0, 0, 0)); - - vTemp = XM_FMADD_PS(Z, row2, row3); - vTemp2 = _mm_mul_ps(Y, row1); - vTemp3 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - - V3 = _mm_div_ps(vTemp, W); - - // Result 4 - Z = XM_PERMUTE_PS(V4, _MM_SHUFFLE(2, 2, 2, 2)); - Y = XM_PERMUTE_PS(V4, _MM_SHUFFLE(1, 1, 1, 1)); - X = XM_PERMUTE_PS(V4, _MM_SHUFFLE(0, 0, 0, 0)); - - vTemp = XM_FMADD_PS(Z, row2, row3); - vTemp2 = _mm_mul_ps(Y, row1); - vTemp3 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - - V4 = _mm_div_ps(vTemp, W); - - // Pack and store the vectors - XM3PACK4INTO3(vTemp); - _mm_storeu_ps(reinterpret_cast(pOutputVector), - V1); - _mm_storeu_ps( - reinterpret_cast(pOutputVector + 16), - vTemp); - _mm_storeu_ps( - reinterpret_cast(pOutputVector + 32), V3); - pOutputVector += sizeof(XMFLOAT3) * 4; - i += 4; - } - } - } else { - // Packed input, unpacked output - for (size_t j = 0; j < four; ++j) { - __m128 V1 = _mm_loadu_ps( - reinterpret_cast(pInputVector)); - __m128 L2 = _mm_loadu_ps( - reinterpret_cast(pInputVector + 16)); - __m128 L3 = _mm_loadu_ps( - reinterpret_cast(pInputVector + 32)); - pInputVector += sizeof(XMFLOAT3) * 4; - - // Unpack the 4 vectors (.w components are junk) - XM3UNPACK3INTO4(V1, L2, L3); - - // Result 1 - XMVECTOR Z = XM_PERMUTE_PS(V1, _MM_SHUFFLE(2, 2, 2, 2)); - XMVECTOR Y = XM_PERMUTE_PS(V1, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR X = XM_PERMUTE_PS(V1, _MM_SHUFFLE(0, 0, 0, 0)); - - XMVECTOR vTemp = XM_FMADD_PS(Z, row2, row3); - XMVECTOR vTemp2 = _mm_mul_ps(Y, row1); - XMVECTOR vTemp3 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - XMVECTOR W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - - vTemp = _mm_div_ps(vTemp, W); - XMStoreFloat3(reinterpret_cast(pOutputVector), - vTemp); - pOutputVector += OutputStride; - - // Result 2 - Z = XM_PERMUTE_PS(V2, _MM_SHUFFLE(2, 2, 2, 2)); - Y = XM_PERMUTE_PS(V2, _MM_SHUFFLE(1, 1, 1, 1)); - X = XM_PERMUTE_PS(V2, _MM_SHUFFLE(0, 0, 0, 0)); - - vTemp = XM_FMADD_PS(Z, row2, row3); - vTemp2 = _mm_mul_ps(Y, row1); - vTemp3 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - - vTemp = _mm_div_ps(vTemp, W); - XMStoreFloat3(reinterpret_cast(pOutputVector), - vTemp); - pOutputVector += OutputStride; - - // Result 3 - Z = XM_PERMUTE_PS(V3, _MM_SHUFFLE(2, 2, 2, 2)); - Y = XM_PERMUTE_PS(V3, _MM_SHUFFLE(1, 1, 1, 1)); - X = XM_PERMUTE_PS(V3, _MM_SHUFFLE(0, 0, 0, 0)); - - vTemp = XM_FMADD_PS(Z, row2, row3); - vTemp2 = _mm_mul_ps(Y, row1); - vTemp3 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - - vTemp = _mm_div_ps(vTemp, W); - XMStoreFloat3(reinterpret_cast(pOutputVector), - vTemp); - pOutputVector += OutputStride; - - // Result 4 - Z = XM_PERMUTE_PS(V4, _MM_SHUFFLE(2, 2, 2, 2)); - Y = XM_PERMUTE_PS(V4, _MM_SHUFFLE(1, 1, 1, 1)); - X = XM_PERMUTE_PS(V4, _MM_SHUFFLE(0, 0, 0, 0)); - - vTemp = XM_FMADD_PS(Z, row2, row3); - vTemp2 = _mm_mul_ps(Y, row1); - vTemp3 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - - vTemp = _mm_div_ps(vTemp, W); - XMStoreFloat3(reinterpret_cast(pOutputVector), - vTemp); - pOutputVector += OutputStride; - - i += 4; - } - } - } - } - - for (; i < VectorCount; i++) { - XMVECTOR V = - XMLoadFloat3(reinterpret_cast(pInputVector)); - pInputVector += InputStride; - - XMVECTOR Z = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); - XMVECTOR Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR X = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); - - XMVECTOR vTemp = XM_FMADD_PS(Z, row2, row3); - XMVECTOR vTemp2 = _mm_mul_ps(Y, row1); - XMVECTOR vTemp3 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - XMVECTOR W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - - vTemp = _mm_div_ps(vTemp, W); - - XMStoreFloat3(reinterpret_cast(pOutputVector), vTemp); - pOutputVector += OutputStride; - } - - XM_SFENCE(); - - return pOutputStream; -#endif -} - -#ifdef _PREFAST_ -#pragma prefast(pop) -#endif - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector3TransformNormal(FXMVECTOR V, - FXMMATRIX M) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR Z = XMVectorSplatZ(V); - XMVECTOR Y = XMVectorSplatY(V); - XMVECTOR X = XMVectorSplatX(V); - - XMVECTOR Result = XMVectorMultiply(Z, M.r[2]); - Result = XMVectorMultiplyAdd(Y, M.r[1], Result); - Result = XMVectorMultiplyAdd(X, M.r[0], Result); - - return Result; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x2_t VL = vget_low_f32(V); - XMVECTOR vResult = vmulq_lane_f32(M.r[0], VL, 0); // X - vResult = vmlaq_lane_f32(vResult, M.r[1], VL, 1); // Y - return vmlaq_lane_f32(vResult, M.r[2], vget_high_f32(V), 0); // Z -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); // Z - vResult = _mm_mul_ps(vResult, M.r[2]); - XMVECTOR vTemp = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); // Y - vResult = XM_FMADD_PS(vTemp, M.r[1], vResult); - vTemp = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); // X - vResult = XM_FMADD_PS(vTemp, M.r[0], vResult); - return vResult; -#endif -} - -//------------------------------------------------------------------------------ - -#ifdef _PREFAST_ -#pragma prefast(push) -#pragma prefast(disable : 26015 26019, "PREfast noise: Esp:1307") -#endif - -_Use_decl_annotations_ inline XMFLOAT3* XM_CALLCONV -XMVector3TransformNormalStream(XMFLOAT3* pOutputStream, size_t OutputStride, - const XMFLOAT3* pInputStream, size_t InputStride, - size_t VectorCount, FXMMATRIX M) noexcept { - assert(pOutputStream != nullptr); - assert(pInputStream != nullptr); - - assert(InputStride >= sizeof(XMFLOAT3)); - _Analysis_assume_(InputStride >= sizeof(XMFLOAT3)); - - assert(OutputStride >= sizeof(XMFLOAT3)); - _Analysis_assume_(OutputStride >= sizeof(XMFLOAT3)); - -#if defined(_XM_NO_INTRINSICS_) - - auto pInputVector = reinterpret_cast(pInputStream); - auto pOutputVector = reinterpret_cast(pOutputStream); - - const XMVECTOR row0 = M.r[0]; - const XMVECTOR row1 = M.r[1]; - const XMVECTOR row2 = M.r[2]; - - for (size_t i = 0; i < VectorCount; i++) { - XMVECTOR V = - XMLoadFloat3(reinterpret_cast(pInputVector)); - XMVECTOR Z = XMVectorSplatZ(V); - XMVECTOR Y = XMVectorSplatY(V); - XMVECTOR X = XMVectorSplatX(V); - - XMVECTOR Result = XMVectorMultiply(Z, row2); - Result = XMVectorMultiplyAdd(Y, row1, Result); - Result = XMVectorMultiplyAdd(X, row0, Result); - - XMStoreFloat3(reinterpret_cast(pOutputVector), Result); - - pInputVector += InputStride; - pOutputVector += OutputStride; - } - - return pOutputStream; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - auto pInputVector = reinterpret_cast(pInputStream); - auto pOutputVector = reinterpret_cast(pOutputStream); - - const XMVECTOR row0 = M.r[0]; - const XMVECTOR row1 = M.r[1]; - const XMVECTOR row2 = M.r[2]; - - size_t i = 0; - size_t four = VectorCount >> 2; - if (four > 0) { - if ((InputStride == sizeof(XMFLOAT3)) && - (OutputStride == sizeof(XMFLOAT3))) { - for (size_t j = 0; j < four; ++j) { - float32x4x3_t V = - vld3q_f32(reinterpret_cast(pInputVector)); - pInputVector += sizeof(XMFLOAT3) * 4; - - float32x2_t r = vget_low_f32(row0); - XMVECTOR vResult0 = vmulq_lane_f32(V.val[0], r, 0); // Ax - XMVECTOR vResult1 = vmulq_lane_f32(V.val[0], r, 1); // Bx - - XM_PREFETCH(pInputVector); - - r = vget_high_f32(row0); - XMVECTOR vResult2 = vmulq_lane_f32(V.val[0], r, 0); // Cx - - XM_PREFETCH(pInputVector + XM_CACHE_LINE_SIZE); - - r = vget_low_f32(row1); - vResult0 = vmlaq_lane_f32(vResult0, V.val[1], r, 0); // Ax+Ey - vResult1 = vmlaq_lane_f32(vResult1, V.val[1], r, 1); // Bx+Fy - - XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 2)); - - r = vget_high_f32(row1); - vResult2 = vmlaq_lane_f32(vResult2, V.val[1], r, 0); // Cx+Gy - - XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 3)); - - r = vget_low_f32(row2); - vResult0 = - vmlaq_lane_f32(vResult0, V.val[2], r, 0); // Ax+Ey+Iz - vResult1 = - vmlaq_lane_f32(vResult1, V.val[2], r, 1); // Bx+Fy+Jz - - XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 4)); - - r = vget_high_f32(row2); - vResult2 = - vmlaq_lane_f32(vResult2, V.val[2], r, 0); // Cx+Gy+Kz - - XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 5)); - - V.val[0] = vResult0; - V.val[1] = vResult1; - V.val[2] = vResult2; - - vst3q_f32(reinterpret_cast(pOutputVector), V); - pOutputVector += sizeof(XMFLOAT3) * 4; - - i += 4; - } - } - } - - for (; i < VectorCount; i++) { - float32x2_t VL = vld1_f32(reinterpret_cast(pInputVector)); - float32x2_t zero = vdup_n_f32(0); - float32x2_t VH = vld1_lane_f32( - reinterpret_cast(pInputVector) + 2, zero, 0); - pInputVector += InputStride; - - XMVECTOR vResult = vmulq_lane_f32(row0, VL, 0); // X - vResult = vmlaq_lane_f32(vResult, row1, VL, 1); // Y - vResult = vmlaq_lane_f32(vResult, row2, VH, 0); // Z - - VL = vget_low_f32(vResult); - vst1_f32(reinterpret_cast(pOutputVector), VL); - vst1q_lane_f32(reinterpret_cast(pOutputVector) + 2, vResult, 2); - pOutputVector += OutputStride; - } - - return pOutputStream; -#elif defined(_XM_SSE_INTRINSICS_) - auto pInputVector = reinterpret_cast(pInputStream); - auto pOutputVector = reinterpret_cast(pOutputStream); - - const XMVECTOR row0 = M.r[0]; - const XMVECTOR row1 = M.r[1]; - const XMVECTOR row2 = M.r[2]; - - size_t i = 0; - size_t four = VectorCount >> 2; - if (four > 0) { - if (InputStride == sizeof(XMFLOAT3)) { - if (OutputStride == sizeof(XMFLOAT3)) { - if (!(reinterpret_cast(pOutputStream) & 0xF)) { - // Packed input, aligned & packed output - for (size_t j = 0; j < four; ++j) { - __m128 V1 = _mm_loadu_ps( - reinterpret_cast(pInputVector)); - __m128 L2 = _mm_loadu_ps( - reinterpret_cast(pInputVector + 16)); - __m128 L3 = _mm_loadu_ps( - reinterpret_cast(pInputVector + 32)); - pInputVector += sizeof(XMFLOAT3) * 4; - - // Unpack the 4 vectors (.w components are junk) - XM3UNPACK3INTO4(V1, L2, L3); - - // Result 1 - XMVECTOR Z = XM_PERMUTE_PS(V1, _MM_SHUFFLE(2, 2, 2, 2)); - XMVECTOR Y = XM_PERMUTE_PS(V1, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR X = XM_PERMUTE_PS(V1, _MM_SHUFFLE(0, 0, 0, 0)); - - XMVECTOR vTemp = _mm_mul_ps(Z, row2); - XMVECTOR vTemp2 = _mm_mul_ps(Y, row1); - XMVECTOR vTemp3 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - V1 = _mm_add_ps(vTemp, vTemp3); - - // Result 2 - Z = XM_PERMUTE_PS(V2, _MM_SHUFFLE(2, 2, 2, 2)); - Y = XM_PERMUTE_PS(V2, _MM_SHUFFLE(1, 1, 1, 1)); - X = XM_PERMUTE_PS(V2, _MM_SHUFFLE(0, 0, 0, 0)); - - vTemp = _mm_mul_ps(Z, row2); - vTemp2 = _mm_mul_ps(Y, row1); - vTemp3 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - V2 = _mm_add_ps(vTemp, vTemp3); - - // Result 3 - Z = XM_PERMUTE_PS(V3, _MM_SHUFFLE(2, 2, 2, 2)); - Y = XM_PERMUTE_PS(V3, _MM_SHUFFLE(1, 1, 1, 1)); - X = XM_PERMUTE_PS(V3, _MM_SHUFFLE(0, 0, 0, 0)); - - vTemp = _mm_mul_ps(Z, row2); - vTemp2 = _mm_mul_ps(Y, row1); - vTemp3 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - V3 = _mm_add_ps(vTemp, vTemp3); - - // Result 4 - Z = XM_PERMUTE_PS(V4, _MM_SHUFFLE(2, 2, 2, 2)); - Y = XM_PERMUTE_PS(V4, _MM_SHUFFLE(1, 1, 1, 1)); - X = XM_PERMUTE_PS(V4, _MM_SHUFFLE(0, 0, 0, 0)); - - vTemp = _mm_mul_ps(Z, row2); - vTemp2 = _mm_mul_ps(Y, row1); - vTemp3 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - V4 = _mm_add_ps(vTemp, vTemp3); - - // Pack and store the vectors - XM3PACK4INTO3(vTemp); - XM_STREAM_PS(reinterpret_cast(pOutputVector), - V1); - XM_STREAM_PS( - reinterpret_cast(pOutputVector + 16), - vTemp); - XM_STREAM_PS( - reinterpret_cast(pOutputVector + 32), V3); - pOutputVector += sizeof(XMFLOAT3) * 4; - i += 4; - } - } else { - // Packed input, unaligned & packed output - for (size_t j = 0; j < four; ++j) { - __m128 V1 = _mm_loadu_ps( - reinterpret_cast(pInputVector)); - __m128 L2 = _mm_loadu_ps( - reinterpret_cast(pInputVector + 16)); - __m128 L3 = _mm_loadu_ps( - reinterpret_cast(pInputVector + 32)); - pInputVector += sizeof(XMFLOAT3) * 4; - - // Unpack the 4 vectors (.w components are junk) - XM3UNPACK3INTO4(V1, L2, L3); - - // Result 1 - XMVECTOR Z = XM_PERMUTE_PS(V1, _MM_SHUFFLE(2, 2, 2, 2)); - XMVECTOR Y = XM_PERMUTE_PS(V1, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR X = XM_PERMUTE_PS(V1, _MM_SHUFFLE(0, 0, 0, 0)); - - XMVECTOR vTemp = _mm_mul_ps(Z, row2); - XMVECTOR vTemp2 = _mm_mul_ps(Y, row1); - XMVECTOR vTemp3 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - V1 = _mm_add_ps(vTemp, vTemp3); - - // Result 2 - Z = XM_PERMUTE_PS(V2, _MM_SHUFFLE(2, 2, 2, 2)); - Y = XM_PERMUTE_PS(V2, _MM_SHUFFLE(1, 1, 1, 1)); - X = XM_PERMUTE_PS(V2, _MM_SHUFFLE(0, 0, 0, 0)); - - vTemp = _mm_mul_ps(Z, row2); - vTemp2 = _mm_mul_ps(Y, row1); - vTemp3 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - V2 = _mm_add_ps(vTemp, vTemp3); - - // Result 3 - Z = XM_PERMUTE_PS(V3, _MM_SHUFFLE(2, 2, 2, 2)); - Y = XM_PERMUTE_PS(V3, _MM_SHUFFLE(1, 1, 1, 1)); - X = XM_PERMUTE_PS(V3, _MM_SHUFFLE(0, 0, 0, 0)); - - vTemp = _mm_mul_ps(Z, row2); - vTemp2 = _mm_mul_ps(Y, row1); - vTemp3 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - V3 = _mm_add_ps(vTemp, vTemp3); - - // Result 4 - Z = XM_PERMUTE_PS(V4, _MM_SHUFFLE(2, 2, 2, 2)); - Y = XM_PERMUTE_PS(V4, _MM_SHUFFLE(1, 1, 1, 1)); - X = XM_PERMUTE_PS(V4, _MM_SHUFFLE(0, 0, 0, 0)); - - vTemp = _mm_mul_ps(Z, row2); - vTemp2 = _mm_mul_ps(Y, row1); - vTemp3 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - V4 = _mm_add_ps(vTemp, vTemp3); - - // Pack and store the vectors - XM3PACK4INTO3(vTemp); - _mm_storeu_ps(reinterpret_cast(pOutputVector), - V1); - _mm_storeu_ps( - reinterpret_cast(pOutputVector + 16), - vTemp); - _mm_storeu_ps( - reinterpret_cast(pOutputVector + 32), V3); - pOutputVector += sizeof(XMFLOAT3) * 4; - i += 4; - } - } - } else { - // Packed input, unpacked output - for (size_t j = 0; j < four; ++j) { - __m128 V1 = _mm_loadu_ps( - reinterpret_cast(pInputVector)); - __m128 L2 = _mm_loadu_ps( - reinterpret_cast(pInputVector + 16)); - __m128 L3 = _mm_loadu_ps( - reinterpret_cast(pInputVector + 32)); - pInputVector += sizeof(XMFLOAT3) * 4; - - // Unpack the 4 vectors (.w components are junk) - XM3UNPACK3INTO4(V1, L2, L3); - - // Result 1 - XMVECTOR Z = XM_PERMUTE_PS(V1, _MM_SHUFFLE(2, 2, 2, 2)); - XMVECTOR Y = XM_PERMUTE_PS(V1, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR X = XM_PERMUTE_PS(V1, _MM_SHUFFLE(0, 0, 0, 0)); - - XMVECTOR vTemp = _mm_mul_ps(Z, row2); - XMVECTOR vTemp2 = _mm_mul_ps(Y, row1); - XMVECTOR vTemp3 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - XMStoreFloat3(reinterpret_cast(pOutputVector), - vTemp); - pOutputVector += OutputStride; - - // Result 2 - Z = XM_PERMUTE_PS(V2, _MM_SHUFFLE(2, 2, 2, 2)); - Y = XM_PERMUTE_PS(V2, _MM_SHUFFLE(1, 1, 1, 1)); - X = XM_PERMUTE_PS(V2, _MM_SHUFFLE(0, 0, 0, 0)); - - vTemp = _mm_mul_ps(Z, row2); - vTemp2 = _mm_mul_ps(Y, row1); - vTemp3 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - XMStoreFloat3(reinterpret_cast(pOutputVector), - vTemp); - pOutputVector += OutputStride; - - // Result 3 - Z = XM_PERMUTE_PS(V3, _MM_SHUFFLE(2, 2, 2, 2)); - Y = XM_PERMUTE_PS(V3, _MM_SHUFFLE(1, 1, 1, 1)); - X = XM_PERMUTE_PS(V3, _MM_SHUFFLE(0, 0, 0, 0)); - - vTemp = _mm_mul_ps(Z, row2); - vTemp2 = _mm_mul_ps(Y, row1); - vTemp3 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - XMStoreFloat3(reinterpret_cast(pOutputVector), - vTemp); - pOutputVector += OutputStride; - - // Result 4 - Z = XM_PERMUTE_PS(V4, _MM_SHUFFLE(2, 2, 2, 2)); - Y = XM_PERMUTE_PS(V4, _MM_SHUFFLE(1, 1, 1, 1)); - X = XM_PERMUTE_PS(V4, _MM_SHUFFLE(0, 0, 0, 0)); - - vTemp = _mm_mul_ps(Z, row2); - vTemp2 = _mm_mul_ps(Y, row1); - vTemp3 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - XMStoreFloat3(reinterpret_cast(pOutputVector), - vTemp); - pOutputVector += OutputStride; - - i += 4; - } - } - } - } - - for (; i < VectorCount; i++) { - XMVECTOR V = - XMLoadFloat3(reinterpret_cast(pInputVector)); - pInputVector += InputStride; - - XMVECTOR Z = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); - XMVECTOR Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR X = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); - - XMVECTOR vTemp = _mm_mul_ps(Z, row2); - XMVECTOR vTemp2 = _mm_mul_ps(Y, row1); - XMVECTOR vTemp3 = _mm_mul_ps(X, row0); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - XMStoreFloat3(reinterpret_cast(pOutputVector), vTemp); - pOutputVector += OutputStride; - } - - XM_SFENCE(); - - return pOutputStream; -#endif -} - -#ifdef _PREFAST_ -#pragma prefast(pop) -#endif - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector3Project( - FXMVECTOR V, float ViewportX, float ViewportY, float ViewportWidth, - float ViewportHeight, float ViewportMinZ, float ViewportMaxZ, - FXMMATRIX Projection, CXMMATRIX View, CXMMATRIX World) noexcept { - const float HalfViewportWidth = ViewportWidth * 0.5f; - const float HalfViewportHeight = ViewportHeight * 0.5f; - - XMVECTOR Scale = XMVectorSet(HalfViewportWidth, -HalfViewportHeight, - ViewportMaxZ - ViewportMinZ, 0.0f); - XMVECTOR Offset = - XMVectorSet(ViewportX + HalfViewportWidth, - ViewportY + HalfViewportHeight, ViewportMinZ, 0.0f); - - XMMATRIX Transform = XMMatrixMultiply(World, View); - Transform = XMMatrixMultiply(Transform, Projection); - - XMVECTOR Result = XMVector3TransformCoord(V, Transform); - - Result = XMVectorMultiplyAdd(Result, Scale, Offset); - - return Result; -} - -//------------------------------------------------------------------------------ - -#ifdef _PREFAST_ -#pragma prefast(push) -#pragma prefast(disable : 26015 26019, "PREfast noise: Esp:1307") -#endif - -_Use_decl_annotations_ inline XMFLOAT3* XM_CALLCONV XMVector3ProjectStream( - XMFLOAT3* pOutputStream, size_t OutputStride, const XMFLOAT3* pInputStream, - size_t InputStride, size_t VectorCount, float ViewportX, float ViewportY, - float ViewportWidth, float ViewportHeight, float ViewportMinZ, - float ViewportMaxZ, FXMMATRIX Projection, CXMMATRIX View, - CXMMATRIX World) noexcept { - assert(pOutputStream != nullptr); - assert(pInputStream != nullptr); - - assert(InputStride >= sizeof(XMFLOAT3)); - _Analysis_assume_(InputStride >= sizeof(XMFLOAT3)); - - assert(OutputStride >= sizeof(XMFLOAT3)); - _Analysis_assume_(OutputStride >= sizeof(XMFLOAT3)); - -#if defined(_XM_NO_INTRINSICS_) - - const float HalfViewportWidth = ViewportWidth * 0.5f; - const float HalfViewportHeight = ViewportHeight * 0.5f; - - XMVECTOR Scale = XMVectorSet(HalfViewportWidth, -HalfViewportHeight, - ViewportMaxZ - ViewportMinZ, 1.0f); - XMVECTOR Offset = - XMVectorSet(ViewportX + HalfViewportWidth, - ViewportY + HalfViewportHeight, ViewportMinZ, 0.0f); - - XMMATRIX Transform = XMMatrixMultiply(World, View); - Transform = XMMatrixMultiply(Transform, Projection); - - auto pInputVector = reinterpret_cast(pInputStream); - auto pOutputVector = reinterpret_cast(pOutputStream); - - for (size_t i = 0; i < VectorCount; i++) { - XMVECTOR V = - XMLoadFloat3(reinterpret_cast(pInputVector)); - - XMVECTOR Result = XMVector3TransformCoord(V, Transform); - Result = XMVectorMultiplyAdd(Result, Scale, Offset); - - XMStoreFloat3(reinterpret_cast(pOutputVector), Result); - - pInputVector += InputStride; - pOutputVector += OutputStride; - } - - return pOutputStream; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - const float HalfViewportWidth = ViewportWidth * 0.5f; - const float HalfViewportHeight = ViewportHeight * 0.5f; - - XMMATRIX Transform = XMMatrixMultiply(World, View); - Transform = XMMatrixMultiply(Transform, Projection); - - auto pInputVector = reinterpret_cast(pInputStream); - auto pOutputVector = reinterpret_cast(pOutputStream); - - size_t i = 0; - size_t four = VectorCount >> 2; - if (four > 0) { - if ((InputStride == sizeof(XMFLOAT3)) && - (OutputStride == sizeof(XMFLOAT3))) { - XMVECTOR ScaleX = vdupq_n_f32(HalfViewportWidth); - XMVECTOR ScaleY = vdupq_n_f32(-HalfViewportHeight); - XMVECTOR ScaleZ = vdupq_n_f32(ViewportMaxZ - ViewportMinZ); - - XMVECTOR OffsetX = vdupq_n_f32(ViewportX + HalfViewportWidth); - XMVECTOR OffsetY = vdupq_n_f32(ViewportY + HalfViewportHeight); - XMVECTOR OffsetZ = vdupq_n_f32(ViewportMinZ); - - for (size_t j = 0; j < four; ++j) { - float32x4x3_t V = - vld3q_f32(reinterpret_cast(pInputVector)); - pInputVector += sizeof(XMFLOAT3) * 4; - - float32x2_t r3 = vget_low_f32(Transform.r[3]); - float32x2_t r = vget_low_f32(Transform.r[0]); - XMVECTOR vResult0 = vmlaq_lane_f32(vdupq_lane_f32(r3, 0), - V.val[0], r, 0); // Ax+M - XMVECTOR vResult1 = vmlaq_lane_f32(vdupq_lane_f32(r3, 1), - V.val[0], r, 1); // Bx+N - - XM_PREFETCH(pInputVector); - - r3 = vget_high_f32(Transform.r[3]); - r = vget_high_f32(Transform.r[0]); - XMVECTOR vResult2 = vmlaq_lane_f32(vdupq_lane_f32(r3, 0), - V.val[0], r, 0); // Cx+O - XMVECTOR W = vmlaq_lane_f32(vdupq_lane_f32(r3, 1), V.val[0], r, - 1); // Dx+P - - XM_PREFETCH(pInputVector + XM_CACHE_LINE_SIZE); - - r = vget_low_f32(Transform.r[1]); - vResult0 = vmlaq_lane_f32(vResult0, V.val[1], r, 0); // Ax+Ey+M - vResult1 = vmlaq_lane_f32(vResult1, V.val[1], r, 1); // Bx+Fy+N - - XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 2)); - - r = vget_high_f32(Transform.r[1]); - vResult2 = vmlaq_lane_f32(vResult2, V.val[1], r, 0); // Cx+Gy+O - W = vmlaq_lane_f32(W, V.val[1], r, 1); // Dx+Hy+P - - XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 3)); - - r = vget_low_f32(Transform.r[2]); - vResult0 = - vmlaq_lane_f32(vResult0, V.val[2], r, 0); // Ax+Ey+Iz+M - vResult1 = - vmlaq_lane_f32(vResult1, V.val[2], r, 1); // Bx+Fy+Jz+N - - XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 4)); - - r = vget_high_f32(Transform.r[2]); - vResult2 = - vmlaq_lane_f32(vResult2, V.val[2], r, 0); // Cx+Gy+Kz+O - W = vmlaq_lane_f32(W, V.val[2], r, 1); // Dx+Hy+Lz+P - - XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 5)); - -#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || \ - defined(_M_ARM64EC) || __aarch64__ - vResult0 = vdivq_f32(vResult0, W); - vResult1 = vdivq_f32(vResult1, W); - vResult2 = vdivq_f32(vResult2, W); -#else - // 2 iterations of Newton-Raphson refinement of reciprocal - float32x4_t Reciprocal = vrecpeq_f32(W); - float32x4_t S = vrecpsq_f32(Reciprocal, W); - Reciprocal = vmulq_f32(S, Reciprocal); - S = vrecpsq_f32(Reciprocal, W); - Reciprocal = vmulq_f32(S, Reciprocal); - - vResult0 = vmulq_f32(vResult0, Reciprocal); - vResult1 = vmulq_f32(vResult1, Reciprocal); - vResult2 = vmulq_f32(vResult2, Reciprocal); -#endif - - V.val[0] = vmlaq_f32(OffsetX, vResult0, ScaleX); - V.val[1] = vmlaq_f32(OffsetY, vResult1, ScaleY); - V.val[2] = vmlaq_f32(OffsetZ, vResult2, ScaleZ); - - vst3q_f32(reinterpret_cast(pOutputVector), V); - pOutputVector += sizeof(XMFLOAT3) * 4; - - i += 4; - } - } - } - - if (i < VectorCount) { - XMVECTOR Scale = XMVectorSet(HalfViewportWidth, -HalfViewportHeight, - ViewportMaxZ - ViewportMinZ, 1.0f); - XMVECTOR Offset = - XMVectorSet(ViewportX + HalfViewportWidth, - ViewportY + HalfViewportHeight, ViewportMinZ, 0.0f); - - for (; i < VectorCount; i++) { - float32x2_t VL = - vld1_f32(reinterpret_cast(pInputVector)); - float32x2_t zero = vdup_n_f32(0); - float32x2_t VH = vld1_lane_f32( - reinterpret_cast(pInputVector) + 2, zero, 0); - pInputVector += InputStride; - - XMVECTOR vResult = - vmlaq_lane_f32(Transform.r[3], Transform.r[0], VL, 0); // X - vResult = vmlaq_lane_f32(vResult, Transform.r[1], VL, 1); // Y - vResult = vmlaq_lane_f32(vResult, Transform.r[2], VH, 0); // Z - - VH = vget_high_f32(vResult); - XMVECTOR W = vdupq_lane_f32(VH, 1); - -#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || \ - defined(_M_ARM64EC) || __aarch64__ - vResult = vdivq_f32(vResult, W); -#else - // 2 iterations of Newton-Raphson refinement of reciprocal for W - float32x4_t Reciprocal = vrecpeq_f32(W); - float32x4_t S = vrecpsq_f32(Reciprocal, W); - Reciprocal = vmulq_f32(S, Reciprocal); - S = vrecpsq_f32(Reciprocal, W); - Reciprocal = vmulq_f32(S, Reciprocal); - - vResult = vmulq_f32(vResult, Reciprocal); -#endif - - vResult = vmlaq_f32(Offset, vResult, Scale); - - VL = vget_low_f32(vResult); - vst1_f32(reinterpret_cast(pOutputVector), VL); - vst1q_lane_f32(reinterpret_cast(pOutputVector) + 2, vResult, - 2); - pOutputVector += OutputStride; - } - } - - return pOutputStream; -#elif defined(_XM_SSE_INTRINSICS_) - const float HalfViewportWidth = ViewportWidth * 0.5f; - const float HalfViewportHeight = ViewportHeight * 0.5f; - - XMVECTOR Scale = XMVectorSet(HalfViewportWidth, -HalfViewportHeight, - ViewportMaxZ - ViewportMinZ, 1.0f); - XMVECTOR Offset = - XMVectorSet(ViewportX + HalfViewportWidth, - ViewportY + HalfViewportHeight, ViewportMinZ, 0.0f); - - XMMATRIX Transform = XMMatrixMultiply(World, View); - Transform = XMMatrixMultiply(Transform, Projection); - - auto pInputVector = reinterpret_cast(pInputStream); - auto pOutputVector = reinterpret_cast(pOutputStream); - - size_t i = 0; - size_t four = VectorCount >> 2; - if (four > 0) { - if (InputStride == sizeof(XMFLOAT3)) { - if (OutputStride == sizeof(XMFLOAT3)) { - if (!(reinterpret_cast(pOutputStream) & 0xF)) { - // Packed input, aligned & packed output - for (size_t j = 0; j < four; ++j) { - __m128 V1 = _mm_loadu_ps( - reinterpret_cast(pInputVector)); - __m128 L2 = _mm_loadu_ps( - reinterpret_cast(pInputVector + 16)); - __m128 L3 = _mm_loadu_ps( - reinterpret_cast(pInputVector + 32)); - pInputVector += sizeof(XMFLOAT3) * 4; - - // Unpack the 4 vectors (.w components are junk) - XM3UNPACK3INTO4(V1, L2, L3); - - // Result 1 - XMVECTOR Z = XM_PERMUTE_PS(V1, _MM_SHUFFLE(2, 2, 2, 2)); - XMVECTOR Y = XM_PERMUTE_PS(V1, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR X = XM_PERMUTE_PS(V1, _MM_SHUFFLE(0, 0, 0, 0)); - - XMVECTOR vTemp = - XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); - XMVECTOR vTemp2 = _mm_mul_ps(Y, Transform.r[1]); - XMVECTOR vTemp3 = _mm_mul_ps(X, Transform.r[0]); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - XMVECTOR W = - XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - vTemp = _mm_div_ps(vTemp, W); - V1 = XM_FMADD_PS(vTemp, Scale, Offset); - - // Result 2 - Z = XM_PERMUTE_PS(V2, _MM_SHUFFLE(2, 2, 2, 2)); - Y = XM_PERMUTE_PS(V2, _MM_SHUFFLE(1, 1, 1, 1)); - X = XM_PERMUTE_PS(V2, _MM_SHUFFLE(0, 0, 0, 0)); - - vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); - vTemp2 = _mm_mul_ps(Y, Transform.r[1]); - vTemp3 = _mm_mul_ps(X, Transform.r[0]); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - vTemp = _mm_div_ps(vTemp, W); - V2 = XM_FMADD_PS(vTemp, Scale, Offset); - - // Result 3 - Z = XM_PERMUTE_PS(V3, _MM_SHUFFLE(2, 2, 2, 2)); - Y = XM_PERMUTE_PS(V3, _MM_SHUFFLE(1, 1, 1, 1)); - X = XM_PERMUTE_PS(V3, _MM_SHUFFLE(0, 0, 0, 0)); - - vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); - vTemp2 = _mm_mul_ps(Y, Transform.r[1]); - vTemp3 = _mm_mul_ps(X, Transform.r[0]); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - vTemp = _mm_div_ps(vTemp, W); - V3 = XM_FMADD_PS(vTemp, Scale, Offset); - - // Result 4 - Z = XM_PERMUTE_PS(V4, _MM_SHUFFLE(2, 2, 2, 2)); - Y = XM_PERMUTE_PS(V4, _MM_SHUFFLE(1, 1, 1, 1)); - X = XM_PERMUTE_PS(V4, _MM_SHUFFLE(0, 0, 0, 0)); - - vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); - vTemp2 = _mm_mul_ps(Y, Transform.r[1]); - vTemp3 = _mm_mul_ps(X, Transform.r[0]); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - vTemp = _mm_div_ps(vTemp, W); - V4 = XM_FMADD_PS(vTemp, Scale, Offset); - - // Pack and store the vectors - XM3PACK4INTO3(vTemp); - XM_STREAM_PS(reinterpret_cast(pOutputVector), - V1); - XM_STREAM_PS( - reinterpret_cast(pOutputVector + 16), - vTemp); - XM_STREAM_PS( - reinterpret_cast(pOutputVector + 32), V3); - pOutputVector += sizeof(XMFLOAT3) * 4; - i += 4; - } - } else { - // Packed input, unaligned & packed output - for (size_t j = 0; j < four; ++j) { - __m128 V1 = _mm_loadu_ps( - reinterpret_cast(pInputVector)); - __m128 L2 = _mm_loadu_ps( - reinterpret_cast(pInputVector + 16)); - __m128 L3 = _mm_loadu_ps( - reinterpret_cast(pInputVector + 32)); - pInputVector += sizeof(XMFLOAT3) * 4; - - // Unpack the 4 vectors (.w components are junk) - XM3UNPACK3INTO4(V1, L2, L3); - - // Result 1 - XMVECTOR Z = XM_PERMUTE_PS(V1, _MM_SHUFFLE(2, 2, 2, 2)); - XMVECTOR Y = XM_PERMUTE_PS(V1, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR X = XM_PERMUTE_PS(V1, _MM_SHUFFLE(0, 0, 0, 0)); - - XMVECTOR vTemp = - XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); - XMVECTOR vTemp2 = _mm_mul_ps(Y, Transform.r[1]); - XMVECTOR vTemp3 = _mm_mul_ps(X, Transform.r[0]); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - XMVECTOR W = - XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - vTemp = _mm_div_ps(vTemp, W); - V1 = XM_FMADD_PS(vTemp, Scale, Offset); - - // Result 2 - Z = XM_PERMUTE_PS(V2, _MM_SHUFFLE(2, 2, 2, 2)); - Y = XM_PERMUTE_PS(V2, _MM_SHUFFLE(1, 1, 1, 1)); - X = XM_PERMUTE_PS(V2, _MM_SHUFFLE(0, 0, 0, 0)); - - vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); - vTemp2 = _mm_mul_ps(Y, Transform.r[1]); - vTemp3 = _mm_mul_ps(X, Transform.r[0]); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - vTemp = _mm_div_ps(vTemp, W); - V2 = XM_FMADD_PS(vTemp, Scale, Offset); - - // Result 3 - Z = XM_PERMUTE_PS(V3, _MM_SHUFFLE(2, 2, 2, 2)); - Y = XM_PERMUTE_PS(V3, _MM_SHUFFLE(1, 1, 1, 1)); - X = XM_PERMUTE_PS(V3, _MM_SHUFFLE(0, 0, 0, 0)); - - vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); - vTemp2 = _mm_mul_ps(Y, Transform.r[1]); - vTemp3 = _mm_mul_ps(X, Transform.r[0]); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - vTemp = _mm_div_ps(vTemp, W); - V3 = XM_FMADD_PS(vTemp, Scale, Offset); - - // Result 4 - Z = XM_PERMUTE_PS(V4, _MM_SHUFFLE(2, 2, 2, 2)); - Y = XM_PERMUTE_PS(V4, _MM_SHUFFLE(1, 1, 1, 1)); - X = XM_PERMUTE_PS(V4, _MM_SHUFFLE(0, 0, 0, 0)); - - vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); - vTemp2 = _mm_mul_ps(Y, Transform.r[1]); - vTemp3 = _mm_mul_ps(X, Transform.r[0]); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - vTemp = _mm_div_ps(vTemp, W); - V4 = XM_FMADD_PS(vTemp, Scale, Offset); - - // Pack and store the vectors - XM3PACK4INTO3(vTemp); - _mm_storeu_ps(reinterpret_cast(pOutputVector), - V1); - _mm_storeu_ps( - reinterpret_cast(pOutputVector + 16), - vTemp); - _mm_storeu_ps( - reinterpret_cast(pOutputVector + 32), V3); - pOutputVector += sizeof(XMFLOAT3) * 4; - i += 4; - } - } - } else { - // Packed input, unpacked output - for (size_t j = 0; j < four; ++j) { - __m128 V1 = _mm_loadu_ps( - reinterpret_cast(pInputVector)); - __m128 L2 = _mm_loadu_ps( - reinterpret_cast(pInputVector + 16)); - __m128 L3 = _mm_loadu_ps( - reinterpret_cast(pInputVector + 32)); - pInputVector += sizeof(XMFLOAT3) * 4; - - // Unpack the 4 vectors (.w components are junk) - XM3UNPACK3INTO4(V1, L2, L3); - - // Result 1 - XMVECTOR Z = XM_PERMUTE_PS(V1, _MM_SHUFFLE(2, 2, 2, 2)); - XMVECTOR Y = XM_PERMUTE_PS(V1, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR X = XM_PERMUTE_PS(V1, _MM_SHUFFLE(0, 0, 0, 0)); - - XMVECTOR vTemp = - XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); - XMVECTOR vTemp2 = _mm_mul_ps(Y, Transform.r[1]); - XMVECTOR vTemp3 = _mm_mul_ps(X, Transform.r[0]); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - XMVECTOR W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - vTemp = _mm_div_ps(vTemp, W); - vTemp = XM_FMADD_PS(vTemp, Scale, Offset); - - XMStoreFloat3(reinterpret_cast(pOutputVector), - vTemp); - pOutputVector += OutputStride; - - // Result 2 - Z = XM_PERMUTE_PS(V2, _MM_SHUFFLE(2, 2, 2, 2)); - Y = XM_PERMUTE_PS(V2, _MM_SHUFFLE(1, 1, 1, 1)); - X = XM_PERMUTE_PS(V2, _MM_SHUFFLE(0, 0, 0, 0)); - - vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); - vTemp2 = _mm_mul_ps(Y, Transform.r[1]); - vTemp3 = _mm_mul_ps(X, Transform.r[0]); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - vTemp = _mm_div_ps(vTemp, W); - vTemp = XM_FMADD_PS(vTemp, Scale, Offset); - - XMStoreFloat3(reinterpret_cast(pOutputVector), - vTemp); - pOutputVector += OutputStride; - - // Result 3 - Z = XM_PERMUTE_PS(V3, _MM_SHUFFLE(2, 2, 2, 2)); - Y = XM_PERMUTE_PS(V3, _MM_SHUFFLE(1, 1, 1, 1)); - X = XM_PERMUTE_PS(V3, _MM_SHUFFLE(0, 0, 0, 0)); - - vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); - vTemp2 = _mm_mul_ps(Y, Transform.r[1]); - vTemp3 = _mm_mul_ps(X, Transform.r[0]); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - vTemp = _mm_div_ps(vTemp, W); - vTemp = XM_FMADD_PS(vTemp, Scale, Offset); - - XMStoreFloat3(reinterpret_cast(pOutputVector), - vTemp); - pOutputVector += OutputStride; - - // Result 4 - Z = XM_PERMUTE_PS(V4, _MM_SHUFFLE(2, 2, 2, 2)); - Y = XM_PERMUTE_PS(V4, _MM_SHUFFLE(1, 1, 1, 1)); - X = XM_PERMUTE_PS(V4, _MM_SHUFFLE(0, 0, 0, 0)); - - vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); - vTemp2 = _mm_mul_ps(Y, Transform.r[1]); - vTemp3 = _mm_mul_ps(X, Transform.r[0]); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - vTemp = _mm_div_ps(vTemp, W); - vTemp = XM_FMADD_PS(vTemp, Scale, Offset); - - XMStoreFloat3(reinterpret_cast(pOutputVector), - vTemp); - pOutputVector += OutputStride; - - i += 4; - } - } - } - } - - for (; i < VectorCount; i++) { - XMVECTOR V = - XMLoadFloat3(reinterpret_cast(pInputVector)); - pInputVector += InputStride; - - XMVECTOR Z = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); - XMVECTOR Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR X = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); - - XMVECTOR vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); - XMVECTOR vTemp2 = _mm_mul_ps(Y, Transform.r[1]); - XMVECTOR vTemp3 = _mm_mul_ps(X, Transform.r[0]); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - XMVECTOR W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - vTemp = _mm_div_ps(vTemp, W); - vTemp = XM_FMADD_PS(vTemp, Scale, Offset); - - XMStoreFloat3(reinterpret_cast(pOutputVector), vTemp); - pOutputVector += OutputStride; - } - - XM_SFENCE(); - - return pOutputStream; -#endif -} - -#ifdef _PREFAST_ -#pragma prefast(pop) -#endif - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector3Unproject( - FXMVECTOR V, float ViewportX, float ViewportY, float ViewportWidth, - float ViewportHeight, float ViewportMinZ, float ViewportMaxZ, - FXMMATRIX Projection, CXMMATRIX View, CXMMATRIX World) noexcept { - static const XMVECTORF32 D = {{{-1.0f, 1.0f, 0.0f, 0.0f}}}; - - XMVECTOR Scale = XMVectorSet(ViewportWidth * 0.5f, -ViewportHeight * 0.5f, - ViewportMaxZ - ViewportMinZ, 1.0f); - Scale = XMVectorReciprocal(Scale); - - XMVECTOR Offset = XMVectorSet(-ViewportX, -ViewportY, -ViewportMinZ, 0.0f); - Offset = XMVectorMultiplyAdd(Scale, Offset, D.v); - - XMMATRIX Transform = XMMatrixMultiply(World, View); - Transform = XMMatrixMultiply(Transform, Projection); - Transform = XMMatrixInverse(nullptr, Transform); - - XMVECTOR Result = XMVectorMultiplyAdd(V, Scale, Offset); - - return XMVector3TransformCoord(Result, Transform); -} - -//------------------------------------------------------------------------------ - -#ifdef _PREFAST_ -#pragma prefast(push) -#pragma prefast(disable : 26015 26019, "PREfast noise: Esp:1307") -#endif - -_Use_decl_annotations_ inline XMFLOAT3* XM_CALLCONV XMVector3UnprojectStream( - XMFLOAT3* pOutputStream, size_t OutputStride, const XMFLOAT3* pInputStream, - size_t InputStride, size_t VectorCount, float ViewportX, float ViewportY, - float ViewportWidth, float ViewportHeight, float ViewportMinZ, - float ViewportMaxZ, FXMMATRIX Projection, CXMMATRIX View, - CXMMATRIX World) noexcept { - assert(pOutputStream != nullptr); - assert(pInputStream != nullptr); - - assert(InputStride >= sizeof(XMFLOAT3)); - _Analysis_assume_(InputStride >= sizeof(XMFLOAT3)); - - assert(OutputStride >= sizeof(XMFLOAT3)); - _Analysis_assume_(OutputStride >= sizeof(XMFLOAT3)); - -#if defined(_XM_NO_INTRINSICS_) - - static const XMVECTORF32 D = {{{-1.0f, 1.0f, 0.0f, 0.0f}}}; - - XMVECTOR Scale = XMVectorSet(ViewportWidth * 0.5f, -ViewportHeight * 0.5f, - ViewportMaxZ - ViewportMinZ, 1.0f); - Scale = XMVectorReciprocal(Scale); - - XMVECTOR Offset = XMVectorSet(-ViewportX, -ViewportY, -ViewportMinZ, 0.0f); - Offset = XMVectorMultiplyAdd(Scale, Offset, D.v); - - XMMATRIX Transform = XMMatrixMultiply(World, View); - Transform = XMMatrixMultiply(Transform, Projection); - Transform = XMMatrixInverse(nullptr, Transform); - - auto pInputVector = reinterpret_cast(pInputStream); - auto pOutputVector = reinterpret_cast(pOutputStream); - - for (size_t i = 0; i < VectorCount; i++) { - XMVECTOR V = - XMLoadFloat3(reinterpret_cast(pInputVector)); - - XMVECTOR Result = XMVectorMultiplyAdd(V, Scale, Offset); - - Result = XMVector3TransformCoord(Result, Transform); - - XMStoreFloat3(reinterpret_cast(pOutputVector), Result); - - pInputVector += InputStride; - pOutputVector += OutputStride; - } - - return pOutputStream; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - XMMATRIX Transform = XMMatrixMultiply(World, View); - Transform = XMMatrixMultiply(Transform, Projection); - Transform = XMMatrixInverse(nullptr, Transform); - - auto pInputVector = reinterpret_cast(pInputStream); - auto pOutputVector = reinterpret_cast(pOutputStream); - - float sx = 1.f / (ViewportWidth * 0.5f); - float sy = 1.f / (-ViewportHeight * 0.5f); - float sz = 1.f / (ViewportMaxZ - ViewportMinZ); - - float ox = (-ViewportX * sx) - 1.f; - float oy = (-ViewportY * sy) + 1.f; - float oz = (-ViewportMinZ * sz); - - size_t i = 0; - size_t four = VectorCount >> 2; - if (four > 0) { - if ((InputStride == sizeof(XMFLOAT3)) && - (OutputStride == sizeof(XMFLOAT3))) { - for (size_t j = 0; j < four; ++j) { - float32x4x3_t V = - vld3q_f32(reinterpret_cast(pInputVector)); - pInputVector += sizeof(XMFLOAT3) * 4; - - XMVECTOR ScaleX = vdupq_n_f32(sx); - XMVECTOR OffsetX = vdupq_n_f32(ox); - XMVECTOR VX = vmlaq_f32(OffsetX, ScaleX, V.val[0]); - - float32x2_t r3 = vget_low_f32(Transform.r[3]); - float32x2_t r = vget_low_f32(Transform.r[0]); - XMVECTOR vResult0 = - vmlaq_lane_f32(vdupq_lane_f32(r3, 0), VX, r, 0); // Ax+M - XMVECTOR vResult1 = - vmlaq_lane_f32(vdupq_lane_f32(r3, 1), VX, r, 1); // Bx+N - - XM_PREFETCH(pInputVector); - - r3 = vget_high_f32(Transform.r[3]); - r = vget_high_f32(Transform.r[0]); - XMVECTOR vResult2 = - vmlaq_lane_f32(vdupq_lane_f32(r3, 0), VX, r, 0); // Cx+O - XMVECTOR W = - vmlaq_lane_f32(vdupq_lane_f32(r3, 1), VX, r, 1); // Dx+P - - XM_PREFETCH(pInputVector + XM_CACHE_LINE_SIZE); - - XMVECTOR ScaleY = vdupq_n_f32(sy); - XMVECTOR OffsetY = vdupq_n_f32(oy); - XMVECTOR VY = vmlaq_f32(OffsetY, ScaleY, V.val[1]); - - r = vget_low_f32(Transform.r[1]); - vResult0 = vmlaq_lane_f32(vResult0, VY, r, 0); // Ax+Ey+M - vResult1 = vmlaq_lane_f32(vResult1, VY, r, 1); // Bx+Fy+N - - XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 2)); - - r = vget_high_f32(Transform.r[1]); - vResult2 = vmlaq_lane_f32(vResult2, VY, r, 0); // Cx+Gy+O - W = vmlaq_lane_f32(W, VY, r, 1); // Dx+Hy+P - - XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 3)); - - XMVECTOR ScaleZ = vdupq_n_f32(sz); - XMVECTOR OffsetZ = vdupq_n_f32(oz); - XMVECTOR VZ = vmlaq_f32(OffsetZ, ScaleZ, V.val[2]); - - r = vget_low_f32(Transform.r[2]); - vResult0 = vmlaq_lane_f32(vResult0, VZ, r, 0); // Ax+Ey+Iz+M - vResult1 = vmlaq_lane_f32(vResult1, VZ, r, 1); // Bx+Fy+Jz+N - - XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 4)); - - r = vget_high_f32(Transform.r[2]); - vResult2 = vmlaq_lane_f32(vResult2, VZ, r, 0); // Cx+Gy+Kz+O - W = vmlaq_lane_f32(W, VZ, r, 1); // Dx+Hy+Lz+P - - XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 5)); - -#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || \ - defined(_M_ARM64EC) || __aarch64__ - V.val[0] = vdivq_f32(vResult0, W); - V.val[1] = vdivq_f32(vResult1, W); - V.val[2] = vdivq_f32(vResult2, W); -#else - // 2 iterations of Newton-Raphson refinement of reciprocal - float32x4_t Reciprocal = vrecpeq_f32(W); - float32x4_t S = vrecpsq_f32(Reciprocal, W); - Reciprocal = vmulq_f32(S, Reciprocal); - S = vrecpsq_f32(Reciprocal, W); - Reciprocal = vmulq_f32(S, Reciprocal); - - V.val[0] = vmulq_f32(vResult0, Reciprocal); - V.val[1] = vmulq_f32(vResult1, Reciprocal); - V.val[2] = vmulq_f32(vResult2, Reciprocal); -#endif - - vst3q_f32(reinterpret_cast(pOutputVector), V); - pOutputVector += sizeof(XMFLOAT3) * 4; - - i += 4; - } - } - } - - if (i < VectorCount) { - float32x2_t ScaleL = vcreate_f32( - static_cast(*reinterpret_cast(&sx)) | - (static_cast(*reinterpret_cast(&sy)) - << 32)); - float32x2_t ScaleH = vcreate_f32( - static_cast(*reinterpret_cast(&sz))); - - float32x2_t OffsetL = vcreate_f32( - static_cast(*reinterpret_cast(&ox)) | - (static_cast(*reinterpret_cast(&oy)) - << 32)); - float32x2_t OffsetH = vcreate_f32( - static_cast(*reinterpret_cast(&oz))); - - for (; i < VectorCount; i++) { - float32x2_t VL = - vld1_f32(reinterpret_cast(pInputVector)); - float32x2_t zero = vdup_n_f32(0); - float32x2_t VH = vld1_lane_f32( - reinterpret_cast(pInputVector) + 2, zero, 0); - pInputVector += InputStride; - - VL = vmla_f32(OffsetL, VL, ScaleL); - VH = vmla_f32(OffsetH, VH, ScaleH); - - XMVECTOR vResult = - vmlaq_lane_f32(Transform.r[3], Transform.r[0], VL, 0); // X - vResult = vmlaq_lane_f32(vResult, Transform.r[1], VL, 1); // Y - vResult = vmlaq_lane_f32(vResult, Transform.r[2], VH, 0); // Z - - VH = vget_high_f32(vResult); - XMVECTOR W = vdupq_lane_f32(VH, 1); - -#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || \ - defined(_M_ARM64EC) || __aarch64__ - vResult = vdivq_f32(vResult, W); -#else - // 2 iterations of Newton-Raphson refinement of reciprocal for W - float32x4_t Reciprocal = vrecpeq_f32(W); - float32x4_t S = vrecpsq_f32(Reciprocal, W); - Reciprocal = vmulq_f32(S, Reciprocal); - S = vrecpsq_f32(Reciprocal, W); - Reciprocal = vmulq_f32(S, Reciprocal); - - vResult = vmulq_f32(vResult, Reciprocal); -#endif - - VL = vget_low_f32(vResult); - vst1_f32(reinterpret_cast(pOutputVector), VL); - vst1q_lane_f32(reinterpret_cast(pOutputVector) + 2, vResult, - 2); - pOutputVector += OutputStride; - } - } - - return pOutputStream; -#elif defined(_XM_SSE_INTRINSICS_) - static const XMVECTORF32 D = {{{-1.0f, 1.0f, 0.0f, 0.0f}}}; - - XMVECTOR Scale = XMVectorSet(ViewportWidth * 0.5f, -ViewportHeight * 0.5f, - ViewportMaxZ - ViewportMinZ, 1.0f); - Scale = XMVectorReciprocal(Scale); - - XMVECTOR Offset = XMVectorSet(-ViewportX, -ViewportY, -ViewportMinZ, 0.0f); - Offset = _mm_mul_ps(Scale, Offset); - Offset = _mm_add_ps(Offset, D); - - XMMATRIX Transform = XMMatrixMultiply(World, View); - Transform = XMMatrixMultiply(Transform, Projection); - Transform = XMMatrixInverse(nullptr, Transform); - - auto pInputVector = reinterpret_cast(pInputStream); - auto pOutputVector = reinterpret_cast(pOutputStream); - - size_t i = 0; - size_t four = VectorCount >> 2; - if (four > 0) { - if (InputStride == sizeof(XMFLOAT3)) { - if (OutputStride == sizeof(XMFLOAT3)) { - if (!(reinterpret_cast(pOutputStream) & 0xF)) { - // Packed input, aligned & packed output - for (size_t j = 0; j < four; ++j) { - __m128 V1 = _mm_loadu_ps( - reinterpret_cast(pInputVector)); - __m128 L2 = _mm_loadu_ps( - reinterpret_cast(pInputVector + 16)); - __m128 L3 = _mm_loadu_ps( - reinterpret_cast(pInputVector + 32)); - pInputVector += sizeof(XMFLOAT3) * 4; - - // Unpack the 4 vectors (.w components are junk) - XM3UNPACK3INTO4(V1, L2, L3); - - // Result 1 - V1 = XM_FMADD_PS(V1, Scale, Offset); - - XMVECTOR Z = XM_PERMUTE_PS(V1, _MM_SHUFFLE(2, 2, 2, 2)); - XMVECTOR Y = XM_PERMUTE_PS(V1, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR X = XM_PERMUTE_PS(V1, _MM_SHUFFLE(0, 0, 0, 0)); - - XMVECTOR vTemp = - XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); - XMVECTOR vTemp2 = _mm_mul_ps(Y, Transform.r[1]); - XMVECTOR vTemp3 = _mm_mul_ps(X, Transform.r[0]); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - XMVECTOR W = - XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - V1 = _mm_div_ps(vTemp, W); - - // Result 2 - V2 = XM_FMADD_PS(V2, Scale, Offset); - - Z = XM_PERMUTE_PS(V2, _MM_SHUFFLE(2, 2, 2, 2)); - Y = XM_PERMUTE_PS(V2, _MM_SHUFFLE(1, 1, 1, 1)); - X = XM_PERMUTE_PS(V2, _MM_SHUFFLE(0, 0, 0, 0)); - - vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); - vTemp2 = _mm_mul_ps(Y, Transform.r[1]); - vTemp3 = _mm_mul_ps(X, Transform.r[0]); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - V2 = _mm_div_ps(vTemp, W); - - // Result 3 - V3 = XM_FMADD_PS(V3, Scale, Offset); - - Z = XM_PERMUTE_PS(V3, _MM_SHUFFLE(2, 2, 2, 2)); - Y = XM_PERMUTE_PS(V3, _MM_SHUFFLE(1, 1, 1, 1)); - X = XM_PERMUTE_PS(V3, _MM_SHUFFLE(0, 0, 0, 0)); - - vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); - vTemp2 = _mm_mul_ps(Y, Transform.r[1]); - vTemp3 = _mm_mul_ps(X, Transform.r[0]); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - V3 = _mm_div_ps(vTemp, W); - - // Result 4 - V4 = XM_FMADD_PS(V4, Scale, Offset); - - Z = XM_PERMUTE_PS(V4, _MM_SHUFFLE(2, 2, 2, 2)); - Y = XM_PERMUTE_PS(V4, _MM_SHUFFLE(1, 1, 1, 1)); - X = XM_PERMUTE_PS(V4, _MM_SHUFFLE(0, 0, 0, 0)); - - vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); - vTemp2 = _mm_mul_ps(Y, Transform.r[1]); - vTemp3 = _mm_mul_ps(X, Transform.r[0]); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - V4 = _mm_div_ps(vTemp, W); - - // Pack and store the vectors - XM3PACK4INTO3(vTemp); - XM_STREAM_PS(reinterpret_cast(pOutputVector), - V1); - XM_STREAM_PS( - reinterpret_cast(pOutputVector + 16), - vTemp); - XM_STREAM_PS( - reinterpret_cast(pOutputVector + 32), V3); - pOutputVector += sizeof(XMFLOAT3) * 4; - i += 4; - } - } else { - // Packed input, unaligned & packed output - for (size_t j = 0; j < four; ++j) { - __m128 V1 = _mm_loadu_ps( - reinterpret_cast(pInputVector)); - __m128 L2 = _mm_loadu_ps( - reinterpret_cast(pInputVector + 16)); - __m128 L3 = _mm_loadu_ps( - reinterpret_cast(pInputVector + 32)); - pInputVector += sizeof(XMFLOAT3) * 4; - - // Unpack the 4 vectors (.w components are junk) - XM3UNPACK3INTO4(V1, L2, L3); - - // Result 1 - V1 = XM_FMADD_PS(V1, Scale, Offset); - - XMVECTOR Z = XM_PERMUTE_PS(V1, _MM_SHUFFLE(2, 2, 2, 2)); - XMVECTOR Y = XM_PERMUTE_PS(V1, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR X = XM_PERMUTE_PS(V1, _MM_SHUFFLE(0, 0, 0, 0)); - - XMVECTOR vTemp = - XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); - XMVECTOR vTemp2 = _mm_mul_ps(Y, Transform.r[1]); - XMVECTOR vTemp3 = _mm_mul_ps(X, Transform.r[0]); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - XMVECTOR W = - XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - V1 = _mm_div_ps(vTemp, W); - - // Result 2 - V2 = XM_FMADD_PS(V2, Scale, Offset); - - Z = XM_PERMUTE_PS(V2, _MM_SHUFFLE(2, 2, 2, 2)); - Y = XM_PERMUTE_PS(V2, _MM_SHUFFLE(1, 1, 1, 1)); - X = XM_PERMUTE_PS(V2, _MM_SHUFFLE(0, 0, 0, 0)); - - vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); - vTemp2 = _mm_mul_ps(Y, Transform.r[1]); - vTemp3 = _mm_mul_ps(X, Transform.r[0]); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - V2 = _mm_div_ps(vTemp, W); - - // Result 3 - V3 = XM_FMADD_PS(V3, Scale, Offset); - - Z = XM_PERMUTE_PS(V3, _MM_SHUFFLE(2, 2, 2, 2)); - Y = XM_PERMUTE_PS(V3, _MM_SHUFFLE(1, 1, 1, 1)); - X = XM_PERMUTE_PS(V3, _MM_SHUFFLE(0, 0, 0, 0)); - - vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); - vTemp2 = _mm_mul_ps(Y, Transform.r[1]); - vTemp3 = _mm_mul_ps(X, Transform.r[0]); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - V3 = _mm_div_ps(vTemp, W); - - // Result 4 - V4 = XM_FMADD_PS(V4, Scale, Offset); - - Z = XM_PERMUTE_PS(V4, _MM_SHUFFLE(2, 2, 2, 2)); - Y = XM_PERMUTE_PS(V4, _MM_SHUFFLE(1, 1, 1, 1)); - X = XM_PERMUTE_PS(V4, _MM_SHUFFLE(0, 0, 0, 0)); - - vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); - vTemp2 = _mm_mul_ps(Y, Transform.r[1]); - vTemp3 = _mm_mul_ps(X, Transform.r[0]); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - V4 = _mm_div_ps(vTemp, W); - - // Pack and store the vectors - XM3PACK4INTO3(vTemp); - _mm_storeu_ps(reinterpret_cast(pOutputVector), - V1); - _mm_storeu_ps( - reinterpret_cast(pOutputVector + 16), - vTemp); - _mm_storeu_ps( - reinterpret_cast(pOutputVector + 32), V3); - pOutputVector += sizeof(XMFLOAT3) * 4; - i += 4; - } - } - } else { - // Packed input, unpacked output - for (size_t j = 0; j < four; ++j) { - __m128 V1 = _mm_loadu_ps( - reinterpret_cast(pInputVector)); - __m128 L2 = _mm_loadu_ps( - reinterpret_cast(pInputVector + 16)); - __m128 L3 = _mm_loadu_ps( - reinterpret_cast(pInputVector + 32)); - pInputVector += sizeof(XMFLOAT3) * 4; - - // Unpack the 4 vectors (.w components are junk) - XM3UNPACK3INTO4(V1, L2, L3); - - // Result 1 - V1 = XM_FMADD_PS(V1, Scale, Offset); - - XMVECTOR Z = XM_PERMUTE_PS(V1, _MM_SHUFFLE(2, 2, 2, 2)); - XMVECTOR Y = XM_PERMUTE_PS(V1, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR X = XM_PERMUTE_PS(V1, _MM_SHUFFLE(0, 0, 0, 0)); - - XMVECTOR vTemp = - XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); - XMVECTOR vTemp2 = _mm_mul_ps(Y, Transform.r[1]); - XMVECTOR vTemp3 = _mm_mul_ps(X, Transform.r[0]); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - XMVECTOR W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - vTemp = _mm_div_ps(vTemp, W); - - XMStoreFloat3(reinterpret_cast(pOutputVector), - vTemp); - pOutputVector += OutputStride; - - // Result 2 - V2 = XM_FMADD_PS(V2, Scale, Offset); - - Z = XM_PERMUTE_PS(V2, _MM_SHUFFLE(2, 2, 2, 2)); - Y = XM_PERMUTE_PS(V2, _MM_SHUFFLE(1, 1, 1, 1)); - X = XM_PERMUTE_PS(V2, _MM_SHUFFLE(0, 0, 0, 0)); - - vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); - vTemp2 = _mm_mul_ps(Y, Transform.r[1]); - vTemp3 = _mm_mul_ps(X, Transform.r[0]); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - vTemp = _mm_div_ps(vTemp, W); - - XMStoreFloat3(reinterpret_cast(pOutputVector), - vTemp); - pOutputVector += OutputStride; - - // Result 3 - V3 = XM_FMADD_PS(V3, Scale, Offset); - - Z = XM_PERMUTE_PS(V3, _MM_SHUFFLE(2, 2, 2, 2)); - Y = XM_PERMUTE_PS(V3, _MM_SHUFFLE(1, 1, 1, 1)); - X = XM_PERMUTE_PS(V3, _MM_SHUFFLE(0, 0, 0, 0)); - - vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); - vTemp2 = _mm_mul_ps(Y, Transform.r[1]); - vTemp3 = _mm_mul_ps(X, Transform.r[0]); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - vTemp = _mm_div_ps(vTemp, W); - - XMStoreFloat3(reinterpret_cast(pOutputVector), - vTemp); - pOutputVector += OutputStride; - - // Result 4 - V4 = XM_FMADD_PS(V4, Scale, Offset); - - Z = XM_PERMUTE_PS(V4, _MM_SHUFFLE(2, 2, 2, 2)); - Y = XM_PERMUTE_PS(V4, _MM_SHUFFLE(1, 1, 1, 1)); - X = XM_PERMUTE_PS(V4, _MM_SHUFFLE(0, 0, 0, 0)); - - vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); - vTemp2 = _mm_mul_ps(Y, Transform.r[1]); - vTemp3 = _mm_mul_ps(X, Transform.r[0]); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - vTemp = _mm_div_ps(vTemp, W); - - XMStoreFloat3(reinterpret_cast(pOutputVector), - vTemp); - pOutputVector += OutputStride; - - i += 4; - } - } - } - } - - for (; i < VectorCount; i++) { - XMVECTOR V = - XMLoadFloat3(reinterpret_cast(pInputVector)); - pInputVector += InputStride; - - V = _mm_mul_ps(V, Scale); - V = _mm_add_ps(V, Offset); - - XMVECTOR Z = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); - XMVECTOR Y = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR X = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); - - XMVECTOR vTemp = XM_FMADD_PS(Z, Transform.r[2], Transform.r[3]); - XMVECTOR vTemp2 = _mm_mul_ps(Y, Transform.r[1]); - XMVECTOR vTemp3 = _mm_mul_ps(X, Transform.r[0]); - vTemp = _mm_add_ps(vTemp, vTemp2); - vTemp = _mm_add_ps(vTemp, vTemp3); - - XMVECTOR W = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 3, 3, 3)); - vTemp = _mm_div_ps(vTemp, W); - - XMStoreFloat3(reinterpret_cast(pOutputVector), vTemp); - pOutputVector += OutputStride; - } - - XM_SFENCE(); - - return pOutputStream; -#endif -} - -#ifdef _PREFAST_ -#pragma prefast(pop) -#endif - -/**************************************************************************** - * - * 4D Vector - * - ****************************************************************************/ - -//------------------------------------------------------------------------------ -// Comparison operations -//------------------------------------------------------------------------------ - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMVector4Equal(FXMVECTOR V1, FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - return (((V1.vector4_f32[0] == V2.vector4_f32[0]) && - (V1.vector4_f32[1] == V2.vector4_f32[1]) && - (V1.vector4_f32[2] == V2.vector4_f32[2]) && - (V1.vector4_f32[3] == V2.vector4_f32[3])) != 0); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x4_t vResult = vceqq_f32(V1, V2); - uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), - vget_high_u8(vreinterpretq_u8_u32(vResult))); - uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), - vreinterpret_u16_u8(vTemp.val[1])); - return (vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) == - 0xFFFFFFFFU); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vTemp = _mm_cmpeq_ps(V1, V2); - return ((_mm_movemask_ps(vTemp) == 0x0f) != 0); -#else - return XMComparisonAllTrue(XMVector4EqualR(V1, V2)); -#endif -} - -//------------------------------------------------------------------------------ - -inline uint32_t XM_CALLCONV XMVector4EqualR(FXMVECTOR V1, - FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - uint32_t CR = 0; - - if ((V1.vector4_f32[0] == V2.vector4_f32[0]) && - (V1.vector4_f32[1] == V2.vector4_f32[1]) && - (V1.vector4_f32[2] == V2.vector4_f32[2]) && - (V1.vector4_f32[3] == V2.vector4_f32[3])) { - CR = XM_CRMASK_CR6TRUE; - } else if ((V1.vector4_f32[0] != V2.vector4_f32[0]) && - (V1.vector4_f32[1] != V2.vector4_f32[1]) && - (V1.vector4_f32[2] != V2.vector4_f32[2]) && - (V1.vector4_f32[3] != V2.vector4_f32[3])) { - CR = XM_CRMASK_CR6FALSE; - } - return CR; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x4_t vResult = vceqq_f32(V1, V2); - uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), - vget_high_u8(vreinterpretq_u8_u32(vResult))); - uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), - vreinterpret_u16_u8(vTemp.val[1])); - uint32_t r = vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1); - - uint32_t CR = 0; - if (r == 0xFFFFFFFFU) { - CR = XM_CRMASK_CR6TRUE; - } else if (!r) { - CR = XM_CRMASK_CR6FALSE; - } - return CR; -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vTemp = _mm_cmpeq_ps(V1, V2); - int iTest = _mm_movemask_ps(vTemp); - uint32_t CR = 0; - if (iTest == 0xf) // All equal? - { - CR = XM_CRMASK_CR6TRUE; - } else if (iTest == 0) // All not equal? - { - CR = XM_CRMASK_CR6FALSE; - } - return CR; -#endif -} - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMVector4EqualInt(FXMVECTOR V1, FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - return (((V1.vector4_u32[0] == V2.vector4_u32[0]) && - (V1.vector4_u32[1] == V2.vector4_u32[1]) && - (V1.vector4_u32[2] == V2.vector4_u32[2]) && - (V1.vector4_u32[3] == V2.vector4_u32[3])) != 0); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x4_t vResult = - vceqq_u32(vreinterpretq_u32_f32(V1), vreinterpretq_u32_f32(V2)); - uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), - vget_high_u8(vreinterpretq_u8_u32(vResult))); - uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), - vreinterpret_u16_u8(vTemp.val[1])); - return (vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) == - 0xFFFFFFFFU); -#elif defined(_XM_SSE_INTRINSICS_) - __m128i vTemp = _mm_cmpeq_epi32(_mm_castps_si128(V1), _mm_castps_si128(V2)); - return ((_mm_movemask_ps(_mm_castsi128_ps(vTemp)) == 0xf) != 0); -#else - return XMComparisonAllTrue(XMVector4EqualIntR(V1, V2)); -#endif -} - -//------------------------------------------------------------------------------ - -inline uint32_t XM_CALLCONV XMVector4EqualIntR(FXMVECTOR V1, - FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - uint32_t CR = 0; - if (V1.vector4_u32[0] == V2.vector4_u32[0] && - V1.vector4_u32[1] == V2.vector4_u32[1] && - V1.vector4_u32[2] == V2.vector4_u32[2] && - V1.vector4_u32[3] == V2.vector4_u32[3]) { - CR = XM_CRMASK_CR6TRUE; - } else if (V1.vector4_u32[0] != V2.vector4_u32[0] && - V1.vector4_u32[1] != V2.vector4_u32[1] && - V1.vector4_u32[2] != V2.vector4_u32[2] && - V1.vector4_u32[3] != V2.vector4_u32[3]) { - CR = XM_CRMASK_CR6FALSE; - } - return CR; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x4_t vResult = - vceqq_u32(vreinterpretq_u32_f32(V1), vreinterpretq_u32_f32(V2)); - uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), - vget_high_u8(vreinterpretq_u8_u32(vResult))); - uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), - vreinterpret_u16_u8(vTemp.val[1])); - uint32_t r = vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1); - - uint32_t CR = 0; - if (r == 0xFFFFFFFFU) { - CR = XM_CRMASK_CR6TRUE; - } else if (!r) { - CR = XM_CRMASK_CR6FALSE; - } - return CR; -#elif defined(_XM_SSE_INTRINSICS_) - __m128i vTemp = _mm_cmpeq_epi32(_mm_castps_si128(V1), _mm_castps_si128(V2)); - int iTest = _mm_movemask_ps(_mm_castsi128_ps(vTemp)); - uint32_t CR = 0; - if (iTest == 0xf) // All equal? - { - CR = XM_CRMASK_CR6TRUE; - } else if (iTest == 0) // All not equal? - { - CR = XM_CRMASK_CR6FALSE; - } - return CR; -#endif -} - -inline bool XM_CALLCONV XMVector4NearEqual(FXMVECTOR V1, FXMVECTOR V2, - FXMVECTOR Epsilon) noexcept { -#if defined(_XM_NO_INTRINSICS_) - float dx, dy, dz, dw; - - dx = fabsf(V1.vector4_f32[0] - V2.vector4_f32[0]); - dy = fabsf(V1.vector4_f32[1] - V2.vector4_f32[1]); - dz = fabsf(V1.vector4_f32[2] - V2.vector4_f32[2]); - dw = fabsf(V1.vector4_f32[3] - V2.vector4_f32[3]); - return (((dx <= Epsilon.vector4_f32[0]) && (dy <= Epsilon.vector4_f32[1]) && - (dz <= Epsilon.vector4_f32[2]) && - (dw <= Epsilon.vector4_f32[3])) != 0); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x4_t vDelta = vsubq_f32(V1, V2); -#if defined(_MSC_VER) && !defined(__clang__) && \ - !defined(_ARM64_DISTINCT_NEON_TYPES) - uint32x4_t vResult = vacleq_f32(vDelta, Epsilon); -#else - uint32x4_t vResult = vcleq_f32(vabsq_f32(vDelta), Epsilon); -#endif - uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), - vget_high_u8(vreinterpretq_u8_u32(vResult))); - uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), - vreinterpret_u16_u8(vTemp.val[1])); - return (vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) == - 0xFFFFFFFFU); -#elif defined(_XM_SSE_INTRINSICS_) - // Get the difference - XMVECTOR vDelta = _mm_sub_ps(V1, V2); - // Get the absolute value of the difference - XMVECTOR vTemp = _mm_setzero_ps(); - vTemp = _mm_sub_ps(vTemp, vDelta); - vTemp = _mm_max_ps(vTemp, vDelta); - vTemp = _mm_cmple_ps(vTemp, Epsilon); - return ((_mm_movemask_ps(vTemp) == 0xf) != 0); -#endif -} - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMVector4NotEqual(FXMVECTOR V1, FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - return (((V1.vector4_f32[0] != V2.vector4_f32[0]) || - (V1.vector4_f32[1] != V2.vector4_f32[1]) || - (V1.vector4_f32[2] != V2.vector4_f32[2]) || - (V1.vector4_f32[3] != V2.vector4_f32[3])) != 0); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x4_t vResult = vceqq_f32(V1, V2); - uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), - vget_high_u8(vreinterpretq_u8_u32(vResult))); - uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), - vreinterpret_u16_u8(vTemp.val[1])); - return (vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) != - 0xFFFFFFFFU); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vTemp = _mm_cmpneq_ps(V1, V2); - return ((_mm_movemask_ps(vTemp)) != 0); -#else - return XMComparisonAnyFalse(XMVector4EqualR(V1, V2)); -#endif -} - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMVector4NotEqualInt(FXMVECTOR V1, - FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - return (((V1.vector4_u32[0] != V2.vector4_u32[0]) || - (V1.vector4_u32[1] != V2.vector4_u32[1]) || - (V1.vector4_u32[2] != V2.vector4_u32[2]) || - (V1.vector4_u32[3] != V2.vector4_u32[3])) != 0); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x4_t vResult = - vceqq_u32(vreinterpretq_u32_f32(V1), vreinterpretq_u32_f32(V2)); - uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), - vget_high_u8(vreinterpretq_u8_u32(vResult))); - uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), - vreinterpret_u16_u8(vTemp.val[1])); - return (vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) != - 0xFFFFFFFFU); -#elif defined(_XM_SSE_INTRINSICS_) - __m128i vTemp = _mm_cmpeq_epi32(_mm_castps_si128(V1), _mm_castps_si128(V2)); - return ((_mm_movemask_ps(_mm_castsi128_ps(vTemp)) != 0xF) != 0); -#else - return XMComparisonAnyFalse(XMVector4EqualIntR(V1, V2)); -#endif -} - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMVector4Greater(FXMVECTOR V1, FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - return (((V1.vector4_f32[0] > V2.vector4_f32[0]) && - (V1.vector4_f32[1] > V2.vector4_f32[1]) && - (V1.vector4_f32[2] > V2.vector4_f32[2]) && - (V1.vector4_f32[3] > V2.vector4_f32[3])) != 0); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x4_t vResult = vcgtq_f32(V1, V2); - uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), - vget_high_u8(vreinterpretq_u8_u32(vResult))); - uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), - vreinterpret_u16_u8(vTemp.val[1])); - return (vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) == - 0xFFFFFFFFU); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vTemp = _mm_cmpgt_ps(V1, V2); - return ((_mm_movemask_ps(vTemp) == 0x0f) != 0); -#else - return XMComparisonAllTrue(XMVector4GreaterR(V1, V2)); -#endif -} - -//------------------------------------------------------------------------------ - -inline uint32_t XM_CALLCONV XMVector4GreaterR(FXMVECTOR V1, - FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - uint32_t CR = 0; - if (V1.vector4_f32[0] > V2.vector4_f32[0] && - V1.vector4_f32[1] > V2.vector4_f32[1] && - V1.vector4_f32[2] > V2.vector4_f32[2] && - V1.vector4_f32[3] > V2.vector4_f32[3]) { - CR = XM_CRMASK_CR6TRUE; - } else if (V1.vector4_f32[0] <= V2.vector4_f32[0] && - V1.vector4_f32[1] <= V2.vector4_f32[1] && - V1.vector4_f32[2] <= V2.vector4_f32[2] && - V1.vector4_f32[3] <= V2.vector4_f32[3]) { - CR = XM_CRMASK_CR6FALSE; - } - return CR; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x4_t vResult = vcgtq_f32(V1, V2); - uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), - vget_high_u8(vreinterpretq_u8_u32(vResult))); - uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), - vreinterpret_u16_u8(vTemp.val[1])); - uint32_t r = vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1); - - uint32_t CR = 0; - if (r == 0xFFFFFFFFU) { - CR = XM_CRMASK_CR6TRUE; - } else if (!r) { - CR = XM_CRMASK_CR6FALSE; - } - return CR; -#elif defined(_XM_SSE_INTRINSICS_) - uint32_t CR = 0; - XMVECTOR vTemp = _mm_cmpgt_ps(V1, V2); - int iTest = _mm_movemask_ps(vTemp); - if (iTest == 0xf) { - CR = XM_CRMASK_CR6TRUE; - } else if (!iTest) { - CR = XM_CRMASK_CR6FALSE; - } - return CR; -#endif -} - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMVector4GreaterOrEqual(FXMVECTOR V1, - FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - return (((V1.vector4_f32[0] >= V2.vector4_f32[0]) && - (V1.vector4_f32[1] >= V2.vector4_f32[1]) && - (V1.vector4_f32[2] >= V2.vector4_f32[2]) && - (V1.vector4_f32[3] >= V2.vector4_f32[3])) != 0); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x4_t vResult = vcgeq_f32(V1, V2); - uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), - vget_high_u8(vreinterpretq_u8_u32(vResult))); - uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), - vreinterpret_u16_u8(vTemp.val[1])); - return (vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) == - 0xFFFFFFFFU); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vTemp = _mm_cmpge_ps(V1, V2); - return ((_mm_movemask_ps(vTemp) == 0x0f) != 0); -#else - return XMComparisonAllTrue(XMVector4GreaterOrEqualR(V1, V2)); -#endif -} - -//------------------------------------------------------------------------------ - -inline uint32_t XM_CALLCONV XMVector4GreaterOrEqualR(FXMVECTOR V1, - FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - uint32_t CR = 0; - if ((V1.vector4_f32[0] >= V2.vector4_f32[0]) && - (V1.vector4_f32[1] >= V2.vector4_f32[1]) && - (V1.vector4_f32[2] >= V2.vector4_f32[2]) && - (V1.vector4_f32[3] >= V2.vector4_f32[3])) { - CR = XM_CRMASK_CR6TRUE; - } else if ((V1.vector4_f32[0] < V2.vector4_f32[0]) && - (V1.vector4_f32[1] < V2.vector4_f32[1]) && - (V1.vector4_f32[2] < V2.vector4_f32[2]) && - (V1.vector4_f32[3] < V2.vector4_f32[3])) { - CR = XM_CRMASK_CR6FALSE; - } - return CR; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x4_t vResult = vcgeq_f32(V1, V2); - uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), - vget_high_u8(vreinterpretq_u8_u32(vResult))); - uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), - vreinterpret_u16_u8(vTemp.val[1])); - uint32_t r = vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1); - - uint32_t CR = 0; - if (r == 0xFFFFFFFFU) { - CR = XM_CRMASK_CR6TRUE; - } else if (!r) { - CR = XM_CRMASK_CR6FALSE; - } - return CR; -#elif defined(_XM_SSE_INTRINSICS_) - uint32_t CR = 0; - XMVECTOR vTemp = _mm_cmpge_ps(V1, V2); - int iTest = _mm_movemask_ps(vTemp); - if (iTest == 0x0f) { - CR = XM_CRMASK_CR6TRUE; - } else if (!iTest) { - CR = XM_CRMASK_CR6FALSE; - } - return CR; -#endif -} - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMVector4Less(FXMVECTOR V1, FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - return (((V1.vector4_f32[0] < V2.vector4_f32[0]) && - (V1.vector4_f32[1] < V2.vector4_f32[1]) && - (V1.vector4_f32[2] < V2.vector4_f32[2]) && - (V1.vector4_f32[3] < V2.vector4_f32[3])) != 0); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x4_t vResult = vcltq_f32(V1, V2); - uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), - vget_high_u8(vreinterpretq_u8_u32(vResult))); - uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), - vreinterpret_u16_u8(vTemp.val[1])); - return (vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) == - 0xFFFFFFFFU); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vTemp = _mm_cmplt_ps(V1, V2); - return ((_mm_movemask_ps(vTemp) == 0x0f) != 0); -#else - return XMComparisonAllTrue(XMVector4GreaterR(V2, V1)); -#endif -} - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMVector4LessOrEqual(FXMVECTOR V1, - FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - return (((V1.vector4_f32[0] <= V2.vector4_f32[0]) && - (V1.vector4_f32[1] <= V2.vector4_f32[1]) && - (V1.vector4_f32[2] <= V2.vector4_f32[2]) && - (V1.vector4_f32[3] <= V2.vector4_f32[3])) != 0); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x4_t vResult = vcleq_f32(V1, V2); - uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vResult)), - vget_high_u8(vreinterpretq_u8_u32(vResult))); - uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), - vreinterpret_u16_u8(vTemp.val[1])); - return (vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) == - 0xFFFFFFFFU); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vTemp = _mm_cmple_ps(V1, V2); - return ((_mm_movemask_ps(vTemp) == 0x0f) != 0); -#else - return XMComparisonAllTrue(XMVector4GreaterOrEqualR(V2, V1)); -#endif -} - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMVector4InBounds(FXMVECTOR V, - FXMVECTOR Bounds) noexcept { -#if defined(_XM_NO_INTRINSICS_) - return (((V.vector4_f32[0] <= Bounds.vector4_f32[0] && - V.vector4_f32[0] >= -Bounds.vector4_f32[0]) && - (V.vector4_f32[1] <= Bounds.vector4_f32[1] && - V.vector4_f32[1] >= -Bounds.vector4_f32[1]) && - (V.vector4_f32[2] <= Bounds.vector4_f32[2] && - V.vector4_f32[2] >= -Bounds.vector4_f32[2]) && - (V.vector4_f32[3] <= Bounds.vector4_f32[3] && - V.vector4_f32[3] >= -Bounds.vector4_f32[3])) != 0); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - // Test if less than or equal - uint32x4_t ivTemp1 = vcleq_f32(V, Bounds); - // Negate the bounds - float32x4_t vTemp2 = vnegq_f32(Bounds); - // Test if greater or equal (Reversed) - uint32x4_t ivTemp2 = vcleq_f32(vTemp2, V); - // Blend answers - ivTemp1 = vandq_u32(ivTemp1, ivTemp2); - // in bounds? - uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(ivTemp1)), - vget_high_u8(vreinterpretq_u8_u32(ivTemp1))); - uint16x4x2_t vTemp3 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), - vreinterpret_u16_u8(vTemp.val[1])); - return (vget_lane_u32(vreinterpret_u32_u16(vTemp3.val[1]), 1) == - 0xFFFFFFFFU); -#elif defined(_XM_SSE_INTRINSICS_) - // Test if less than or equal - XMVECTOR vTemp1 = _mm_cmple_ps(V, Bounds); - // Negate the bounds - XMVECTOR vTemp2 = _mm_mul_ps(Bounds, g_XMNegativeOne); - // Test if greater or equal (Reversed) - vTemp2 = _mm_cmple_ps(vTemp2, V); - // Blend answers - vTemp1 = _mm_and_ps(vTemp1, vTemp2); - // All in bounds? - return ((_mm_movemask_ps(vTemp1) == 0x0f) != 0); -#else - return XMComparisonAllInBounds(XMVector4InBoundsR(V, Bounds)); -#endif -} - -//------------------------------------------------------------------------------ - -#if !defined(_XM_NO_INTRINSICS_) && defined(_MSC_VER) && \ - !defined(__INTEL_COMPILER) -#pragma float_control(push) -#pragma float_control(precise, on) -#endif - -inline bool XM_CALLCONV XMVector4IsNaN(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - return (XMISNAN(V.vector4_f32[0]) || XMISNAN(V.vector4_f32[1]) || - XMISNAN(V.vector4_f32[2]) || XMISNAN(V.vector4_f32[3])); -#elif defined(_XM_ARM_NEON_INTRINSICS_) -#if defined(__clang__) && defined(__FINITE_MATH_ONLY__) - return isnan(vgetq_lane_f32(V, 0)) || isnan(vgetq_lane_f32(V, 1)) || - isnan(vgetq_lane_f32(V, 2)) || isnan(vgetq_lane_f32(V, 3)); -#else - // Test against itself. NaN is always not equal - uint32x4_t vTempNan = vceqq_f32(V, V); - uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vTempNan)), - vget_high_u8(vreinterpretq_u8_u32(vTempNan))); - uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), - vreinterpret_u16_u8(vTemp.val[1])); - // If any are NaN, the mask is zero - return (vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) != - 0xFFFFFFFFU); -#endif -#elif defined(_XM_SSE_INTRINSICS_) -#if defined(__clang__) && defined(__FINITE_MATH_ONLY__) - XM_ALIGNED_DATA(16) float tmp[4]; - _mm_store_ps(tmp, V); - return isnan(tmp[0]) || isnan(tmp[1]) || isnan(tmp[2]) || isnan(tmp[3]); -#else - // Test against itself. NaN is always not equal - XMVECTOR vTempNan = _mm_cmpneq_ps(V, V); - // If any are NaN, the mask is non-zero - return (_mm_movemask_ps(vTempNan) != 0); -#endif -#endif -} - -#if !defined(_XM_NO_INTRINSICS_) && defined(_MSC_VER) && \ - !defined(__INTEL_COMPILER) -#pragma float_control(pop) -#endif - -//------------------------------------------------------------------------------ - -inline bool XM_CALLCONV XMVector4IsInfinite(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - return (XMISINF(V.vector4_f32[0]) || XMISINF(V.vector4_f32[1]) || - XMISINF(V.vector4_f32[2]) || XMISINF(V.vector4_f32[3])); - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - // Mask off the sign bit - uint32x4_t vTempInf = vandq_u32(vreinterpretq_u32_f32(V), g_XMAbsMask); - // Compare to infinity - vTempInf = vceqq_f32(vreinterpretq_f32_u32(vTempInf), g_XMInfinity); - // If any are infinity, the signs are true. - uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(vTempInf)), - vget_high_u8(vreinterpretq_u8_u32(vTempInf))); - uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), - vreinterpret_u16_u8(vTemp.val[1])); - return (vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) != 0); -#elif defined(_XM_SSE_INTRINSICS_) - // Mask off the sign bit - XMVECTOR vTemp = _mm_and_ps(V, g_XMAbsMask); - // Compare to infinity - vTemp = _mm_cmpeq_ps(vTemp, g_XMInfinity); - // If any are infinity, the signs are true. - return (_mm_movemask_ps(vTemp) != 0); -#endif -} - -//------------------------------------------------------------------------------ -// Computation operations -//------------------------------------------------------------------------------ - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector4Dot(FXMVECTOR V1, FXMVECTOR V2) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTORF32 Result; - Result.f[0] = Result.f[1] = Result.f[2] = Result.f[3] = - V1.vector4_f32[0] * V2.vector4_f32[0] + - V1.vector4_f32[1] * V2.vector4_f32[1] + - V1.vector4_f32[2] * V2.vector4_f32[2] + - V1.vector4_f32[3] * V2.vector4_f32[3]; - return Result.v; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x4_t vTemp = vmulq_f32(V1, V2); - float32x2_t v1 = vget_low_f32(vTemp); - float32x2_t v2 = vget_high_f32(vTemp); - v1 = vadd_f32(v1, v2); - v1 = vpadd_f32(v1, v1); - return vcombine_f32(v1, v1); -#elif defined(_XM_SSE4_INTRINSICS_) - return _mm_dp_ps(V1, V2, 0xff); -#elif defined(_XM_SSE3_INTRINSICS_) - XMVECTOR vTemp = _mm_mul_ps(V1, V2); - vTemp = _mm_hadd_ps(vTemp, vTemp); - return _mm_hadd_ps(vTemp, vTemp); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vTemp2 = V2; - XMVECTOR vTemp = _mm_mul_ps(V1, vTemp2); - vTemp2 = _mm_shuffle_ps( - vTemp2, vTemp, - _MM_SHUFFLE(1, 0, 0, - 0)); // Copy X to the Z position and Y to the W position - vTemp2 = _mm_add_ps(vTemp2, vTemp); // Add Z = X+Z; W = Y+W; - vTemp = _mm_shuffle_ps( - vTemp, vTemp2, _MM_SHUFFLE(0, 3, 0, 0)); // Copy W to the Z position - vTemp = _mm_add_ps(vTemp, vTemp2); // Add Z and W together - return XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(2, 2, 2, 2)); // Splat Z and return -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector4Cross(FXMVECTOR V1, FXMVECTOR V2, - FXMVECTOR V3) noexcept { - // [ - // ((v2.z*v3.w-v2.w*v3.z)*v1.y)-((v2.y*v3.w-v2.w*v3.y)*v1.z)+((v2.y*v3.z-v2.z*v3.y)*v1.w), - // ((v2.w*v3.z-v2.z*v3.w)*v1.x)-((v2.w*v3.x-v2.x*v3.w)*v1.z)+((v2.z*v3.x-v2.x*v3.z)*v1.w), - // ((v2.y*v3.w-v2.w*v3.y)*v1.x)-((v2.x*v3.w-v2.w*v3.x)*v1.y)+((v2.x*v3.y-v2.y*v3.x)*v1.w), - // ((v2.z*v3.y-v2.y*v3.z)*v1.x)-((v2.z*v3.x-v2.x*v3.z)*v1.y)+((v2.y*v3.x-v2.x*v3.y)*v1.z) - // ] - -#if defined(_XM_NO_INTRINSICS_) - - XMVECTORF32 Result = {{{ - (((V2.vector4_f32[2] * V3.vector4_f32[3]) - - (V2.vector4_f32[3] * V3.vector4_f32[2])) * - V1.vector4_f32[1]) - - (((V2.vector4_f32[1] * V3.vector4_f32[3]) - - (V2.vector4_f32[3] * V3.vector4_f32[1])) * - V1.vector4_f32[2]) + - (((V2.vector4_f32[1] * V3.vector4_f32[2]) - - (V2.vector4_f32[2] * V3.vector4_f32[1])) * - V1.vector4_f32[3]), - (((V2.vector4_f32[3] * V3.vector4_f32[2]) - - (V2.vector4_f32[2] * V3.vector4_f32[3])) * - V1.vector4_f32[0]) - - (((V2.vector4_f32[3] * V3.vector4_f32[0]) - - (V2.vector4_f32[0] * V3.vector4_f32[3])) * - V1.vector4_f32[2]) + - (((V2.vector4_f32[2] * V3.vector4_f32[0]) - - (V2.vector4_f32[0] * V3.vector4_f32[2])) * - V1.vector4_f32[3]), - (((V2.vector4_f32[1] * V3.vector4_f32[3]) - - (V2.vector4_f32[3] * V3.vector4_f32[1])) * - V1.vector4_f32[0]) - - (((V2.vector4_f32[0] * V3.vector4_f32[3]) - - (V2.vector4_f32[3] * V3.vector4_f32[0])) * - V1.vector4_f32[1]) + - (((V2.vector4_f32[0] * V3.vector4_f32[1]) - - (V2.vector4_f32[1] * V3.vector4_f32[0])) * - V1.vector4_f32[3]), - (((V2.vector4_f32[2] * V3.vector4_f32[1]) - - (V2.vector4_f32[1] * V3.vector4_f32[2])) * - V1.vector4_f32[0]) - - (((V2.vector4_f32[2] * V3.vector4_f32[0]) - - (V2.vector4_f32[0] * V3.vector4_f32[2])) * - V1.vector4_f32[1]) + - (((V2.vector4_f32[1] * V3.vector4_f32[0]) - - (V2.vector4_f32[0] * V3.vector4_f32[1])) * - V1.vector4_f32[2]), - }}}; - return Result.v; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - const uint32x2_t select = vget_low_u32(g_XMMaskX); - - // Term1: V2zwyz * V3wzwy - const float32x2_t v2xy = vget_low_f32(V2); - const float32x2_t v2zw = vget_high_f32(V2); - const float32x2_t v2yx = vrev64_f32(v2xy); - const float32x2_t v2wz = vrev64_f32(v2zw); - const float32x2_t v2yz = vbsl_f32(select, v2yx, v2wz); - - const float32x2_t v3zw = vget_high_f32(V3); - const float32x2_t v3wz = vrev64_f32(v3zw); - const float32x2_t v3xy = vget_low_f32(V3); - const float32x2_t v3wy = vbsl_f32(select, v3wz, v3xy); - - float32x4_t vTemp1 = vcombine_f32(v2zw, v2yz); - float32x4_t vTemp2 = vcombine_f32(v3wz, v3wy); - XMVECTOR vResult = vmulq_f32(vTemp1, vTemp2); - - // - V2wzwy * V3zwyz - const float32x2_t v2wy = vbsl_f32(select, v2wz, v2xy); - - const float32x2_t v3yx = vrev64_f32(v3xy); - const float32x2_t v3yz = vbsl_f32(select, v3yx, v3wz); - - vTemp1 = vcombine_f32(v2wz, v2wy); - vTemp2 = vcombine_f32(v3zw, v3yz); - vResult = vmlsq_f32(vResult, vTemp1, vTemp2); - - // term1 * V1yxxx - const float32x2_t v1xy = vget_low_f32(V1); - const float32x2_t v1yx = vrev64_f32(v1xy); - - vTemp1 = vcombine_f32(v1yx, vdup_lane_f32(v1yx, 1)); - vResult = vmulq_f32(vResult, vTemp1); - - // Term2: V2ywxz * V3wxwx - const float32x2_t v2yw = vrev64_f32(v2wy); - const float32x2_t v2xz = vbsl_f32(select, v2xy, v2wz); - - const float32x2_t v3wx = vbsl_f32(select, v3wz, v3yx); - - vTemp1 = vcombine_f32(v2yw, v2xz); - vTemp2 = vcombine_f32(v3wx, v3wx); - float32x4_t vTerm = vmulq_f32(vTemp1, vTemp2); - - // - V2wxwx * V3ywxz - const float32x2_t v2wx = vbsl_f32(select, v2wz, v2yx); - - const float32x2_t v3yw = vrev64_f32(v3wy); - const float32x2_t v3xz = vbsl_f32(select, v3xy, v3wz); - - vTemp1 = vcombine_f32(v2wx, v2wx); - vTemp2 = vcombine_f32(v3yw, v3xz); - vTerm = vmlsq_f32(vTerm, vTemp1, vTemp2); - - // vResult - term2 * V1zzyy - const float32x2_t v1zw = vget_high_f32(V1); - - vTemp1 = vcombine_f32(vdup_lane_f32(v1zw, 0), vdup_lane_f32(v1yx, 0)); - vResult = vmlsq_f32(vResult, vTerm, vTemp1); - - // Term3: V2yzxy * V3zxyx - const float32x2_t v3zx = vrev64_f32(v3xz); - - vTemp1 = vcombine_f32(v2yz, v2xy); - vTemp2 = vcombine_f32(v3zx, v3yx); - vTerm = vmulq_f32(vTemp1, vTemp2); - - // - V2zxyx * V3yzxy - const float32x2_t v2zx = vrev64_f32(v2xz); - - vTemp1 = vcombine_f32(v2zx, v2yx); - vTemp2 = vcombine_f32(v3yz, v3xy); - vTerm = vmlsq_f32(vTerm, vTemp1, vTemp2); - - // vResult + term3 * V1wwwz - const float32x2_t v1wz = vrev64_f32(v1zw); - - vTemp1 = vcombine_f32(vdup_lane_f32(v1wz, 0), v1wz); - return vmlaq_f32(vResult, vTerm, vTemp1); -#elif defined(_XM_SSE_INTRINSICS_) - // V2zwyz * V3wzwy - XMVECTOR vResult = XM_PERMUTE_PS(V2, _MM_SHUFFLE(2, 1, 3, 2)); - XMVECTOR vTemp3 = XM_PERMUTE_PS(V3, _MM_SHUFFLE(1, 3, 2, 3)); - vResult = _mm_mul_ps(vResult, vTemp3); - // - V2wzwy * V3zwyz - XMVECTOR vTemp2 = XM_PERMUTE_PS(V2, _MM_SHUFFLE(1, 3, 2, 3)); - vTemp3 = XM_PERMUTE_PS(vTemp3, _MM_SHUFFLE(1, 3, 0, 1)); - vResult = XM_FNMADD_PS(vTemp2, vTemp3, vResult); - // term1 * V1yxxx - XMVECTOR vTemp1 = XM_PERMUTE_PS(V1, _MM_SHUFFLE(0, 0, 0, 1)); - vResult = _mm_mul_ps(vResult, vTemp1); - - // V2ywxz * V3wxwx - vTemp2 = XM_PERMUTE_PS(V2, _MM_SHUFFLE(2, 0, 3, 1)); - vTemp3 = XM_PERMUTE_PS(V3, _MM_SHUFFLE(0, 3, 0, 3)); - vTemp3 = _mm_mul_ps(vTemp3, vTemp2); - // - V2wxwx * V3ywxz - vTemp2 = XM_PERMUTE_PS(vTemp2, _MM_SHUFFLE(2, 1, 2, 1)); - vTemp1 = XM_PERMUTE_PS(V3, _MM_SHUFFLE(2, 0, 3, 1)); - vTemp3 = XM_FNMADD_PS(vTemp2, vTemp1, vTemp3); - // vResult - temp * V1zzyy - vTemp1 = XM_PERMUTE_PS(V1, _MM_SHUFFLE(1, 1, 2, 2)); - vResult = XM_FNMADD_PS(vTemp1, vTemp3, vResult); - - // V2yzxy * V3zxyx - vTemp2 = XM_PERMUTE_PS(V2, _MM_SHUFFLE(1, 0, 2, 1)); - vTemp3 = XM_PERMUTE_PS(V3, _MM_SHUFFLE(0, 1, 0, 2)); - vTemp3 = _mm_mul_ps(vTemp3, vTemp2); - // - V2zxyx * V3yzxy - vTemp2 = XM_PERMUTE_PS(vTemp2, _MM_SHUFFLE(2, 0, 2, 1)); - vTemp1 = XM_PERMUTE_PS(V3, _MM_SHUFFLE(1, 0, 2, 1)); - vTemp3 = XM_FNMADD_PS(vTemp1, vTemp2, vTemp3); - // vResult + term * V1wwwz - vTemp1 = XM_PERMUTE_PS(V1, _MM_SHUFFLE(2, 3, 3, 3)); - vResult = XM_FMADD_PS(vTemp3, vTemp1, vResult); - return vResult; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector4LengthSq(FXMVECTOR V) noexcept { - return XMVector4Dot(V, V); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector4ReciprocalLengthEst(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR Result; - - Result = XMVector4LengthSq(V); - Result = XMVectorReciprocalSqrtEst(Result); - - return Result; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - // Dot4 - float32x4_t vTemp = vmulq_f32(V, V); - float32x2_t v1 = vget_low_f32(vTemp); - float32x2_t v2 = vget_high_f32(vTemp); - v1 = vadd_f32(v1, v2); - v1 = vpadd_f32(v1, v1); - // Reciprocal sqrt (estimate) - v2 = vrsqrte_f32(v1); - return vcombine_f32(v2, v2); -#elif defined(_XM_SSE4_INTRINSICS_) - XMVECTOR vTemp = _mm_dp_ps(V, V, 0xff); - return _mm_rsqrt_ps(vTemp); -#elif defined(_XM_SSE3_INTRINSICS_) - XMVECTOR vLengthSq = _mm_mul_ps(V, V); - vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); - vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); - vLengthSq = _mm_rsqrt_ps(vLengthSq); - return vLengthSq; -#elif defined(_XM_SSE_INTRINSICS_) - // Perform the dot product on x,y,z and w - XMVECTOR vLengthSq = _mm_mul_ps(V, V); - // vTemp has z and w - XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(3, 2, 3, 2)); - // x+z, y+w - vLengthSq = _mm_add_ps(vLengthSq, vTemp); - // x+z,x+z,x+z,y+w - vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(1, 0, 0, 0)); - // ??,??,y+w,y+w - vTemp = _mm_shuffle_ps(vTemp, vLengthSq, _MM_SHUFFLE(3, 3, 0, 0)); - // ??,??,x+z+y+w,?? - vLengthSq = _mm_add_ps(vLengthSq, vTemp); - // Splat the length - vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(2, 2, 2, 2)); - // Get the reciprocal - vLengthSq = _mm_rsqrt_ps(vLengthSq); - return vLengthSq; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector4ReciprocalLength(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR Result; - - Result = XMVector4LengthSq(V); - Result = XMVectorReciprocalSqrt(Result); - - return Result; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - // Dot4 - float32x4_t vTemp = vmulq_f32(V, V); - float32x2_t v1 = vget_low_f32(vTemp); - float32x2_t v2 = vget_high_f32(vTemp); - v1 = vadd_f32(v1, v2); - v1 = vpadd_f32(v1, v1); - // Reciprocal sqrt - float32x2_t S0 = vrsqrte_f32(v1); - float32x2_t P0 = vmul_f32(v1, S0); - float32x2_t R0 = vrsqrts_f32(P0, S0); - float32x2_t S1 = vmul_f32(S0, R0); - float32x2_t P1 = vmul_f32(v1, S1); - float32x2_t R1 = vrsqrts_f32(P1, S1); - float32x2_t Result = vmul_f32(S1, R1); - return vcombine_f32(Result, Result); -#elif defined(_XM_SSE4_INTRINSICS_) - XMVECTOR vTemp = _mm_dp_ps(V, V, 0xff); - XMVECTOR vLengthSq = _mm_sqrt_ps(vTemp); - return _mm_div_ps(g_XMOne, vLengthSq); -#elif defined(_XM_SSE3_INTRINSICS_) - XMVECTOR vLengthSq = _mm_mul_ps(V, V); - vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); - vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); - vLengthSq = _mm_sqrt_ps(vLengthSq); - vLengthSq = _mm_div_ps(g_XMOne, vLengthSq); - return vLengthSq; -#elif defined(_XM_SSE_INTRINSICS_) - // Perform the dot product on x,y,z and w - XMVECTOR vLengthSq = _mm_mul_ps(V, V); - // vTemp has z and w - XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(3, 2, 3, 2)); - // x+z, y+w - vLengthSq = _mm_add_ps(vLengthSq, vTemp); - // x+z,x+z,x+z,y+w - vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(1, 0, 0, 0)); - // ??,??,y+w,y+w - vTemp = _mm_shuffle_ps(vTemp, vLengthSq, _MM_SHUFFLE(3, 3, 0, 0)); - // ??,??,x+z+y+w,?? - vLengthSq = _mm_add_ps(vLengthSq, vTemp); - // Splat the length - vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(2, 2, 2, 2)); - // Get the reciprocal - vLengthSq = _mm_sqrt_ps(vLengthSq); - // Accurate! - vLengthSq = _mm_div_ps(g_XMOne, vLengthSq); - return vLengthSq; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector4LengthEst(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR Result; - - Result = XMVector4LengthSq(V); - Result = XMVectorSqrtEst(Result); - - return Result; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - // Dot4 - float32x4_t vTemp = vmulq_f32(V, V); - float32x2_t v1 = vget_low_f32(vTemp); - float32x2_t v2 = vget_high_f32(vTemp); - v1 = vadd_f32(v1, v2); - v1 = vpadd_f32(v1, v1); - const float32x2_t zero = vdup_n_f32(0); - uint32x2_t VEqualsZero = vceq_f32(v1, zero); - // Sqrt (estimate) - float32x2_t Result = vrsqrte_f32(v1); - Result = vmul_f32(v1, Result); - Result = vbsl_f32(VEqualsZero, zero, Result); - return vcombine_f32(Result, Result); -#elif defined(_XM_SSE4_INTRINSICS_) - XMVECTOR vTemp = _mm_dp_ps(V, V, 0xff); - return _mm_sqrt_ps(vTemp); -#elif defined(_XM_SSE3_INTRINSICS_) - XMVECTOR vLengthSq = _mm_mul_ps(V, V); - vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); - vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); - vLengthSq = _mm_sqrt_ps(vLengthSq); - return vLengthSq; -#elif defined(_XM_SSE_INTRINSICS_) - // Perform the dot product on x,y,z and w - XMVECTOR vLengthSq = _mm_mul_ps(V, V); - // vTemp has z and w - XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(3, 2, 3, 2)); - // x+z, y+w - vLengthSq = _mm_add_ps(vLengthSq, vTemp); - // x+z,x+z,x+z,y+w - vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(1, 0, 0, 0)); - // ??,??,y+w,y+w - vTemp = _mm_shuffle_ps(vTemp, vLengthSq, _MM_SHUFFLE(3, 3, 0, 0)); - // ??,??,x+z+y+w,?? - vLengthSq = _mm_add_ps(vLengthSq, vTemp); - // Splat the length - vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(2, 2, 2, 2)); - // Get the length - vLengthSq = _mm_sqrt_ps(vLengthSq); - return vLengthSq; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector4Length(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR Result; - - Result = XMVector4LengthSq(V); - Result = XMVectorSqrt(Result); - - return Result; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - // Dot4 - float32x4_t vTemp = vmulq_f32(V, V); - float32x2_t v1 = vget_low_f32(vTemp); - float32x2_t v2 = vget_high_f32(vTemp); - v1 = vadd_f32(v1, v2); - v1 = vpadd_f32(v1, v1); - const float32x2_t zero = vdup_n_f32(0); - uint32x2_t VEqualsZero = vceq_f32(v1, zero); - // Sqrt - float32x2_t S0 = vrsqrte_f32(v1); - float32x2_t P0 = vmul_f32(v1, S0); - float32x2_t R0 = vrsqrts_f32(P0, S0); - float32x2_t S1 = vmul_f32(S0, R0); - float32x2_t P1 = vmul_f32(v1, S1); - float32x2_t R1 = vrsqrts_f32(P1, S1); - float32x2_t Result = vmul_f32(S1, R1); - Result = vmul_f32(v1, Result); - Result = vbsl_f32(VEqualsZero, zero, Result); - return vcombine_f32(Result, Result); -#elif defined(_XM_SSE4_INTRINSICS_) - XMVECTOR vTemp = _mm_dp_ps(V, V, 0xff); - return _mm_sqrt_ps(vTemp); -#elif defined(_XM_SSE3_INTRINSICS_) - XMVECTOR vLengthSq = _mm_mul_ps(V, V); - vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); - vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); - vLengthSq = _mm_sqrt_ps(vLengthSq); - return vLengthSq; -#elif defined(_XM_SSE_INTRINSICS_) - // Perform the dot product on x,y,z and w - XMVECTOR vLengthSq = _mm_mul_ps(V, V); - // vTemp has z and w - XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(3, 2, 3, 2)); - // x+z, y+w - vLengthSq = _mm_add_ps(vLengthSq, vTemp); - // x+z,x+z,x+z,y+w - vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(1, 0, 0, 0)); - // ??,??,y+w,y+w - vTemp = _mm_shuffle_ps(vTemp, vLengthSq, _MM_SHUFFLE(3, 3, 0, 0)); - // ??,??,x+z+y+w,?? - vLengthSq = _mm_add_ps(vLengthSq, vTemp); - // Splat the length - vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(2, 2, 2, 2)); - // Get the length - vLengthSq = _mm_sqrt_ps(vLengthSq); - return vLengthSq; -#endif -} - -//------------------------------------------------------------------------------ -// XMVector4NormalizeEst uses a reciprocal estimate and -// returns QNaN on zero and infinite vectors. - -inline XMVECTOR XM_CALLCONV XMVector4NormalizeEst(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR Result; - Result = XMVector4ReciprocalLength(V); - Result = XMVectorMultiply(V, Result); - return Result; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - // Dot4 - float32x4_t vTemp = vmulq_f32(V, V); - float32x2_t v1 = vget_low_f32(vTemp); - float32x2_t v2 = vget_high_f32(vTemp); - v1 = vadd_f32(v1, v2); - v1 = vpadd_f32(v1, v1); - // Reciprocal sqrt (estimate) - v2 = vrsqrte_f32(v1); - // Normalize - return vmulq_f32(V, vcombine_f32(v2, v2)); -#elif defined(_XM_SSE4_INTRINSICS_) - XMVECTOR vTemp = _mm_dp_ps(V, V, 0xff); - XMVECTOR vResult = _mm_rsqrt_ps(vTemp); - return _mm_mul_ps(vResult, V); -#elif defined(_XM_SSE3_INTRINSICS_) - XMVECTOR vDot = _mm_mul_ps(V, V); - vDot = _mm_hadd_ps(vDot, vDot); - vDot = _mm_hadd_ps(vDot, vDot); - vDot = _mm_rsqrt_ps(vDot); - vDot = _mm_mul_ps(vDot, V); - return vDot; -#elif defined(_XM_SSE_INTRINSICS_) - // Perform the dot product on x,y,z and w - XMVECTOR vLengthSq = _mm_mul_ps(V, V); - // vTemp has z and w - XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(3, 2, 3, 2)); - // x+z, y+w - vLengthSq = _mm_add_ps(vLengthSq, vTemp); - // x+z,x+z,x+z,y+w - vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(1, 0, 0, 0)); - // ??,??,y+w,y+w - vTemp = _mm_shuffle_ps(vTemp, vLengthSq, _MM_SHUFFLE(3, 3, 0, 0)); - // ??,??,x+z+y+w,?? - vLengthSq = _mm_add_ps(vLengthSq, vTemp); - // Splat the length - vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(2, 2, 2, 2)); - // Get the reciprocal - XMVECTOR vResult = _mm_rsqrt_ps(vLengthSq); - // Reciprocal mul to perform the normalization - vResult = _mm_mul_ps(vResult, V); - return vResult; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector4Normalize(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - float fLength; - XMVECTOR vResult; - - vResult = XMVector4Length(V); - fLength = vResult.vector4_f32[0]; - - // Prevent divide by zero - if (fLength > 0) { - fLength = 1.0f / fLength; - } - - vResult.vector4_f32[0] = V.vector4_f32[0] * fLength; - vResult.vector4_f32[1] = V.vector4_f32[1] * fLength; - vResult.vector4_f32[2] = V.vector4_f32[2] * fLength; - vResult.vector4_f32[3] = V.vector4_f32[3] * fLength; - return vResult; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - // Dot4 - float32x4_t vTemp = vmulq_f32(V, V); - float32x2_t v1 = vget_low_f32(vTemp); - float32x2_t v2 = vget_high_f32(vTemp); - v1 = vadd_f32(v1, v2); - v1 = vpadd_f32(v1, v1); - uint32x2_t VEqualsZero = vceq_f32(v1, vdup_n_f32(0)); - uint32x2_t VEqualsInf = vceq_f32(v1, vget_low_f32(g_XMInfinity)); - // Reciprocal sqrt (2 iterations of Newton-Raphson) - float32x2_t S0 = vrsqrte_f32(v1); - float32x2_t P0 = vmul_f32(v1, S0); - float32x2_t R0 = vrsqrts_f32(P0, S0); - float32x2_t S1 = vmul_f32(S0, R0); - float32x2_t P1 = vmul_f32(v1, S1); - float32x2_t R1 = vrsqrts_f32(P1, S1); - v2 = vmul_f32(S1, R1); - // Normalize - XMVECTOR vResult = vmulq_f32(V, vcombine_f32(v2, v2)); - vResult = vbslq_f32(vcombine_u32(VEqualsZero, VEqualsZero), vdupq_n_f32(0), - vResult); - return vbslq_f32(vcombine_u32(VEqualsInf, VEqualsInf), g_XMQNaN, vResult); -#elif defined(_XM_SSE4_INTRINSICS_) - XMVECTOR vLengthSq = _mm_dp_ps(V, V, 0xff); - // Prepare for the division - XMVECTOR vResult = _mm_sqrt_ps(vLengthSq); - // Create zero with a single instruction - XMVECTOR vZeroMask = _mm_setzero_ps(); - // Test for a divide by zero (Must be FP to detect -0.0) - vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult); - // Failsafe on zero (Or epsilon) length planes - // If the length is infinity, set the elements to zero - vLengthSq = _mm_cmpneq_ps(vLengthSq, g_XMInfinity); - // Divide to perform the normalization - vResult = _mm_div_ps(V, vResult); - // Any that are infinity, set to zero - vResult = _mm_and_ps(vResult, vZeroMask); - // Select qnan or result based on infinite length - XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq, g_XMQNaN); - XMVECTOR vTemp2 = _mm_and_ps(vResult, vLengthSq); - vResult = _mm_or_ps(vTemp1, vTemp2); - return vResult; -#elif defined(_XM_SSE3_INTRINSICS_) - // Perform the dot product on x,y,z and w - XMVECTOR vLengthSq = _mm_mul_ps(V, V); - vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); - vLengthSq = _mm_hadd_ps(vLengthSq, vLengthSq); - // Prepare for the division - XMVECTOR vResult = _mm_sqrt_ps(vLengthSq); - // Create zero with a single instruction - XMVECTOR vZeroMask = _mm_setzero_ps(); - // Test for a divide by zero (Must be FP to detect -0.0) - vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult); - // Failsafe on zero (Or epsilon) length planes - // If the length is infinity, set the elements to zero - vLengthSq = _mm_cmpneq_ps(vLengthSq, g_XMInfinity); - // Divide to perform the normalization - vResult = _mm_div_ps(V, vResult); - // Any that are infinity, set to zero - vResult = _mm_and_ps(vResult, vZeroMask); - // Select qnan or result based on infinite length - XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq, g_XMQNaN); - XMVECTOR vTemp2 = _mm_and_ps(vResult, vLengthSq); - vResult = _mm_or_ps(vTemp1, vTemp2); - return vResult; -#elif defined(_XM_SSE_INTRINSICS_) - // Perform the dot product on x,y,z and w - XMVECTOR vLengthSq = _mm_mul_ps(V, V); - // vTemp has z and w - XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(3, 2, 3, 2)); - // x+z, y+w - vLengthSq = _mm_add_ps(vLengthSq, vTemp); - // x+z,x+z,x+z,y+w - vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(1, 0, 0, 0)); - // ??,??,y+w,y+w - vTemp = _mm_shuffle_ps(vTemp, vLengthSq, _MM_SHUFFLE(3, 3, 0, 0)); - // ??,??,x+z+y+w,?? - vLengthSq = _mm_add_ps(vLengthSq, vTemp); - // Splat the length - vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(2, 2, 2, 2)); - // Prepare for the division - XMVECTOR vResult = _mm_sqrt_ps(vLengthSq); - // Create zero with a single instruction - XMVECTOR vZeroMask = _mm_setzero_ps(); - // Test for a divide by zero (Must be FP to detect -0.0) - vZeroMask = _mm_cmpneq_ps(vZeroMask, vResult); - // Failsafe on zero (Or epsilon) length planes - // If the length is infinity, set the elements to zero - vLengthSq = _mm_cmpneq_ps(vLengthSq, g_XMInfinity); - // Divide to perform the normalization - vResult = _mm_div_ps(V, vResult); - // Any that are infinity, set to zero - vResult = _mm_and_ps(vResult, vZeroMask); - // Select qnan or result based on infinite length - XMVECTOR vTemp1 = _mm_andnot_ps(vLengthSq, g_XMQNaN); - XMVECTOR vTemp2 = _mm_and_ps(vResult, vLengthSq); - vResult = _mm_or_ps(vTemp1, vTemp2); - return vResult; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector4ClampLength(FXMVECTOR V, float LengthMin, - float LengthMax) noexcept { - XMVECTOR ClampMax = XMVectorReplicate(LengthMax); - XMVECTOR ClampMin = XMVectorReplicate(LengthMin); - - return XMVector4ClampLengthV(V, ClampMin, ClampMax); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector4ClampLengthV( - FXMVECTOR V, FXMVECTOR LengthMin, FXMVECTOR LengthMax) noexcept { - assert((XMVectorGetY(LengthMin) == XMVectorGetX(LengthMin)) && - (XMVectorGetZ(LengthMin) == XMVectorGetX(LengthMin)) && - (XMVectorGetW(LengthMin) == XMVectorGetX(LengthMin))); - assert((XMVectorGetY(LengthMax) == XMVectorGetX(LengthMax)) && - (XMVectorGetZ(LengthMax) == XMVectorGetX(LengthMax)) && - (XMVectorGetW(LengthMax) == XMVectorGetX(LengthMax))); - assert(XMVector4GreaterOrEqual(LengthMin, XMVectorZero())); - assert(XMVector4GreaterOrEqual(LengthMax, XMVectorZero())); - assert(XMVector4GreaterOrEqual(LengthMax, LengthMin)); - - XMVECTOR LengthSq = XMVector4LengthSq(V); - - const XMVECTOR Zero = XMVectorZero(); - - XMVECTOR RcpLength = XMVectorReciprocalSqrt(LengthSq); - - XMVECTOR InfiniteLength = XMVectorEqualInt(LengthSq, g_XMInfinity.v); - XMVECTOR ZeroLength = XMVectorEqual(LengthSq, Zero); - - XMVECTOR Normal = XMVectorMultiply(V, RcpLength); - - XMVECTOR Length = XMVectorMultiply(LengthSq, RcpLength); - - XMVECTOR Select = XMVectorEqualInt(InfiniteLength, ZeroLength); - Length = XMVectorSelect(LengthSq, Length, Select); - Normal = XMVectorSelect(LengthSq, Normal, Select); - - XMVECTOR ControlMax = XMVectorGreater(Length, LengthMax); - XMVECTOR ControlMin = XMVectorLess(Length, LengthMin); - - XMVECTOR ClampLength = XMVectorSelect(Length, LengthMax, ControlMax); - ClampLength = XMVectorSelect(ClampLength, LengthMin, ControlMin); - - XMVECTOR Result = XMVectorMultiply(Normal, ClampLength); - - // Preserve the original vector (with no precision loss) if the length falls - // within the given range - XMVECTOR Control = XMVectorEqualInt(ControlMax, ControlMin); - Result = XMVectorSelect(Result, V, Control); - - return Result; -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector4Reflect(FXMVECTOR Incident, - FXMVECTOR Normal) noexcept { - // Result = Incident - (2 * dot(Incident, Normal)) * Normal - - XMVECTOR Result = XMVector4Dot(Incident, Normal); - Result = XMVectorAdd(Result, Result); - Result = XMVectorNegativeMultiplySubtract(Result, Normal, Incident); - - return Result; -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector4Refract(FXMVECTOR Incident, - FXMVECTOR Normal, - float RefractionIndex) noexcept { - XMVECTOR Index = XMVectorReplicate(RefractionIndex); - return XMVector4RefractV(Incident, Normal, Index); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector4RefractV( - FXMVECTOR Incident, FXMVECTOR Normal, FXMVECTOR RefractionIndex) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR IDotN; - XMVECTOR R; - const XMVECTOR Zero = XMVectorZero(); - - // Result = RefractionIndex * Incident - Normal * (RefractionIndex * - // dot(Incident, Normal) + sqrt(1 - RefractionIndex * RefractionIndex * (1 - - // dot(Incident, Normal) * dot(Incident, Normal)))) - - IDotN = XMVector4Dot(Incident, Normal); - - // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN) - R = XMVectorNegativeMultiplySubtract(IDotN, IDotN, g_XMOne.v); - R = XMVectorMultiply(R, RefractionIndex); - R = XMVectorNegativeMultiplySubtract(R, RefractionIndex, g_XMOne.v); - - if (XMVector4LessOrEqual(R, Zero)) { - // Total internal reflection - return Zero; - } else { - XMVECTOR Result; - - // R = RefractionIndex * IDotN + sqrt(R) - R = XMVectorSqrt(R); - R = XMVectorMultiplyAdd(RefractionIndex, IDotN, R); - - // Result = RefractionIndex * Incident - Normal * R - Result = XMVectorMultiply(RefractionIndex, Incident); - Result = XMVectorNegativeMultiplySubtract(Normal, R, Result); - - return Result; - } - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - XMVECTOR IDotN = XMVector4Dot(Incident, Normal); - - // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN) - float32x4_t R = vmlsq_f32(g_XMOne, IDotN, IDotN); - R = vmulq_f32(R, RefractionIndex); - R = vmlsq_f32(g_XMOne, R, RefractionIndex); - - uint32x4_t isrzero = vcleq_f32(R, g_XMZero); - uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(isrzero)), - vget_high_u8(vreinterpretq_u8_u32(isrzero))); - uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), - vreinterpret_u16_u8(vTemp.val[1])); - - float32x4_t vResult; - if (vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1) == 0xFFFFFFFFU) { - // Total internal reflection - vResult = g_XMZero; - } else { - // Sqrt(R) - float32x4_t S0 = vrsqrteq_f32(R); - float32x4_t P0 = vmulq_f32(R, S0); - float32x4_t R0 = vrsqrtsq_f32(P0, S0); - float32x4_t S1 = vmulq_f32(S0, R0); - float32x4_t P1 = vmulq_f32(R, S1); - float32x4_t R1 = vrsqrtsq_f32(P1, S1); - float32x4_t S2 = vmulq_f32(S1, R1); - R = vmulq_f32(R, S2); - // R = RefractionIndex * IDotN + sqrt(R) - R = vmlaq_f32(R, RefractionIndex, IDotN); - // Result = RefractionIndex * Incident - Normal * R - vResult = vmulq_f32(RefractionIndex, Incident); - vResult = vmlsq_f32(vResult, R, Normal); - } - return vResult; -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR IDotN = XMVector4Dot(Incident, Normal); - - // R = 1.0f - RefractionIndex * RefractionIndex * (1.0f - IDotN * IDotN) - XMVECTOR R = XM_FNMADD_PS(IDotN, IDotN, g_XMOne); - XMVECTOR R2 = _mm_mul_ps(RefractionIndex, RefractionIndex); - R = XM_FNMADD_PS(R, R2, g_XMOne); - - XMVECTOR vResult = _mm_cmple_ps(R, g_XMZero); - if (_mm_movemask_ps(vResult) == 0x0f) { - // Total internal reflection - vResult = g_XMZero; - } else { - // R = RefractionIndex * IDotN + sqrt(R) - R = _mm_sqrt_ps(R); - R = XM_FMADD_PS(RefractionIndex, IDotN, R); - // Result = RefractionIndex * Incident - Normal * R - vResult = _mm_mul_ps(RefractionIndex, Incident); - vResult = XM_FNMADD_PS(R, Normal, vResult); - } - return vResult; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector4Orthogonal(FXMVECTOR V) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - XMVECTORF32 Result = {{{V.vector4_f32[2], V.vector4_f32[3], - -V.vector4_f32[0], -V.vector4_f32[1]}}}; - return Result.v; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - static const XMVECTORF32 Negate = {{{1.f, 1.f, -1.f, -1.f}}}; - - float32x4_t Result = vcombine_f32(vget_high_f32(V), vget_low_f32(V)); - return vmulq_f32(Result, Negate); -#elif defined(_XM_SSE_INTRINSICS_) - static const XMVECTORF32 FlipZW = {{{1.0f, 1.0f, -1.0f, -1.0f}}}; - XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 0, 3, 2)); - vResult = _mm_mul_ps(vResult, FlipZW); - return vResult; -#endif -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV -XMVector4AngleBetweenNormalsEst(FXMVECTOR N1, FXMVECTOR N2) noexcept { - XMVECTOR Result = XMVector4Dot(N1, N2); - Result = XMVectorClamp(Result, g_XMNegativeOne.v, g_XMOne.v); - Result = XMVectorACosEst(Result); - return Result; -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV -XMVector4AngleBetweenNormals(FXMVECTOR N1, FXMVECTOR N2) noexcept { - XMVECTOR Result = XMVector4Dot(N1, N2); - Result = XMVectorClamp(Result, g_XMNegativeOne.v, g_XMOne.v); - Result = XMVectorACos(Result); - return Result; -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV -XMVector4AngleBetweenVectors(FXMVECTOR V1, FXMVECTOR V2) noexcept { - XMVECTOR L1 = XMVector4ReciprocalLength(V1); - XMVECTOR L2 = XMVector4ReciprocalLength(V2); - - XMVECTOR Dot = XMVector4Dot(V1, V2); - - L1 = XMVectorMultiply(L1, L2); - - XMVECTOR CosAngle = XMVectorMultiply(Dot, L1); - CosAngle = XMVectorClamp(CosAngle, g_XMNegativeOne.v, g_XMOne.v); - - return XMVectorACos(CosAngle); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV XMVector4Transform(FXMVECTOR V, - FXMMATRIX M) noexcept { -#if defined(_XM_NO_INTRINSICS_) - - float fX = (M.m[0][0] * V.vector4_f32[0]) + (M.m[1][0] * V.vector4_f32[1]) + - (M.m[2][0] * V.vector4_f32[2]) + (M.m[3][0] * V.vector4_f32[3]); - float fY = (M.m[0][1] * V.vector4_f32[0]) + (M.m[1][1] * V.vector4_f32[1]) + - (M.m[2][1] * V.vector4_f32[2]) + (M.m[3][1] * V.vector4_f32[3]); - float fZ = (M.m[0][2] * V.vector4_f32[0]) + (M.m[1][2] * V.vector4_f32[1]) + - (M.m[2][2] * V.vector4_f32[2]) + (M.m[3][2] * V.vector4_f32[3]); - float fW = (M.m[0][3] * V.vector4_f32[0]) + (M.m[1][3] * V.vector4_f32[1]) + - (M.m[2][3] * V.vector4_f32[2]) + (M.m[3][3] * V.vector4_f32[3]); - XMVECTORF32 vResult = {{{fX, fY, fZ, fW}}}; - return vResult.v; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x2_t VL = vget_low_f32(V); - XMVECTOR vResult = vmulq_lane_f32(M.r[0], VL, 0); // X - vResult = vmlaq_lane_f32(vResult, M.r[1], VL, 1); // Y - float32x2_t VH = vget_high_f32(V); - vResult = vmlaq_lane_f32(vResult, M.r[2], VH, 0); // Z - return vmlaq_lane_f32(vResult, M.r[3], VH, 1); // W -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vResult = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 3, 3, 3)); // W - vResult = _mm_mul_ps(vResult, M.r[3]); - XMVECTOR vTemp = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); // Z - vResult = XM_FMADD_PS(vTemp, M.r[2], vResult); - vTemp = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); // Y - vResult = XM_FMADD_PS(vTemp, M.r[1], vResult); - vTemp = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); // X - vResult = XM_FMADD_PS(vTemp, M.r[0], vResult); - return vResult; -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMFLOAT4* XM_CALLCONV XMVector4TransformStream( - XMFLOAT4* pOutputStream, size_t OutputStride, const XMFLOAT4* pInputStream, - size_t InputStride, size_t VectorCount, FXMMATRIX M) noexcept { - assert(pOutputStream != nullptr); - assert(pInputStream != nullptr); - - assert(InputStride >= sizeof(XMFLOAT4)); - _Analysis_assume_(InputStride >= sizeof(XMFLOAT4)); - - assert(OutputStride >= sizeof(XMFLOAT4)); - _Analysis_assume_(OutputStride >= sizeof(XMFLOAT4)); - -#if defined(_XM_NO_INTRINSICS_) - - auto pInputVector = reinterpret_cast(pInputStream); - auto pOutputVector = reinterpret_cast(pOutputStream); - - const XMVECTOR row0 = M.r[0]; - const XMVECTOR row1 = M.r[1]; - const XMVECTOR row2 = M.r[2]; - const XMVECTOR row3 = M.r[3]; - - for (size_t i = 0; i < VectorCount; i++) { - XMVECTOR V = - XMLoadFloat4(reinterpret_cast(pInputVector)); - XMVECTOR W = XMVectorSplatW(V); - XMVECTOR Z = XMVectorSplatZ(V); - XMVECTOR Y = XMVectorSplatY(V); - XMVECTOR X = XMVectorSplatX(V); - - XMVECTOR Result = XMVectorMultiply(W, row3); - Result = XMVectorMultiplyAdd(Z, row2, Result); - Result = XMVectorMultiplyAdd(Y, row1, Result); - Result = XMVectorMultiplyAdd(X, row0, Result); - -#ifdef _PREFAST_ -#pragma prefast(push) -#pragma prefast(disable : 26015, "PREfast noise: Esp:1307") -#endif - - XMStoreFloat4(reinterpret_cast(pOutputVector), Result); - -#ifdef _PREFAST_ -#pragma prefast(pop) -#endif - - pInputVector += InputStride; - pOutputVector += OutputStride; - } - - return pOutputStream; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - auto pInputVector = reinterpret_cast(pInputStream); - auto pOutputVector = reinterpret_cast(pOutputStream); - - const XMVECTOR row0 = M.r[0]; - const XMVECTOR row1 = M.r[1]; - const XMVECTOR row2 = M.r[2]; - const XMVECTOR row3 = M.r[3]; - - size_t i = 0; - size_t four = VectorCount >> 2; - if (four > 0) { - if ((InputStride == sizeof(XMFLOAT4)) && - (OutputStride == sizeof(XMFLOAT4))) { - for (size_t j = 0; j < four; ++j) { - float32x4x4_t V = - vld4q_f32(reinterpret_cast(pInputVector)); - pInputVector += sizeof(XMFLOAT4) * 4; - - float32x2_t r = vget_low_f32(row0); - XMVECTOR vResult0 = vmulq_lane_f32(V.val[0], r, 0); // Ax - XMVECTOR vResult1 = vmulq_lane_f32(V.val[0], r, 1); // Bx - - XM_PREFETCH(pInputVector); - - r = vget_high_f32(row0); - XMVECTOR vResult2 = vmulq_lane_f32(V.val[0], r, 0); // Cx - XMVECTOR vResult3 = vmulq_lane_f32(V.val[0], r, 1); // Dx - - XM_PREFETCH(pInputVector + XM_CACHE_LINE_SIZE); - - r = vget_low_f32(row1); - vResult0 = vmlaq_lane_f32(vResult0, V.val[1], r, 0); // Ax+Ey - vResult1 = vmlaq_lane_f32(vResult1, V.val[1], r, 1); // Bx+Fy - - XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 2)); - - r = vget_high_f32(row1); - vResult2 = vmlaq_lane_f32(vResult2, V.val[1], r, 0); // Cx+Gy - vResult3 = vmlaq_lane_f32(vResult3, V.val[1], r, 1); // Dx+Hy - - XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 3)); - - r = vget_low_f32(row2); - vResult0 = - vmlaq_lane_f32(vResult0, V.val[2], r, 0); // Ax+Ey+Iz - vResult1 = - vmlaq_lane_f32(vResult1, V.val[2], r, 1); // Bx+Fy+Jz - - XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 4)); - - r = vget_high_f32(row2); - vResult2 = - vmlaq_lane_f32(vResult2, V.val[2], r, 0); // Cx+Gy+Kz - vResult3 = - vmlaq_lane_f32(vResult3, V.val[2], r, 1); // Dx+Hy+Lz - - XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 5)); - - r = vget_low_f32(row3); - vResult0 = - vmlaq_lane_f32(vResult0, V.val[3], r, 0); // Ax+Ey+Iz+Mw - vResult1 = - vmlaq_lane_f32(vResult1, V.val[3], r, 1); // Bx+Fy+Jz+Nw - - XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 6)); - - r = vget_high_f32(row3); - vResult2 = - vmlaq_lane_f32(vResult2, V.val[3], r, 0); // Cx+Gy+Kz+Ow - vResult3 = - vmlaq_lane_f32(vResult3, V.val[3], r, 1); // Dx+Hy+Lz+Pw - - XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 7)); - - V.val[0] = vResult0; - V.val[1] = vResult1; - V.val[2] = vResult2; - V.val[3] = vResult3; - - vst4q_f32(reinterpret_cast(pOutputVector), V); - pOutputVector += sizeof(XMFLOAT4) * 4; - - i += 4; - } - } - } - - for (; i < VectorCount; i++) { - XMVECTOR V = vld1q_f32(reinterpret_cast(pInputVector)); - pInputVector += InputStride; - - float32x2_t VL = vget_low_f32(V); - XMVECTOR vResult = vmulq_lane_f32(row0, VL, 0); // X - vResult = vmlaq_lane_f32(vResult, row1, VL, 1); // Y - float32x2_t VH = vget_high_f32(V); - vResult = vmlaq_lane_f32(vResult, row2, VH, 0); // Z - vResult = vmlaq_lane_f32(vResult, row3, VH, 1); // W - - vst1q_f32(reinterpret_cast(pOutputVector), vResult); - pOutputVector += OutputStride; - } - - return pOutputStream; -#elif defined(_XM_AVX2_INTRINSICS_) - auto pInputVector = reinterpret_cast(pInputStream); - auto pOutputVector = reinterpret_cast(pOutputStream); - - size_t i = 0; - size_t two = VectorCount >> 1; - if (two > 0) { - __m256 row0 = _mm256_broadcast_ps(&M.r[0]); - __m256 row1 = _mm256_broadcast_ps(&M.r[1]); - __m256 row2 = _mm256_broadcast_ps(&M.r[2]); - __m256 row3 = _mm256_broadcast_ps(&M.r[3]); - - if (InputStride == sizeof(XMFLOAT4)) { - if (OutputStride == sizeof(XMFLOAT4)) { - if (!(reinterpret_cast(pOutputStream) & 0x1F)) { - // Packed input, aligned & packed output - for (size_t j = 0; j < two; ++j) { - __m256 VV = _mm256_loadu_ps( - reinterpret_cast(pInputVector)); - pInputVector += sizeof(XMFLOAT4) * 2; - - __m256 vTempX = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(0, 0, 0, 0)); - __m256 vTempY = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(1, 1, 1, 1)); - __m256 vTempZ = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(2, 2, 2, 2)); - __m256 vTempW = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(3, 3, 3, 3)); - - vTempX = _mm256_mul_ps(vTempX, row0); - vTempY = _mm256_mul_ps(vTempY, row1); - vTempZ = _mm256_fmadd_ps(vTempZ, row2, vTempX); - vTempW = _mm256_fmadd_ps(vTempW, row3, vTempY); - vTempX = _mm256_add_ps(vTempZ, vTempW); - - XM256_STREAM_PS(reinterpret_cast(pOutputVector), - vTempX); - pOutputVector += sizeof(XMFLOAT4) * 2; - - i += 2; - } - } else { - // Packed input, packed output - for (size_t j = 0; j < two; ++j) { - __m256 VV = _mm256_loadu_ps( - reinterpret_cast(pInputVector)); - pInputVector += sizeof(XMFLOAT4) * 2; - - __m256 vTempX = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(0, 0, 0, 0)); - __m256 vTempY = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(1, 1, 1, 1)); - __m256 vTempZ = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(2, 2, 2, 2)); - __m256 vTempW = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(3, 3, 3, 3)); - - vTempX = _mm256_mul_ps(vTempX, row0); - vTempY = _mm256_mul_ps(vTempY, row1); - vTempZ = _mm256_fmadd_ps(vTempZ, row2, vTempX); - vTempW = _mm256_fmadd_ps(vTempW, row3, vTempY); - vTempX = _mm256_add_ps(vTempZ, vTempW); - - _mm256_storeu_ps( - reinterpret_cast(pOutputVector), vTempX); - pOutputVector += sizeof(XMFLOAT4) * 2; - - i += 2; - } - } - } else { - // Packed input, unpacked output - for (size_t j = 0; j < two; ++j) { - __m256 VV = _mm256_loadu_ps( - reinterpret_cast(pInputVector)); - pInputVector += sizeof(XMFLOAT4) * 2; - - __m256 vTempX = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(0, 0, 0, 0)); - __m256 vTempY = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(1, 1, 1, 1)); - __m256 vTempZ = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(2, 2, 2, 2)); - __m256 vTempW = - _mm256_shuffle_ps(VV, VV, _MM_SHUFFLE(3, 3, 3, 3)); - - vTempX = _mm256_mul_ps(vTempX, row0); - vTempY = _mm256_mul_ps(vTempY, row1); - vTempZ = _mm256_fmadd_ps(vTempZ, row2, vTempX); - vTempW = _mm256_fmadd_ps(vTempW, row3, vTempY); - vTempX = _mm256_add_ps(vTempZ, vTempW); - - _mm_storeu_ps(reinterpret_cast(pOutputVector), - _mm256_castps256_ps128(vTempX)); - pOutputVector += OutputStride; - - _mm_storeu_ps(reinterpret_cast(pOutputVector), - _mm256_extractf128_ps(vTempX, 1)); - pOutputVector += OutputStride; - i += 2; - } - } - } - } - - if (i < VectorCount) { - const XMVECTOR row0 = M.r[0]; - const XMVECTOR row1 = M.r[1]; - const XMVECTOR row2 = M.r[2]; - const XMVECTOR row3 = M.r[3]; - - for (; i < VectorCount; i++) { - __m128 V = - _mm_loadu_ps(reinterpret_cast(pInputVector)); - pInputVector += InputStride; - - XMVECTOR vTempX = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); - XMVECTOR vTempY = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR vTempZ = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); - XMVECTOR vTempW = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 3, 3, 3)); - - vTempX = _mm_mul_ps(vTempX, row0); - vTempY = _mm_mul_ps(vTempY, row1); - vTempZ = XM_FMADD_PS(vTempZ, row2, vTempX); - vTempW = XM_FMADD_PS(vTempW, row3, vTempY); - vTempX = _mm_add_ps(vTempZ, vTempW); - - _mm_storeu_ps(reinterpret_cast(pOutputVector), vTempX); - pOutputVector += OutputStride; - } - } - - XM_SFENCE(); - - return pOutputStream; -#elif defined(_XM_SSE_INTRINSICS_) - auto pInputVector = reinterpret_cast(pInputStream); - auto pOutputVector = reinterpret_cast(pOutputStream); - - const XMVECTOR row0 = M.r[0]; - const XMVECTOR row1 = M.r[1]; - const XMVECTOR row2 = M.r[2]; - const XMVECTOR row3 = M.r[3]; - - if (!(reinterpret_cast(pOutputStream) & 0xF) && - !(OutputStride & 0xF)) { - if (!(reinterpret_cast(pInputStream) & 0xF) && - !(InputStride & 0xF)) { - // Aligned input, aligned output - for (size_t i = 0; i < VectorCount; i++) { - __m128 V = - _mm_load_ps(reinterpret_cast(pInputVector)); - pInputVector += InputStride; - - XMVECTOR vTempX = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); - XMVECTOR vTempY = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR vTempZ = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); - XMVECTOR vTempW = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 3, 3, 3)); - - vTempX = _mm_mul_ps(vTempX, row0); - vTempY = _mm_mul_ps(vTempY, row1); - vTempZ = XM_FMADD_PS(vTempZ, row2, vTempX); - vTempW = XM_FMADD_PS(vTempW, row3, vTempY); - vTempX = _mm_add_ps(vTempZ, vTempW); - - XM_STREAM_PS(reinterpret_cast(pOutputVector), vTempX); - pOutputVector += OutputStride; - } - } else { - // Unaligned input, aligned output - for (size_t i = 0; i < VectorCount; i++) { - __m128 V = - _mm_loadu_ps(reinterpret_cast(pInputVector)); - pInputVector += InputStride; - - XMVECTOR vTempX = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); - XMVECTOR vTempY = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR vTempZ = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); - XMVECTOR vTempW = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 3, 3, 3)); - - vTempX = _mm_mul_ps(vTempX, row0); - vTempY = _mm_mul_ps(vTempY, row1); - vTempZ = XM_FMADD_PS(vTempZ, row2, vTempX); - vTempW = XM_FMADD_PS(vTempW, row3, vTempY); - vTempX = _mm_add_ps(vTempZ, vTempW); - - XM_STREAM_PS(reinterpret_cast(pOutputVector), vTempX); - pOutputVector += OutputStride; - } - } - } else { - if (!(reinterpret_cast(pInputStream) & 0xF) && - !(InputStride & 0xF)) { - // Aligned input, unaligned output - for (size_t i = 0; i < VectorCount; i++) { - __m128 V = - _mm_load_ps(reinterpret_cast(pInputVector)); - pInputVector += InputStride; - - XMVECTOR vTempX = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); - XMVECTOR vTempY = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR vTempZ = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); - XMVECTOR vTempW = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 3, 3, 3)); - - vTempX = _mm_mul_ps(vTempX, row0); - vTempY = _mm_mul_ps(vTempY, row1); - vTempZ = XM_FMADD_PS(vTempZ, row2, vTempX); - vTempW = XM_FMADD_PS(vTempW, row3, vTempY); - vTempX = _mm_add_ps(vTempZ, vTempW); - - _mm_storeu_ps(reinterpret_cast(pOutputVector), vTempX); - pOutputVector += OutputStride; - } - } else { - // Unaligned input, unaligned output - for (size_t i = 0; i < VectorCount; i++) { - __m128 V = - _mm_loadu_ps(reinterpret_cast(pInputVector)); - pInputVector += InputStride; - - XMVECTOR vTempX = XM_PERMUTE_PS(V, _MM_SHUFFLE(0, 0, 0, 0)); - XMVECTOR vTempY = XM_PERMUTE_PS(V, _MM_SHUFFLE(1, 1, 1, 1)); - XMVECTOR vTempZ = XM_PERMUTE_PS(V, _MM_SHUFFLE(2, 2, 2, 2)); - XMVECTOR vTempW = XM_PERMUTE_PS(V, _MM_SHUFFLE(3, 3, 3, 3)); - - vTempX = _mm_mul_ps(vTempX, row0); - vTempY = _mm_mul_ps(vTempY, row1); - vTempZ = XM_FMADD_PS(vTempZ, row2, vTempX); - vTempW = XM_FMADD_PS(vTempW, row3, vTempY); - vTempX = _mm_add_ps(vTempZ, vTempW); - - _mm_storeu_ps(reinterpret_cast(pOutputVector), vTempX); - pOutputVector += OutputStride; - } - } - } - - XM_SFENCE(); - - return pOutputStream; -#endif -} - -/**************************************************************************** - * - * XMVECTOR operators - * - ****************************************************************************/ - -#ifndef _XM_NO_XMVECTOR_OVERLOADS_ - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV operator+(FXMVECTOR V) noexcept { return V; } - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV operator-(FXMVECTOR V) noexcept { - return XMVectorNegate(V); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR& XM_CALLCONV operator+=(XMVECTOR& V1, FXMVECTOR V2) noexcept { - V1 = XMVectorAdd(V1, V2); - return V1; -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR& XM_CALLCONV operator-=(XMVECTOR& V1, FXMVECTOR V2) noexcept { - V1 = XMVectorSubtract(V1, V2); - return V1; -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR& XM_CALLCONV operator*=(XMVECTOR& V1, FXMVECTOR V2) noexcept { - V1 = XMVectorMultiply(V1, V2); - return V1; -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR& XM_CALLCONV operator/=(XMVECTOR& V1, FXMVECTOR V2) noexcept { - V1 = XMVectorDivide(V1, V2); - return V1; -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR& operator*=(XMVECTOR& V, const float S) noexcept { - V = XMVectorScale(V, S); - return V; -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR& operator/=(XMVECTOR& V, const float S) noexcept { - XMVECTOR vS = XMVectorReplicate(S); - V = XMVectorDivide(V, vS); - return V; -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV operator+(FXMVECTOR V1, FXMVECTOR V2) noexcept { - return XMVectorAdd(V1, V2); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV operator-(FXMVECTOR V1, FXMVECTOR V2) noexcept { - return XMVectorSubtract(V1, V2); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV operator*(FXMVECTOR V1, FXMVECTOR V2) noexcept { - return XMVectorMultiply(V1, V2); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV operator/(FXMVECTOR V1, FXMVECTOR V2) noexcept { - return XMVectorDivide(V1, V2); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV operator*(FXMVECTOR V, const float S) noexcept { - return XMVectorScale(V, S); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV operator/(FXMVECTOR V, const float S) noexcept { - XMVECTOR vS = XMVectorReplicate(S); - return XMVectorDivide(V, vS); -} - -//------------------------------------------------------------------------------ - -inline XMVECTOR XM_CALLCONV operator*(float S, FXMVECTOR V) noexcept { - return XMVectorScale(V, S); -} - -#endif /* !_XM_NO_XMVECTOR_OVERLOADS_ */ - -#if defined(_XM_NO_INTRINSICS_) -#undef XMISNAN -#undef XMISINF -#endif - -#if defined(_XM_SSE_INTRINSICS_) -#undef XM3UNPACK3INTO4 -#undef XM3PACK4INTO3 -#endif diff --git a/targets/app/linux/Stubs/DirectXMath/DirectXPackedVector.h b/targets/app/linux/Stubs/DirectXMath/DirectXPackedVector.h deleted file mode 100644 index 4442fde21..000000000 --- a/targets/app/linux/Stubs/DirectXMath/DirectXPackedVector.h +++ /dev/null @@ -1,1329 +0,0 @@ -//------------------------------------------------------------------------------------- -// DirectXPackedVector.h -- SIMD C++ Math library -// -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. -// -// http://go.microsoft.com/fwlink/?LinkID=615560 -//------------------------------------------------------------------------------------- - -#pragma once - -#include "DirectXMath.h" - -namespace DirectX { - -namespace PackedVector { -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4201 4365 4324 4996) -// C4201: nonstandard extension used -// C4365: Off by default noise -// C4324: alignment padding warnings -// C4996: deprecation warnings -#endif - -#ifdef __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wgnu-anonymous-struct" -#pragma clang diagnostic ignored "-Wnested-anon-types" -#endif - -//------------------------------------------------------------------------------ -// ARGB Color; 8-8-8-8 bit unsigned normalized integer components packed into -// a 32 bit integer. The normalized color is packed into 32 bits using 8 bit -// unsigned, normalized integers for the alpha, red, green, and blue components. -// The alpha component is stored in the most significant bits and the blue -// component in the least significant bits (A8R8G8B8): -// [32] aaaaaaaa rrrrrrrr gggggggg bbbbbbbb [0] -struct XMCOLOR { - union { - struct { - uint8_t b; // Blue: 0/255 to 255/255 - uint8_t g; // Green: 0/255 to 255/255 - uint8_t r; // Red: 0/255 to 255/255 - uint8_t a; // Alpha: 0/255 to 255/255 - }; - uint32_t c; - }; - - XMCOLOR() = default; - - XMCOLOR(const XMCOLOR&) = default; - XMCOLOR& operator=(const XMCOLOR&) = default; - - XMCOLOR(XMCOLOR&&) = default; - XMCOLOR& operator=(XMCOLOR&&) = default; - - constexpr XMCOLOR(uint32_t Color) noexcept : c(Color) {} - XMCOLOR(float _r, float _g, float _b, float _a) noexcept; - explicit XMCOLOR(_In_reads_(4) const float* pArray) noexcept; - - operator uint32_t() const noexcept { return c; } - - XMCOLOR& operator=(const uint32_t Color) noexcept { - c = Color; - return *this; - } -}; - -//------------------------------------------------------------------------------ -// 16 bit floating point number consisting of a sign bit, a 5 bit biased -// exponent, and a 10 bit mantissa -using HALF = uint16_t; - -//------------------------------------------------------------------------------ -// 2D Vector; 16 bit floating point components -struct XMHALF2 { - union { - struct { - HALF x; - HALF y; - }; - uint32_t v; - }; - - XMHALF2() = default; - - XMHALF2(const XMHALF2&) = default; - XMHALF2& operator=(const XMHALF2&) = default; - - XMHALF2(XMHALF2&&) = default; - XMHALF2& operator=(XMHALF2&&) = default; - - explicit constexpr XMHALF2(uint32_t Packed) noexcept : v(Packed) {} - constexpr XMHALF2(HALF _x, HALF _y) noexcept : x(_x), y(_y) {} - explicit XMHALF2(_In_reads_(2) const HALF* pArray) noexcept - : x(pArray[0]), y(pArray[1]) {} - XMHALF2(float _x, float _y) noexcept; - explicit XMHALF2(_In_reads_(2) const float* pArray) noexcept; - - XMHALF2& operator=(uint32_t Packed) noexcept { - v = Packed; - return *this; - } -}; - -//------------------------------------------------------------------------------ -// 2D Vector; 16 bit signed normalized integer components -struct XMSHORTN2 { - union { - struct { - int16_t x; - int16_t y; - }; - uint32_t v; - }; - - XMSHORTN2() = default; - - XMSHORTN2(const XMSHORTN2&) = default; - XMSHORTN2& operator=(const XMSHORTN2&) = default; - - XMSHORTN2(XMSHORTN2&&) = default; - XMSHORTN2& operator=(XMSHORTN2&&) = default; - - explicit constexpr XMSHORTN2(uint32_t Packed) noexcept : v(Packed) {} - constexpr XMSHORTN2(int16_t _x, int16_t _y) noexcept : x(_x), y(_y) {} - explicit XMSHORTN2(_In_reads_(2) const int16_t* pArray) noexcept - : x(pArray[0]), y(pArray[1]) {} - XMSHORTN2(float _x, float _y) noexcept; - explicit XMSHORTN2(_In_reads_(2) const float* pArray) noexcept; - - XMSHORTN2& operator=(uint32_t Packed) noexcept { - v = Packed; - return *this; - } -}; - -// 2D Vector; 16 bit signed integer components -struct XMSHORT2 { - union { - struct { - int16_t x; - int16_t y; - }; - uint32_t v; - }; - - XMSHORT2() = default; - - XMSHORT2(const XMSHORT2&) = default; - XMSHORT2& operator=(const XMSHORT2&) = default; - - XMSHORT2(XMSHORT2&&) = default; - XMSHORT2& operator=(XMSHORT2&&) = default; - - explicit constexpr XMSHORT2(uint32_t Packed) noexcept : v(Packed) {} - constexpr XMSHORT2(int16_t _x, int16_t _y) noexcept : x(_x), y(_y) {} - explicit XMSHORT2(_In_reads_(2) const int16_t* pArray) noexcept - : x(pArray[0]), y(pArray[1]) {} - XMSHORT2(float _x, float _y) noexcept; - explicit XMSHORT2(_In_reads_(2) const float* pArray) noexcept; - - XMSHORT2& operator=(uint32_t Packed) noexcept { - v = Packed; - return *this; - } -}; - -// 2D Vector; 16 bit unsigned normalized integer components -struct XMUSHORTN2 { - union { - struct { - uint16_t x; - uint16_t y; - }; - uint32_t v; - }; - - XMUSHORTN2() = default; - - XMUSHORTN2(const XMUSHORTN2&) = default; - XMUSHORTN2& operator=(const XMUSHORTN2&) = default; - - XMUSHORTN2(XMUSHORTN2&&) = default; - XMUSHORTN2& operator=(XMUSHORTN2&&) = default; - - explicit constexpr XMUSHORTN2(uint32_t Packed) noexcept : v(Packed) {} - constexpr XMUSHORTN2(uint16_t _x, uint16_t _y) noexcept : x(_x), y(_y) {} - explicit XMUSHORTN2(_In_reads_(2) const uint16_t* pArray) noexcept - : x(pArray[0]), y(pArray[1]) {} - XMUSHORTN2(float _x, float _y) noexcept; - explicit XMUSHORTN2(_In_reads_(2) const float* pArray) noexcept; - - XMUSHORTN2& operator=(uint32_t Packed) noexcept { - v = Packed; - return *this; - } -}; - -// 2D Vector; 16 bit unsigned integer components -struct XMUSHORT2 { - union { - struct { - uint16_t x; - uint16_t y; - }; - uint32_t v; - }; - - XMUSHORT2() = default; - - XMUSHORT2(const XMUSHORT2&) = default; - XMUSHORT2& operator=(const XMUSHORT2&) = default; - - XMUSHORT2(XMUSHORT2&&) = default; - XMUSHORT2& operator=(XMUSHORT2&&) = default; - - explicit constexpr XMUSHORT2(uint32_t Packed) noexcept : v(Packed) {} - constexpr XMUSHORT2(uint16_t _x, uint16_t _y) noexcept : x(_x), y(_y) {} - explicit XMUSHORT2(_In_reads_(2) const uint16_t* pArray) noexcept - : x(pArray[0]), y(pArray[1]) {} - XMUSHORT2(float _x, float _y) noexcept; - explicit XMUSHORT2(_In_reads_(2) const float* pArray) noexcept; - - XMUSHORT2& operator=(uint32_t Packed) noexcept { - v = Packed; - return *this; - } -}; - -//------------------------------------------------------------------------------ -// 2D Vector; 8 bit signed normalized integer components -struct XMBYTEN2 { - union { - struct { - int8_t x; - int8_t y; - }; - uint16_t v; - }; - - XMBYTEN2() = default; - - XMBYTEN2(const XMBYTEN2&) = default; - XMBYTEN2& operator=(const XMBYTEN2&) = default; - - XMBYTEN2(XMBYTEN2&&) = default; - XMBYTEN2& operator=(XMBYTEN2&&) = default; - - explicit constexpr XMBYTEN2(uint16_t Packed) noexcept : v(Packed) {} - constexpr XMBYTEN2(int8_t _x, int8_t _y) noexcept : x(_x), y(_y) {} - explicit XMBYTEN2(_In_reads_(2) const int8_t* pArray) noexcept - : x(pArray[0]), y(pArray[1]) {} - XMBYTEN2(float _x, float _y) noexcept; - explicit XMBYTEN2(_In_reads_(2) const float* pArray) noexcept; - - XMBYTEN2& operator=(uint16_t Packed) noexcept { - v = Packed; - return *this; - } -}; - -// 2D Vector; 8 bit signed integer components -struct XMBYTE2 { - union { - struct { - int8_t x; - int8_t y; - }; - uint16_t v; - }; - - XMBYTE2() = default; - - XMBYTE2(const XMBYTE2&) = default; - XMBYTE2& operator=(const XMBYTE2&) = default; - - XMBYTE2(XMBYTE2&&) = default; - XMBYTE2& operator=(XMBYTE2&&) = default; - - explicit constexpr XMBYTE2(uint16_t Packed) noexcept : v(Packed) {} - constexpr XMBYTE2(int8_t _x, int8_t _y) noexcept : x(_x), y(_y) {} - explicit XMBYTE2(_In_reads_(2) const int8_t* pArray) noexcept - : x(pArray[0]), y(pArray[1]) {} - XMBYTE2(float _x, float _y) noexcept; - explicit XMBYTE2(_In_reads_(2) const float* pArray) noexcept; - - XMBYTE2& operator=(uint16_t Packed) noexcept { - v = Packed; - return *this; - } -}; - -// 2D Vector; 8 bit unsigned normalized integer components -struct XMUBYTEN2 { - union { - struct { - uint8_t x; - uint8_t y; - }; - uint16_t v; - }; - - XMUBYTEN2() = default; - - XMUBYTEN2(const XMUBYTEN2&) = default; - XMUBYTEN2& operator=(const XMUBYTEN2&) = default; - - XMUBYTEN2(XMUBYTEN2&&) = default; - XMUBYTEN2& operator=(XMUBYTEN2&&) = default; - - explicit constexpr XMUBYTEN2(uint16_t Packed) noexcept : v(Packed) {} - constexpr XMUBYTEN2(uint8_t _x, uint8_t _y) noexcept : x(_x), y(_y) {} - explicit XMUBYTEN2(_In_reads_(2) const uint8_t* pArray) noexcept - : x(pArray[0]), y(pArray[1]) {} - XMUBYTEN2(float _x, float _y) noexcept; - explicit XMUBYTEN2(_In_reads_(2) const float* pArray) noexcept; - - XMUBYTEN2& operator=(uint16_t Packed) noexcept { - v = Packed; - return *this; - } -}; - -// 2D Vector; 8 bit unsigned integer components -struct XMUBYTE2 { - union { - struct { - uint8_t x; - uint8_t y; - }; - uint16_t v; - }; - - XMUBYTE2() = default; - - XMUBYTE2(const XMUBYTE2&) = default; - XMUBYTE2& operator=(const XMUBYTE2&) = default; - - XMUBYTE2(XMUBYTE2&&) = default; - XMUBYTE2& operator=(XMUBYTE2&&) = default; - - explicit constexpr XMUBYTE2(uint16_t Packed) noexcept : v(Packed) {} - constexpr XMUBYTE2(uint8_t _x, uint8_t _y) noexcept : x(_x), y(_y) {} - explicit XMUBYTE2(_In_reads_(2) const uint8_t* pArray) noexcept - : x(pArray[0]), y(pArray[1]) {} - XMUBYTE2(float _x, float _y) noexcept; - explicit XMUBYTE2(_In_reads_(2) const float* pArray) noexcept; - - XMUBYTE2& operator=(uint16_t Packed) noexcept { - v = Packed; - return *this; - } -}; - -//------------------------------------------------------------------------------ -// 3D vector: 5/6/5 unsigned integer components -struct XMU565 { - union { - struct { - uint16_t x : 5; // 0 to 31 - uint16_t y : 6; // 0 to 63 - uint16_t z : 5; // 0 to 31 - }; - uint16_t v; - }; - - XMU565() = default; - - XMU565(const XMU565&) = default; - XMU565& operator=(const XMU565&) = default; - - XMU565(XMU565&&) = default; - XMU565& operator=(XMU565&&) = default; - - explicit constexpr XMU565(uint16_t Packed) noexcept : v(Packed) {} - constexpr XMU565(uint8_t _x, uint8_t _y, uint8_t _z) noexcept - : x(_x), y(_y), z(_z) {} - explicit XMU565(_In_reads_(3) const uint8_t* pArray) noexcept - : x(pArray[0]), y(pArray[1]), z(pArray[2]) {} - XMU565(float _x, float _y, float _z) noexcept; - explicit XMU565(_In_reads_(3) const float* pArray) noexcept; - - operator uint16_t() const noexcept { return v; } - - XMU565& operator=(uint16_t Packed) noexcept { - v = Packed; - return *this; - } -}; - -//------------------------------------------------------------------------------ -// 3D vector: 11/11/10 floating-point components -// The 3D vector is packed into 32 bits as follows: a 5-bit biased exponent -// and 6-bit mantissa for x component, a 5-bit biased exponent and -// 6-bit mantissa for y component, a 5-bit biased exponent and a 5-bit -// mantissa for z. The z component is stored in the most significant bits -// and the x component in the least significant bits. No sign bits so -// all partial-precision numbers are positive. -// (Z10Y11X11): [32] ZZZZZzzz zzzYYYYY yyyyyyXX XXXxxxxx [0] -struct XMFLOAT3PK { - union { - struct { - uint32_t xm : 6; // x-mantissa - uint32_t xe : 5; // x-exponent - uint32_t ym : 6; // y-mantissa - uint32_t ye : 5; // y-exponent - uint32_t zm : 5; // z-mantissa - uint32_t ze : 5; // z-exponent - }; - uint32_t v; - }; - - XMFLOAT3PK() = default; - - XMFLOAT3PK(const XMFLOAT3PK&) = default; - XMFLOAT3PK& operator=(const XMFLOAT3PK&) = default; - - XMFLOAT3PK(XMFLOAT3PK&&) = default; - XMFLOAT3PK& operator=(XMFLOAT3PK&&) = default; - - explicit constexpr XMFLOAT3PK(uint32_t Packed) noexcept : v(Packed) {} - XMFLOAT3PK(float _x, float _y, float _z) noexcept; - explicit XMFLOAT3PK(_In_reads_(3) const float* pArray) noexcept; - - operator uint32_t() const noexcept { return v; } - - XMFLOAT3PK& operator=(uint32_t Packed) noexcept { - v = Packed; - return *this; - } -}; - -//------------------------------------------------------------------------------ -// 3D vector: 9/9/9 floating-point components with shared 5-bit exponent -// The 3D vector is packed into 32 bits as follows: a 5-bit biased exponent -// with 9-bit mantissa for the x, y, and z component. The shared exponent -// is stored in the most significant bits and the x component mantissa is in -// the least significant bits. No sign bits so all partial-precision numbers -// are positive. -// (E5Z9Y9X9): [32] EEEEEzzz zzzzzzyy yyyyyyyx xxxxxxxx [0] -struct XMFLOAT3SE { - union { - struct { - uint32_t xm : 9; // x-mantissa - uint32_t ym : 9; // y-mantissa - uint32_t zm : 9; // z-mantissa - uint32_t e : 5; // shared exponent - }; - uint32_t v; - }; - - XMFLOAT3SE() = default; - - XMFLOAT3SE(const XMFLOAT3SE&) = default; - XMFLOAT3SE& operator=(const XMFLOAT3SE&) = default; - - XMFLOAT3SE(XMFLOAT3SE&&) = default; - XMFLOAT3SE& operator=(XMFLOAT3SE&&) = default; - - explicit constexpr XMFLOAT3SE(uint32_t Packed) noexcept : v(Packed) {} - XMFLOAT3SE(float _x, float _y, float _z) noexcept; - explicit XMFLOAT3SE(_In_reads_(3) const float* pArray) noexcept; - - operator uint32_t() const noexcept { return v; } - - XMFLOAT3SE& operator=(uint32_t Packed) noexcept { - v = Packed; - return *this; - } -}; - -//------------------------------------------------------------------------------ -// 4D Vector; 16 bit floating point components -struct XMHALF4 { - union { - struct { - HALF x; - HALF y; - HALF z; - HALF w; - }; - uint64_t v; - }; - - XMHALF4() = default; - - XMHALF4(const XMHALF4&) = default; - XMHALF4& operator=(const XMHALF4&) = default; - - XMHALF4(XMHALF4&&) = default; - XMHALF4& operator=(XMHALF4&&) = default; - - explicit constexpr XMHALF4(uint64_t Packed) noexcept : v(Packed) {} - constexpr XMHALF4(HALF _x, HALF _y, HALF _z, HALF _w) noexcept - : x(_x), y(_y), z(_z), w(_w) {} - explicit XMHALF4(_In_reads_(4) const HALF* pArray) noexcept - : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {} - XMHALF4(float _x, float _y, float _z, float _w) noexcept; - explicit XMHALF4(_In_reads_(4) const float* pArray) noexcept; - - XMHALF4& operator=(uint64_t Packed) noexcept { - v = Packed; - return *this; - } -}; - -//------------------------------------------------------------------------------ -// 4D Vector; 16 bit signed normalized integer components -struct XMSHORTN4 { - union { - struct { - int16_t x; - int16_t y; - int16_t z; - int16_t w; - }; - uint64_t v; - }; - - XMSHORTN4() = default; - - XMSHORTN4(const XMSHORTN4&) = default; - XMSHORTN4& operator=(const XMSHORTN4&) = default; - - XMSHORTN4(XMSHORTN4&&) = default; - XMSHORTN4& operator=(XMSHORTN4&&) = default; - - explicit constexpr XMSHORTN4(uint64_t Packed) noexcept : v(Packed) {} - constexpr XMSHORTN4(int16_t _x, int16_t _y, int16_t _z, int16_t _w) noexcept - : x(_x), y(_y), z(_z), w(_w) {} - explicit XMSHORTN4(_In_reads_(4) const int16_t* pArray) noexcept - : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {} - XMSHORTN4(float _x, float _y, float _z, float _w) noexcept; - explicit XMSHORTN4(_In_reads_(4) const float* pArray) noexcept; - - XMSHORTN4& operator=(uint64_t Packed) noexcept { - v = Packed; - return *this; - } -}; - -// 4D Vector; 16 bit signed integer components -struct XMSHORT4 { - union { - struct { - int16_t x; - int16_t y; - int16_t z; - int16_t w; - }; - uint64_t v; - }; - - XMSHORT4() = default; - - XMSHORT4(const XMSHORT4&) = default; - XMSHORT4& operator=(const XMSHORT4&) = default; - - XMSHORT4(XMSHORT4&&) = default; - XMSHORT4& operator=(XMSHORT4&&) = default; - - explicit constexpr XMSHORT4(uint64_t Packed) noexcept : v(Packed) {} - constexpr XMSHORT4(int16_t _x, int16_t _y, int16_t _z, int16_t _w) noexcept - : x(_x), y(_y), z(_z), w(_w) {} - explicit XMSHORT4(_In_reads_(4) const int16_t* pArray) noexcept - : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {} - XMSHORT4(float _x, float _y, float _z, float _w) noexcept; - explicit XMSHORT4(_In_reads_(4) const float* pArray) noexcept; - - XMSHORT4& operator=(uint64_t Packed) noexcept { - v = Packed; - return *this; - } -}; - -// 4D Vector; 16 bit unsigned normalized integer components -struct XMUSHORTN4 { - union { - struct { - uint16_t x; - uint16_t y; - uint16_t z; - uint16_t w; - }; - uint64_t v; - }; - - XMUSHORTN4() = default; - - XMUSHORTN4(const XMUSHORTN4&) = default; - XMUSHORTN4& operator=(const XMUSHORTN4&) = default; - - XMUSHORTN4(XMUSHORTN4&&) = default; - XMUSHORTN4& operator=(XMUSHORTN4&&) = default; - - explicit constexpr XMUSHORTN4(uint64_t Packed) noexcept : v(Packed) {} - constexpr XMUSHORTN4(uint16_t _x, uint16_t _y, uint16_t _z, - uint16_t _w) noexcept - : x(_x), y(_y), z(_z), w(_w) {} - explicit XMUSHORTN4(_In_reads_(4) const uint16_t* pArray) noexcept - : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {} - XMUSHORTN4(float _x, float _y, float _z, float _w) noexcept; - explicit XMUSHORTN4(_In_reads_(4) const float* pArray) noexcept; - - XMUSHORTN4& operator=(uint64_t Packed) noexcept { - v = Packed; - return *this; - } -}; - -// 4D Vector; 16 bit unsigned integer components -struct XMUSHORT4 { - union { - struct { - uint16_t x; - uint16_t y; - uint16_t z; - uint16_t w; - }; - uint64_t v; - }; - - XMUSHORT4() = default; - - XMUSHORT4(const XMUSHORT4&) = default; - XMUSHORT4& operator=(const XMUSHORT4&) = default; - - XMUSHORT4(XMUSHORT4&&) = default; - XMUSHORT4& operator=(XMUSHORT4&&) = default; - - explicit constexpr XMUSHORT4(uint64_t Packed) noexcept : v(Packed) {} - constexpr XMUSHORT4(uint16_t _x, uint16_t _y, uint16_t _z, - uint16_t _w) noexcept - : x(_x), y(_y), z(_z), w(_w) {} - explicit XMUSHORT4(_In_reads_(4) const uint16_t* pArray) noexcept - : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {} - XMUSHORT4(float _x, float _y, float _z, float _w) noexcept; - explicit XMUSHORT4(_In_reads_(4) const float* pArray) noexcept; - - XMUSHORT4& operator=(uint32_t Packed) noexcept { - v = Packed; - return *this; - } -}; - -//------------------------------------------------------------------------------ -// 4D Vector; 10-10-10-2 bit normalized components packed into a 32 bit integer -// The normalized 4D Vector is packed into 32 bits as follows: a 2 bit unsigned, -// normalized integer for the w component and 10 bit signed, normalized -// integers for the z, y, and x components. The w component is stored in the -// most significant bits and the x component in the least significant bits -// (W2Z10Y10X10): [32] wwzzzzzz zzzzyyyy yyyyyyxx xxxxxxxx [0] -struct XMXDECN4 { - union { - struct { - int32_t x : 10; // -511/511 to 511/511 - int32_t y : 10; // -511/511 to 511/511 - int32_t z : 10; // -511/511 to 511/511 - uint32_t w : 2; // 0/3 to 3/3 - }; - uint32_t v; - }; - - XMXDECN4() = default; - - XMXDECN4(const XMXDECN4&) = default; - XMXDECN4& operator=(const XMXDECN4&) = default; - - XMXDECN4(XMXDECN4&&) = default; - XMXDECN4& operator=(XMXDECN4&&) = default; - - explicit constexpr XMXDECN4(uint32_t Packed) : v(Packed) {} - XMXDECN4(float _x, float _y, float _z, float _w) noexcept; - explicit XMXDECN4(_In_reads_(4) const float* pArray) noexcept; - - operator uint32_t() const noexcept { return v; } - - XMXDECN4& operator=(uint32_t Packed) noexcept { - v = Packed; - return *this; - } -}; - -// 4D Vector; 10-10-10-2 bit components packed into a 32 bit integer -// The normalized 4D Vector is packed into 32 bits as follows: a 2 bit unsigned -// integer for the w component and 10 bit signed integers for the -// z, y, and x components. The w component is stored in the -// most significant bits and the x component in the least significant bits -// (W2Z10Y10X10): [32] wwzzzzzz zzzzyyyy yyyyyyxx xxxxxxxx [0] -struct XM_DEPRECATED XMXDEC4 { - union { - struct { - int32_t x : 10; // -511 to 511 - int32_t y : 10; // -511 to 511 - int32_t z : 10; // -511 to 511 - uint32_t w : 2; // 0 to 3 - }; - uint32_t v; - }; - - XMXDEC4() = default; - - XMXDEC4(const XMXDEC4&) = default; - XMXDEC4& operator=(const XMXDEC4&) = default; - - XMXDEC4(XMXDEC4&&) = default; - XMXDEC4& operator=(XMXDEC4&&) = default; - - explicit constexpr XMXDEC4(uint32_t Packed) noexcept : v(Packed) {} - XMXDEC4(float _x, float _y, float _z, float _w) noexcept; - explicit XMXDEC4(_In_reads_(4) const float* pArray) noexcept; - - operator uint32_t() const noexcept { return v; } - - XMXDEC4& operator=(uint32_t Packed) noexcept { - v = Packed; - return *this; - } -}; - -// 4D Vector; 10-10-10-2 bit normalized components packed into a 32 bit integer -// The normalized 4D Vector is packed into 32 bits as follows: a 2 bit signed, -// normalized integer for the w component and 10 bit signed, normalized -// integers for the z, y, and x components. The w component is stored in the -// most significant bits and the x component in the least significant bits -// (W2Z10Y10X10): [32] wwzzzzzz zzzzyyyy yyyyyyxx xxxxxxxx [0] -struct XM_DEPRECATED XMDECN4 { - union { - struct { - int32_t x : 10; // -511/511 to 511/511 - int32_t y : 10; // -511/511 to 511/511 - int32_t z : 10; // -511/511 to 511/511 - int32_t w : 2; // -1/1 to 1/1 - }; - uint32_t v; - }; - - XMDECN4() = default; - - XMDECN4(const XMDECN4&) = default; - XMDECN4& operator=(const XMDECN4&) = default; - - XMDECN4(XMDECN4&&) = default; - XMDECN4& operator=(XMDECN4&&) = default; - - explicit constexpr XMDECN4(uint32_t Packed) noexcept : v(Packed) {} - XMDECN4(float _x, float _y, float _z, float _w) noexcept; - explicit XMDECN4(_In_reads_(4) const float* pArray) noexcept; - - operator uint32_t() const noexcept { return v; } - - XMDECN4& operator=(uint32_t Packed) noexcept { - v = Packed; - return *this; - } -}; - -// 4D Vector; 10-10-10-2 bit components packed into a 32 bit integer -// The 4D Vector is packed into 32 bits as follows: a 2 bit signed, -// integer for the w component and 10 bit signed integers for the -// z, y, and x components. The w component is stored in the -// most significant bits and the x component in the least significant bits -// (W2Z10Y10X10): [32] wwzzzzzz zzzzyyyy yyyyyyxx xxxxxxxx [0] -struct XM_DEPRECATED XMDEC4 { - union { - struct { - int32_t x : 10; // -511 to 511 - int32_t y : 10; // -511 to 511 - int32_t z : 10; // -511 to 511 - int32_t w : 2; // -1 to 1 - }; - uint32_t v; - }; - - XMDEC4() = default; - - XMDEC4(const XMDEC4&) = default; - XMDEC4& operator=(const XMDEC4&) = default; - - XMDEC4(XMDEC4&&) = default; - XMDEC4& operator=(XMDEC4&&) = default; - - explicit constexpr XMDEC4(uint32_t Packed) noexcept : v(Packed) {} - XMDEC4(float _x, float _y, float _z, float _w) noexcept; - explicit XMDEC4(_In_reads_(4) const float* pArray) noexcept; - - operator uint32_t() const noexcept { return v; } - - XMDEC4& operator=(uint32_t Packed) noexcept { - v = Packed; - return *this; - } -}; - -// 4D Vector; 10-10-10-2 bit normalized components packed into a 32 bit integer -// The normalized 4D Vector is packed into 32 bits as follows: a 2 bit unsigned, -// normalized integer for the w component and 10 bit unsigned, normalized -// integers for the z, y, and x components. The w component is stored in the -// most significant bits and the x component in the least significant bits -// (W2Z10Y10X10): [32] wwzzzzzz zzzzyyyy yyyyyyxx xxxxxxxx [0] -struct XMUDECN4 { - union { - struct { - uint32_t x : 10; // 0/1023 to 1023/1023 - uint32_t y : 10; // 0/1023 to 1023/1023 - uint32_t z : 10; // 0/1023 to 1023/1023 - uint32_t w : 2; // 0/3 to 3/3 - }; - uint32_t v; - }; - - XMUDECN4() = default; - - XMUDECN4(const XMUDECN4&) = default; - XMUDECN4& operator=(const XMUDECN4&) = default; - - XMUDECN4(XMUDECN4&&) = default; - XMUDECN4& operator=(XMUDECN4&&) = default; - - explicit constexpr XMUDECN4(uint32_t Packed) noexcept : v(Packed) {} - XMUDECN4(float _x, float _y, float _z, float _w) noexcept; - explicit XMUDECN4(_In_reads_(4) const float* pArray) noexcept; - - operator uint32_t() const noexcept { return v; } - - XMUDECN4& operator=(uint32_t Packed) noexcept { - v = Packed; - return *this; - } -}; - -// 4D Vector; 10-10-10-2 bit components packed into a 32 bit integer -// The 4D Vector is packed into 32 bits as follows: a 2 bit unsigned, -// integer for the w component and 10 bit unsigned integers -// for the z, y, and x components. The w component is stored in the -// most significant bits and the x component in the least significant bits -// (W2Z10Y10X10): [32] wwzzzzzz zzzzyyyy yyyyyyxx xxxxxxxx [0] -struct XMUDEC4 { - union { - struct { - uint32_t x : 10; // 0 to 1023 - uint32_t y : 10; // 0 to 1023 - uint32_t z : 10; // 0 to 1023 - uint32_t w : 2; // 0 to 3 - }; - uint32_t v; - }; - - XMUDEC4() = default; - - XMUDEC4(const XMUDEC4&) = default; - XMUDEC4& operator=(const XMUDEC4&) = default; - - XMUDEC4(XMUDEC4&&) = default; - XMUDEC4& operator=(XMUDEC4&&) = default; - - explicit constexpr XMUDEC4(uint32_t Packed) noexcept : v(Packed) {} - XMUDEC4(float _x, float _y, float _z, float _w) noexcept; - explicit XMUDEC4(_In_reads_(4) const float* pArray) noexcept; - - operator uint32_t() const noexcept { return v; } - - XMUDEC4& operator=(uint32_t Packed) noexcept { - v = Packed; - return *this; - } -}; - -//------------------------------------------------------------------------------ -// 4D Vector; 8 bit signed normalized integer components -struct XMBYTEN4 { - union { - struct { - int8_t x; - int8_t y; - int8_t z; - int8_t w; - }; - uint32_t v; - }; - - XMBYTEN4() = default; - - XMBYTEN4(const XMBYTEN4&) = default; - XMBYTEN4& operator=(const XMBYTEN4&) = default; - - XMBYTEN4(XMBYTEN4&&) = default; - XMBYTEN4& operator=(XMBYTEN4&&) = default; - - constexpr XMBYTEN4(int8_t _x, int8_t _y, int8_t _z, int8_t _w) noexcept - : x(_x), y(_y), z(_z), w(_w) {} - explicit constexpr XMBYTEN4(uint32_t Packed) noexcept : v(Packed) {} - explicit XMBYTEN4(_In_reads_(4) const int8_t* pArray) noexcept - : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {} - XMBYTEN4(float _x, float _y, float _z, float _w) noexcept; - explicit XMBYTEN4(_In_reads_(4) const float* pArray) noexcept; - - XMBYTEN4& operator=(uint32_t Packed) noexcept { - v = Packed; - return *this; - } -}; - -// 4D Vector; 8 bit signed integer components -struct XMBYTE4 { - union { - struct { - int8_t x; - int8_t y; - int8_t z; - int8_t w; - }; - uint32_t v; - }; - - XMBYTE4() = default; - - XMBYTE4(const XMBYTE4&) = default; - XMBYTE4& operator=(const XMBYTE4&) = default; - - XMBYTE4(XMBYTE4&&) = default; - XMBYTE4& operator=(XMBYTE4&&) = default; - - constexpr XMBYTE4(int8_t _x, int8_t _y, int8_t _z, int8_t _w) noexcept - : x(_x), y(_y), z(_z), w(_w) {} - explicit constexpr XMBYTE4(uint32_t Packed) noexcept : v(Packed) {} - explicit XMBYTE4(_In_reads_(4) const int8_t* pArray) noexcept - : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {} - XMBYTE4(float _x, float _y, float _z, float _w) noexcept; - explicit XMBYTE4(_In_reads_(4) const float* pArray) noexcept; - - XMBYTE4& operator=(uint32_t Packed) noexcept { - v = Packed; - return *this; - } -}; - -// 4D Vector; 8 bit unsigned normalized integer components -struct XMUBYTEN4 { - union { - struct { - uint8_t x; - uint8_t y; - uint8_t z; - uint8_t w; - }; - uint32_t v; - }; - - XMUBYTEN4() = default; - - XMUBYTEN4(const XMUBYTEN4&) = default; - XMUBYTEN4& operator=(const XMUBYTEN4&) = default; - - XMUBYTEN4(XMUBYTEN4&&) = default; - XMUBYTEN4& operator=(XMUBYTEN4&&) = default; - - constexpr XMUBYTEN4(uint8_t _x, uint8_t _y, uint8_t _z, uint8_t _w) noexcept - : x(_x), y(_y), z(_z), w(_w) {} - explicit constexpr XMUBYTEN4(uint32_t Packed) noexcept : v(Packed) {} - explicit XMUBYTEN4(_In_reads_(4) const uint8_t* pArray) noexcept - : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {} - XMUBYTEN4(float _x, float _y, float _z, float _w) noexcept; - explicit XMUBYTEN4(_In_reads_(4) const float* pArray) noexcept; - - XMUBYTEN4& operator=(uint32_t Packed) noexcept { - v = Packed; - return *this; - } -}; - -// 4D Vector; 8 bit unsigned integer components -struct XMUBYTE4 { - union { - struct { - uint8_t x; - uint8_t y; - uint8_t z; - uint8_t w; - }; - uint32_t v; - }; - - XMUBYTE4() = default; - - XMUBYTE4(const XMUBYTE4&) = default; - XMUBYTE4& operator=(const XMUBYTE4&) = default; - - XMUBYTE4(XMUBYTE4&&) = default; - XMUBYTE4& operator=(XMUBYTE4&&) = default; - - constexpr XMUBYTE4(uint8_t _x, uint8_t _y, uint8_t _z, uint8_t _w) noexcept - : x(_x), y(_y), z(_z), w(_w) {} - explicit constexpr XMUBYTE4(uint32_t Packed) noexcept : v(Packed) {} - explicit XMUBYTE4(_In_reads_(4) const uint8_t* pArray) noexcept - : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {} - XMUBYTE4(float _x, float _y, float _z, float _w) noexcept; - explicit XMUBYTE4(_In_reads_(4) const float* pArray) noexcept; - - XMUBYTE4& operator=(uint32_t Packed) noexcept { - v = Packed; - return *this; - } -}; - -//------------------------------------------------------------------------------ -// 4D vector; 4 bit unsigned integer components -struct XMUNIBBLE4 { - union { - struct { - uint16_t x : 4; // 0 to 15 - uint16_t y : 4; // 0 to 15 - uint16_t z : 4; // 0 to 15 - uint16_t w : 4; // 0 to 15 - }; - uint16_t v; - }; - - XMUNIBBLE4() = default; - - XMUNIBBLE4(const XMUNIBBLE4&) = default; - XMUNIBBLE4& operator=(const XMUNIBBLE4&) = default; - - XMUNIBBLE4(XMUNIBBLE4&&) = default; - XMUNIBBLE4& operator=(XMUNIBBLE4&&) = default; - - explicit constexpr XMUNIBBLE4(uint16_t Packed) noexcept : v(Packed) {} - constexpr XMUNIBBLE4(uint8_t _x, uint8_t _y, uint8_t _z, - uint8_t _w) noexcept - : x(_x), y(_y), z(_z), w(_w) {} - explicit XMUNIBBLE4(_In_reads_(4) const uint8_t* pArray) noexcept - : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(pArray[3]) {} - XMUNIBBLE4(float _x, float _y, float _z, float _w) noexcept; - explicit XMUNIBBLE4(_In_reads_(4) const float* pArray) noexcept; - - operator uint16_t() const noexcept { return v; } - - XMUNIBBLE4& operator=(uint16_t Packed) noexcept { - v = Packed; - return *this; - } -}; - -//------------------------------------------------------------------------------ -// 4D vector: 5/5/5/1 unsigned integer components -struct XMU555 { - union { - struct { - uint16_t x : 5; // 0 to 31 - uint16_t y : 5; // 0 to 31 - uint16_t z : 5; // 0 to 31 - uint16_t w : 1; // 0 or 1 - }; - uint16_t v; - }; - - XMU555() = default; - - XMU555(const XMU555&) = default; - XMU555& operator=(const XMU555&) = default; - - XMU555(XMU555&&) = default; - XMU555& operator=(XMU555&&) = default; - - explicit constexpr XMU555(uint16_t Packed) noexcept : v(Packed) {} - constexpr XMU555(uint8_t _x, uint8_t _y, uint8_t _z, bool _w) noexcept - : x(_x), y(_y), z(_z), w(_w ? 0x1 : 0) {} - XMU555(_In_reads_(3) const uint8_t* pArray, _In_ bool _w) noexcept - : x(pArray[0]), y(pArray[1]), z(pArray[2]), w(_w ? 0x1 : 0) {} - XMU555(float _x, float _y, float _z, bool _w) noexcept; - XMU555(_In_reads_(3) const float* pArray, _In_ bool _w) noexcept; - - operator uint16_t() const noexcept { return v; } - - XMU555& operator=(uint16_t Packed) noexcept { - v = Packed; - return *this; - } -}; - -#ifdef __clang__ -#pragma clang diagnostic pop -#endif -#ifdef _MSC_VER -#pragma warning(pop) -#endif - -/**************************************************************************** - * - * Data conversion operations - * - ****************************************************************************/ - -float XMConvertHalfToFloat(HALF Value) noexcept; -float* XMConvertHalfToFloatStream( - _Out_writes_bytes_(sizeof(float) + - OutputStride * (HalfCount - 1)) float* pOutputStream, - _In_ size_t OutputStride, - _In_reads_bytes_(sizeof(HALF) + InputStride * (HalfCount - 1)) - const HALF* pInputStream, - _In_ size_t InputStride, _In_ size_t HalfCount) noexcept; -HALF XMConvertFloatToHalf(float Value) noexcept; -HALF* XMConvertFloatToHalfStream( - _Out_writes_bytes_(sizeof(HALF) + OutputStride * (FloatCount - 1)) - HALF* pOutputStream, - _In_ size_t OutputStride, - _In_reads_bytes_(sizeof(float) + InputStride * (FloatCount - 1)) - const float* pInputStream, - _In_ size_t InputStride, _In_ size_t FloatCount) noexcept; - -/**************************************************************************** - * - * Load operations - * - ****************************************************************************/ - -XMVECTOR XM_CALLCONV XMLoadColor(_In_ const XMCOLOR* pSource) noexcept; - -XMVECTOR XM_CALLCONV XMLoadHalf2(_In_ const XMHALF2* pSource) noexcept; -XMVECTOR XM_CALLCONV XMLoadShortN2(_In_ const XMSHORTN2* pSource) noexcept; -XMVECTOR XM_CALLCONV XMLoadShort2(_In_ const XMSHORT2* pSource) noexcept; -XMVECTOR XM_CALLCONV XMLoadUShortN2(_In_ const XMUSHORTN2* pSource) noexcept; -XMVECTOR XM_CALLCONV XMLoadUShort2(_In_ const XMUSHORT2* pSource) noexcept; -XMVECTOR XM_CALLCONV XMLoadByteN2(_In_ const XMBYTEN2* pSource) noexcept; -XMVECTOR XM_CALLCONV XMLoadByte2(_In_ const XMBYTE2* pSource) noexcept; -XMVECTOR XM_CALLCONV XMLoadUByteN2(_In_ const XMUBYTEN2* pSource) noexcept; -XMVECTOR XM_CALLCONV XMLoadUByte2(_In_ const XMUBYTE2* pSource) noexcept; - -XMVECTOR XM_CALLCONV XMLoadU565(_In_ const XMU565* pSource) noexcept; -XMVECTOR XM_CALLCONV XMLoadFloat3PK(_In_ const XMFLOAT3PK* pSource) noexcept; -XMVECTOR XM_CALLCONV XMLoadFloat3SE(_In_ const XMFLOAT3SE* pSource) noexcept; - -XMVECTOR XM_CALLCONV XMLoadHalf4(_In_ const XMHALF4* pSource) noexcept; -XMVECTOR XM_CALLCONV XMLoadShortN4(_In_ const XMSHORTN4* pSource) noexcept; -XMVECTOR XM_CALLCONV XMLoadShort4(_In_ const XMSHORT4* pSource) noexcept; -XMVECTOR XM_CALLCONV XMLoadUShortN4(_In_ const XMUSHORTN4* pSource) noexcept; -XMVECTOR XM_CALLCONV XMLoadUShort4(_In_ const XMUSHORT4* pSource) noexcept; -XMVECTOR XM_CALLCONV XMLoadXDecN4(_In_ const XMXDECN4* pSource) noexcept; -XMVECTOR XM_CALLCONV XMLoadUDecN4(_In_ const XMUDECN4* pSource) noexcept; -XMVECTOR XM_CALLCONV XMLoadUDecN4_XR(_In_ const XMUDECN4* pSource) noexcept; -XMVECTOR XM_CALLCONV XMLoadUDec4(_In_ const XMUDEC4* pSource) noexcept; -XMVECTOR XM_CALLCONV XMLoadByteN4(_In_ const XMBYTEN4* pSource) noexcept; -XMVECTOR XM_CALLCONV XMLoadByte4(_In_ const XMBYTE4* pSource) noexcept; -XMVECTOR XM_CALLCONV XMLoadUByteN4(_In_ const XMUBYTEN4* pSource) noexcept; -XMVECTOR XM_CALLCONV XMLoadUByte4(_In_ const XMUBYTE4* pSource) noexcept; -XMVECTOR XM_CALLCONV XMLoadUNibble4(_In_ const XMUNIBBLE4* pSource) noexcept; -XMVECTOR XM_CALLCONV XMLoadU555(_In_ const XMU555* pSource) noexcept; - -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4996) -// C4996: ignore deprecation warning -#endif - -#ifdef __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wdeprecated-declarations" -#endif - -#ifdef __GNUC__ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -#endif - -XM_DEPRECATED -XMVECTOR XM_CALLCONV XMLoadDecN4(_In_ const XMDECN4* pSource) noexcept; - -XM_DEPRECATED -XMVECTOR XM_CALLCONV XMLoadDec4(_In_ const XMDEC4* pSource) noexcept; - -XM_DEPRECATED -XMVECTOR XM_CALLCONV XMLoadXDec4(_In_ const XMXDEC4* pSource) noexcept; - -#ifdef __GNUC__ -#pragma GCC diagnostic pop -#endif -#ifdef __clang__ -#pragma clang diagnostic pop -#endif -#ifdef _MSC_VER -#pragma warning(pop) -#endif - -/**************************************************************************** - * - * Store operations - * - ****************************************************************************/ - -void XM_CALLCONV XMStoreColor(_Out_ XMCOLOR* pDestination, - _In_ FXMVECTOR V) noexcept; - -void XM_CALLCONV XMStoreHalf2(_Out_ XMHALF2* pDestination, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMStoreShortN2(_Out_ XMSHORTN2* pDestination, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMStoreShort2(_Out_ XMSHORT2* pDestination, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMStoreUShortN2(_Out_ XMUSHORTN2* pDestination, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMStoreUShort2(_Out_ XMUSHORT2* pDestination, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMStoreByteN2(_Out_ XMBYTEN2* pDestination, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMStoreByte2(_Out_ XMBYTE2* pDestination, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMStoreUByteN2(_Out_ XMUBYTEN2* pDestination, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMStoreUByte2(_Out_ XMUBYTE2* pDestination, - _In_ FXMVECTOR V) noexcept; - -void XM_CALLCONV XMStoreU565(_Out_ XMU565* pDestination, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMStoreFloat3PK(_Out_ XMFLOAT3PK* pDestination, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMStoreFloat3SE(_Out_ XMFLOAT3SE* pDestination, - _In_ FXMVECTOR V) noexcept; - -void XM_CALLCONV XMStoreHalf4(_Out_ XMHALF4* pDestination, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMStoreShortN4(_Out_ XMSHORTN4* pDestination, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMStoreShort4(_Out_ XMSHORT4* pDestination, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMStoreUShortN4(_Out_ XMUSHORTN4* pDestination, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMStoreUShort4(_Out_ XMUSHORT4* pDestination, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMStoreXDecN4(_Out_ XMXDECN4* pDestination, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMStoreUDecN4(_Out_ XMUDECN4* pDestination, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMStoreUDecN4_XR(_Out_ XMUDECN4* pDestination, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMStoreUDec4(_Out_ XMUDEC4* pDestination, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMStoreByteN4(_Out_ XMBYTEN4* pDestination, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMStoreByte4(_Out_ XMBYTE4* pDestination, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMStoreUByteN4(_Out_ XMUBYTEN4* pDestination, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMStoreUByte4(_Out_ XMUBYTE4* pDestination, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMStoreUNibble4(_Out_ XMUNIBBLE4* pDestination, - _In_ FXMVECTOR V) noexcept; -void XM_CALLCONV XMStoreU555(_Out_ XMU555* pDestination, - _In_ FXMVECTOR V) noexcept; - -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4996) -// C4996: ignore deprecation warning -#endif - -#ifdef __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wdeprecated-declarations" -#endif - -#ifdef __GNUC__ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -#endif - -XM_DEPRECATED -void XM_CALLCONV XMStoreDecN4(_Out_ XMDECN4* pDestination, - _In_ FXMVECTOR V) noexcept; - -XM_DEPRECATED -void XM_CALLCONV XMStoreDec4(_Out_ XMDEC4* pDestination, - _In_ FXMVECTOR V) noexcept; - -XM_DEPRECATED -void XM_CALLCONV XMStoreXDec4(_Out_ XMXDEC4* pDestination, - _In_ FXMVECTOR V) noexcept; - -#ifdef __GNUC__ -#pragma GCC diagnostic pop -#endif -#ifdef __clang__ -#pragma clang diagnostic pop -#endif -#ifdef _MSC_VER -#pragma warning(pop) -#endif - -/**************************************************************************** - * - * Implementation - * - ****************************************************************************/ -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4068 4214 4204 4365 4616 6001 6101) -// C4068/4616: ignore unknown pragmas -// C4214/4204: nonstandard extension used -// C4365: Off by default noise -// C6001/6101: False positives -#endif - -#ifdef _PREFAST_ -#pragma prefast(push) -#pragma prefast(disable : 25000, "FXMVECTOR is 16 bytes") -#pragma prefast(disable : 26495, "Union initialization confuses /analyze") -#endif - -#ifdef __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wunknown-warning-option" -#pragma clang diagnostic ignored "-Wunsafe-buffer-usage" -#endif - -#include "DirectXPackedVector.inl" - -#ifdef __clang__ -#pragma clang diagnostic pop -#endif -#ifdef _PREFAST_ -#pragma prefast(pop) -#endif -#ifdef _MSC_VER -#pragma warning(pop) -#endif -} // namespace PackedVector - -} // namespace DirectX diff --git a/targets/app/linux/Stubs/DirectXMath/DirectXPackedVector.inl b/targets/app/linux/Stubs/DirectXMath/DirectXPackedVector.inl deleted file mode 100644 index 2ed7774a0..000000000 --- a/targets/app/linux/Stubs/DirectXMath/DirectXPackedVector.inl +++ /dev/null @@ -1,4142 +0,0 @@ -//------------------------------------------------------------------------------------- -// DirectXPackedVector.inl -- SIMD C++ Math library -// -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. -// -// http://go.microsoft.com/fwlink/?LinkID=615560 -//------------------------------------------------------------------------------------- - -#pragma once - -/**************************************************************************** - * - * Data conversion - * - ****************************************************************************/ - -//------------------------------------------------------------------------------ - -inline float XMConvertHalfToFloat(HALF Value) noexcept { -#if defined(_XM_F16C_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) - __m128i V1 = _mm_cvtsi32_si128(static_cast(Value)); - __m128 V2 = _mm_cvtph_ps(V1); - return _mm_cvtss_f32(V2); -#elif defined(_XM_ARM_NEON_INTRINSICS_) && \ - (defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || \ - defined(_M_ARM64EC) || __aarch64__) && \ - !defined(_XM_NO_INTRINSICS_) && (!defined(__GNUC__) || (__ARM_FP & 2)) - uint16x4_t vHalf = vdup_n_u16(Value); - float32x4_t vFloat = vcvt_f32_f16(vreinterpret_f16_u16(vHalf)); - return vgetq_lane_f32(vFloat, 0); -#else - auto Mantissa = static_cast(Value & 0x03FF); - - uint32_t Exponent = (Value & 0x7C00); - if (Exponent == 0x7C00) // INF/NAN - { - Exponent = 0x8f; - } else if (Exponent != 0) // The value is normalized - { - Exponent = - static_cast((static_cast(Value) >> 10) & 0x1F); - } else if (Mantissa != 0) // The value is denormalized - { - // Normalize the value in the resulting float - Exponent = 1; - - do { - Exponent--; - Mantissa <<= 1; - } while ((Mantissa & 0x0400) == 0); - - Mantissa &= 0x03FF; - } else // The value is zero - { - Exponent = static_cast(-112); - } - - uint32_t Result = ((static_cast(Value) & 0x8000) << 16) // Sign - | ((Exponent + 112) << 23) // Exponent - | (Mantissa << 13); // Mantissa - - return reinterpret_cast(&Result)[0]; -#endif // !_XM_F16C_INTRINSICS_ -} - -//------------------------------------------------------------------------------ -#ifdef _PREFAST_ -#pragma prefast(push) -#pragma prefast(disable : 26015 26019, "PREfast noise: Esp:1307") -#endif - -_Use_decl_annotations_ inline float* XMConvertHalfToFloatStream( - float* pOutputStream, size_t OutputStride, const HALF* pInputStream, - size_t InputStride, size_t HalfCount) noexcept { - assert(pOutputStream); - assert(pInputStream); - - assert(InputStride >= sizeof(HALF)); - _Analysis_assume_(InputStride >= sizeof(HALF)); - - assert(OutputStride >= sizeof(float)); - _Analysis_assume_(OutputStride >= sizeof(float)); - -#if defined(_XM_F16C_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) - auto pHalf = reinterpret_cast(pInputStream); - auto pFloat = reinterpret_cast(pOutputStream); - - size_t i = 0; - size_t four = HalfCount >> 2; - if (four > 0) { - if (InputStride == sizeof(HALF)) { - if (OutputStride == sizeof(float)) { - if ((reinterpret_cast(pFloat) & 0xF) == 0) { - // Packed input, aligned & packed output - for (size_t j = 0; j < four; ++j) { - __m128i HV = _mm_loadl_epi64( - reinterpret_cast(pHalf)); - pHalf += InputStride * 4; - - __m128 FV = _mm_cvtph_ps(HV); - - XM_STREAM_PS(reinterpret_cast(pFloat), FV); - pFloat += OutputStride * 4; - i += 4; - } - } else { - // Packed input, packed output - for (size_t j = 0; j < four; ++j) { - __m128i HV = _mm_loadl_epi64( - reinterpret_cast(pHalf)); - pHalf += InputStride * 4; - - __m128 FV = _mm_cvtph_ps(HV); - - _mm_storeu_ps(reinterpret_cast(pFloat), FV); - pFloat += OutputStride * 4; - i += 4; - } - } - } else { - // Packed input, scattered output - for (size_t j = 0; j < four; ++j) { - __m128i HV = _mm_loadl_epi64( - reinterpret_cast(pHalf)); - pHalf += InputStride * 4; - - __m128 FV = _mm_cvtph_ps(HV); - - _mm_store_ss(reinterpret_cast(pFloat), FV); - pFloat += OutputStride; - *reinterpret_cast(pFloat) = _mm_extract_ps(FV, 1); - pFloat += OutputStride; - *reinterpret_cast(pFloat) = _mm_extract_ps(FV, 2); - pFloat += OutputStride; - *reinterpret_cast(pFloat) = _mm_extract_ps(FV, 3); - pFloat += OutputStride; - i += 4; - } - } - } else if (OutputStride == sizeof(float)) { - if ((reinterpret_cast(pFloat) & 0xF) == 0) { - // Scattered input, aligned & packed output - for (size_t j = 0; j < four; ++j) { - uint16_t H1 = *reinterpret_cast(pHalf); - pHalf += InputStride; - uint16_t H2 = *reinterpret_cast(pHalf); - pHalf += InputStride; - uint16_t H3 = *reinterpret_cast(pHalf); - pHalf += InputStride; - uint16_t H4 = *reinterpret_cast(pHalf); - pHalf += InputStride; - - __m128i HV = _mm_setzero_si128(); - HV = _mm_insert_epi16(HV, H1, 0); - HV = _mm_insert_epi16(HV, H2, 1); - HV = _mm_insert_epi16(HV, H3, 2); - HV = _mm_insert_epi16(HV, H4, 3); - __m128 FV = _mm_cvtph_ps(HV); - - XM_STREAM_PS(reinterpret_cast(pFloat), FV); - pFloat += OutputStride * 4; - i += 4; - } - } else { - // Scattered input, packed output - for (size_t j = 0; j < four; ++j) { - uint16_t H1 = *reinterpret_cast(pHalf); - pHalf += InputStride; - uint16_t H2 = *reinterpret_cast(pHalf); - pHalf += InputStride; - uint16_t H3 = *reinterpret_cast(pHalf); - pHalf += InputStride; - uint16_t H4 = *reinterpret_cast(pHalf); - pHalf += InputStride; - - __m128i HV = _mm_setzero_si128(); - HV = _mm_insert_epi16(HV, H1, 0); - HV = _mm_insert_epi16(HV, H2, 1); - HV = _mm_insert_epi16(HV, H3, 2); - HV = _mm_insert_epi16(HV, H4, 3); - __m128 FV = _mm_cvtph_ps(HV); - - _mm_storeu_ps(reinterpret_cast(pFloat), FV); - pFloat += OutputStride * 4; - i += 4; - } - } - } else { - // Scattered input, scattered output - for (size_t j = 0; j < four; ++j) { - uint16_t H1 = *reinterpret_cast(pHalf); - pHalf += InputStride; - uint16_t H2 = *reinterpret_cast(pHalf); - pHalf += InputStride; - uint16_t H3 = *reinterpret_cast(pHalf); - pHalf += InputStride; - uint16_t H4 = *reinterpret_cast(pHalf); - pHalf += InputStride; - - __m128i HV = _mm_setzero_si128(); - HV = _mm_insert_epi16(HV, H1, 0); - HV = _mm_insert_epi16(HV, H2, 1); - HV = _mm_insert_epi16(HV, H3, 2); - HV = _mm_insert_epi16(HV, H4, 3); - __m128 FV = _mm_cvtph_ps(HV); - - _mm_store_ss(reinterpret_cast(pFloat), FV); - pFloat += OutputStride; - *reinterpret_cast(pFloat) = _mm_extract_ps(FV, 1); - pFloat += OutputStride; - *reinterpret_cast(pFloat) = _mm_extract_ps(FV, 2); - pFloat += OutputStride; - *reinterpret_cast(pFloat) = _mm_extract_ps(FV, 3); - pFloat += OutputStride; - i += 4; - } - } - } - - for (; i < HalfCount; ++i) { - *reinterpret_cast(pFloat) = - XMConvertHalfToFloat(reinterpret_cast(pHalf)[0]); - pHalf += InputStride; - pFloat += OutputStride; - } - - XM_SFENCE(); - - return pOutputStream; -#elif defined(_XM_ARM_NEON_INTRINSICS_) && \ - (defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || \ - defined(_M_ARM64EC) || __aarch64__) && \ - !defined(_XM_NO_INTRINSICS_) && (!defined(__GNUC__) || (__ARM_FP & 2)) - auto pHalf = reinterpret_cast(pInputStream); - auto pFloat = reinterpret_cast(pOutputStream); - - size_t i = 0; - size_t four = HalfCount >> 2; - if (four > 0) { - if (InputStride == sizeof(HALF)) { - if (OutputStride == sizeof(float)) { - // Packed input, packed output - for (size_t j = 0; j < four; ++j) { - uint16x4_t vHalf = - vld1_u16(reinterpret_cast(pHalf)); - pHalf += InputStride * 4; - - float32x4_t vFloat = - vcvt_f32_f16(vreinterpret_f16_u16(vHalf)); - - vst1q_f32(reinterpret_cast(pFloat), vFloat); - pFloat += OutputStride * 4; - i += 4; - } - } else { - // Packed input, scattered output - for (size_t j = 0; j < four; ++j) { - uint16x4_t vHalf = - vld1_u16(reinterpret_cast(pHalf)); - pHalf += InputStride * 4; - - float32x4_t vFloat = - vcvt_f32_f16(vreinterpret_f16_u16(vHalf)); - - vst1q_lane_f32(reinterpret_cast(pFloat), vFloat, 0); - pFloat += OutputStride; - vst1q_lane_f32(reinterpret_cast(pFloat), vFloat, 1); - pFloat += OutputStride; - vst1q_lane_f32(reinterpret_cast(pFloat), vFloat, 2); - pFloat += OutputStride; - vst1q_lane_f32(reinterpret_cast(pFloat), vFloat, 3); - pFloat += OutputStride; - i += 4; - } - } - } else if (OutputStride == sizeof(float)) { - // Scattered input, packed output - for (size_t j = 0; j < four; ++j) { - uint16_t H1 = *reinterpret_cast(pHalf); - pHalf += InputStride; - uint16_t H2 = *reinterpret_cast(pHalf); - pHalf += InputStride; - uint16_t H3 = *reinterpret_cast(pHalf); - pHalf += InputStride; - uint16_t H4 = *reinterpret_cast(pHalf); - pHalf += InputStride; - - uint64_t iHalf = uint64_t(H1) | (uint64_t(H2) << 16) | - (uint64_t(H3) << 32) | (uint64_t(H4) << 48); - uint16x4_t vHalf = vcreate_u16(iHalf); - - float32x4_t vFloat = vcvt_f32_f16(vreinterpret_f16_u16(vHalf)); - - vst1q_f32(reinterpret_cast(pFloat), vFloat); - pFloat += OutputStride * 4; - i += 4; - } - } else { - // Scattered input, scattered output - for (size_t j = 0; j < four; ++j) { - uint16_t H1 = *reinterpret_cast(pHalf); - pHalf += InputStride; - uint16_t H2 = *reinterpret_cast(pHalf); - pHalf += InputStride; - uint16_t H3 = *reinterpret_cast(pHalf); - pHalf += InputStride; - uint16_t H4 = *reinterpret_cast(pHalf); - pHalf += InputStride; - - uint64_t iHalf = uint64_t(H1) | (uint64_t(H2) << 16) | - (uint64_t(H3) << 32) | (uint64_t(H4) << 48); - uint16x4_t vHalf = vcreate_u16(iHalf); - - float32x4_t vFloat = vcvt_f32_f16(vreinterpret_f16_u16(vHalf)); - - vst1q_lane_f32(reinterpret_cast(pFloat), vFloat, 0); - pFloat += OutputStride; - vst1q_lane_f32(reinterpret_cast(pFloat), vFloat, 1); - pFloat += OutputStride; - vst1q_lane_f32(reinterpret_cast(pFloat), vFloat, 2); - pFloat += OutputStride; - vst1q_lane_f32(reinterpret_cast(pFloat), vFloat, 3); - pFloat += OutputStride; - i += 4; - } - } - } - - for (; i < HalfCount; ++i) { - *reinterpret_cast(pFloat) = - XMConvertHalfToFloat(reinterpret_cast(pHalf)[0]); - pHalf += InputStride; - pFloat += OutputStride; - } - - return pOutputStream; -#else - auto pHalf = reinterpret_cast(pInputStream); - auto pFloat = reinterpret_cast(pOutputStream); - - for (size_t i = 0; i < HalfCount; i++) { - *reinterpret_cast(pFloat) = - XMConvertHalfToFloat(reinterpret_cast(pHalf)[0]); - pHalf += InputStride; - pFloat += OutputStride; - } - - return pOutputStream; -#endif // !_XM_F16C_INTRINSICS_ -} - -//------------------------------------------------------------------------------ - -inline HALF XMConvertFloatToHalf(float Value) noexcept { -#if defined(_XM_F16C_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) - __m128 V1 = _mm_set_ss(Value); - __m128i V2 = _mm_cvtps_ph(V1, _MM_FROUND_TO_NEAREST_INT); - return static_cast(_mm_extract_epi16(V2, 0)); -#elif defined(_XM_ARM_NEON_INTRINSICS_) && \ - (defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || \ - defined(_M_ARM64EC) || __aarch64__) && \ - !defined(_XM_NO_INTRINSICS_) && (!defined(__GNUC__) || (__ARM_FP & 2)) - float32x4_t vFloat = vdupq_n_f32(Value); - float16x4_t vHalf = vcvt_f16_f32(vFloat); - return vget_lane_u16(vreinterpret_u16_f16(vHalf), 0); -#else - uint32_t Result; - - auto IValue = reinterpret_cast(&Value)[0]; - uint32_t Sign = (IValue & 0x80000000U) >> 16U; - IValue = IValue & 0x7FFFFFFFU; // Hack off the sign - if (IValue >= 0x47800000 /*e+16*/) { - // The number is too large to be represented as a half. Return infinity - // or NaN - Result = - 0x7C00U | - ((IValue > 0x7F800000) ? (0x200 | ((IValue >> 13U) & 0x3FFU)) : 0U); - } else if (IValue <= 0x33000000U /*e-25*/) { - Result = 0; - } else if (IValue < 0x38800000U /*e-14*/) { - // The number is too small to be represented as a normalized half. - // Convert it to a denormalized value. - uint32_t Shift = 125U - (IValue >> 23U); - IValue = 0x800000U | (IValue & 0x7FFFFFU); - Result = IValue >> (Shift + 1); - uint32_t s = (IValue & ((1U << Shift) - 1)) != 0; - Result += (Result | s) & ((IValue >> Shift) & 1U); - } else { - // Rebias the exponent to represent the value as a normalized half. - IValue += 0xC8000000U; - Result = ((IValue + 0x0FFFU + ((IValue >> 13U) & 1U)) >> 13U) & 0x7FFFU; - } - return static_cast(Result | Sign); -#endif // !_XM_F16C_INTRINSICS_ -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline HALF* XMConvertFloatToHalfStream( - HALF* pOutputStream, size_t OutputStride, const float* pInputStream, - size_t InputStride, size_t FloatCount) noexcept { - assert(pOutputStream); - assert(pInputStream); - - assert(InputStride >= sizeof(float)); - _Analysis_assume_(InputStride >= sizeof(float)); - - assert(OutputStride >= sizeof(HALF)); - _Analysis_assume_(OutputStride >= sizeof(HALF)); - -#if defined(_XM_F16C_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) - auto pFloat = reinterpret_cast(pInputStream); - auto pHalf = reinterpret_cast(pOutputStream); - - size_t i = 0; - size_t four = FloatCount >> 2; - if (four > 0) { - if (InputStride == sizeof(float)) { - if (OutputStride == sizeof(HALF)) { - if ((reinterpret_cast(pFloat) & 0xF) == 0) { - // Aligned and packed input, packed output - for (size_t j = 0; j < four; ++j) { - __m128 FV = - _mm_load_ps(reinterpret_cast(pFloat)); - pFloat += InputStride * 4; - - __m128i HV = - _mm_cvtps_ph(FV, _MM_FROUND_TO_NEAREST_INT); - - _mm_storel_epi64(reinterpret_cast<__m128i*>(pHalf), HV); - pHalf += OutputStride * 4; - i += 4; - } - } else { - // Packed input, packed output - for (size_t j = 0; j < four; ++j) { - __m128 FV = _mm_loadu_ps( - reinterpret_cast(pFloat)); - pFloat += InputStride * 4; - - __m128i HV = - _mm_cvtps_ph(FV, _MM_FROUND_TO_NEAREST_INT); - - _mm_storel_epi64(reinterpret_cast<__m128i*>(pHalf), HV); - pHalf += OutputStride * 4; - i += 4; - } - } - } else { - if ((reinterpret_cast(pFloat) & 0xF) == 0) { - // Aligned & packed input, scattered output - for (size_t j = 0; j < four; ++j) { - __m128 FV = - _mm_load_ps(reinterpret_cast(pFloat)); - pFloat += InputStride * 4; - - __m128i HV = - _mm_cvtps_ph(FV, _MM_FROUND_TO_NEAREST_INT); - - *reinterpret_cast(pHalf) = - static_cast(_mm_extract_epi16(HV, 0)); - pHalf += OutputStride; - *reinterpret_cast(pHalf) = - static_cast(_mm_extract_epi16(HV, 1)); - pHalf += OutputStride; - *reinterpret_cast(pHalf) = - static_cast(_mm_extract_epi16(HV, 2)); - pHalf += OutputStride; - *reinterpret_cast(pHalf) = - static_cast(_mm_extract_epi16(HV, 3)); - pHalf += OutputStride; - i += 4; - } - } else { - // Packed input, scattered output - for (size_t j = 0; j < four; ++j) { - __m128 FV = _mm_loadu_ps( - reinterpret_cast(pFloat)); - pFloat += InputStride * 4; - - __m128i HV = - _mm_cvtps_ph(FV, _MM_FROUND_TO_NEAREST_INT); - - *reinterpret_cast(pHalf) = - static_cast(_mm_extract_epi16(HV, 0)); - pHalf += OutputStride; - *reinterpret_cast(pHalf) = - static_cast(_mm_extract_epi16(HV, 1)); - pHalf += OutputStride; - *reinterpret_cast(pHalf) = - static_cast(_mm_extract_epi16(HV, 2)); - pHalf += OutputStride; - *reinterpret_cast(pHalf) = - static_cast(_mm_extract_epi16(HV, 3)); - pHalf += OutputStride; - i += 4; - } - } - } - } else if (OutputStride == sizeof(HALF)) { - // Scattered input, packed output - for (size_t j = 0; j < four; ++j) { - __m128 FV1 = - _mm_load_ss(reinterpret_cast(pFloat)); - pFloat += InputStride; - - __m128 FV2 = - _mm_broadcast_ss(reinterpret_cast(pFloat)); - pFloat += InputStride; - - __m128 FV3 = - _mm_broadcast_ss(reinterpret_cast(pFloat)); - pFloat += InputStride; - - __m128 FV4 = - _mm_broadcast_ss(reinterpret_cast(pFloat)); - pFloat += InputStride; - - __m128 FV = _mm_blend_ps(FV1, FV2, 0x2); - __m128 FT = _mm_blend_ps(FV3, FV4, 0x8); - FV = _mm_blend_ps(FV, FT, 0xC); - - __m128i HV = _mm_cvtps_ph(FV, _MM_FROUND_TO_NEAREST_INT); - - _mm_storel_epi64(reinterpret_cast<__m128i*>(pHalf), HV); - pHalf += OutputStride * 4; - i += 4; - } - } else { - // Scattered input, scattered output - for (size_t j = 0; j < four; ++j) { - __m128 FV1 = - _mm_load_ss(reinterpret_cast(pFloat)); - pFloat += InputStride; - - __m128 FV2 = - _mm_broadcast_ss(reinterpret_cast(pFloat)); - pFloat += InputStride; - - __m128 FV3 = - _mm_broadcast_ss(reinterpret_cast(pFloat)); - pFloat += InputStride; - - __m128 FV4 = - _mm_broadcast_ss(reinterpret_cast(pFloat)); - pFloat += InputStride; - - __m128 FV = _mm_blend_ps(FV1, FV2, 0x2); - __m128 FT = _mm_blend_ps(FV3, FV4, 0x8); - FV = _mm_blend_ps(FV, FT, 0xC); - - __m128i HV = _mm_cvtps_ph(FV, _MM_FROUND_TO_NEAREST_INT); - - *reinterpret_cast(pHalf) = - static_cast(_mm_extract_epi16(HV, 0)); - pHalf += OutputStride; - *reinterpret_cast(pHalf) = - static_cast(_mm_extract_epi16(HV, 1)); - pHalf += OutputStride; - *reinterpret_cast(pHalf) = - static_cast(_mm_extract_epi16(HV, 2)); - pHalf += OutputStride; - *reinterpret_cast(pHalf) = - static_cast(_mm_extract_epi16(HV, 3)); - pHalf += OutputStride; - i += 4; - } - } - } - - for (; i < FloatCount; ++i) { - *reinterpret_cast(pHalf) = - XMConvertFloatToHalf(reinterpret_cast(pFloat)[0]); - pFloat += InputStride; - pHalf += OutputStride; - } - - return pOutputStream; -#elif defined(_XM_ARM_NEON_INTRINSICS_) && \ - (defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || \ - defined(_M_ARM64EC) || __aarch64__) && \ - !defined(_XM_NO_INTRINSICS_) && (!defined(__GNUC__) || (__ARM_FP & 2)) - auto pFloat = reinterpret_cast(pInputStream); - auto pHalf = reinterpret_cast(pOutputStream); - - size_t i = 0; - size_t four = FloatCount >> 2; - if (four > 0) { - if (InputStride == sizeof(float)) { - if (OutputStride == sizeof(HALF)) { - // Packed input, packed output - for (size_t j = 0; j < four; ++j) { - float32x4_t vFloat = - vld1q_f32(reinterpret_cast(pFloat)); - pFloat += InputStride * 4; - - uint16x4_t vHalf = - vreinterpret_u16_f16(vcvt_f16_f32(vFloat)); - - vst1_u16(reinterpret_cast(pHalf), vHalf); - pHalf += OutputStride * 4; - i += 4; - } - } else { - // Packed input, scattered output - for (size_t j = 0; j < four; ++j) { - float32x4_t vFloat = - vld1q_f32(reinterpret_cast(pFloat)); - pFloat += InputStride * 4; - - uint16x4_t vHalf = - vreinterpret_u16_f16(vcvt_f16_f32(vFloat)); - - vst1_lane_u16(reinterpret_cast(pHalf), vHalf, 0); - pHalf += OutputStride; - vst1_lane_u16(reinterpret_cast(pHalf), vHalf, 1); - pHalf += OutputStride; - vst1_lane_u16(reinterpret_cast(pHalf), vHalf, 2); - pHalf += OutputStride; - vst1_lane_u16(reinterpret_cast(pHalf), vHalf, 3); - pHalf += OutputStride; - i += 4; - } - } - } else if (OutputStride == sizeof(HALF)) { - // Scattered input, packed output - for (size_t j = 0; j < four; ++j) { - float32x4_t vFloat = vdupq_n_f32(0); - vFloat = vld1q_lane_f32(reinterpret_cast(pFloat), - vFloat, 0); - pFloat += InputStride; - - vFloat = vld1q_lane_f32(reinterpret_cast(pFloat), - vFloat, 1); - pFloat += InputStride; - - vFloat = vld1q_lane_f32(reinterpret_cast(pFloat), - vFloat, 2); - pFloat += InputStride; - - vFloat = vld1q_lane_f32(reinterpret_cast(pFloat), - vFloat, 3); - pFloat += InputStride; - - uint16x4_t vHalf = vreinterpret_u16_f16(vcvt_f16_f32(vFloat)); - - vst1_u16(reinterpret_cast(pHalf), vHalf); - pHalf += OutputStride * 4; - i += 4; - } - } else { - // Scattered input, scattered output - for (size_t j = 0; j < four; ++j) { - float32x4_t vFloat = vdupq_n_f32(0); - vFloat = vld1q_lane_f32(reinterpret_cast(pFloat), - vFloat, 0); - pFloat += InputStride; - - vFloat = vld1q_lane_f32(reinterpret_cast(pFloat), - vFloat, 1); - pFloat += InputStride; - - vFloat = vld1q_lane_f32(reinterpret_cast(pFloat), - vFloat, 2); - pFloat += InputStride; - - vFloat = vld1q_lane_f32(reinterpret_cast(pFloat), - vFloat, 3); - pFloat += InputStride; - - uint16x4_t vHalf = vreinterpret_u16_f16(vcvt_f16_f32(vFloat)); - - vst1_lane_u16(reinterpret_cast(pHalf), vHalf, 0); - pHalf += OutputStride; - vst1_lane_u16(reinterpret_cast(pHalf), vHalf, 1); - pHalf += OutputStride; - vst1_lane_u16(reinterpret_cast(pHalf), vHalf, 2); - pHalf += OutputStride; - vst1_lane_u16(reinterpret_cast(pHalf), vHalf, 3); - pHalf += OutputStride; - i += 4; - } - } - } - - for (; i < FloatCount; ++i) { - *reinterpret_cast(pHalf) = - XMConvertFloatToHalf(reinterpret_cast(pFloat)[0]); - pFloat += InputStride; - pHalf += OutputStride; - } - - return pOutputStream; -#else - auto pFloat = reinterpret_cast(pInputStream); - auto pHalf = reinterpret_cast(pOutputStream); - - for (size_t i = 0; i < FloatCount; i++) { - *reinterpret_cast(pHalf) = - XMConvertFloatToHalf(reinterpret_cast(pFloat)[0]); - pFloat += InputStride; - pHalf += OutputStride; - } - return pOutputStream; -#endif // !_XM_F16C_INTRINSICS_ -} - -#ifdef _PREFAST_ -#pragma prefast(pop) -#endif - -/**************************************************************************** - * - * Vector and matrix load operations - * - ****************************************************************************/ - -#ifdef _PREFAST_ -#pragma prefast(push) -#pragma prefast(disable : 28931, "PREfast noise: Esp:1266") -#endif - -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadColor(const XMCOLOR* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - // int32_t -> Float conversions are done in one instruction. - // uint32_t -> Float calls a runtime function. Keep in int32_t - auto iColor = static_cast(pSource->c); - XMVECTORF32 vColor = { - {{static_cast((iColor >> 16) & 0xFF) * (1.0f / 255.0f), - static_cast((iColor >> 8) & 0xFF) * (1.0f / 255.0f), - static_cast(iColor & 0xFF) * (1.0f / 255.0f), - static_cast((iColor >> 24) & 0xFF) * (1.0f / 255.0f)}}}; - return vColor.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32_t bgra = pSource->c; - uint32_t rgba = - (bgra & 0xFF00FF00) | ((bgra >> 16) & 0xFF) | ((bgra << 16) & 0xFF0000); - uint32x2_t vInt8 = vdup_n_u32(rgba); - uint16x8_t vInt16 = vmovl_u8(vreinterpret_u8_u32(vInt8)); - uint32x4_t vInt = vmovl_u16(vget_low_u16(vInt16)); - float32x4_t R = vcvtq_f32_u32(vInt); - return vmulq_n_f32(R, 1.0f / 255.0f); -#elif defined(_XM_SSE_INTRINSICS_) - // Splat the color in all four entries - __m128i vInt = _mm_set1_epi32(static_cast(pSource->c)); - // Shift R&0xFF0000, G&0xFF00, B&0xFF, A&0xFF000000 - vInt = _mm_and_si128(vInt, g_XMMaskA8R8G8B8); - // a is unsigned! Flip the bit to convert the order to signed - vInt = _mm_xor_si128(vInt, g_XMFlipA8R8G8B8); - // Convert to floating point numbers - XMVECTOR vTemp = _mm_cvtepi32_ps(vInt); - // RGB + 0, A + 0x80000000.f to undo the signed order. - vTemp = _mm_add_ps(vTemp, g_XMFixAA8R8G8B8); - // Convert 0-255 to 0.0f-1.0f - return _mm_mul_ps(vTemp, g_XMNormalizeA8R8G8B8); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadHalf2(const XMHALF2* pSource) noexcept { - assert(pSource); -#if defined(_XM_F16C_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) - __m128 V = _mm_load_ss(reinterpret_cast(pSource)); - return _mm_cvtph_ps(_mm_castps_si128(V)); -#else - XMVECTORF32 vResult = {{{XMConvertHalfToFloat(pSource->x), - XMConvertHalfToFloat(pSource->y), 0.0f, 0.0f}}}; - return vResult.v; -#endif // !_XM_F16C_INTRINSICS_ -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadShortN2(const XMSHORTN2* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 vResult = { - {{(pSource->x == -32768) - ? -1.f - : (static_cast(pSource->x) * (1.0f / 32767.0f)), - (pSource->y == -32768) - ? -1.f - : (static_cast(pSource->y) * (1.0f / 32767.0f)), - 0.0f, 0.0f}}}; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x2_t vInt16 = - vld1_dup_u32(reinterpret_cast(pSource)); - int32x4_t vInt = vmovl_s16(vreinterpret_s16_u32(vInt16)); - vInt = vandq_s32(vInt, g_XMMaskXY); - float32x4_t R = vcvtq_f32_s32(vInt); - R = vmulq_n_f32(R, 1.0f / 32767.0f); - return vmaxq_f32(R, vdupq_n_f32(-1.f)); -#elif defined(_XM_SSE_INTRINSICS_) - // Splat the two shorts in all four entries (WORD alignment okay, - // DWORD alignment preferred) - __m128 vTemp = _mm_load_ps1(reinterpret_cast(&pSource->x)); - // Mask x&0xFFFF, y&0xFFFF0000,z&0,w&0 - vTemp = _mm_and_ps(vTemp, g_XMMaskX16Y16); - // x needs to be sign extended - vTemp = _mm_xor_ps(vTemp, g_XMFlipX16Y16); - // Convert to floating point numbers - vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); - // x - 0x8000 to undo the signed order. - vTemp = _mm_add_ps(vTemp, g_XMFixX16Y16); - // Convert -1.0f - 1.0f - vTemp = _mm_mul_ps(vTemp, g_XMNormalizeX16Y16); - // Clamp result (for case of -32768) - return _mm_max_ps(vTemp, g_XMNegativeOne); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadShort2(const XMSHORT2* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 vResult = {{{static_cast(pSource->x), - static_cast(pSource->y), 0.f, 0.f}}}; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x2_t vInt16 = - vld1_dup_u32(reinterpret_cast(pSource)); - int32x4_t vInt = vmovl_s16(vreinterpret_s16_u32(vInt16)); - vInt = vandq_s32(vInt, g_XMMaskXY); - return vcvtq_f32_s32(vInt); -#elif defined(_XM_SSE_INTRINSICS_) - // Splat the two shorts in all four entries (WORD alignment okay, - // DWORD alignment preferred) - __m128 vTemp = _mm_load_ps1(reinterpret_cast(&pSource->x)); - // Mask x&0xFFFF, y&0xFFFF0000,z&0,w&0 - vTemp = _mm_and_ps(vTemp, g_XMMaskX16Y16); - // x needs to be sign extended - vTemp = _mm_xor_ps(vTemp, g_XMFlipX16Y16); - // Convert to floating point numbers - vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); - // x - 0x8000 to undo the signed order. - vTemp = _mm_add_ps(vTemp, g_XMFixX16Y16); - // Y is 65536 too large - return _mm_mul_ps(vTemp, g_XMFixupY16); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadUShortN2(const XMUSHORTN2* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 vResult = { - {{static_cast(pSource->x) / 65535.0f, - static_cast(pSource->y) / 65535.0f, 0.f, 0.f}}}; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x2_t vInt16 = - vld1_dup_u32(reinterpret_cast(pSource)); - uint32x4_t vInt = vmovl_u16(vreinterpret_u16_u32(vInt16)); - vInt = vandq_u32(vInt, g_XMMaskXY); - float32x4_t R = vcvtq_f32_u32(vInt); - R = vmulq_n_f32(R, 1.0f / 65535.0f); - return vmaxq_f32(R, vdupq_n_f32(-1.f)); -#elif defined(_XM_SSE_INTRINSICS_) - static const XMVECTORF32 FixupY16 = { - {{1.0f / 65535.0f, 1.0f / (65535.0f * 65536.0f), 0.0f, 0.0f}}}; - static const XMVECTORF32 FixaddY16 = {{{0, 32768.0f * 65536.0f, 0, 0}}}; - // Splat the two shorts in all four entries (WORD alignment okay, - // DWORD alignment preferred) - __m128 vTemp = _mm_load_ps1(reinterpret_cast(&pSource->x)); - // Mask x&0xFFFF, y&0xFFFF0000,z&0,w&0 - vTemp = _mm_and_ps(vTemp, g_XMMaskX16Y16); - // y needs to be sign flipped - vTemp = _mm_xor_ps(vTemp, g_XMFlipY); - // Convert to floating point numbers - vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); - // y + 0x8000 to undo the signed order. - vTemp = _mm_add_ps(vTemp, FixaddY16); - // Y is 65536 times too large - vTemp = _mm_mul_ps(vTemp, FixupY16); - return vTemp; -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadUShort2(const XMUSHORT2* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 vResult = {{{static_cast(pSource->x), - static_cast(pSource->y), 0.f, 0.f}}}; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x2_t vInt16 = - vld1_dup_u32(reinterpret_cast(pSource)); - uint32x4_t vInt = vmovl_u16(vreinterpret_u16_u32(vInt16)); - vInt = vandq_u32(vInt, g_XMMaskXY); - return vcvtq_f32_u32(vInt); -#elif defined(_XM_SSE_INTRINSICS_) - static const XMVECTORF32 FixaddY16 = {{{0, 32768.0f, 0, 0}}}; - // Splat the two shorts in all four entries (WORD alignment okay, - // DWORD alignment preferred) - __m128 vTemp = _mm_load_ps1(reinterpret_cast(&pSource->x)); - // Mask x&0xFFFF, y&0xFFFF0000,z&0,w&0 - vTemp = _mm_and_ps(vTemp, g_XMMaskX16Y16); - // y needs to be sign flipped - vTemp = _mm_xor_ps(vTemp, g_XMFlipY); - // Convert to floating point numbers - vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); - // Y is 65536 times too large - vTemp = _mm_mul_ps(vTemp, g_XMFixupY16); - // y + 0x8000 to undo the signed order. - vTemp = _mm_add_ps(vTemp, FixaddY16); - return vTemp; -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadByteN2(const XMBYTEN2* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 vResult = { - {{(pSource->x == -128) - ? -1.f - : (static_cast(pSource->x) * (1.0f / 127.0f)), - (pSource->y == -128) - ? -1.f - : (static_cast(pSource->y) * (1.0f / 127.0f)), - 0.0f, 0.0f}}}; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint16x4_t vInt8 = vld1_dup_u16(reinterpret_cast(pSource)); - int16x8_t vInt16 = vmovl_s8(vreinterpret_s8_u16(vInt8)); - int32x4_t vInt = vmovl_s16(vget_low_s16(vInt16)); - vInt = vandq_s32(vInt, g_XMMaskXY); - float32x4_t R = vcvtq_f32_s32(vInt); - R = vmulq_n_f32(R, 1.0f / 127.0f); - return vmaxq_f32(R, vdupq_n_f32(-1.f)); -#elif defined(_XM_SSE_INTRINSICS_) - static const XMVECTORF32 Scale = { - {{1.0f / 127.0f, 1.0f / (127.0f * 256.0f), 0, 0}}}; - static const XMVECTORU32 Mask = {{{0xFF, 0xFF00, 0, 0}}}; - // Splat the color in all four entries (x,z,y,w) - __m128i vInt = XM_LOADU_SI16(&pSource->v); - XMVECTOR vTemp = - XM_PERMUTE_PS(_mm_castsi128_ps(vInt), _MM_SHUFFLE(0, 0, 0, 0)); - // Mask - vTemp = _mm_and_ps(vTemp, Mask); - // x,y and z are unsigned! Flip the bits to convert the order to signed - vTemp = _mm_xor_ps(vTemp, g_XMXorByte4); - // Convert to floating point numbers - vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); - // x, y and z - 0x80 to complete the conversion - vTemp = _mm_add_ps(vTemp, g_XMAddByte4); - // Fix y, z and w because they are too large - vTemp = _mm_mul_ps(vTemp, Scale); - // Clamp result (for case of -128) - return _mm_max_ps(vTemp, g_XMNegativeOne); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadByte2(const XMBYTE2* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 vResult = {{{static_cast(pSource->x), - static_cast(pSource->y), 0.0f, 0.0f}}}; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint16x4_t vInt8 = vld1_dup_u16(reinterpret_cast(pSource)); - int16x8_t vInt16 = vmovl_s8(vreinterpret_s8_u16(vInt8)); - int32x4_t vInt = vmovl_s16(vget_low_s16(vInt16)); - vInt = vandq_s32(vInt, g_XMMaskXY); - return vcvtq_f32_s32(vInt); -#elif defined(_XM_SSE_INTRINSICS_) - static const XMVECTORF32 Scale = { - {{1.0f, 1.0f / 256.0f, 1.0f / 65536.0f, 1.0f / (65536.0f * 256.0f)}}}; - static const XMVECTORU32 Mask = {{{0xFF, 0xFF00, 0, 0}}}; - // Splat the color in all four entries (x,z,y,w) - __m128i vInt = XM_LOADU_SI16(&pSource->v); - XMVECTOR vTemp = - XM_PERMUTE_PS(_mm_castsi128_ps(vInt), _MM_SHUFFLE(0, 0, 0, 0)); - // Mask - vTemp = _mm_and_ps(vTemp, Mask); - // x,y and z are unsigned! Flip the bits to convert the order to signed - vTemp = _mm_xor_ps(vTemp, g_XMXorByte4); - // Convert to floating point numbers - vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); - // x, y and z - 0x80 to complete the conversion - vTemp = _mm_add_ps(vTemp, g_XMAddByte4); - // Fix y, z and w because they are too large - return _mm_mul_ps(vTemp, Scale); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadUByteN2(const XMUBYTEN2* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 vResult = { - {{static_cast(pSource->x) * (1.0f / 255.0f), - static_cast(pSource->y) * (1.0f / 255.0f), 0.0f, 0.0f}}}; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint16x4_t vInt8 = vld1_dup_u16(reinterpret_cast(pSource)); - uint16x8_t vInt16 = vmovl_u8(vreinterpret_u8_u16(vInt8)); - uint32x4_t vInt = vmovl_u16(vget_low_u16(vInt16)); - vInt = vandq_u32(vInt, g_XMMaskXY); - float32x4_t R = vcvtq_f32_u32(vInt); - return vmulq_n_f32(R, 1.0f / 255.0f); -#elif defined(_XM_SSE_INTRINSICS_) - static const XMVECTORF32 Scale = { - {{1.0f / 255.0f, 1.0f / (255.0f * 256.0f), 0, 0}}}; - static const XMVECTORU32 Mask = {{{0xFF, 0xFF00, 0, 0}}}; - // Splat the color in all four entries (x,z,y,w) - __m128i vInt = XM_LOADU_SI16(&pSource->v); - XMVECTOR vTemp = - XM_PERMUTE_PS(_mm_castsi128_ps(vInt), _MM_SHUFFLE(0, 0, 0, 0)); - // Mask - vTemp = _mm_and_ps(vTemp, Mask); - // w is signed! Flip the bits to convert the order to unsigned - vTemp = _mm_xor_ps(vTemp, g_XMFlipW); - // Convert to floating point numbers - vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); - // w + 0x80 to complete the conversion - vTemp = _mm_add_ps(vTemp, g_XMAddUDec4); - // Fix y, z and w because they are too large - return _mm_mul_ps(vTemp, Scale); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadUByte2(const XMUBYTE2* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 vResult = {{{static_cast(pSource->x), - static_cast(pSource->y), 0.0f, 0.0f}}}; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint16x4_t vInt8 = vld1_dup_u16(reinterpret_cast(pSource)); - uint16x8_t vInt16 = vmovl_u8(vreinterpret_u8_u16(vInt8)); - uint32x4_t vInt = vmovl_u16(vget_low_u16(vInt16)); - vInt = vandq_u32(vInt, g_XMMaskXY); - return vcvtq_f32_u32(vInt); -#elif defined(_XM_SSE_INTRINSICS_) - static const XMVECTORF32 Scale = {{{1.0f, 1.0f / 256.0f, 0, 0}}}; - static const XMVECTORU32 Mask = {{{0xFF, 0xFF00, 0, 0}}}; - // Splat the color in all four entries (x,z,y,w) - __m128i vInt = XM_LOADU_SI16(&pSource->v); - XMVECTOR vTemp = - XM_PERMUTE_PS(_mm_castsi128_ps(vInt), _MM_SHUFFLE(0, 0, 0, 0)); - // Mask - vTemp = _mm_and_ps(vTemp, Mask); - // w is signed! Flip the bits to convert the order to unsigned - vTemp = _mm_xor_ps(vTemp, g_XMFlipW); - // Convert to floating point numbers - vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); - // w + 0x80 to complete the conversion - vTemp = _mm_add_ps(vTemp, g_XMAddUDec4); - // Fix y, z and w because they are too large - return _mm_mul_ps(vTemp, Scale); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadU565(const XMU565* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 vResult = {{{ - float(pSource->v & 0x1F), - float((pSource->v >> 5) & 0x3F), - float((pSource->v >> 11) & 0x1F), - 0.f, - }}}; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - static const XMVECTORI32 U565And = {{{0x1F, 0x3F << 5, 0x1F << 11, 0}}}; - static const XMVECTORF32 U565Mul = { - {{1.0f, 1.0f / 32.0f, 1.0f / 2048.f, 0}}}; - uint16x4_t vInt16 = - vld1_dup_u16(reinterpret_cast(pSource)); - uint32x4_t vInt = vmovl_u16(vInt16); - vInt = vandq_u32(vInt, U565And); - float32x4_t R = vcvtq_f32_u32(vInt); - return vmulq_f32(R, U565Mul); -#elif defined(_XM_SSE_INTRINSICS_) - static const XMVECTORI32 U565And = {{{0x1F, 0x3F << 5, 0x1F << 11, 0}}}; - static const XMVECTORF32 U565Mul = { - {{1.0f, 1.0f / 32.0f, 1.0f / 2048.f, 0}}}; - // Get the 16 bit value and splat it - __m128i vInt = XM_LOADU_SI16(&pSource->v); - XMVECTOR vResult = - XM_PERMUTE_PS(_mm_castsi128_ps(vInt), _MM_SHUFFLE(0, 0, 0, 0)); - // Mask off x, y and z - vResult = _mm_and_ps(vResult, U565And); - // Convert to float - vResult = _mm_cvtepi32_ps(_mm_castps_si128(vResult)); - // Normalize x, y, and z - vResult = _mm_mul_ps(vResult, U565Mul); - return vResult; -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadFloat3PK(const XMFLOAT3PK* pSource) noexcept { - assert(pSource); - - XM_ALIGNED_DATA(16) uint32_t Result[4]; - uint32_t Mantissa; - uint32_t Exponent; - - // X Channel (6-bit mantissa) - Mantissa = pSource->xm; - - if (pSource->xe == 0x1f) // INF or NAN - { - Result[0] = static_cast( - 0x7f800000 | (static_cast(pSource->xm) << 17)); - } else { - if (pSource->xe != 0) // The value is normalized - { - Exponent = pSource->xe; - } else if (Mantissa != 0) // The value is denormalized - { - // Normalize the value in the resulting float - Exponent = 1; - - do { - Exponent--; - Mantissa <<= 1; - } while ((Mantissa & 0x40) == 0); - - Mantissa &= 0x3F; - } else // The value is zero - { - Exponent = static_cast(-112); - } - - Result[0] = ((Exponent + 112) << 23) | (Mantissa << 17); - } - - // Y Channel (6-bit mantissa) - Mantissa = pSource->ym; - - if (pSource->ye == 0x1f) // INF or NAN - { - Result[1] = static_cast( - 0x7f800000 | (static_cast(pSource->ym) << 17)); - } else { - if (pSource->ye != 0) // The value is normalized - { - Exponent = pSource->ye; - } else if (Mantissa != 0) // The value is denormalized - { - // Normalize the value in the resulting float - Exponent = 1; - - do { - Exponent--; - Mantissa <<= 1; - } while ((Mantissa & 0x40) == 0); - - Mantissa &= 0x3F; - } else // The value is zero - { - Exponent = static_cast(-112); - } - - Result[1] = ((Exponent + 112) << 23) | (Mantissa << 17); - } - - // Z Channel (5-bit mantissa) - Mantissa = pSource->zm; - - if (pSource->ze == 0x1f) // INF or NAN - { - Result[2] = static_cast( - 0x7f800000 | (static_cast(pSource->zm) << 17)); - } else { - if (pSource->ze != 0) // The value is normalized - { - Exponent = pSource->ze; - } else if (Mantissa != 0) // The value is denormalized - { - // Normalize the value in the resulting float - Exponent = 1; - - do { - Exponent--; - Mantissa <<= 1; - } while ((Mantissa & 0x20) == 0); - - Mantissa &= 0x1F; - } else // The value is zero - { - Exponent = static_cast(-112); - } - - Result[2] = ((Exponent + 112) << 23) | (Mantissa << 18); - } - - return XMLoadFloat3A(reinterpret_cast(&Result)); -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadFloat3SE(const XMFLOAT3SE* pSource) noexcept { - assert(pSource); - - union { - float f; - int32_t i; - } fi; - fi.i = 0x33800000 + (pSource->e << 23); - float Scale = fi.f; - - XMVECTORF32 v = {{{Scale * float(pSource->xm), Scale * float(pSource->ym), - Scale * float(pSource->zm), 1.0f}}}; - return v; -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadHalf4(const XMHALF4* pSource) noexcept { - assert(pSource); -#if defined(_XM_F16C_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) - __m128i V = _mm_loadl_epi64(reinterpret_cast(pSource)); - return _mm_cvtph_ps(V); -#else - XMVECTORF32 vResult = { - {{XMConvertHalfToFloat(pSource->x), XMConvertHalfToFloat(pSource->y), - XMConvertHalfToFloat(pSource->z), XMConvertHalfToFloat(pSource->w)}}}; - return vResult.v; -#endif // !_XM_F16C_INTRINSICS_ -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadShortN4(const XMSHORTN4* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 vResult = { - {{(pSource->x == -32768) - ? -1.f - : (static_cast(pSource->x) * (1.0f / 32767.0f)), - (pSource->y == -32768) - ? -1.f - : (static_cast(pSource->y) * (1.0f / 32767.0f)), - (pSource->z == -32768) - ? -1.f - : (static_cast(pSource->z) * (1.0f / 32767.0f)), - (pSource->w == -32768) - ? -1.f - : (static_cast(pSource->w) * (1.0f / 32767.0f))}}}; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - int16x4_t vInt = vld1_s16(reinterpret_cast(pSource)); - int32x4_t V = vmovl_s16(vInt); - float32x4_t vResult = vcvtq_f32_s32(V); - vResult = vmulq_n_f32(vResult, 1.0f / 32767.0f); - return vmaxq_f32(vResult, vdupq_n_f32(-1.f)); -#elif defined(_XM_SSE_INTRINSICS_) - // Splat the color in all four entries (x,z,y,w) - __m128d vIntd = _mm_load1_pd(reinterpret_cast(&pSource->x)); - // Shift x&0ffff,z&0xffff,y&0xffff0000,w&0xffff0000 - __m128 vTemp = _mm_and_ps(_mm_castpd_ps(vIntd), g_XMMaskX16Y16Z16W16); - // x and z are unsigned! Flip the bits to convert the order to signed - vTemp = _mm_xor_ps(vTemp, g_XMFlipX16Y16Z16W16); - // Convert to floating point numbers - vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); - // x and z - 0x8000 to complete the conversion - vTemp = _mm_add_ps(vTemp, g_XMFixX16Y16Z16W16); - // Convert to -1.0f - 1.0f - vTemp = _mm_mul_ps(vTemp, g_XMNormalizeX16Y16Z16W16); - // Very important! The entries are x,z,y,w, flip it to x,y,z,w - vTemp = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 1, 2, 0)); - // Clamp result (for case of -32768) - return _mm_max_ps(vTemp, g_XMNegativeOne); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadShort4(const XMSHORT4* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 vResult = { - {{static_cast(pSource->x), static_cast(pSource->y), - static_cast(pSource->z), static_cast(pSource->w)}}}; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - int16x4_t vInt = vld1_s16(reinterpret_cast(pSource)); - int32x4_t V = vmovl_s16(vInt); - return vcvtq_f32_s32(V); -#elif defined(_XM_SSE_INTRINSICS_) - // Splat the color in all four entries (x,z,y,w) - __m128d vIntd = _mm_load1_pd(reinterpret_cast(&pSource->x)); - // Shift x&0ffff,z&0xffff,y&0xffff0000,w&0xffff0000 - __m128 vTemp = _mm_and_ps(_mm_castpd_ps(vIntd), g_XMMaskX16Y16Z16W16); - // x and z are unsigned! Flip the bits to convert the order to signed - vTemp = _mm_xor_ps(vTemp, g_XMFlipX16Y16Z16W16); - // Convert to floating point numbers - vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); - // x and z - 0x8000 to complete the conversion - vTemp = _mm_add_ps(vTemp, g_XMFixX16Y16Z16W16); - // Fix y and w because they are 65536 too large - vTemp = _mm_mul_ps(vTemp, g_XMFixupY16W16); - // Very important! The entries are x,z,y,w, flip it to x,y,z,w - return XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 1, 2, 0)); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadUShortN4(const XMUSHORTN4* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 vResult = {{{static_cast(pSource->x) / 65535.0f, - static_cast(pSource->y) / 65535.0f, - static_cast(pSource->z) / 65535.0f, - static_cast(pSource->w) / 65535.0f}}}; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint16x4_t vInt = vld1_u16(reinterpret_cast(pSource)); - uint32x4_t V = vmovl_u16(vInt); - float32x4_t vResult = vcvtq_f32_u32(V); - return vmulq_n_f32(vResult, 1.0f / 65535.0f); -#elif defined(_XM_SSE_INTRINSICS_) - static const XMVECTORF32 FixupY16W16 = { - {{1.0f / 65535.0f, 1.0f / 65535.0f, 1.0f / (65535.0f * 65536.0f), - 1.0f / (65535.0f * 65536.0f)}}}; - static const XMVECTORF32 FixaddY16W16 = { - {{0, 0, 32768.0f * 65536.0f, 32768.0f * 65536.0f}}}; - // Splat the color in all four entries (x,z,y,w) - __m128d vIntd = _mm_load1_pd(reinterpret_cast(&pSource->x)); - // Shift x&0ffff,z&0xffff,y&0xffff0000,w&0xffff0000 - __m128 vTemp = _mm_and_ps(_mm_castpd_ps(vIntd), g_XMMaskX16Y16Z16W16); - // y and w are signed! Flip the bits to convert the order to unsigned - vTemp = _mm_xor_ps(vTemp, g_XMFlipZW); - // Convert to floating point numbers - vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); - // y and w + 0x8000 to complete the conversion - vTemp = _mm_add_ps(vTemp, FixaddY16W16); - // Fix y and w because they are 65536 too large - vTemp = _mm_mul_ps(vTemp, FixupY16W16); - // Very important! The entries are x,z,y,w, flip it to x,y,z,w - return XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 1, 2, 0)); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadUShort4(const XMUSHORT4* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 vResult = { - {{static_cast(pSource->x), static_cast(pSource->y), - static_cast(pSource->z), static_cast(pSource->w)}}}; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint16x4_t vInt = vld1_u16(reinterpret_cast(pSource)); - uint32x4_t V = vmovl_u16(vInt); - return vcvtq_f32_u32(V); -#elif defined(_XM_SSE_INTRINSICS_) - static const XMVECTORF32 FixaddY16W16 = {{{0, 0, 32768.0f, 32768.0f}}}; - // Splat the color in all four entries (x,z,y,w) - __m128d vIntd = _mm_load1_pd(reinterpret_cast(&pSource->x)); - // Shift x&0ffff,z&0xffff,y&0xffff0000,w&0xffff0000 - __m128 vTemp = _mm_and_ps(_mm_castpd_ps(vIntd), g_XMMaskX16Y16Z16W16); - // y and w are signed! Flip the bits to convert the order to unsigned - vTemp = _mm_xor_ps(vTemp, g_XMFlipZW); - // Convert to floating point numbers - vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); - // Fix y and w because they are 65536 too large - vTemp = _mm_mul_ps(vTemp, g_XMFixupY16W16); - // y and w + 0x8000 to complete the conversion - vTemp = _mm_add_ps(vTemp, FixaddY16W16); - // Very important! The entries are x,z,y,w, flip it to x,y,z,w - return XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(3, 1, 2, 0)); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadXDecN4(const XMXDECN4* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - static const uint32_t SignExtend[] = {0x00000000, 0xFFFFFC00}; - - uint32_t ElementX = pSource->v & 0x3FF; - uint32_t ElementY = (pSource->v >> 10) & 0x3FF; - uint32_t ElementZ = (pSource->v >> 20) & 0x3FF; - - XMVECTORF32 vResult = { - {{(ElementX == 0x200) ? -1.f - : (static_cast(static_cast( - ElementX | SignExtend[ElementX >> 9])) / - 511.0f), - (ElementY == 0x200) ? -1.f - : (static_cast(static_cast( - ElementY | SignExtend[ElementY >> 9])) / - 511.0f), - (ElementZ == 0x200) ? -1.f - : (static_cast(static_cast( - ElementZ | SignExtend[ElementZ >> 9])) / - 511.0f), - static_cast(pSource->v >> 30) / 3.0f}}}; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x4_t vInt = vld1q_dup_u32(reinterpret_cast(pSource)); - vInt = vandq_u32(vInt, g_XMMaskA2B10G10R10); - vInt = veorq_u32(vInt, g_XMFlipA2B10G10R10); - float32x4_t R = vcvtq_f32_s32(vreinterpretq_s32_u32(vInt)); - R = vaddq_f32(R, g_XMFixAA2B10G10R10); - R = vmulq_f32(R, g_XMNormalizeA2B10G10R10); - return vmaxq_f32(R, vdupq_n_f32(-1.0f)); -#elif defined(_XM_SSE_INTRINSICS_) - // Splat the color in all four entries - __m128 vTemp = _mm_load_ps1(reinterpret_cast(&pSource->v)); - // Shift R&0xFF0000, G&0xFF00, B&0xFF, A&0xFF000000 - vTemp = _mm_and_ps(vTemp, g_XMMaskA2B10G10R10); - // a is unsigned! Flip the bit to convert the order to signed - vTemp = _mm_xor_ps(vTemp, g_XMFlipA2B10G10R10); - // Convert to floating point numbers - vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); - // RGB + 0, A + 0x80000000.f to undo the signed order. - vTemp = _mm_add_ps(vTemp, g_XMFixAA2B10G10R10); - // Convert 0-255 to 0.0f-1.0f - vTemp = _mm_mul_ps(vTemp, g_XMNormalizeA2B10G10R10); - // Clamp result (for case of -512) - return _mm_max_ps(vTemp, g_XMNegativeOne); -#endif -} - -//------------------------------------------------------------------------------ -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4996) -// C4996: ignore deprecation warning -#endif - -#ifdef __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wdeprecated-declarations" -#endif - -#ifdef __GNUC__ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -#endif - -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadXDec4(const XMXDEC4* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - static const uint32_t SignExtend[] = {0x00000000, 0xFFFFFC00}; - - uint32_t ElementX = pSource->v & 0x3FF; - uint32_t ElementY = (pSource->v >> 10) & 0x3FF; - uint32_t ElementZ = (pSource->v >> 20) & 0x3FF; - - XMVECTORF32 vResult = {{{static_cast(static_cast( - ElementX | SignExtend[ElementX >> 9])), - static_cast(static_cast( - ElementY | SignExtend[ElementY >> 9])), - static_cast(static_cast( - ElementZ | SignExtend[ElementZ >> 9])), - static_cast(pSource->v >> 30)}}}; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - static const XMVECTORU32 XDec4Xor = { - {{0x200, 0x200 << 10, 0x200 << 20, 0x80000000}}}; - static const XMVECTORF32 XDec4Add = { - {{-512.0f, -512.0f * 1024.0f, -512.0f * 1024.0f * 1024.0f, - 32768 * 65536.0f}}}; - uint32x4_t vInt = vld1q_dup_u32(reinterpret_cast(pSource)); - vInt = vandq_u32(vInt, g_XMMaskDec4); - vInt = veorq_u32(vInt, XDec4Xor); - float32x4_t R = vcvtq_f32_s32(vreinterpretq_s32_u32(vInt)); - R = vaddq_f32(R, XDec4Add); - return vmulq_f32(R, g_XMMulDec4); -#elif defined(_XM_SSE_INTRINSICS_) - static const XMVECTORU32 XDec4Xor = { - {{0x200, 0x200 << 10, 0x200 << 20, 0x80000000}}}; - static const XMVECTORF32 XDec4Add = { - {{-512.0f, -512.0f * 1024.0f, -512.0f * 1024.0f * 1024.0f, - 32768 * 65536.0f}}}; - // Splat the color in all four entries - XMVECTOR vTemp = _mm_load_ps1(reinterpret_cast(&pSource->v)); - // Shift R&0xFF0000, G&0xFF00, B&0xFF, A&0xFF000000 - vTemp = _mm_and_ps(vTemp, g_XMMaskDec4); - // a is unsigned! Flip the bit to convert the order to signed - vTemp = _mm_xor_ps(vTemp, XDec4Xor); - // Convert to floating point numbers - vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); - // RGB + 0, A + 0x80000000.f to undo the signed order. - vTemp = _mm_add_ps(vTemp, XDec4Add); - // Convert 0-255 to 0.0f-1.0f - vTemp = _mm_mul_ps(vTemp, g_XMMulDec4); - return vTemp; -#endif -} - -#ifdef __GNUC__ -#pragma GCC diagnostic pop -#endif -#ifdef __clang__ -#pragma clang diagnostic pop -#endif -#ifdef _MSC_VER -#pragma warning(pop) -#endif - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadUDecN4(const XMUDECN4* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - - uint32_t ElementX = pSource->v & 0x3FF; - uint32_t ElementY = (pSource->v >> 10) & 0x3FF; - uint32_t ElementZ = (pSource->v >> 20) & 0x3FF; - - XMVECTORF32 vResult = {{{static_cast(ElementX) / 1023.0f, - static_cast(ElementY) / 1023.0f, - static_cast(ElementZ) / 1023.0f, - static_cast(pSource->v >> 30) / 3.0f}}}; - return vResult.v; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - static const XMVECTORF32 UDecN4Mul = { - {{1.0f / 1023.0f, 1.0f / (1023.0f * 1024.0f), - 1.0f / (1023.0f * 1024.0f * 1024.0f), - 1.0f / (3.0f * 1024.0f * 1024.0f * 1024.0f)}}}; - uint32x4_t vInt = vld1q_dup_u32(reinterpret_cast(pSource)); - vInt = vandq_u32(vInt, g_XMMaskDec4); - float32x4_t R = vcvtq_f32_u32(vInt); - return vmulq_f32(R, UDecN4Mul); -#elif defined(_XM_SSE_INTRINSICS_) - static const XMVECTORF32 UDecN4Mul = { - {{1.0f / 1023.0f, 1.0f / (1023.0f * 1024.0f), - 1.0f / (1023.0f * 1024.0f * 1024.0f), - 1.0f / (3.0f * 1024.0f * 1024.0f * 1024.0f)}}}; - // Splat the color in all four entries - XMVECTOR vTemp = _mm_load_ps1(reinterpret_cast(&pSource->v)); - // Shift R&0xFF0000, G&0xFF00, B&0xFF, A&0xFF000000 - vTemp = _mm_and_ps(vTemp, g_XMMaskDec4); - // a is unsigned! Flip the bit to convert the order to signed - vTemp = _mm_xor_ps(vTemp, g_XMFlipW); - // Convert to floating point numbers - vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); - // RGB + 0, A + 0x80000000.f to undo the signed order. - vTemp = _mm_add_ps(vTemp, g_XMAddUDec4); - // Convert 0-255 to 0.0f-1.0f - vTemp = _mm_mul_ps(vTemp, UDecN4Mul); - return vTemp; -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadUDecN4_XR(const XMUDECN4* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - - int32_t ElementX = pSource->v & 0x3FF; - int32_t ElementY = (pSource->v >> 10) & 0x3FF; - int32_t ElementZ = (pSource->v >> 20) & 0x3FF; - - XMVECTORF32 vResult = {{{static_cast(ElementX - 0x180) / 510.0f, - static_cast(ElementY - 0x180) / 510.0f, - static_cast(ElementZ - 0x180) / 510.0f, - static_cast(pSource->v >> 30) / 3.0f}}}; - - return vResult.v; - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - static const XMVECTORF32 XRMul = { - {{1.0f / 510.0f, 1.0f / (510.0f * 1024.0f), - 1.0f / (510.0f * 1024.0f * 1024.0f), - 1.0f / (3.0f * 1024.0f * 1024.0f * 1024.0f)}}}; - static const XMVECTORI32 XRBias = { - {{0x180, 0x180 * 1024, 0x180 * 1024 * 1024, 0}}}; - uint32x4_t vInt = vld1q_dup_u32(reinterpret_cast(pSource)); - vInt = vandq_u32(vInt, g_XMMaskDec4); - int32x4_t vTemp = vsubq_s32(vreinterpretq_s32_u32(vInt), XRBias); - vTemp = veorq_s32(vTemp, g_XMFlipW); - float32x4_t R = vcvtq_f32_s32(vTemp); - R = vaddq_f32(R, g_XMAddUDec4); - return vmulq_f32(R, XRMul); -#elif defined(_XM_SSE_INTRINSICS_) - static const XMVECTORF32 XRMul = { - {{1.0f / 510.0f, 1.0f / (510.0f * 1024.0f), - 1.0f / (510.0f * 1024.0f * 1024.0f), - 1.0f / (3.0f * 1024.0f * 1024.0f * 1024.0f)}}}; - static const XMVECTORI32 XRBias = { - {{0x180, 0x180 * 1024, 0x180 * 1024 * 1024, 0}}}; - // Splat the color in all four entries - XMVECTOR vTemp = _mm_load_ps1(reinterpret_cast(&pSource->v)); - // Mask channels - vTemp = _mm_and_ps(vTemp, g_XMMaskDec4); - // Subtract bias - vTemp = _mm_castsi128_ps(_mm_sub_epi32(_mm_castps_si128(vTemp), XRBias)); - // a is unsigned! Flip the bit to convert the order to signed - vTemp = _mm_xor_ps(vTemp, g_XMFlipW); - // Convert to floating point numbers - vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); - // RGB + 0, A + 0x80000000.f to undo the signed order. - vTemp = _mm_add_ps(vTemp, g_XMAddUDec4); - // Convert to 0.0f-1.0f - return _mm_mul_ps(vTemp, XRMul); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadUDec4(const XMUDEC4* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - uint32_t ElementX = pSource->v & 0x3FF; - uint32_t ElementY = (pSource->v >> 10) & 0x3FF; - uint32_t ElementZ = (pSource->v >> 20) & 0x3FF; - - XMVECTORF32 vResult = { - {{static_cast(ElementX), static_cast(ElementY), - static_cast(ElementZ), static_cast(pSource->v >> 30)}}}; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x4_t vInt = vld1q_dup_u32(reinterpret_cast(pSource)); - vInt = vandq_u32(vInt, g_XMMaskDec4); - float32x4_t R = vcvtq_f32_u32(vInt); - return vmulq_f32(R, g_XMMulDec4); -#elif defined(_XM_SSE_INTRINSICS_) - // Splat the color in all four entries - XMVECTOR vTemp = _mm_load_ps1(reinterpret_cast(&pSource->v)); - // Shift R&0xFF0000, G&0xFF00, B&0xFF, A&0xFF000000 - vTemp = _mm_and_ps(vTemp, g_XMMaskDec4); - // a is unsigned! Flip the bit to convert the order to signed - vTemp = _mm_xor_ps(vTemp, g_XMFlipW); - // Convert to floating point numbers - vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); - // RGB + 0, A + 0x80000000.f to undo the signed order. - vTemp = _mm_add_ps(vTemp, g_XMAddUDec4); - // Convert 0-255 to 0.0f-1.0f - vTemp = _mm_mul_ps(vTemp, g_XMMulDec4); - return vTemp; -#endif -} - -//------------------------------------------------------------------------------ -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4996) -// C4996: ignore deprecation warning -#endif - -#ifdef __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wdeprecated-declarations" -#endif - -#ifdef __GNUC__ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -#endif - -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadDecN4(const XMDECN4* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - static const uint32_t SignExtend[] = {0x00000000, 0xFFFFFC00}; - static const uint32_t SignExtendW[] = {0x00000000, 0xFFFFFFFC}; - - uint32_t ElementX = pSource->v & 0x3FF; - uint32_t ElementY = (pSource->v >> 10) & 0x3FF; - uint32_t ElementZ = (pSource->v >> 20) & 0x3FF; - uint32_t ElementW = pSource->v >> 30; - - XMVECTORF32 vResult = { - {{(ElementX == 0x200) ? -1.f - : (static_cast(static_cast( - ElementX | SignExtend[ElementX >> 9])) / - 511.0f), - (ElementY == 0x200) ? -1.f - : (static_cast(static_cast( - ElementY | SignExtend[ElementY >> 9])) / - 511.0f), - (ElementZ == 0x200) ? -1.f - : (static_cast(static_cast( - ElementZ | SignExtend[ElementZ >> 9])) / - 511.0f), - (ElementW == 0x2) - ? -1.f - : static_cast(static_cast( - ElementW | SignExtendW[(ElementW >> 1) & 1]))}}}; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - static const XMVECTORF32 DecN4Mul = { - {{1.0f / 511.0f, 1.0f / (511.0f * 1024.0f), - 1.0f / (511.0f * 1024.0f * 1024.0f), - 1.0f / (1024.0f * 1024.0f * 1024.0f)}}}; - uint32x4_t vInt = vld1q_dup_u32(reinterpret_cast(pSource)); - vInt = vandq_u32(vInt, g_XMMaskDec4); - vInt = veorq_u32(vInt, g_XMXorDec4); - float32x4_t R = vcvtq_f32_s32(vreinterpretq_s32_u32(vInt)); - R = vaddq_f32(R, g_XMAddDec4); - R = vmulq_f32(R, DecN4Mul); - return vmaxq_f32(R, vdupq_n_f32(-1.0f)); -#elif defined(_XM_SSE_INTRINSICS_) - static const XMVECTORF32 DecN4Mul = { - {{1.0f / 511.0f, 1.0f / (511.0f * 1024.0f), - 1.0f / (511.0f * 1024.0f * 1024.0f), - 1.0f / (1024.0f * 1024.0f * 1024.0f)}}}; - // Splat the color in all four entries - XMVECTOR vTemp = _mm_load_ps1(reinterpret_cast(&pSource->v)); - // Shift R&0xFF0000, G&0xFF00, B&0xFF, A&0xFF000000 - vTemp = _mm_and_ps(vTemp, g_XMMaskDec4); - // a is unsigned! Flip the bit to convert the order to signed - vTemp = _mm_xor_ps(vTemp, g_XMXorDec4); - // Convert to floating point numbers - vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); - // RGB + 0, A + 0x80000000.f to undo the signed order. - vTemp = _mm_add_ps(vTemp, g_XMAddDec4); - // Convert 0-255 to 0.0f-1.0f - vTemp = _mm_mul_ps(vTemp, DecN4Mul); - // Clamp result (for case of -512/-1) - return _mm_max_ps(vTemp, g_XMNegativeOne); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadDec4(const XMDEC4* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - static const uint32_t SignExtend[] = {0x00000000, 0xFFFFFC00}; - static const uint32_t SignExtendW[] = {0x00000000, 0xFFFFFFFC}; - - uint32_t ElementX = pSource->v & 0x3FF; - uint32_t ElementY = (pSource->v >> 10) & 0x3FF; - uint32_t ElementZ = (pSource->v >> 20) & 0x3FF; - uint32_t ElementW = pSource->v >> 30; - - XMVECTORF32 vResult = { - {{static_cast( - static_cast(ElementX | SignExtend[ElementX >> 9])), - static_cast( - static_cast(ElementY | SignExtend[ElementY >> 9])), - static_cast( - static_cast(ElementZ | SignExtend[ElementZ >> 9])), - static_cast( - static_cast(ElementW | SignExtendW[ElementW >> 1]))}}}; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x4_t vInt = vld1q_dup_u32(reinterpret_cast(pSource)); - vInt = vandq_u32(vInt, g_XMMaskDec4); - vInt = veorq_u32(vInt, g_XMXorDec4); - float32x4_t R = vcvtq_f32_s32(vreinterpretq_s32_u32(vInt)); - R = vaddq_f32(R, g_XMAddDec4); - return vmulq_f32(R, g_XMMulDec4); -#elif defined(_XM_SSE_INTRINSICS_) - // Splat the color in all four entries - XMVECTOR vTemp = _mm_load_ps1(reinterpret_cast(&pSource->v)); - // Shift R&0xFF0000, G&0xFF00, B&0xFF, A&0xFF000000 - vTemp = _mm_and_ps(vTemp, g_XMMaskDec4); - // a is unsigned! Flip the bit to convert the order to signed - vTemp = _mm_xor_ps(vTemp, g_XMXorDec4); - // Convert to floating point numbers - vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); - // RGB + 0, A + 0x80000000.f to undo the signed order. - vTemp = _mm_add_ps(vTemp, g_XMAddDec4); - // Convert 0-255 to 0.0f-1.0f - vTemp = _mm_mul_ps(vTemp, g_XMMulDec4); - return vTemp; -#endif -} - -#ifdef __GNUC__ -#pragma GCC diagnostic pop -#endif -#ifdef __clang__ -#pragma clang diagnostic pop -#endif -#ifdef _MSC_VER -#pragma warning(pop) -#endif - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadUByteN4(const XMUBYTEN4* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 vResult = {{{static_cast(pSource->x) / 255.0f, - static_cast(pSource->y) / 255.0f, - static_cast(pSource->z) / 255.0f, - static_cast(pSource->w) / 255.0f}}}; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x2_t vInt8 = vld1_dup_u32(reinterpret_cast(pSource)); - uint16x8_t vInt16 = vmovl_u8(vreinterpret_u8_u32(vInt8)); - uint32x4_t vInt = vmovl_u16(vget_low_u16(vInt16)); - float32x4_t R = vcvtq_f32_u32(vInt); - return vmulq_n_f32(R, 1.0f / 255.0f); -#elif defined(_XM_SSE_INTRINSICS_) - static const XMVECTORF32 LoadUByteN4Mul = { - {{1.0f / 255.0f, 1.0f / (255.0f * 256.0f), 1.0f / (255.0f * 65536.0f), - 1.0f / (255.0f * 65536.0f * 256.0f)}}}; - // Splat the color in all four entries (x,z,y,w) - XMVECTOR vTemp = _mm_load1_ps(reinterpret_cast(&pSource->x)); - // Mask x&0ff,y&0xff00,z&0xff0000,w&0xff000000 - vTemp = _mm_and_ps(vTemp, g_XMMaskByte4); - // w is signed! Flip the bits to convert the order to unsigned - vTemp = _mm_xor_ps(vTemp, g_XMFlipW); - // Convert to floating point numbers - vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); - // w + 0x80 to complete the conversion - vTemp = _mm_add_ps(vTemp, g_XMAddUDec4); - // Fix y, z and w because they are too large - vTemp = _mm_mul_ps(vTemp, LoadUByteN4Mul); - return vTemp; -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadUByte4(const XMUBYTE4* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 vResult = { - {{static_cast(pSource->x), static_cast(pSource->y), - static_cast(pSource->z), static_cast(pSource->w)}}}; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x2_t vInt8 = vld1_dup_u32(reinterpret_cast(pSource)); - uint16x8_t vInt16 = vmovl_u8(vreinterpret_u8_u32(vInt8)); - uint32x4_t vInt = vmovl_u16(vget_low_u16(vInt16)); - return vcvtq_f32_u32(vInt); -#elif defined(_XM_SSE_INTRINSICS_) - static const XMVECTORF32 LoadUByte4Mul = { - {{1.0f, 1.0f / 256.0f, 1.0f / 65536.0f, 1.0f / (65536.0f * 256.0f)}}}; - // Splat the color in all four entries (x,z,y,w) - XMVECTOR vTemp = _mm_load1_ps(reinterpret_cast(&pSource->x)); - // Mask x&0ff,y&0xff00,z&0xff0000,w&0xff000000 - vTemp = _mm_and_ps(vTemp, g_XMMaskByte4); - // w is signed! Flip the bits to convert the order to unsigned - vTemp = _mm_xor_ps(vTemp, g_XMFlipW); - // Convert to floating point numbers - vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); - // w + 0x80 to complete the conversion - vTemp = _mm_add_ps(vTemp, g_XMAddUDec4); - // Fix y, z and w because they are too large - vTemp = _mm_mul_ps(vTemp, LoadUByte4Mul); - return vTemp; -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadByteN4(const XMBYTEN4* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 vResult = { - {{(pSource->x == -128) ? -1.f - : (static_cast(pSource->x) / 127.0f), - (pSource->y == -128) ? -1.f - : (static_cast(pSource->y) / 127.0f), - (pSource->z == -128) ? -1.f - : (static_cast(pSource->z) / 127.0f), - (pSource->w == -128) ? -1.f - : (static_cast(pSource->w) / 127.0f)}}}; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x2_t vInt8 = vld1_dup_u32(reinterpret_cast(pSource)); - int16x8_t vInt16 = vmovl_s8(vreinterpret_s8_u32(vInt8)); - int32x4_t vInt = vmovl_s16(vget_low_s16(vInt16)); - float32x4_t R = vcvtq_f32_s32(vInt); - R = vmulq_n_f32(R, 1.0f / 127.0f); - return vmaxq_f32(R, vdupq_n_f32(-1.f)); -#elif defined(_XM_SSE_INTRINSICS_) - static const XMVECTORF32 LoadByteN4Mul = { - {{1.0f / 127.0f, 1.0f / (127.0f * 256.0f), 1.0f / (127.0f * 65536.0f), - 1.0f / (127.0f * 65536.0f * 256.0f)}}}; - // Splat the color in all four entries (x,z,y,w) - XMVECTOR vTemp = _mm_load1_ps(reinterpret_cast(&pSource->x)); - // Mask x&0ff,y&0xff00,z&0xff0000,w&0xff000000 - vTemp = _mm_and_ps(vTemp, g_XMMaskByte4); - // x,y and z are unsigned! Flip the bits to convert the order to signed - vTemp = _mm_xor_ps(vTemp, g_XMXorByte4); - // Convert to floating point numbers - vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); - // x, y and z - 0x80 to complete the conversion - vTemp = _mm_add_ps(vTemp, g_XMAddByte4); - // Fix y, z and w because they are too large - vTemp = _mm_mul_ps(vTemp, LoadByteN4Mul); - // Clamp result (for case of -128) - return _mm_max_ps(vTemp, g_XMNegativeOne); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadByte4(const XMBYTE4* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 vResult = { - {{static_cast(pSource->x), static_cast(pSource->y), - static_cast(pSource->z), static_cast(pSource->w)}}}; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - uint32x2_t vInt8 = vld1_dup_u32(reinterpret_cast(pSource)); - int16x8_t vInt16 = vmovl_s8(vreinterpret_s8_u32(vInt8)); - int32x4_t vInt = vmovl_s16(vget_low_s16(vInt16)); - return vcvtq_f32_s32(vInt); -#elif defined(_XM_SSE_INTRINSICS_) - static const XMVECTORF32 LoadByte4Mul = { - {{1.0f, 1.0f / 256.0f, 1.0f / 65536.0f, 1.0f / (65536.0f * 256.0f)}}}; - // Splat the color in all four entries (x,z,y,w) - XMVECTOR vTemp = _mm_load1_ps(reinterpret_cast(&pSource->x)); - // Mask x&0ff,y&0xff00,z&0xff0000,w&0xff000000 - vTemp = _mm_and_ps(vTemp, g_XMMaskByte4); - // x,y and z are unsigned! Flip the bits to convert the order to signed - vTemp = _mm_xor_ps(vTemp, g_XMXorByte4); - // Convert to floating point numbers - vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); - // x, y and z - 0x80 to complete the conversion - vTemp = _mm_add_ps(vTemp, g_XMAddByte4); - // Fix y, z and w because they are too large - vTemp = _mm_mul_ps(vTemp, LoadByte4Mul); - return vTemp; -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadUNibble4(const XMUNIBBLE4* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 vResult = { - {{float(pSource->v & 0xF), float((pSource->v >> 4) & 0xF), - float((pSource->v >> 8) & 0xF), float((pSource->v >> 12) & 0xF)}}}; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - static const XMVECTORI32 UNibble4And = {{{0xF, 0xF0, 0xF00, 0xF000}}}; - static const XMVECTORF32 UNibble4Mul = { - {{1.0f, 1.0f / 16.f, 1.0f / 256.f, 1.0f / 4096.f}}}; - uint16x4_t vInt16 = - vld1_dup_u16(reinterpret_cast(pSource)); - uint32x4_t vInt = vmovl_u16(vInt16); - vInt = vandq_u32(vInt, UNibble4And); - float32x4_t R = vcvtq_f32_u32(vInt); - return vmulq_f32(R, UNibble4Mul); -#elif defined(_XM_SSE_INTRINSICS_) - static const XMVECTORI32 UNibble4And = {{{0xF, 0xF0, 0xF00, 0xF000}}}; - static const XMVECTORF32 UNibble4Mul = { - {{1.0f, 1.0f / 16.f, 1.0f / 256.f, 1.0f / 4096.f}}}; - // Get the 16 bit value and splat it - __m128i vInt = XM_LOADU_SI16(&pSource->v); - XMVECTOR vResult = - XM_PERMUTE_PS(_mm_castsi128_ps(vInt), _MM_SHUFFLE(0, 0, 0, 0)); - // Mask off x, y and z - vResult = _mm_and_ps(vResult, UNibble4And); - // Convert to float - vResult = _mm_cvtepi32_ps(_mm_castps_si128(vResult)); - // Normalize x, y, and z - vResult = _mm_mul_ps(vResult, UNibble4Mul); - return vResult; -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMVECTOR XM_CALLCONV -XMLoadU555(const XMU555* pSource) noexcept { - assert(pSource); -#if defined(_XM_NO_INTRINSICS_) - XMVECTORF32 vResult = { - {{float(pSource->v & 0x1F), float((pSource->v >> 5) & 0x1F), - float((pSource->v >> 10) & 0x1F), float((pSource->v >> 15) & 0x1)}}}; - return vResult.v; -#elif defined(_XM_ARM_NEON_INTRINSICS_) - static const XMVECTORI32 U555And = { - {{0x1F, 0x1F << 5, 0x1F << 10, 0x8000}}}; - static const XMVECTORF32 U555Mul = { - {{1.0f, 1.0f / 32.f, 1.0f / 1024.f, 1.0f / 32768.f}}}; - uint16x4_t vInt16 = - vld1_dup_u16(reinterpret_cast(pSource)); - uint32x4_t vInt = vmovl_u16(vInt16); - vInt = vandq_u32(vInt, U555And); - float32x4_t R = vcvtq_f32_u32(vInt); - return vmulq_f32(R, U555Mul); -#elif defined(_XM_SSE_INTRINSICS_) - static const XMVECTORI32 U555And = { - {{0x1F, 0x1F << 5, 0x1F << 10, 0x8000}}}; - static const XMVECTORF32 U555Mul = { - {{1.0f, 1.0f / 32.f, 1.0f / 1024.f, 1.0f / 32768.f}}}; - // Get the 16bit value and splat it - __m128i vInt = XM_LOADU_SI16(&pSource->v); - XMVECTOR vResult = - XM_PERMUTE_PS(_mm_castsi128_ps(vInt), _MM_SHUFFLE(0, 0, 0, 0)); - // Mask off x, y and z - vResult = _mm_and_ps(vResult, U555And); - // Convert to float - vResult = _mm_cvtepi32_ps(_mm_castps_si128(vResult)); - // Normalize x, y, and z - vResult = _mm_mul_ps(vResult, U555Mul); - return vResult; -#endif -} - -#ifdef _PREFAST_ -#pragma prefast(pop) -#endif - -/**************************************************************************** - * - * Vector and matrix store operations - * - ****************************************************************************/ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreColor(XMCOLOR* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR N = XMVectorSaturate(V); - N = XMVectorMultiply(N, g_UByteMax); - N = XMVectorRound(N); - - XMFLOAT4A tmp; - XMStoreFloat4A(&tmp, N); - - pDestination->c = (static_cast(tmp.w) << 24) | - (static_cast(tmp.x) << 16) | - (static_cast(tmp.y) << 8) | - static_cast(tmp.z); - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x4_t R = vmaxq_f32(V, vdupq_n_f32(0)); - R = vminq_f32(R, vdupq_n_f32(1.0f)); - R = vmulq_n_f32(R, 255.0f); - R = XMVectorRound(R); - uint32x4_t vInt32 = vcvtq_u32_f32(R); - uint16x4_t vInt16 = vqmovn_u32(vInt32); - uint8x8_t vInt8 = vqmovn_u16(vcombine_u16(vInt16, vInt16)); - uint32_t rgba = vget_lane_u32(vreinterpret_u32_u8(vInt8), 0); - pDestination->c = - (rgba & 0xFF00FF00) | ((rgba >> 16) & 0xFF) | ((rgba << 16) & 0xFF0000); -#elif defined(_XM_SSE_INTRINSICS_) - // Set <0 to 0 - XMVECTOR vResult = _mm_max_ps(V, g_XMZero); - // Set>1 to 1 - vResult = _mm_min_ps(vResult, g_XMOne); - // Convert to 0-255 - vResult = _mm_mul_ps(vResult, g_UByteMax); - // Shuffle RGBA to ARGB - vResult = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(3, 0, 1, 2)); - // Convert to int - __m128i vInt = _mm_cvtps_epi32(vResult); - // Mash to shorts - vInt = _mm_packs_epi32(vInt, vInt); - // Mash to bytes - vInt = _mm_packus_epi16(vInt, vInt); - // Store the color - _mm_store_ss(reinterpret_cast(&pDestination->c), - _mm_castsi128_ps(vInt)); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreHalf2(XMHALF2* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); -#if defined(_XM_F16C_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) - __m128i V1 = _mm_cvtps_ph(V, _MM_FROUND_TO_NEAREST_INT); - _mm_store_ss(reinterpret_cast(pDestination), _mm_castsi128_ps(V1)); -#else - pDestination->x = XMConvertFloatToHalf(XMVectorGetX(V)); - pDestination->y = XMConvertFloatToHalf(XMVectorGetY(V)); -#endif // !_XM_F16C_INTRINSICS_ -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreShortN2(XMSHORTN2* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR N = XMVectorClamp(V, g_XMNegativeOne.v, g_XMOne.v); - N = XMVectorMultiply(N, g_ShortMax); - N = XMVectorRound(N); - - XMFLOAT4A tmp; - XMStoreFloat4A(&tmp, N); - - pDestination->x = static_cast(tmp.x); - pDestination->y = static_cast(tmp.y); - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x4_t R = vmaxq_f32(V, vdupq_n_f32(-1.f)); - R = vminq_f32(R, vdupq_n_f32(1.0f)); - R = vmulq_n_f32(R, 32767.0f); - int32x4_t vInt32 = vcvtq_s32_f32(R); - int16x4_t vInt16 = vqmovn_s32(vInt32); - vst1_lane_u32(&pDestination->v, vreinterpret_u32_s16(vInt16), 0); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vResult = _mm_max_ps(V, g_XMNegativeOne); - vResult = _mm_min_ps(vResult, g_XMOne); - vResult = _mm_mul_ps(vResult, g_ShortMax); - __m128i vResulti = _mm_cvtps_epi32(vResult); - vResulti = _mm_packs_epi32(vResulti, vResulti); - _mm_store_ss(reinterpret_cast(&pDestination->x), - _mm_castsi128_ps(vResulti)); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreShort2(XMSHORT2* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR N = XMVectorClamp(V, g_ShortMin, g_ShortMax); - N = XMVectorRound(N); - - XMFLOAT4A tmp; - XMStoreFloat4A(&tmp, N); - - pDestination->x = static_cast(tmp.x); - pDestination->y = static_cast(tmp.y); - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x4_t R = vmaxq_f32(V, vdupq_n_f32(-32767.f)); - R = vminq_f32(R, vdupq_n_f32(32767.0f)); - int32x4_t vInt32 = vcvtq_s32_f32(R); - int16x4_t vInt16 = vqmovn_s32(vInt32); - vst1_lane_u32(&pDestination->v, vreinterpret_u32_s16(vInt16), 0); -#elif defined(_XM_SSE_INTRINSICS_) - // Bounds check - XMVECTOR vResult = _mm_max_ps(V, g_ShortMin); - vResult = _mm_min_ps(vResult, g_ShortMax); - // Convert to int with rounding - __m128i vInt = _mm_cvtps_epi32(vResult); - // Pack the ints into shorts - vInt = _mm_packs_epi32(vInt, vInt); - _mm_store_ss(reinterpret_cast(&pDestination->x), - _mm_castsi128_ps(vInt)); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreUShortN2(XMUSHORTN2* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR N = XMVectorSaturate(V); - N = XMVectorMultiplyAdd(N, g_UShortMax, g_XMOneHalf.v); - N = XMVectorTruncate(N); - - XMFLOAT4A tmp; - XMStoreFloat4A(&tmp, N); - - pDestination->x = static_cast(tmp.x); - pDestination->y = static_cast(tmp.y); - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x4_t R = vmaxq_f32(V, vdupq_n_f32(0.f)); - R = vminq_f32(R, vdupq_n_f32(1.0f)); - R = vmulq_n_f32(R, 65535.0f); - R = vaddq_f32(R, g_XMOneHalf); - uint32x4_t vInt32 = vcvtq_u32_f32(R); - uint16x4_t vInt16 = vqmovn_u32(vInt32); - vst1_lane_u32(&pDestination->v, vreinterpret_u32_u16(vInt16), 0); -#elif defined(_XM_SSE_INTRINSICS_) - // Bounds check - XMVECTOR vResult = _mm_max_ps(V, g_XMZero); - vResult = _mm_min_ps(vResult, g_XMOne); - vResult = _mm_mul_ps(vResult, g_UShortMax); - vResult = _mm_add_ps(vResult, g_XMOneHalf); - // Convert to int - __m128i vInt = _mm_cvttps_epi32(vResult); - // Since the SSE pack instruction clamps using signed rules, - // manually extract the values to store them to memory - pDestination->x = static_cast(_mm_extract_epi16(vInt, 0)); - pDestination->y = static_cast(_mm_extract_epi16(vInt, 2)); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreUShort2(XMUSHORT2* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR N = XMVectorClamp(V, XMVectorZero(), g_UShortMax); - N = XMVectorRound(N); - - XMFLOAT4A tmp; - XMStoreFloat4A(&tmp, N); - - pDestination->x = static_cast(tmp.x); - pDestination->y = static_cast(tmp.y); - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x4_t R = vmaxq_f32(V, vdupq_n_f32(0.f)); - R = vminq_f32(R, vdupq_n_f32(65535.0f)); - uint32x4_t vInt32 = vcvtq_u32_f32(R); - uint16x4_t vInt16 = vqmovn_u32(vInt32); - vst1_lane_u32(&pDestination->v, vreinterpret_u32_u16(vInt16), 0); -#elif defined(_XM_SSE_INTRINSICS_) - // Bounds check - XMVECTOR vResult = _mm_max_ps(V, g_XMZero); - vResult = _mm_min_ps(vResult, g_UShortMax); - // Convert to int with rounding - __m128i vInt = _mm_cvtps_epi32(vResult); - // Since the SSE pack instruction clamps using signed rules, - // manually extract the values to store them to memory - pDestination->x = static_cast(_mm_extract_epi16(vInt, 0)); - pDestination->y = static_cast(_mm_extract_epi16(vInt, 2)); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreByteN2(XMBYTEN2* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR N = XMVectorClamp(V, g_XMNegativeOne.v, g_XMOne.v); - N = XMVectorMultiply(N, g_ByteMax); - N = XMVectorRound(N); - - XMFLOAT4A tmp; - XMStoreFloat4A(&tmp, N); - - pDestination->x = static_cast(tmp.x); - pDestination->y = static_cast(tmp.y); - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x4_t R = vmaxq_f32(V, vdupq_n_f32(-1.f)); - R = vminq_f32(R, vdupq_n_f32(1.0f)); - R = vmulq_n_f32(R, 127.0f); - int32x4_t vInt32 = vcvtq_s32_f32(R); - int16x4_t vInt16 = vqmovn_s32(vInt32); - int8x8_t vInt8 = vqmovn_s16(vcombine_s16(vInt16, vInt16)); - vst1_lane_u16(reinterpret_cast(pDestination), - vreinterpret_u16_s8(vInt8), 0); -#elif defined(_XM_SSE_INTRINSICS_) - // Clamp to bounds - XMVECTOR vResult = _mm_max_ps(V, g_XMNegativeOne); - vResult = _mm_min_ps(vResult, g_XMOne); - // Scale by multiplication - vResult = _mm_mul_ps(vResult, g_ByteMax); - // Convert to int by rounding - __m128i vInt = _mm_cvtps_epi32(vResult); - // No SSE operations will write to 16-bit values, so we have to extract them - // manually - auto x = static_cast(_mm_extract_epi16(vInt, 0)); - auto y = static_cast(_mm_extract_epi16(vInt, 2)); - pDestination->v = static_cast( - ((static_cast(y) & 0xFF) << 8) | (static_cast(x) & 0xFF)); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreByte2(XMBYTE2* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR N = XMVectorClamp(V, g_ByteMin, g_ByteMax); - N = XMVectorRound(N); - - XMFLOAT4A tmp; - XMStoreFloat4A(&tmp, N); - - pDestination->x = static_cast(tmp.x); - pDestination->y = static_cast(tmp.y); - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x4_t R = vmaxq_f32(V, vdupq_n_f32(-127.f)); - R = vminq_f32(R, vdupq_n_f32(127.0f)); - int32x4_t vInt32 = vcvtq_s32_f32(R); - int16x4_t vInt16 = vqmovn_s32(vInt32); - int8x8_t vInt8 = vqmovn_s16(vcombine_s16(vInt16, vInt16)); - vst1_lane_u16(reinterpret_cast(pDestination), - vreinterpret_u16_s8(vInt8), 0); -#elif defined(_XM_SSE_INTRINSICS_) - // Clamp to bounds - XMVECTOR vResult = _mm_max_ps(V, g_ByteMin); - vResult = _mm_min_ps(vResult, g_ByteMax); - // Convert to int by rounding - __m128i vInt = _mm_cvtps_epi32(vResult); - // No SSE operations will write to 16-bit values, so we have to extract them - // manually - auto x = static_cast(_mm_extract_epi16(vInt, 0)); - auto y = static_cast(_mm_extract_epi16(vInt, 2)); - pDestination->v = static_cast( - ((static_cast(y) & 0xFF) << 8) | (static_cast(x) & 0xFF)); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreUByteN2(XMUBYTEN2* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR N = XMVectorSaturate(V); - N = XMVectorMultiplyAdd(N, g_UByteMax, g_XMOneHalf.v); - N = XMVectorTruncate(N); - - XMFLOAT4A tmp; - XMStoreFloat4A(&tmp, N); - - pDestination->x = static_cast(tmp.x); - pDestination->y = static_cast(tmp.y); - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x4_t R = vmaxq_f32(V, vdupq_n_f32(0.f)); - R = vminq_f32(R, vdupq_n_f32(1.0f)); - R = vmulq_n_f32(R, 255.0f); - R = vaddq_f32(R, g_XMOneHalf); - uint32x4_t vInt32 = vcvtq_u32_f32(R); - uint16x4_t vInt16 = vqmovn_u32(vInt32); - uint8x8_t vInt8 = vqmovn_u16(vcombine_u16(vInt16, vInt16)); - vst1_lane_u16(reinterpret_cast(pDestination), - vreinterpret_u16_u8(vInt8), 0); -#elif defined(_XM_SSE_INTRINSICS_) - // Clamp to bounds - XMVECTOR vResult = _mm_max_ps(V, g_XMZero); - vResult = _mm_min_ps(vResult, g_XMOne); - // Scale by multiplication - vResult = _mm_mul_ps(vResult, g_UByteMax); - vResult = _mm_add_ps(vResult, g_XMOneHalf); - // Convert to int - __m128i vInt = _mm_cvttps_epi32(vResult); - // No SSE operations will write to 16-bit values, so we have to extract them - // manually - auto x = static_cast(_mm_extract_epi16(vInt, 0)); - auto y = static_cast(_mm_extract_epi16(vInt, 2)); - pDestination->v = static_cast( - ((static_cast(y) & 0xFF) << 8) | (static_cast(x) & 0xFF)); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreUByte2(XMUBYTE2* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR N = XMVectorClamp(V, XMVectorZero(), g_UByteMax); - N = XMVectorRound(N); - - XMFLOAT4A tmp; - XMStoreFloat4A(&tmp, N); - - pDestination->x = static_cast(tmp.x); - pDestination->y = static_cast(tmp.y); - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x4_t R = vmaxq_f32(V, vdupq_n_f32(0.f)); - R = vminq_f32(R, vdupq_n_f32(255.0f)); - uint32x4_t vInt32 = vcvtq_u32_f32(R); - uint16x4_t vInt16 = vqmovn_u32(vInt32); - uint8x8_t vInt8 = vqmovn_u16(vcombine_u16(vInt16, vInt16)); - vst1_lane_u16(reinterpret_cast(pDestination), - vreinterpret_u16_u8(vInt8), 0); -#elif defined(_XM_SSE_INTRINSICS_) - // Clamp to bounds - XMVECTOR vResult = _mm_max_ps(V, g_XMZero); - vResult = _mm_min_ps(vResult, g_UByteMax); - // Convert to int by rounding - __m128i vInt = _mm_cvtps_epi32(vResult); - // No SSE operations will write to 16-bit values, so we have to extract them - // manually - auto x = static_cast(_mm_extract_epi16(vInt, 0)); - auto y = static_cast(_mm_extract_epi16(vInt, 2)); - pDestination->v = static_cast( - ((static_cast(y) & 0xFF) << 8) | (static_cast(x) & 0xFF)); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreU565(XMU565* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); - static const XMVECTORF32 Max = {{{31.0f, 63.0f, 31.0f, 0.0f}}}; - -#if defined(_XM_NO_INTRINSICS_) - XMVECTOR N = XMVectorClamp(V, XMVectorZero(), Max.v); - N = XMVectorRound(N); - - XMFLOAT4A tmp; - XMStoreFloat4A(&tmp, N); - - pDestination->v = - static_cast(((static_cast(tmp.z) & 0x1F) << 11) | - ((static_cast(tmp.y) & 0x3F) << 5) | - ((static_cast(tmp.x) & 0x1F))); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - static const XMVECTORF32 Scale = {{{1.0f, 32.f, 32.f * 64.f, 0.f}}}; - static const XMVECTORU32 Mask = {{{0x1F, 0x3F << 5, 0x1F << 11, 0}}}; - float32x4_t vResult = vmaxq_f32(V, vdupq_n_f32(0)); - vResult = vminq_f32(vResult, Max); - vResult = vmulq_f32(vResult, Scale); - uint32x4_t vResulti = vcvtq_u32_f32(vResult); - vResulti = vandq_u32(vResulti, Mask); - // Do a horizontal or of 4 entries - uint32x2_t vTemp = vget_low_u32(vResulti); - uint32x2_t vhi = vget_high_u32(vResulti); - vTemp = vorr_u32(vTemp, vhi); - vTemp = vpadd_u32(vTemp, vTemp); - vst1_lane_u16(&pDestination->v, vreinterpret_u16_u32(vTemp), 0); -#elif defined(_XM_SSE_INTRINSICS_) - // Bounds check - XMVECTOR vResult = _mm_max_ps(V, g_XMZero); - vResult = _mm_min_ps(vResult, Max); - // Convert to int with rounding - __m128i vInt = _mm_cvtps_epi32(vResult); - // No SSE operations will write to 16-bit values, so we have to extract them - // manually - auto x = static_cast(_mm_extract_epi16(vInt, 0)); - auto y = static_cast(_mm_extract_epi16(vInt, 2)); - auto z = static_cast(_mm_extract_epi16(vInt, 4)); - pDestination->v = static_cast( - ((static_cast(z) & 0x1F) << 11) | - ((static_cast(y) & 0x3F) << 5) | ((static_cast(x) & 0x1F))); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreFloat3PK(XMFLOAT3PK* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); - - XM_ALIGNED_DATA(16) uint32_t IValue[4]; - XMStoreFloat3A(reinterpret_cast(&IValue), V); - - uint32_t Result[3]; - - // X & Y Channels (5-bit exponent, 6-bit mantissa) - for (uint32_t j = 0; j < 2; ++j) { - uint32_t Sign = IValue[j] & 0x80000000; - uint32_t I = IValue[j] & 0x7FFFFFFF; - - if ((I & 0x7F800000) == 0x7F800000) { - // INF or NAN - Result[j] = 0x7C0U; - if ((I & 0x7FFFFF) != 0) { - Result[j] = 0x7FFU; - } else if (Sign) { - // -INF is clamped to 0 since 3PK is positive only - Result[j] = 0; - } - } else if (Sign || I < 0x35800000) { - // 3PK is positive only, so clamp to zero - Result[j] = 0; - } else if (I > 0x477E0000U) { - // The number is too large to be represented as a float11, set to - // max - Result[j] = 0x7BFU; - } else { - if (I < 0x38800000U) { - // The number is too small to be represented as a normalized - // float11 Convert it to a denormalized value. - uint32_t Shift = 113U - (I >> 23U); - I = (0x800000U | (I & 0x7FFFFFU)) >> Shift; - } else { - // Rebias the exponent to represent the value as a normalized - // float11 - I += 0xC8000000U; - } - - Result[j] = ((I + 0xFFFFU + ((I >> 17U) & 1U)) >> 17U) & 0x7ffU; - } - } - - // Z Channel (5-bit exponent, 5-bit mantissa) - uint32_t Sign = IValue[2] & 0x80000000; - uint32_t I = IValue[2] & 0x7FFFFFFF; - - if ((I & 0x7F800000) == 0x7F800000) { - // INF or NAN - Result[2] = 0x3E0U; - if (I & 0x7FFFFF) { - Result[2] = 0x3FFU; - } else if (Sign || I < 0x36000000) { - // -INF is clamped to 0 since 3PK is positive only - Result[2] = 0; - } - } else if (Sign) { - // 3PK is positive only, so clamp to zero - Result[2] = 0; - } else if (I > 0x477C0000U) { - // The number is too large to be represented as a float10, set to max - Result[2] = 0x3DFU; - } else { - if (I < 0x38800000U) { - // The number is too small to be represented as a normalized float10 - // Convert it to a denormalized value. - uint32_t Shift = 113U - (I >> 23U); - I = (0x800000U | (I & 0x7FFFFFU)) >> Shift; - } else { - // Rebias the exponent to represent the value as a normalized - // float10 - I += 0xC8000000U; - } - - Result[2] = ((I + 0x1FFFFU + ((I >> 18U) & 1U)) >> 18U) & 0x3ffU; - } - - // Pack Result into memory - pDestination->v = (Result[0] & 0x7ff) | ((Result[1] & 0x7ff) << 11) | - ((Result[2] & 0x3ff) << 22); -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreFloat3SE(XMFLOAT3SE* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); - - XMFLOAT3A tmp; - XMStoreFloat3A(&tmp, V); - - static constexpr float maxf9 = float(0x1FF << 7); - static constexpr float minf9 = float(1.f / (1 << 16)); - - float x = (tmp.x >= 0.f) ? ((tmp.x > maxf9) ? maxf9 : tmp.x) : 0.f; - float y = (tmp.y >= 0.f) ? ((tmp.y > maxf9) ? maxf9 : tmp.y) : 0.f; - float z = (tmp.z >= 0.f) ? ((tmp.z > maxf9) ? maxf9 : tmp.z) : 0.f; - - const float max_xy = (x > y) ? x : y; - const float max_xyz = (max_xy > z) ? max_xy : z; - - const float maxColor = (max_xyz > minf9) ? max_xyz : minf9; - - union { - float f; - int32_t i; - } fi; - fi.f = maxColor; - fi.i += 0x00004000; // round up leaving 9 bits in fraction (including - // assumed 1) - - auto exp = static_cast(fi.i) >> 23; - pDestination->e = exp - 0x6f; - - fi.i = static_cast(0x83000000 - (exp << 23)); - float ScaleR = fi.f; - - pDestination->xm = - static_cast(MathInternal::round_to_nearest(x * ScaleR)); - pDestination->ym = - static_cast(MathInternal::round_to_nearest(y * ScaleR)); - pDestination->zm = - static_cast(MathInternal::round_to_nearest(z * ScaleR)); -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreHalf4(XMHALF4* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); -#if defined(_XM_F16C_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) - __m128i V1 = _mm_cvtps_ph(V, _MM_FROUND_TO_NEAREST_INT); - _mm_storel_epi64(reinterpret_cast<__m128i*>(pDestination), V1); -#else - XMFLOAT4A t; - XMStoreFloat4A(&t, V); - - pDestination->x = XMConvertFloatToHalf(t.x); - pDestination->y = XMConvertFloatToHalf(t.y); - pDestination->z = XMConvertFloatToHalf(t.z); - pDestination->w = XMConvertFloatToHalf(t.w); -#endif // !_XM_F16C_INTRINSICS_ -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreShortN4(XMSHORTN4* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR N = XMVectorClamp(V, g_XMNegativeOne.v, g_XMOne.v); - N = XMVectorMultiply(N, g_ShortMax); - N = XMVectorRound(N); - - XMFLOAT4A tmp; - XMStoreFloat4A(&tmp, N); - - pDestination->x = static_cast(tmp.x); - pDestination->y = static_cast(tmp.y); - pDestination->z = static_cast(tmp.z); - pDestination->w = static_cast(tmp.w); - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x4_t vResult = vmaxq_f32(V, vdupq_n_f32(-1.f)); - vResult = vminq_f32(vResult, vdupq_n_f32(1.0f)); - vResult = vmulq_n_f32(vResult, 32767.0f); - int16x4_t vInt = vmovn_s32(vcvtq_s32_f32(vResult)); - vst1_s16(reinterpret_cast(pDestination), vInt); -#elif defined(_XM_SSE_INTRINSICS_) - XMVECTOR vResult = _mm_max_ps(V, g_XMNegativeOne); - vResult = _mm_min_ps(vResult, g_XMOne); - vResult = _mm_mul_ps(vResult, g_ShortMax); - __m128i vResulti = _mm_cvtps_epi32(vResult); - vResulti = _mm_packs_epi32(vResulti, vResulti); - _mm_store_sd(reinterpret_cast(&pDestination->x), - _mm_castsi128_pd(vResulti)); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreShort4(XMSHORT4* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR N = XMVectorClamp(V, g_ShortMin, g_ShortMax); - N = XMVectorRound(N); - - XMFLOAT4A tmp; - XMStoreFloat4A(&tmp, N); - - pDestination->x = static_cast(tmp.x); - pDestination->y = static_cast(tmp.y); - pDestination->z = static_cast(tmp.z); - pDestination->w = static_cast(tmp.w); - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x4_t vResult = vmaxq_f32(V, g_ShortMin); - vResult = vminq_f32(vResult, g_ShortMax); - int16x4_t vInt = vmovn_s32(vcvtq_s32_f32(vResult)); - vst1_s16(reinterpret_cast(pDestination), vInt); -#elif defined(_XM_SSE_INTRINSICS_) - // Bounds check - XMVECTOR vResult = _mm_max_ps(V, g_ShortMin); - vResult = _mm_min_ps(vResult, g_ShortMax); - // Convert to int with rounding - __m128i vInt = _mm_cvtps_epi32(vResult); - // Pack the ints into shorts - vInt = _mm_packs_epi32(vInt, vInt); - _mm_store_sd(reinterpret_cast(&pDestination->x), - _mm_castsi128_pd(vInt)); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreUShortN4(XMUSHORTN4* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR N = XMVectorSaturate(V); - N = XMVectorMultiplyAdd(N, g_UShortMax, g_XMOneHalf.v); - N = XMVectorTruncate(N); - - XMFLOAT4A tmp; - XMStoreFloat4A(&tmp, N); - - pDestination->x = static_cast(tmp.x); - pDestination->y = static_cast(tmp.y); - pDestination->z = static_cast(tmp.z); - pDestination->w = static_cast(tmp.w); - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x4_t vResult = vmaxq_f32(V, vdupq_n_f32(0)); - vResult = vminq_f32(vResult, vdupq_n_f32(1.0f)); - vResult = vmulq_n_f32(vResult, 65535.0f); - vResult = vaddq_f32(vResult, g_XMOneHalf); - uint16x4_t vInt = vmovn_u32(vcvtq_u32_f32(vResult)); - vst1_u16(reinterpret_cast(pDestination), vInt); -#elif defined(_XM_SSE_INTRINSICS_) - // Bounds check - XMVECTOR vResult = _mm_max_ps(V, g_XMZero); - vResult = _mm_min_ps(vResult, g_XMOne); - vResult = _mm_mul_ps(vResult, g_UShortMax); - vResult = _mm_add_ps(vResult, g_XMOneHalf); - // Convert to int - __m128i vInt = _mm_cvttps_epi32(vResult); - // Since the SSE pack instruction clamps using signed rules, - // manually extract the values to store them to memory - pDestination->x = static_cast(_mm_extract_epi16(vInt, 0)); - pDestination->y = static_cast(_mm_extract_epi16(vInt, 2)); - pDestination->z = static_cast(_mm_extract_epi16(vInt, 4)); - pDestination->w = static_cast(_mm_extract_epi16(vInt, 6)); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreUShort4(XMUSHORT4* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR N = XMVectorClamp(V, XMVectorZero(), g_UShortMax); - N = XMVectorRound(N); - - XMFLOAT4A tmp; - XMStoreFloat4A(&tmp, N); - - pDestination->x = static_cast(tmp.x); - pDestination->y = static_cast(tmp.y); - pDestination->z = static_cast(tmp.z); - pDestination->w = static_cast(tmp.w); - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x4_t vResult = vmaxq_f32(V, vdupq_n_f32(0)); - vResult = vminq_f32(vResult, g_UShortMax); - uint16x4_t vInt = vmovn_u32(vcvtq_u32_f32(vResult)); - vst1_u16(reinterpret_cast(pDestination), vInt); -#elif defined(_XM_SSE_INTRINSICS_) - // Bounds check - XMVECTOR vResult = _mm_max_ps(V, g_XMZero); - vResult = _mm_min_ps(vResult, g_UShortMax); - // Convert to int with rounding - __m128i vInt = _mm_cvtps_epi32(vResult); - // Since the SSE pack instruction clamps using signed rules, - // manually extract the values to store them to memory - pDestination->x = static_cast(_mm_extract_epi16(vInt, 0)); - pDestination->y = static_cast(_mm_extract_epi16(vInt, 2)); - pDestination->z = static_cast(_mm_extract_epi16(vInt, 4)); - pDestination->w = static_cast(_mm_extract_epi16(vInt, 6)); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreXDecN4(XMXDECN4* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); - static const XMVECTORF32 Min = {{{-1.0f, -1.0f, -1.0f, 0.0f}}}; - -#if defined(_XM_NO_INTRINSICS_) - - static const XMVECTORF32 Scale = {{{511.0f, 511.0f, 511.0f, 3.0f}}}; - - XMVECTOR N = XMVectorClamp(V, Min.v, g_XMOne.v); - N = XMVectorMultiply(N, Scale.v); - N = XMVectorRound(N); - - XMFLOAT4A tmp; - XMStoreFloat4A(&tmp, N); - - pDestination->v = - static_cast((static_cast(tmp.w) << 30) | - ((static_cast(tmp.z) & 0x3FF) << 20) | - ((static_cast(tmp.y) & 0x3FF) << 10) | - (static_cast(tmp.x) & 0x3FF)); - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - static const XMVECTORF32 Scale = { - {{511.0f, 511.0f * 1024.0f, 511.0f * 1048576.0f, 3.0f * 536870912.0f}}}; - static const XMVECTORI32 ScaleMask = { - {{0x3FF, 0x3FF << 10, 0x3FF << 20, 0x3 << 29}}}; - float32x4_t vResult = vmaxq_f32(V, Min); - vResult = vminq_f32(vResult, vdupq_n_f32(1.0f)); - vResult = vmulq_f32(vResult, Scale); - int32x4_t vResulti = vcvtq_s32_f32(vResult); - vResulti = vandq_s32(vResulti, ScaleMask); - int32x4_t vResultw = vandq_s32(vResulti, g_XMMaskW); - vResulti = vaddq_s32(vResulti, vResultw); - // Do a horizontal or of all 4 entries - uint32x2_t vTemp = vget_low_u32(vreinterpretq_u32_s32(vResulti)); - uint32x2_t vhi = vget_high_u32(vreinterpretq_u32_s32(vResulti)); - vTemp = vorr_u32(vTemp, vhi); - vTemp = vpadd_u32(vTemp, vTemp); - vst1_lane_u32(&pDestination->v, vTemp, 0); -#elif defined(_XM_SSE_INTRINSICS_) - static const XMVECTORF32 Scale = { - {{511.0f, 511.0f * 1024.0f, 511.0f * 1048576.0f, 3.0f * 536870912.0f}}}; - static const XMVECTORI32 ScaleMask = { - {{0x3FF, 0x3FF << 10, 0x3FF << 20, 0x3 << 29}}}; - XMVECTOR vResult = _mm_max_ps(V, Min); - vResult = _mm_min_ps(vResult, g_XMOne); - // Scale by multiplication - vResult = _mm_mul_ps(vResult, Scale); - // Convert to int (W is unsigned) - __m128i vResulti = _mm_cvtps_epi32(vResult); - // Mask off any fraction - vResulti = _mm_and_si128(vResulti, ScaleMask); - // To fix W, add itself to shift it up to <<30 instead of <<29 - __m128i vResultw = _mm_and_si128(vResulti, g_XMMaskW); - vResulti = _mm_add_epi32(vResulti, vResultw); - // Do a horizontal or of all 4 entries - vResult = - XM_PERMUTE_PS(_mm_castsi128_ps(vResulti), _MM_SHUFFLE(0, 3, 2, 1)); - vResulti = _mm_or_si128(vResulti, _mm_castps_si128(vResult)); - vResult = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(0, 3, 2, 1)); - vResulti = _mm_or_si128(vResulti, _mm_castps_si128(vResult)); - vResult = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(0, 3, 2, 1)); - vResulti = _mm_or_si128(vResulti, _mm_castps_si128(vResult)); - _mm_store_ss(reinterpret_cast(&pDestination->v), - _mm_castsi128_ps(vResulti)); -#endif -} - -//------------------------------------------------------------------------------ -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4996) -// C4996: ignore deprecation warning -#endif - -#ifdef __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wdeprecated-declarations" -#endif - -#ifdef __GNUC__ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -#endif - -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreXDec4(XMXDEC4* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); - static const XMVECTORF32 MinXDec4 = {{{-511.0f, -511.0f, -511.0f, 0.0f}}}; - static const XMVECTORF32 MaxXDec4 = {{{511.0f, 511.0f, 511.0f, 3.0f}}}; - -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR N = XMVectorClamp(V, MinXDec4, MaxXDec4); - - XMFLOAT4A tmp; - XMStoreFloat4A(&tmp, N); - - pDestination->v = - static_cast((static_cast(tmp.w) << 30) | - ((static_cast(tmp.z) & 0x3FF) << 20) | - ((static_cast(tmp.y) & 0x3FF) << 10) | - ((static_cast(tmp.x) & 0x3FF))); - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - static const XMVECTORF32 ScaleXDec4 = { - {{1.0f, 1024.0f / 2.0f, 1024.0f * 1024.0f, - 1024.0f * 1024.0f * 1024.0f / 2.0f}}}; - static const XMVECTORI32 MaskXDec4 = { - {{0x3FF, 0x3FF << (10 - 1), 0x3FF << 20, 0x3 << (30 - 1)}}}; - float32x4_t vResult = vmaxq_f32(V, MinXDec4); - vResult = vminq_f32(vResult, MaxXDec4); - vResult = vmulq_f32(vResult, ScaleXDec4); - int32x4_t vResulti = vcvtq_s32_f32(vResult); - vResulti = vandq_s32(vResulti, MaskXDec4); - // Do a horizontal or of 4 entries - uint32x2_t vTemp = vget_low_u32(vreinterpretq_u32_s32(vResulti)); - uint32x2_t vTemp2 = vget_high_u32(vreinterpretq_u32_s32(vResulti)); - vTemp = vorr_u32(vTemp, vTemp2); - // Perform a single bit left shift on y|w - vTemp2 = vdup_lane_u32(vTemp, 1); - vTemp2 = vadd_u32(vTemp2, vTemp2); - vTemp = vorr_u32(vTemp, vTemp2); - vst1_lane_u32(&pDestination->v, vTemp, 0); -#elif defined(_XM_SSE_INTRINSICS_) - static const XMVECTORF32 ScaleXDec4 = { - {{1.0f, 1024.0f / 2.0f, 1024.0f * 1024.0f, - 1024.0f * 1024.0f * 1024.0f / 2.0f}}}; - static const XMVECTORI32 MaskXDec4 = { - {{0x3FF, 0x3FF << (10 - 1), 0x3FF << 20, 0x3 << (30 - 1)}}}; - // Clamp to bounds - XMVECTOR vResult = _mm_max_ps(V, MinXDec4); - vResult = _mm_min_ps(vResult, MaxXDec4); - // Scale by multiplication - vResult = _mm_mul_ps(vResult, ScaleXDec4); - // Convert to int - __m128i vResulti = _mm_cvttps_epi32(vResult); - // Mask off any fraction - vResulti = _mm_and_si128(vResulti, MaskXDec4); - // Do a horizontal or of 4 entries - __m128i vResulti2 = _mm_shuffle_epi32(vResulti, _MM_SHUFFLE(3, 2, 3, 2)); - // x = x|z, y = y|w - vResulti = _mm_or_si128(vResulti, vResulti2); - // Move Z to the x position - vResulti2 = _mm_shuffle_epi32(vResulti, _MM_SHUFFLE(1, 1, 1, 1)); - // Perform a single bit left shift on y|w - vResulti2 = _mm_add_epi32(vResulti2, vResulti2); - // i = x|y|z|w - vResulti = _mm_or_si128(vResulti, vResulti2); - _mm_store_ss(reinterpret_cast(&pDestination->v), - _mm_castsi128_ps(vResulti)); -#endif -} - -#ifdef __GNUC__ -#pragma GCC diagnostic pop -#endif -#ifdef __clang__ -#pragma clang diagnostic pop -#endif -#ifdef _MSC_VER -#pragma warning(pop) -#endif - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreUDecN4(XMUDECN4* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); -#if defined(_XM_NO_INTRINSICS_) - - static const XMVECTORF32 Scale = {{{1023.0f, 1023.0f, 1023.0f, 3.0f}}}; - - XMVECTOR N = XMVectorSaturate(V); - N = XMVectorMultiply(N, Scale.v); - - XMFLOAT4A tmp; - XMStoreFloat4A(&tmp, N); - - pDestination->v = - static_cast((static_cast(tmp.w) << 30) | - ((static_cast(tmp.z) & 0x3FF) << 20) | - ((static_cast(tmp.y) & 0x3FF) << 10) | - ((static_cast(tmp.x) & 0x3FF))); - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - static const XMVECTORF32 ScaleUDecN4 = { - {{1023.0f, 1023.0f * 1024.0f * 0.5f, 1023.0f * 1024.0f * 1024.0f, - 3.0f * 1024.0f * 1024.0f * 1024.0f * 0.5f}}}; - static const XMVECTORI32 MaskUDecN4 = { - {{0x3FF, 0x3FF << (10 - 1), 0x3FF << 20, 0x3 << (30 - 1)}}}; - float32x4_t vResult = vmaxq_f32(V, vdupq_n_f32(0.f)); - vResult = vminq_f32(vResult, vdupq_n_f32(1.f)); - vResult = vmulq_f32(vResult, ScaleUDecN4); - uint32x4_t vResulti = vcvtq_u32_f32(vResult); - vResulti = vandq_u32(vResulti, MaskUDecN4); - // Do a horizontal or of 4 entries - uint32x2_t vTemp = vget_low_u32(vResulti); - uint32x2_t vTemp2 = vget_high_u32(vResulti); - vTemp = vorr_u32(vTemp, vTemp2); - // Perform a single bit left shift on y|w - vTemp2 = vdup_lane_u32(vTemp, 1); - vTemp2 = vadd_u32(vTemp2, vTemp2); - vTemp = vorr_u32(vTemp, vTemp2); - vst1_lane_u32(&pDestination->v, vTemp, 0); -#elif defined(_XM_SSE_INTRINSICS_) - static const XMVECTORF32 ScaleUDecN4 = { - {{1023.0f, 1023.0f * 1024.0f * 0.5f, 1023.0f * 1024.0f * 1024.0f, - 3.0f * 1024.0f * 1024.0f * 1024.0f * 0.5f}}}; - static const XMVECTORI32 MaskUDecN4 = { - {{0x3FF, 0x3FF << (10 - 1), 0x3FF << 20, 0x3 << (30 - 1)}}}; - // Clamp to bounds - XMVECTOR vResult = _mm_max_ps(V, g_XMZero); - vResult = _mm_min_ps(vResult, g_XMOne); - // Scale by multiplication - vResult = _mm_mul_ps(vResult, ScaleUDecN4); - // Convert to int - __m128i vResulti = _mm_cvttps_epi32(vResult); - // Mask off any fraction - vResulti = _mm_and_si128(vResulti, MaskUDecN4); - // Do a horizontal or of 4 entries - __m128i vResulti2 = _mm_shuffle_epi32(vResulti, _MM_SHUFFLE(3, 2, 3, 2)); - // x = x|z, y = y|w - vResulti = _mm_or_si128(vResulti, vResulti2); - // Move Z to the x position - vResulti2 = _mm_shuffle_epi32(vResulti, _MM_SHUFFLE(1, 1, 1, 1)); - // Perform a left shift by one bit on y|w - vResulti2 = _mm_add_epi32(vResulti2, vResulti2); - // i = x|y|z|w - vResulti = _mm_or_si128(vResulti, vResulti2); - _mm_store_ss(reinterpret_cast(&pDestination->v), - _mm_castsi128_ps(vResulti)); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreUDecN4_XR(XMUDECN4* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); - static const XMVECTORF32 Scale = {{{510.0f, 510.0f, 510.0f, 3.0f}}}; - static const XMVECTORF32 Bias = {{{384.0f, 384.0f, 384.0f, 0.0f}}}; - static const XMVECTORF32 C = {{{1023.f, 1023.f, 1023.f, 3.f}}}; - -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR N = XMVectorMultiplyAdd(V, Scale, Bias); - N = XMVectorClamp(N, g_XMZero, C); - - XMFLOAT4A tmp; - XMStoreFloat4A(&tmp, N); - - pDestination->v = - static_cast((static_cast(tmp.w) << 30) | - ((static_cast(tmp.z) & 0x3FF) << 20) | - ((static_cast(tmp.y) & 0x3FF) << 10) | - ((static_cast(tmp.x) & 0x3FF))); - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - static const XMVECTORF32 Shift = {{{1.0f, 1024.0f * 0.5f, 1024.0f * 1024.0f, - 1024.0f * 1024.0f * 1024.0f * 0.5f}}}; - static const XMVECTORU32 MaskUDecN4 = { - {{0x3FF, 0x3FF << (10 - 1), 0x3FF << 20, 0x3 << (30 - 1)}}}; - float32x4_t vResult = vmlaq_f32(Bias, V, Scale); - vResult = vmaxq_f32(vResult, vdupq_n_f32(0.f)); - vResult = vminq_f32(vResult, C); - vResult = vmulq_f32(vResult, Shift); - uint32x4_t vResulti = vcvtq_u32_f32(vResult); - vResulti = vandq_u32(vResulti, MaskUDecN4); - // Do a horizontal or of 4 entries - uint32x2_t vTemp = vget_low_u32(vResulti); - uint32x2_t vTemp2 = vget_high_u32(vResulti); - vTemp = vorr_u32(vTemp, vTemp2); - // Perform a single bit left shift on y|w - vTemp2 = vdup_lane_u32(vTemp, 1); - vTemp2 = vadd_u32(vTemp2, vTemp2); - vTemp = vorr_u32(vTemp, vTemp2); - vst1_lane_u32(&pDestination->v, vTemp, 0); -#elif defined(_XM_SSE_INTRINSICS_) - static const XMVECTORF32 Shift = {{{1.0f, 1024.0f * 0.5f, 1024.0f * 1024.0f, - 1024.0f * 1024.0f * 1024.0f * 0.5f}}}; - static const XMVECTORU32 MaskUDecN4 = { - {{0x3FF, 0x3FF << (10 - 1), 0x3FF << 20, 0x3 << (30 - 1)}}}; - // Scale & bias - XMVECTOR vResult = XM_FMADD_PS(V, Scale, Bias); - // Clamp to bounds - vResult = _mm_max_ps(vResult, g_XMZero); - vResult = _mm_min_ps(vResult, C); - // Scale by shift values - vResult = _mm_mul_ps(vResult, Shift); - // Convert to int - __m128i vResulti = _mm_cvttps_epi32(vResult); - // Mask off any fraction - vResulti = _mm_and_si128(vResulti, MaskUDecN4); - // Do a horizontal or of 4 entries - __m128i vResulti2 = _mm_shuffle_epi32(vResulti, _MM_SHUFFLE(3, 2, 3, 2)); - // x = x|z, y = y|w - vResulti = _mm_or_si128(vResulti, vResulti2); - // Move Z to the x position - vResulti2 = _mm_shuffle_epi32(vResulti, _MM_SHUFFLE(1, 1, 1, 1)); - // Perform a left shift by one bit on y|w - vResulti2 = _mm_add_epi32(vResulti2, vResulti2); - // i = x|y|z|w - vResulti = _mm_or_si128(vResulti, vResulti2); - _mm_store_ss(reinterpret_cast(&pDestination->v), - _mm_castsi128_ps(vResulti)); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreUDec4(XMUDEC4* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); - static const XMVECTORF32 MaxUDec4 = {{{1023.0f, 1023.0f, 1023.0f, 3.0f}}}; - -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR N = XMVectorClamp(V, XMVectorZero(), MaxUDec4); - - XMFLOAT4A tmp; - XMStoreFloat4A(&tmp, N); - - pDestination->v = - static_cast((static_cast(tmp.w) << 30) | - ((static_cast(tmp.z) & 0x3FF) << 20) | - ((static_cast(tmp.y) & 0x3FF) << 10) | - ((static_cast(tmp.x) & 0x3FF))); - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - static const XMVECTORF32 ScaleUDec4 = { - {{1.0f, 1024.0f / 2.0f, 1024.0f * 1024.0f, - 1024.0f * 1024.0f * 1024.0f / 2.0f}}}; - static const XMVECTORI32 MaskUDec4 = { - {{0x3FF, 0x3FF << (10 - 1), 0x3FF << 20, 0x3 << (30 - 1)}}}; - float32x4_t vResult = vmaxq_f32(V, vdupq_n_f32(0.f)); - vResult = vminq_f32(vResult, MaxUDec4); - vResult = vmulq_f32(vResult, ScaleUDec4); - uint32x4_t vResulti = vcvtq_u32_f32(vResult); - vResulti = vandq_u32(vResulti, MaskUDec4); - // Do a horizontal or of 4 entries - uint32x2_t vTemp = vget_low_u32(vResulti); - uint32x2_t vTemp2 = vget_high_u32(vResulti); - vTemp = vorr_u32(vTemp, vTemp2); - // Perform a single bit left shift on y|w - vTemp2 = vdup_lane_u32(vTemp, 1); - vTemp2 = vadd_u32(vTemp2, vTemp2); - vTemp = vorr_u32(vTemp, vTemp2); - vst1_lane_u32(&pDestination->v, vTemp, 0); -#elif defined(_XM_SSE_INTRINSICS_) - static const XMVECTORF32 ScaleUDec4 = { - {{1.0f, 1024.0f / 2.0f, 1024.0f * 1024.0f, - 1024.0f * 1024.0f * 1024.0f / 2.0f}}}; - static const XMVECTORI32 MaskUDec4 = { - {{0x3FF, 0x3FF << (10 - 1), 0x3FF << 20, 0x3 << (30 - 1)}}}; - // Clamp to bounds - XMVECTOR vResult = _mm_max_ps(V, g_XMZero); - vResult = _mm_min_ps(vResult, MaxUDec4); - // Scale by multiplication - vResult = _mm_mul_ps(vResult, ScaleUDec4); - // Convert to int - __m128i vResulti = _mm_cvttps_epi32(vResult); - // Mask off any fraction - vResulti = _mm_and_si128(vResulti, MaskUDec4); - // Do a horizontal or of 4 entries - __m128i vResulti2 = _mm_shuffle_epi32(vResulti, _MM_SHUFFLE(3, 2, 3, 2)); - // x = x|z, y = y|w - vResulti = _mm_or_si128(vResulti, vResulti2); - // Move Z to the x position - vResulti2 = _mm_shuffle_epi32(vResulti, _MM_SHUFFLE(1, 1, 1, 1)); - // Perform a left shift by one bit on y|w - vResulti2 = _mm_add_epi32(vResulti2, vResulti2); - // i = x|y|z|w - vResulti = _mm_or_si128(vResulti, vResulti2); - _mm_store_ss(reinterpret_cast(&pDestination->v), - _mm_castsi128_ps(vResulti)); -#endif -} - -//------------------------------------------------------------------------------ -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4996) -// C4996: ignore deprecation warning -#endif - -#ifdef __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wdeprecated-declarations" -#endif - -#ifdef __GNUC__ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -#endif - -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreDecN4(XMDECN4* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); -#if defined(_XM_NO_INTRINSICS_) - - static const XMVECTORF32 Scale = {{{511.0f, 511.0f, 511.0f, 1.0f}}}; - - XMVECTOR N = XMVectorClamp(V, g_XMNegativeOne.v, g_XMOne.v); - N = XMVectorMultiply(N, Scale.v); - - XMFLOAT4A tmp; - XMStoreFloat4A(&tmp, N); - - pDestination->v = - static_cast((static_cast(tmp.w) << 30) | - ((static_cast(tmp.z) & 0x3FF) << 20) | - ((static_cast(tmp.y) & 0x3FF) << 10) | - ((static_cast(tmp.x) & 0x3FF))); - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - static const XMVECTORF32 ScaleDecN4 = { - {{511.0f, 511.0f * 1024.0f, 511.0f * 1024.0f * 1024.0f, - 1.0f * 1024.0f * 1024.0f * 1024.0f}}}; - float32x4_t vResult = vmaxq_f32(V, vdupq_n_f32(-1.f)); - vResult = vminq_f32(vResult, vdupq_n_f32(1.f)); - vResult = vmulq_f32(vResult, ScaleDecN4); - int32x4_t vResulti = vcvtq_s32_f32(vResult); - vResulti = vandq_s32(vResulti, g_XMMaskDec4); - // Do a horizontal or of 4 entries - uint32x2_t vTemp = vget_low_u32(vreinterpretq_u32_s32(vResulti)); - uint32x2_t vhi = vget_high_u32(vreinterpretq_u32_s32(vResulti)); - vTemp = vorr_u32(vTemp, vhi); - vTemp = vpadd_u32(vTemp, vTemp); - vst1_lane_u32(&pDestination->v, vTemp, 0); -#elif defined(_XM_SSE_INTRINSICS_) - static const XMVECTORF32 ScaleDecN4 = { - {{511.0f, 511.0f * 1024.0f, 511.0f * 1024.0f * 1024.0f, - 1.0f * 1024.0f * 1024.0f * 1024.0f}}}; - // Clamp to bounds - XMVECTOR vResult = _mm_max_ps(V, g_XMNegativeOne); - vResult = _mm_min_ps(vResult, g_XMOne); - // Scale by multiplication - vResult = _mm_mul_ps(vResult, ScaleDecN4); - // Convert to int - __m128i vResulti = _mm_cvttps_epi32(vResult); - // Mask off any fraction - vResulti = _mm_and_si128(vResulti, g_XMMaskDec4); - // Do a horizontal or of 4 entries - __m128i vResulti2 = _mm_shuffle_epi32(vResulti, _MM_SHUFFLE(3, 2, 3, 2)); - // x = x|z, y = y|w - vResulti = _mm_or_si128(vResulti, vResulti2); - // Move Z to the x position - vResulti2 = _mm_shuffle_epi32(vResulti, _MM_SHUFFLE(1, 1, 1, 1)); - // i = x|y|z|w - vResulti = _mm_or_si128(vResulti, vResulti2); - _mm_store_ss(reinterpret_cast(&pDestination->v), - _mm_castsi128_ps(vResulti)); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreDec4(XMDEC4* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); - static const XMVECTORF32 MinDec4 = {{{-511.0f, -511.0f, -511.0f, -1.0f}}}; - static const XMVECTORF32 MaxDec4 = {{{511.0f, 511.0f, 511.0f, 1.0f}}}; - -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR N = XMVectorClamp(V, MinDec4, MaxDec4); - - XMFLOAT4A tmp; - XMStoreFloat4A(&tmp, N); - - pDestination->v = - static_cast((static_cast(tmp.w) << 30) | - ((static_cast(tmp.z) & 0x3FF) << 20) | - ((static_cast(tmp.y) & 0x3FF) << 10) | - ((static_cast(tmp.x) & 0x3FF))); - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - static const XMVECTORF32 ScaleDec4 = { - {{1.0f, 1024.0f, 1024.0f * 1024.0f, 1024.0f * 1024.0f * 1024.0f}}}; - float32x4_t vResult = vmaxq_f32(V, MinDec4); - vResult = vminq_f32(vResult, MaxDec4); - vResult = vmulq_f32(vResult, ScaleDec4); - int32x4_t vResulti = vcvtq_s32_f32(vResult); - vResulti = vandq_s32(vResulti, g_XMMaskDec4); - // Do a horizontal or of all 4 entries - uint32x2_t vTemp = vget_low_u32(vreinterpretq_u32_s32(vResulti)); - uint32x2_t vhi = vget_high_u32(vreinterpretq_u32_s32(vResulti)); - vTemp = vorr_u32(vTemp, vhi); - vTemp = vpadd_u32(vTemp, vTemp); - vst1_lane_u32(&pDestination->v, vTemp, 0); -#elif defined(_XM_SSE_INTRINSICS_) - static const XMVECTORF32 ScaleDec4 = { - {{1.0f, 1024.0f, 1024.0f * 1024.0f, 1024.0f * 1024.0f * 1024.0f}}}; - // Clamp to bounds - XMVECTOR vResult = _mm_max_ps(V, MinDec4); - vResult = _mm_min_ps(vResult, MaxDec4); - // Scale by multiplication - vResult = _mm_mul_ps(vResult, ScaleDec4); - // Convert to int - __m128i vResulti = _mm_cvttps_epi32(vResult); - // Mask off any fraction - vResulti = _mm_and_si128(vResulti, g_XMMaskDec4); - // Do a horizontal or of 4 entries - __m128i vResulti2 = _mm_shuffle_epi32(vResulti, _MM_SHUFFLE(3, 2, 3, 2)); - // x = x|z, y = y|w - vResulti = _mm_or_si128(vResulti, vResulti2); - // Move Z to the x position - vResulti2 = _mm_shuffle_epi32(vResulti, _MM_SHUFFLE(1, 1, 1, 1)); - // i = x|y|z|w - vResulti = _mm_or_si128(vResulti, vResulti2); - _mm_store_ss(reinterpret_cast(&pDestination->v), - _mm_castsi128_ps(vResulti)); -#endif -} - -#ifdef __GNUC__ -#pragma GCC diagnostic pop -#endif -#ifdef __clang__ -#pragma clang diagnostic pop -#endif -#ifdef _MSC_VER -#pragma warning(pop) -#endif - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreUByteN4(XMUBYTEN4* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR N = XMVectorSaturate(V); - N = XMVectorMultiply(N, g_UByteMax); - N = XMVectorTruncate(N); - - XMFLOAT4A tmp; - XMStoreFloat4A(&tmp, N); - - pDestination->x = static_cast(tmp.x); - pDestination->y = static_cast(tmp.y); - pDestination->z = static_cast(tmp.z); - pDestination->w = static_cast(tmp.w); - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x4_t R = vmaxq_f32(V, vdupq_n_f32(0)); - R = vminq_f32(R, vdupq_n_f32(1.0f)); - R = vmulq_n_f32(R, 255.0f); - uint32x4_t vInt32 = vcvtq_u32_f32(R); - uint16x4_t vInt16 = vqmovn_u32(vInt32); - uint8x8_t vInt8 = vqmovn_u16(vcombine_u16(vInt16, vInt16)); - vst1_lane_u32(&pDestination->v, vreinterpret_u32_u8(vInt8), 0); -#elif defined(_XM_SSE_INTRINSICS_) - static const XMVECTORF32 ScaleUByteN4 = { - {{255.0f, 255.0f * 256.0f * 0.5f, 255.0f * 256.0f * 256.0f, - 255.0f * 256.0f * 256.0f * 256.0f * 0.5f}}}; - static const XMVECTORI32 MaskUByteN4 = { - {{0xFF, 0xFF << (8 - 1), 0xFF << 16, 0xFF << (24 - 1)}}}; - // Clamp to bounds - XMVECTOR vResult = _mm_max_ps(V, g_XMZero); - vResult = _mm_min_ps(vResult, g_XMOne); - // Scale by multiplication - vResult = _mm_mul_ps(vResult, ScaleUByteN4); - // Convert to int - __m128i vResulti = _mm_cvttps_epi32(vResult); - // Mask off any fraction - vResulti = _mm_and_si128(vResulti, MaskUByteN4); - // Do a horizontal or of 4 entries - __m128i vResulti2 = _mm_shuffle_epi32(vResulti, _MM_SHUFFLE(3, 2, 3, 2)); - // x = x|z, y = y|w - vResulti = _mm_or_si128(vResulti, vResulti2); - // Move Z to the x position - vResulti2 = _mm_shuffle_epi32(vResulti, _MM_SHUFFLE(1, 1, 1, 1)); - // Perform a single bit left shift to fix y|w - vResulti2 = _mm_add_epi32(vResulti2, vResulti2); - // i = x|y|z|w - vResulti = _mm_or_si128(vResulti, vResulti2); - _mm_store_ss(reinterpret_cast(&pDestination->v), - _mm_castsi128_ps(vResulti)); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreUByte4(XMUBYTE4* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR N = XMVectorClamp(V, XMVectorZero(), g_UByteMax); - N = XMVectorRound(N); - - XMFLOAT4A tmp; - XMStoreFloat4A(&tmp, N); - - pDestination->x = static_cast(tmp.x); - pDestination->y = static_cast(tmp.y); - pDestination->z = static_cast(tmp.z); - pDestination->w = static_cast(tmp.w); - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x4_t R = vmaxq_f32(V, vdupq_n_f32(0)); - R = vminq_f32(R, vdupq_n_f32(255.0f)); - uint32x4_t vInt32 = vcvtq_u32_f32(R); - uint16x4_t vInt16 = vqmovn_u32(vInt32); - uint8x8_t vInt8 = vqmovn_u16(vcombine_u16(vInt16, vInt16)); - vst1_lane_u32(&pDestination->v, vreinterpret_u32_u8(vInt8), 0); -#elif defined(_XM_SSE_INTRINSICS_) - static const XMVECTORF32 ScaleUByte4 = { - {{1.0f, 256.0f * 0.5f, 256.0f * 256.0f, - 256.0f * 256.0f * 256.0f * 0.5f}}}; - static const XMVECTORI32 MaskUByte4 = { - {{0xFF, 0xFF << (8 - 1), 0xFF << 16, 0xFF << (24 - 1)}}}; - // Clamp to bounds - XMVECTOR vResult = _mm_max_ps(V, g_XMZero); - vResult = _mm_min_ps(vResult, g_UByteMax); - // Scale by multiplication - vResult = _mm_mul_ps(vResult, ScaleUByte4); - // Convert to int by rounding - __m128i vResulti = _mm_cvtps_epi32(vResult); - // Mask off any fraction - vResulti = _mm_and_si128(vResulti, MaskUByte4); - // Do a horizontal or of 4 entries - __m128i vResulti2 = _mm_shuffle_epi32(vResulti, _MM_SHUFFLE(3, 2, 3, 2)); - // x = x|z, y = y|w - vResulti = _mm_or_si128(vResulti, vResulti2); - // Move Z to the x position - vResulti2 = _mm_shuffle_epi32(vResulti, _MM_SHUFFLE(1, 1, 1, 1)); - // Perform a single bit left shift to fix y|w - vResulti2 = _mm_add_epi32(vResulti2, vResulti2); - // i = x|y|z|w - vResulti = _mm_or_si128(vResulti, vResulti2); - _mm_store_ss(reinterpret_cast(&pDestination->v), - _mm_castsi128_ps(vResulti)); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreByteN4(XMBYTEN4* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR N = XMVectorClamp(V, g_XMNegativeOne.v, g_XMOne.v); - N = XMVectorMultiply(N, g_ByteMax); - N = XMVectorTruncate(N); - - XMFLOAT4A tmp; - XMStoreFloat4A(&tmp, N); - - pDestination->x = static_cast(tmp.x); - pDestination->y = static_cast(tmp.y); - pDestination->z = static_cast(tmp.z); - pDestination->w = static_cast(tmp.w); - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x4_t R = vmaxq_f32(V, vdupq_n_f32(-1.f)); - R = vminq_f32(R, vdupq_n_f32(1.0f)); - R = vmulq_n_f32(R, 127.0f); - int32x4_t vInt32 = vcvtq_s32_f32(R); - int16x4_t vInt16 = vqmovn_s32(vInt32); - int8x8_t vInt8 = vqmovn_s16(vcombine_s16(vInt16, vInt16)); - vst1_lane_u32(&pDestination->v, vreinterpret_u32_s8(vInt8), 0); -#elif defined(_XM_SSE_INTRINSICS_) - static const XMVECTORF32 ScaleByteN4 = { - {{127.0f, 127.0f * 256.0f, 127.0f * 256.0f * 256.0f, - 127.0f * 256.0f * 256.0f * 256.0f}}}; - static const XMVECTORI32 MaskByteN4 = { - {{0xFF, 0xFF << 8, 0xFF << 16, static_cast(0xFF000000)}}}; - // Clamp to bounds - XMVECTOR vResult = _mm_max_ps(V, g_XMNegativeOne); - vResult = _mm_min_ps(vResult, g_XMOne); - // Scale by multiplication - vResult = _mm_mul_ps(vResult, ScaleByteN4); - // Convert to int - __m128i vResulti = _mm_cvttps_epi32(vResult); - // Mask off any fraction - vResulti = _mm_and_si128(vResulti, MaskByteN4); - // Do a horizontal or of 4 entries - __m128i vResulti2 = _mm_shuffle_epi32(vResulti, _MM_SHUFFLE(3, 2, 3, 2)); - // x = x|z, y = y|w - vResulti = _mm_or_si128(vResulti, vResulti2); - // Move Z to the x position - vResulti2 = _mm_shuffle_epi32(vResulti, _MM_SHUFFLE(1, 1, 1, 1)); - // i = x|y|z|w - vResulti = _mm_or_si128(vResulti, vResulti2); - _mm_store_ss(reinterpret_cast(&pDestination->v), - _mm_castsi128_ps(vResulti)); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreByte4(XMBYTE4* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR N = XMVectorClamp(V, g_ByteMin, g_ByteMax); - N = XMVectorRound(N); - - XMFLOAT4A tmp; - XMStoreFloat4A(&tmp, N); - - pDestination->x = static_cast(tmp.x); - pDestination->y = static_cast(tmp.y); - pDestination->z = static_cast(tmp.z); - pDestination->w = static_cast(tmp.w); - -#elif defined(_XM_ARM_NEON_INTRINSICS_) - float32x4_t R = vmaxq_f32(V, vdupq_n_f32(-127.f)); - R = vminq_f32(R, vdupq_n_f32(127.f)); - int32x4_t vInt32 = vcvtq_s32_f32(R); - int16x4_t vInt16 = vqmovn_s32(vInt32); - int8x8_t vInt8 = vqmovn_s16(vcombine_s16(vInt16, vInt16)); - vst1_lane_u32(&pDestination->v, vreinterpret_u32_s8(vInt8), 0); -#elif defined(_XM_SSE_INTRINSICS_) - static const XMVECTORF32 ScaleByte4 = { - {{1.0f, 256.0f, 256.0f * 256.0f, 256.0f * 256.0f * 256.0f}}}; - static const XMVECTORI32 MaskByte4 = { - {{0xFF, 0xFF << 8, 0xFF << 16, static_cast(0xFF000000)}}}; - // Clamp to bounds - XMVECTOR vResult = _mm_max_ps(V, g_ByteMin); - vResult = _mm_min_ps(vResult, g_ByteMax); - // Scale by multiplication - vResult = _mm_mul_ps(vResult, ScaleByte4); - // Convert to int by rounding - __m128i vResulti = _mm_cvtps_epi32(vResult); - // Mask off any fraction - vResulti = _mm_and_si128(vResulti, MaskByte4); - // Do a horizontal or of 4 entries - __m128i vResulti2 = _mm_shuffle_epi32(vResulti, _MM_SHUFFLE(3, 2, 3, 2)); - // x = x|z, y = y|w - vResulti = _mm_or_si128(vResulti, vResulti2); - // Move Z to the x position - vResulti2 = _mm_shuffle_epi32(vResulti, _MM_SHUFFLE(1, 1, 1, 1)); - // i = x|y|z|w - vResulti = _mm_or_si128(vResulti, vResulti2); - _mm_store_ss(reinterpret_cast(&pDestination->v), - _mm_castsi128_ps(vResulti)); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreUNibble4(XMUNIBBLE4* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); - static const XMVECTORF32 Max = {{{15.0f, 15.0f, 15.0f, 15.0f}}}; -#if defined(_XM_NO_INTRINSICS_) - - XMVECTOR N = XMVectorClamp(V, XMVectorZero(), Max.v); - N = XMVectorRound(N); - - XMFLOAT4A tmp; - XMStoreFloat4A(&tmp, N); - - pDestination->v = - static_cast(((static_cast(tmp.w) & 0xF) << 12) | - ((static_cast(tmp.z) & 0xF) << 8) | - ((static_cast(tmp.y) & 0xF) << 4) | - (static_cast(tmp.x) & 0xF)); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - static const XMVECTORF32 Scale = { - {{1.0f, 16.f, 16.f * 16.f, 16.f * 16.f * 16.f}}}; - static const XMVECTORU32 Mask = {{{0xF, 0xF << 4, 0xF << 8, 0xF << 12}}}; - float32x4_t vResult = vmaxq_f32(V, vdupq_n_f32(0)); - vResult = vminq_f32(vResult, Max); - vResult = vmulq_f32(vResult, Scale); - uint32x4_t vResulti = vcvtq_u32_f32(vResult); - vResulti = vandq_u32(vResulti, Mask); - // Do a horizontal or of 4 entries - uint32x2_t vTemp = vget_low_u32(vResulti); - uint32x2_t vhi = vget_high_u32(vResulti); - vTemp = vorr_u32(vTemp, vhi); - vTemp = vpadd_u32(vTemp, vTemp); - vst1_lane_u16(&pDestination->v, vreinterpret_u16_u32(vTemp), 0); -#elif defined(_XM_SSE_INTRINSICS_) - // Bounds check - XMVECTOR vResult = _mm_max_ps(V, g_XMZero); - vResult = _mm_min_ps(vResult, Max); - // Convert to int with rounding - __m128i vInt = _mm_cvtps_epi32(vResult); - // No SSE operations will write to 16-bit values, so we have to extract them - // manually - auto x = static_cast(_mm_extract_epi16(vInt, 0)); - auto y = static_cast(_mm_extract_epi16(vInt, 2)); - auto z = static_cast(_mm_extract_epi16(vInt, 4)); - auto w = static_cast(_mm_extract_epi16(vInt, 6)); - pDestination->v = static_cast( - ((static_cast(w) & 0xF) << 12) | - ((static_cast(z) & 0xF) << 8) | - ((static_cast(y) & 0xF) << 4) | ((static_cast(x) & 0xF))); -#endif -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline void XM_CALLCONV -XMStoreU555(XMU555* pDestination, FXMVECTOR V) noexcept { - assert(pDestination); - static const XMVECTORF32 Max = {{{31.0f, 31.0f, 31.0f, 1.0f}}}; - -#if defined(_XM_NO_INTRINSICS_) - XMVECTOR N = XMVectorClamp(V, XMVectorZero(), Max.v); - N = XMVectorRound(N); - - XMFLOAT4A tmp; - XMStoreFloat4A(&tmp, N); - - pDestination->v = - static_cast(((tmp.w > 0.f) ? 0x8000 : 0) | - ((static_cast(tmp.z) & 0x1F) << 10) | - ((static_cast(tmp.y) & 0x1F) << 5) | - (static_cast(tmp.x) & 0x1F)); -#elif defined(_XM_ARM_NEON_INTRINSICS_) - static const XMVECTORF32 Scale = { - {{1.0f, 32.f / 2.f, 32.f * 32.f, 32.f * 32.f * 32.f / 2.f}}}; - static const XMVECTORU32 Mask = { - {{0x1F, 0x1F << (5 - 1), 0x1F << 10, 0x1 << (15 - 1)}}}; - float32x4_t vResult = vmaxq_f32(V, vdupq_n_f32(0)); - vResult = vminq_f32(vResult, Max); - vResult = vmulq_f32(vResult, Scale); - uint32x4_t vResulti = vcvtq_u32_f32(vResult); - vResulti = vandq_u32(vResulti, Mask); - // Do a horizontal or of 4 entries - uint32x2_t vTemp = vget_low_u32(vResulti); - uint32x2_t vTemp2 = vget_high_u32(vResulti); - vTemp = vorr_u32(vTemp, vTemp2); - // Perform a single bit left shift on y|w - vTemp2 = vdup_lane_u32(vTemp, 1); - vTemp2 = vadd_u32(vTemp2, vTemp2); - vTemp = vorr_u32(vTemp, vTemp2); - vst1_lane_u16(&pDestination->v, vreinterpret_u16_u32(vTemp), 0); -#elif defined(_XM_SSE_INTRINSICS_) - // Bounds check - XMVECTOR vResult = _mm_max_ps(V, g_XMZero); - vResult = _mm_min_ps(vResult, Max); - // Convert to int with rounding - __m128i vInt = _mm_cvtps_epi32(vResult); - // No SSE operations will write to 16-bit values, so we have to extract them - // manually - auto x = static_cast(_mm_extract_epi16(vInt, 0)); - auto y = static_cast(_mm_extract_epi16(vInt, 2)); - auto z = static_cast(_mm_extract_epi16(vInt, 4)); - auto w = static_cast(_mm_extract_epi16(vInt, 6)); - pDestination->v = static_cast( - (static_cast(w) ? 0x8000 : 0) | - ((static_cast(z) & 0x1F) << 10) | - ((static_cast(y) & 0x1F) << 5) | ((static_cast(x) & 0x1F))); -#endif -} - -/**************************************************************************** - * - * XMCOLOR operators - * - ****************************************************************************/ - -//------------------------------------------------------------------------------ - -inline XMCOLOR::XMCOLOR(float _r, float _g, float _b, float _a) noexcept { - XMStoreColor(this, XMVectorSet(_r, _g, _b, _a)); -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMCOLOR::XMCOLOR(const float* pArray) noexcept { - XMStoreColor(this, XMLoadFloat4(reinterpret_cast(pArray))); -} - -/**************************************************************************** - * - * XMHALF2 operators - * - ****************************************************************************/ - -//------------------------------------------------------------------------------ - -inline XMHALF2::XMHALF2(float _x, float _y) noexcept { - x = XMConvertFloatToHalf(_x); - y = XMConvertFloatToHalf(_y); -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMHALF2::XMHALF2(const float* pArray) noexcept { - assert(pArray != nullptr); - x = XMConvertFloatToHalf(pArray[0]); - y = XMConvertFloatToHalf(pArray[1]); -} - -/**************************************************************************** - * - * XMSHORTN2 operators - * - ****************************************************************************/ - -//------------------------------------------------------------------------------ - -inline XMSHORTN2::XMSHORTN2(float _x, float _y) noexcept { - XMStoreShortN2(this, XMVectorSet(_x, _y, 0.0f, 0.0f)); -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMSHORTN2::XMSHORTN2( - const float* pArray) noexcept { - XMStoreShortN2(this, - XMLoadFloat2(reinterpret_cast(pArray))); -} - -/**************************************************************************** - * - * XMSHORT2 operators - * - ****************************************************************************/ - -//------------------------------------------------------------------------------ - -inline XMSHORT2::XMSHORT2(float _x, float _y) noexcept { - XMStoreShort2(this, XMVectorSet(_x, _y, 0.0f, 0.0f)); -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMSHORT2::XMSHORT2(const float* pArray) noexcept { - XMStoreShort2(this, - XMLoadFloat2(reinterpret_cast(pArray))); -} - -/**************************************************************************** - * - * XMUSHORTN2 operators - * - ****************************************************************************/ - -//------------------------------------------------------------------------------ - -inline XMUSHORTN2::XMUSHORTN2(float _x, float _y) noexcept { - XMStoreUShortN2(this, XMVectorSet(_x, _y, 0.0f, 0.0f)); -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMUSHORTN2::XMUSHORTN2( - const float* pArray) noexcept { - XMStoreUShortN2(this, - XMLoadFloat2(reinterpret_cast(pArray))); -} - -/**************************************************************************** - * - * XMUSHORT2 operators - * - ****************************************************************************/ - -//------------------------------------------------------------------------------ - -inline XMUSHORT2::XMUSHORT2(float _x, float _y) noexcept { - XMStoreUShort2(this, XMVectorSet(_x, _y, 0.0f, 0.0f)); -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMUSHORT2::XMUSHORT2( - const float* pArray) noexcept { - XMStoreUShort2(this, - XMLoadFloat2(reinterpret_cast(pArray))); -} - -/**************************************************************************** - * - * XMBYTEN2 operators - * - ****************************************************************************/ - -//------------------------------------------------------------------------------ - -inline XMBYTEN2::XMBYTEN2(float _x, float _y) noexcept { - XMStoreByteN2(this, XMVectorSet(_x, _y, 0.0f, 0.0f)); -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMBYTEN2::XMBYTEN2(const float* pArray) noexcept { - XMStoreByteN2(this, - XMLoadFloat2(reinterpret_cast(pArray))); -} - -/**************************************************************************** - * - * XMBYTE2 operators - * - ****************************************************************************/ - -//------------------------------------------------------------------------------ - -inline XMBYTE2::XMBYTE2(float _x, float _y) noexcept { - XMStoreByte2(this, XMVectorSet(_x, _y, 0.0f, 0.0f)); -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMBYTE2::XMBYTE2(const float* pArray) noexcept { - XMStoreByte2(this, XMLoadFloat2(reinterpret_cast(pArray))); -} - -/**************************************************************************** - * - * XMUBYTEN2 operators - * - ****************************************************************************/ - -//------------------------------------------------------------------------------ - -inline XMUBYTEN2::XMUBYTEN2(float _x, float _y) noexcept { - XMStoreUByteN2(this, XMVectorSet(_x, _y, 0.0f, 0.0f)); -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMUBYTEN2::XMUBYTEN2( - const float* pArray) noexcept { - XMStoreUByteN2(this, - XMLoadFloat2(reinterpret_cast(pArray))); -} - -/**************************************************************************** - * - * XMUBYTE2 operators - * - ****************************************************************************/ - -//------------------------------------------------------------------------------ - -inline XMUBYTE2::XMUBYTE2(float _x, float _y) noexcept { - XMStoreUByte2(this, XMVectorSet(_x, _y, 0.0f, 0.0f)); -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMUBYTE2::XMUBYTE2(const float* pArray) noexcept { - XMStoreUByte2(this, - XMLoadFloat2(reinterpret_cast(pArray))); -} - -/**************************************************************************** - * - * XMU565 operators - * - ****************************************************************************/ - -inline XMU565::XMU565(float _x, float _y, float _z) noexcept { - XMStoreU565(this, XMVectorSet(_x, _y, _z, 0.0f)); -} - -_Use_decl_annotations_ inline XMU565::XMU565(const float* pArray) noexcept { - XMStoreU565(this, XMLoadFloat3(reinterpret_cast(pArray))); -} - -/**************************************************************************** - * - * XMFLOAT3PK operators - * - ****************************************************************************/ - -inline XMFLOAT3PK::XMFLOAT3PK(float _x, float _y, float _z) noexcept { - XMStoreFloat3PK(this, XMVectorSet(_x, _y, _z, 0.0f)); -} - -_Use_decl_annotations_ inline XMFLOAT3PK::XMFLOAT3PK( - const float* pArray) noexcept { - XMStoreFloat3PK(this, - XMLoadFloat3(reinterpret_cast(pArray))); -} - -/**************************************************************************** - * - * XMFLOAT3SE operators - * - ****************************************************************************/ - -inline XMFLOAT3SE::XMFLOAT3SE(float _x, float _y, float _z) noexcept { - XMStoreFloat3SE(this, XMVectorSet(_x, _y, _z, 0.0f)); -} - -_Use_decl_annotations_ inline XMFLOAT3SE::XMFLOAT3SE( - const float* pArray) noexcept { - XMStoreFloat3SE(this, - XMLoadFloat3(reinterpret_cast(pArray))); -} - -/**************************************************************************** - * - * XMHALF4 operators - * - ****************************************************************************/ - -//------------------------------------------------------------------------------ - -inline XMHALF4::XMHALF4(float _x, float _y, float _z, float _w) noexcept { - x = XMConvertFloatToHalf(_x); - y = XMConvertFloatToHalf(_y); - z = XMConvertFloatToHalf(_z); - w = XMConvertFloatToHalf(_w); -} - -//------------------------------------------------------------------------------ - -_Use_decl_annotations_ inline XMHALF4::XMHALF4(const float* pArray) noexcept { - XMConvertFloatToHalfStream(&x, sizeof(HALF), pArray, sizeof(float), 4); -} - -/**************************************************************************** - * - * XMSHORTN4 operators - * - ****************************************************************************/ - -//------------------------------------------------------------------------------ - -inline XMSHORTN4::XMSHORTN4(float _x, float _y, float _z, float _w) noexcept { - XMStoreShortN4(this, XMVectorSet(_x, _y, _z, _w)); -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMSHORTN4::XMSHORTN4( - const float* pArray) noexcept { - XMStoreShortN4(this, - XMLoadFloat4(reinterpret_cast(pArray))); -} - -/**************************************************************************** - * - * XMSHORT4 operators - * - ****************************************************************************/ - -//------------------------------------------------------------------------------ - -inline XMSHORT4::XMSHORT4(float _x, float _y, float _z, float _w) noexcept { - XMStoreShort4(this, XMVectorSet(_x, _y, _z, _w)); -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMSHORT4::XMSHORT4(const float* pArray) noexcept { - XMStoreShort4(this, - XMLoadFloat4(reinterpret_cast(pArray))); -} - -/**************************************************************************** - * - * XMUSHORTN4 operators - * - ****************************************************************************/ - -//------------------------------------------------------------------------------ - -inline XMUSHORTN4::XMUSHORTN4(float _x, float _y, float _z, float _w) noexcept { - XMStoreUShortN4(this, XMVectorSet(_x, _y, _z, _w)); -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMUSHORTN4::XMUSHORTN4( - const float* pArray) noexcept { - XMStoreUShortN4(this, - XMLoadFloat4(reinterpret_cast(pArray))); -} - -/**************************************************************************** - * - * XMUSHORT4 operators - * - ****************************************************************************/ - -//------------------------------------------------------------------------------ - -inline XMUSHORT4::XMUSHORT4(float _x, float _y, float _z, float _w) noexcept { - XMStoreUShort4(this, XMVectorSet(_x, _y, _z, _w)); -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMUSHORT4::XMUSHORT4( - const float* pArray) noexcept { - XMStoreUShort4(this, - XMLoadFloat4(reinterpret_cast(pArray))); -} - -/**************************************************************************** - * - * XMXDECN4 operators - * - ****************************************************************************/ - -//------------------------------------------------------------------------------ - -inline XMXDECN4::XMXDECN4(float _x, float _y, float _z, float _w) noexcept { - XMStoreXDecN4(this, XMVectorSet(_x, _y, _z, _w)); -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMXDECN4::XMXDECN4(const float* pArray) noexcept { - XMStoreXDecN4(this, - XMLoadFloat4(reinterpret_cast(pArray))); -} - -/**************************************************************************** - * - * XMXDEC4 operators - * - ****************************************************************************/ -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4996) -// C4996: ignore deprecation warning -#endif - -#ifdef __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wdeprecated-declarations" -#endif - -#ifdef __GNUC__ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -#endif - -//------------------------------------------------------------------------------ - -inline XMXDEC4::XMXDEC4(float _x, float _y, float _z, float _w) noexcept { - XMStoreXDec4(this, XMVectorSet(_x, _y, _z, _w)); -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMXDEC4::XMXDEC4(const float* pArray) noexcept { - XMStoreXDec4(this, XMLoadFloat4(reinterpret_cast(pArray))); -} - -/**************************************************************************** - * - * XMDECN4 operators - * - ****************************************************************************/ - -//------------------------------------------------------------------------------ - -inline XMDECN4::XMDECN4(float _x, float _y, float _z, float _w) noexcept { - XMStoreDecN4(this, XMVectorSet(_x, _y, _z, _w)); -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMDECN4::XMDECN4(const float* pArray) noexcept { - XMStoreDecN4(this, XMLoadFloat4(reinterpret_cast(pArray))); -} - -/**************************************************************************** - * - * XMDEC4 operators - * - ****************************************************************************/ - -//------------------------------------------------------------------------------ - -inline XMDEC4::XMDEC4(float _x, float _y, float _z, float _w) noexcept { - XMStoreDec4(this, XMVectorSet(_x, _y, _z, _w)); -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMDEC4::XMDEC4(const float* pArray) noexcept { - XMStoreDec4(this, XMLoadFloat4(reinterpret_cast(pArray))); -} - -#ifdef __GNUC__ -#pragma GCC diagnostic pop -#endif -#ifdef __clang__ -#pragma clang diagnostic pop -#endif -#ifdef _MSC_VER -#pragma warning(pop) -#endif - -/**************************************************************************** - * - * XMUDECN4 operators - * - ****************************************************************************/ - -//------------------------------------------------------------------------------ - -inline XMUDECN4::XMUDECN4(float _x, float _y, float _z, float _w) noexcept { - XMStoreUDecN4(this, XMVectorSet(_x, _y, _z, _w)); -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMUDECN4::XMUDECN4(const float* pArray) noexcept { - XMStoreUDecN4(this, - XMLoadFloat4(reinterpret_cast(pArray))); -} - -/**************************************************************************** - * - * XMUDEC4 operators - * - ****************************************************************************/ - -//------------------------------------------------------------------------------ - -inline XMUDEC4::XMUDEC4(float _x, float _y, float _z, float _w) noexcept { - XMStoreUDec4(this, XMVectorSet(_x, _y, _z, _w)); -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMUDEC4::XMUDEC4(const float* pArray) noexcept { - XMStoreUDec4(this, XMLoadFloat4(reinterpret_cast(pArray))); -} - -/**************************************************************************** - * - * XMBYTEN4 operators - * - ****************************************************************************/ - -//------------------------------------------------------------------------------ - -inline XMBYTEN4::XMBYTEN4(float _x, float _y, float _z, float _w) noexcept { - XMStoreByteN4(this, XMVectorSet(_x, _y, _z, _w)); -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMBYTEN4::XMBYTEN4(const float* pArray) noexcept { - XMStoreByteN4(this, - XMLoadFloat4(reinterpret_cast(pArray))); -} - -/**************************************************************************** - * - * XMBYTE4 operators - * - ****************************************************************************/ - -//------------------------------------------------------------------------------ - -inline XMBYTE4::XMBYTE4(float _x, float _y, float _z, float _w) noexcept { - XMStoreByte4(this, XMVectorSet(_x, _y, _z, _w)); -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMBYTE4::XMBYTE4(const float* pArray) noexcept { - XMStoreByte4(this, XMLoadFloat4(reinterpret_cast(pArray))); -} - -/**************************************************************************** - * - * XMUBYTEN4 operators - * - ****************************************************************************/ - -//------------------------------------------------------------------------------ - -inline XMUBYTEN4::XMUBYTEN4(float _x, float _y, float _z, float _w) noexcept { - XMStoreUByteN4(this, XMVectorSet(_x, _y, _z, _w)); -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMUBYTEN4::XMUBYTEN4( - const float* pArray) noexcept { - XMStoreUByteN4(this, - XMLoadFloat4(reinterpret_cast(pArray))); -} - -/**************************************************************************** - * - * XMUBYTE4 operators - * - ****************************************************************************/ - -//------------------------------------------------------------------------------ - -inline XMUBYTE4::XMUBYTE4(float _x, float _y, float _z, float _w) noexcept { - XMStoreUByte4(this, XMVectorSet(_x, _y, _z, _w)); -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMUBYTE4::XMUBYTE4(const float* pArray) noexcept { - XMStoreUByte4(this, - XMLoadFloat4(reinterpret_cast(pArray))); -} - -/**************************************************************************** - * - * XMUNIBBLE4 operators - * - ****************************************************************************/ - -//------------------------------------------------------------------------------ - -inline XMUNIBBLE4::XMUNIBBLE4(float _x, float _y, float _z, float _w) noexcept { - XMStoreUNibble4(this, XMVectorSet(_x, _y, _z, _w)); -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMUNIBBLE4::XMUNIBBLE4( - const float* pArray) noexcept { - XMStoreUNibble4(this, - XMLoadFloat4(reinterpret_cast(pArray))); -} - -/**************************************************************************** - * - * XMU555 operators - * - ****************************************************************************/ - -//------------------------------------------------------------------------------ - -inline XMU555::XMU555(float _x, float _y, float _z, bool _w) noexcept { - XMStoreU555(this, XMVectorSet(_x, _y, _z, ((_w) ? 1.0f : 0.0f))); -} - -//------------------------------------------------------------------------------ -_Use_decl_annotations_ inline XMU555::XMU555(const float* pArray, - bool _w) noexcept { - XMVECTOR V = XMLoadFloat3(reinterpret_cast(pArray)); - XMStoreU555(this, XMVectorSetW(V, ((_w) ? 1.0f : 0.0f))); -} diff --git a/targets/app/linux/Stubs/DirectXMath/sal.h b/targets/app/linux/Stubs/DirectXMath/sal.h deleted file mode 100644 index 2f40e716e..000000000 --- a/targets/app/linux/Stubs/DirectXMath/sal.h +++ /dev/null @@ -1,4244 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/*** -*sal.h - markers for documenting the semantics of APIs -* - -* -*Purpose: -* sal.h provides a set of annotations to describe how a function uses its -* parameters - the assumptions it makes about them, and the guarantees it makes -* upon finishing. -****/ -#pragma once - -/*========================================================================== - - The comments in this file are intended to give basic understanding of - the usage of SAL, the Microsoft Source Code Annotation Language. - For more details, please see http://go.microsoft.com/fwlink/?LinkID=242134 - - The macros are defined in 3 layers, plus the structural set: - - _In_/_Out_/_Ret_ Layer: - ---------------------- - This layer provides the highest abstraction and its macros should be used - in most cases. These macros typically start with: - _In_ : input parameter to a function, unmodified by called function - _Out_ : output parameter, written to by called function, pointed-to - location not expected to be initialized prior to call - _Outptr_ : like _Out_ when returned variable is a pointer type - (so param is pointer-to-pointer type). Called function - provides/allocated space. - _Outref_ : like _Outptr_, except param is reference-to-pointer type. - _Inout_ : inout parameter, read from and potentially modified by - called function. - _Ret_ : for return values - _Field_ : class/struct field invariants - For common usage, this class of SAL provides the most concise annotations. - Note that _In_/_Out_/_Inout_/_Outptr_ annotations are designed to be used - with a parameter target. Using them with _At_ to specify non-parameter - targets may yield unexpected results. - - This layer also includes a number of other properties that can be specified - to extend the ability of code analysis, most notably: - -- Designating parameters as format strings for printf/scanf/scanf_s - -- Requesting stricter type checking for C enum parameters - - _Pre_/_Post_ Layer: - ------------------ - The macros of this layer only should be used when there is no suitable macro - in the _In_/_Out_ layer. Its macros start with _Pre_ or _Post_. - This layer provides the most flexibility for annotations. - - Implementation Abstraction Layer: - -------------------------------- - Macros from this layer should never be used directly. The layer only exists - to hide the implementation of the annotation macros. - - Structural Layer: - ---------------- - These annotations, like _At_ and _When_, are used with annotations from - any of the other layers as modifiers, indicating exactly when and where - the annotations apply. - - - Common syntactic conventions: - ---------------------------- - - Usage: - ----- - _In_, _Out_, _Inout_, _Pre_, _Post_, are for formal parameters. - _Ret_, _Deref_ret_ must be used for return values. - - Nullness: - -------- - If the parameter can be NULL as a precondition to the function, the - annotation contains _opt. If the macro does not contain '_opt' the - parameter cannot be NULL. - - If an out/inout parameter returns a null pointer as a postcondition, this is - indicated by _Ret_maybenull_ or _result_maybenull_. If the macro is not - of this form, then the result will not be NULL as a postcondition. - _Outptr_ - output value is not NULL - _Outptr_result_maybenull_ - output value might be NULL - - String Type: - ----------- - _z: NullTerminated string - for _In_ parameters the buffer must have the specified stringtype before the call - for _Out_ parameters the buffer must have the specified stringtype after the call - for _Inout_ parameters both conditions apply - - Extent Syntax: - ------------- - Buffer sizes are expressed as element counts, unless the macro explicitly - contains _byte_ or _bytes_. Some annotations specify two buffer sizes, in - which case the second is used to indicate how much of the buffer is valid - as a postcondition. This table outlines the precondition buffer allocation - size, precondition number of valid elements, postcondition allocation size, - and postcondition number of valid elements for representative buffer size - annotations: - Pre | Pre | Post | Post - alloc | valid | alloc | valid - Annotation elems | elems | elems | elems - ---------- ------------------------------------ - _In_reads_(s) s | s | s | s - _Inout_updates_(s) s | s | s | s - _Inout_updates_to_(s,c) s | s | s | c - _Out_writes_(s) s | 0 | s | s - _Out_writes_to_(s,c) s | 0 | s | c - _Outptr_result_buffer_(s) ? | ? | s | s - _Outptr_result_buffer_to_(s,c) ? | ? | s | c - - For the _Outptr_ annotations, the buffer in question is at one level of - dereference. The called function is responsible for supplying the buffer. - - Success and failure: - ------------------- - The SAL concept of success allows functions to define expressions that can - be tested by the caller, which if it evaluates to non-zero, indicates the - function succeeded, which means that its postconditions are guaranteed to - hold. Otherwise, if the expression evaluates to zero, the function is - considered to have failed, and the postconditions are not guaranteed. - - The success criteria can be specified with the _Success_(expr) annotation: - _Success_(return != FALSE) BOOL - PathCanonicalizeA(_Out_writes_(MAX_PATH) LPSTR pszBuf, LPCSTR pszPath) : - pszBuf is only guaranteed to be NULL-terminated when TRUE is returned, - and FALSE indicates failure. In common practice, callers check for zero - vs. non-zero returns, so it is preferable to express the success - criteria in terms of zero/non-zero, not checked for exactly TRUE. - - Functions can specify that some postconditions will still hold, even when - the function fails, using _On_failure_(anno-list), or postconditions that - hold regardless of success or failure using _Always_(anno-list). - - The annotation _Return_type_success_(expr) may be used with a typedef to - give a default _Success_ criteria to all functions returning that type. - This is the case for common Windows API status types, including - HRESULT and NTSTATUS. This may be overridden on a per-function basis by - specifying a _Success_ annotation locally. - -============================================================================*/ - -#define __ATTR_SAL - -#ifndef _SAL_VERSION /*IFSTRIP=IGN*/ -#define _SAL_VERSION 20 -#endif - -#ifdef _PREFAST_ // [ - -// choose attribute or __declspec implementation -#ifndef _USE_DECLSPECS_FOR_SAL // [ -#define _USE_DECLSPECS_FOR_SAL 1 -#endif // ] - -#if _USE_DECLSPECS_FOR_SAL // [ -#undef _USE_ATTRIBUTES_FOR_SAL -#define _USE_ATTRIBUTES_FOR_SAL 0 -#elif !defined(_USE_ATTRIBUTES_FOR_SAL) // ][ -#if _MSC_VER >= 1400 /*IFSTRIP=IGN*/ // [ -#define _USE_ATTRIBUTES_FOR_SAL 1 -#else // ][ -#define _USE_ATTRIBUTES_FOR_SAL 0 -#endif // ] -#endif // ] - -#if !_USE_DECLSPECS_FOR_SAL // [ -#if !_USE_ATTRIBUTES_FOR_SAL // [ -#if _MSC_VER >= 1400 /*IFSTRIP=IGN*/ // [ -#undef _USE_ATTRIBUTES_FOR_SAL -#define _USE_ATTRIBUTES_FOR_SAL 1 -#else // ][ -#undef _USE_DECLSPECS_FOR_SAL -#define _USE_DECLSPECS_FOR_SAL 1 -#endif // ] -#endif // ] -#endif // ] - -#else - -// Disable expansion of SAL macros in non-Prefast mode to -// improve compiler throughput. -#ifndef _USE_DECLSPECS_FOR_SAL // [ -#define _USE_DECLSPECS_FOR_SAL 0 -#endif // ] -#ifndef _USE_ATTRIBUTES_FOR_SAL // [ -#define _USE_ATTRIBUTES_FOR_SAL 0 -#endif // ] - -#endif // ] - -// safeguard for MIDL and RC builds -#if _USE_DECLSPECS_FOR_SAL && (defined(MIDL_PASS) || defined(__midl) || defined(RC_INVOKED) || \ - !defined(_PREFAST_)) /*IFSTRIP=IGN*/ // [ -#undef _USE_DECLSPECS_FOR_SAL -#define _USE_DECLSPECS_FOR_SAL 0 -#endif // ] -#if _USE_ATTRIBUTES_FOR_SAL && (!defined(_MSC_EXTENSIONS) || defined(MIDL_PASS) || \ - defined(__midl) || defined(RC_INVOKED)) /*IFSTRIP=IGN*/ // [ -#undef _USE_ATTRIBUTES_FOR_SAL -#define _USE_ATTRIBUTES_FOR_SAL 0 -#endif // ] - -#if _USE_DECLSPECS_FOR_SAL || _USE_ATTRIBUTES_FOR_SAL - -// Special enum type for Y/N/M -enum __SAL_YesNo { _SAL_notpresent, _SAL_no, _SAL_maybe, _SAL_yes, _SAL_default }; - -#endif - -#if defined(BUILD_WINDOWS) && !_USE_ATTRIBUTES_FOR_SAL /*IFSTRIP=IGN*/ -#define _SAL1_Source_(Name, args, annotes) \ - _SA_annotes3(SAL_name, #Name, "", "1") _GrouP_(annotes _SAL_nop_impl_) -#define _SAL1_1_Source_(Name, args, annotes) \ - _SA_annotes3(SAL_name, #Name, "", "1.1") _GrouP_(annotes _SAL_nop_impl_) -#define _SAL1_2_Source_(Name, args, annotes) \ - _SA_annotes3(SAL_name, #Name, "", "1.2") _GrouP_(annotes _SAL_nop_impl_) -#define _SAL2_Source_(Name, args, annotes) \ - _SA_annotes3(SAL_name, #Name, "", "2") _GrouP_(annotes _SAL_nop_impl_) -#else -#define _SAL1_Source_(Name, args, annotes) \ - _SA_annotes3(SAL_name, #Name, "", "1") _Group_(annotes _SAL_nop_impl_) -#define _SAL1_1_Source_(Name, args, annotes) \ - _SA_annotes3(SAL_name, #Name, "", "1.1") _Group_(annotes _SAL_nop_impl_) -#define _SAL1_2_Source_(Name, args, annotes) \ - _SA_annotes3(SAL_name, #Name, "", "1.2") _Group_(annotes _SAL_nop_impl_) -#define _SAL2_Source_(Name, args, annotes) \ - _SA_annotes3(SAL_name, #Name, "", "2") _Group_(annotes _SAL_nop_impl_) -#endif - -//============================================================================ -// Structural SAL: -// These annotations modify the use of other annotations. They may -// express the annotation target (i.e. what parameter/field the annotation -// applies to) or the condition under which the annotation is applicable. -//============================================================================ - -// _At_(target, annos) specifies that the annotations listed in 'annos' is to -// be applied to 'target' rather than to the identifier which is the current -// lexical target. -#define _At_(target, annos) _At_impl_(target, annos _SAL_nop_impl_) - -// _At_buffer_(target, iter, bound, annos) is similar to _At_, except that -// target names a buffer, and each annotation in annos is applied to each -// element of target up to bound, with the variable named in iter usable -// by the annotations to refer to relevant offsets within target. -#define _At_buffer_(target, iter, bound, annos) \ - _At_buffer_impl_(target, iter, bound, annos _SAL_nop_impl_) - -// _When_(expr, annos) specifies that the annotations listed in 'annos' only -// apply when 'expr' evaluates to non-zero. -#define _When_(expr, annos) _When_impl_(expr, annos _SAL_nop_impl_) -#define _Group_(annos) _Group_impl_(annos _SAL_nop_impl_) -#define _GrouP_(annos) _GrouP_impl_(annos _SAL_nop_impl_) - -// indicates whether normal post conditions apply to a function -#define _Success_(expr) _SAL2_Source_(_Success_, (expr), _Success_impl_(expr)) - -// indicates whether post conditions apply to a function returning -// the type that this annotation is applied to -#define _Return_type_success_(expr) \ - _SAL2_Source_(_Return_type_success_, (expr), _Success_impl_(expr)) - -// Establish postconditions that apply only if the function does not succeed -#define _On_failure_(annos) _On_failure_impl_(annos _SAL_nop_impl_) - -// Establish postconditions that apply in both success and failure cases. -// Only applicable with functions that have _Success_ or _Return_type_succss_. -#define _Always_(annos) _Always_impl_(annos _SAL_nop_impl_) - -// Usable on a function definition. Asserts that a function declaration is -// in scope, and its annotations are to be used. There are no other annotations -// allowed on the function definition. -#define _Use_decl_annotations_ _Use_decl_anno_impl_ - -// _Notref_ may precede a _Deref_ or "real" annotation, and removes one -// level of dereference if the parameter is a C++ reference (&). If the -// net deref on a "real" annotation is negative, it is simply discarded. -#define _Notref_ _Notref_impl_ - -// Annotations for defensive programming styles. -#define _Pre_defensive_ _SA_annotes0(SAL_pre_defensive) -#define _Post_defensive_ _SA_annotes0(SAL_post_defensive) - -#define _In_defensive_(annotes) _Pre_defensive_ _Group_(annotes) -#define _Out_defensive_(annotes) _Post_defensive_ _Group_(annotes) -#define _Inout_defensive_(annotes) _Pre_defensive_ _Post_defensive_ _Group_(annotes) - -//============================================================================ -// _In_\_Out_ Layer: -//============================================================================ - -// Reserved pointer parameters, must always be NULL. -#define _Reserved_ _SAL2_Source_(_Reserved_, (), _Pre1_impl_(__null_impl)) - -// _Const_ allows specification that any namable memory location is considered -// readonly for a given call. -#define _Const_ _SAL2_Source_(_Const_, (), _Pre1_impl_(__readaccess_impl_notref)) - -// Input parameters -------------------------- - -// _In_ - Annotations for parameters where data is passed into the function, but not modified. -// _In_ by itself can be used with non-pointer types (although it is redundant). - -// e.g. void SetPoint( _In_ const POINT* pPT ); -#define _In_ \ - _SAL2_Source_(_In_, (), \ - _Pre1_impl_(__notnull_impl_notref) \ - _Pre_valid_impl_ _Deref_pre1_impl_(__readaccess_impl_notref)) -#define _In_opt_ \ - _SAL2_Source_(_In_opt_, (), \ - _Pre1_impl_(__maybenull_impl_notref) _Pre_valid_impl_ _Deref_pre_readonly_) - -// nullterminated 'in' parameters. -// e.g. void CopyStr( _In_z_ const char* szFrom, _Out_z_cap_(cchTo) char* szTo, size_t cchTo ); -#define _In_z_ _SAL2_Source_(_In_z_, (), _In_ _Pre1_impl_(__zterm_impl)) -#define _In_opt_z_ _SAL2_Source_(_In_opt_z_, (), _In_opt_ _Pre1_impl_(__zterm_impl)) - -// 'input' buffers with given size - -#define _In_reads_(size) _SAL2_Source_(_In_reads_, (size), _Pre_count_(size) _Deref_pre_readonly_) -#define _In_reads_opt_(size) \ - _SAL2_Source_(_In_reads_opt_, (size), _Pre_opt_count_(size) _Deref_pre_readonly_) -#define _In_reads_bytes_(size) \ - _SAL2_Source_(_In_reads_bytes_, (size), _Pre_bytecount_(size) _Deref_pre_readonly_) -#define _In_reads_bytes_opt_(size) \ - _SAL2_Source_(_In_reads_bytes_opt_, (size), _Pre_opt_bytecount_(size) _Deref_pre_readonly_) -#define _In_reads_z_(size) _SAL2_Source_(_In_reads_z_, (size), _In_reads_(size) _Pre_z_) -#define _In_reads_opt_z_(size) \ - _SAL2_Source_(_In_reads_opt_z_, (size), _Pre_opt_count_(size) _Deref_pre_readonly_ _Pre_opt_z_) -#define _In_reads_or_z_(size) \ - _SAL2_Source_(_In_reads_or_z_, (size), \ - _In_ _When_(_String_length_(_Curr_) < (size), _Pre_z_) \ - _When_(_String_length_(_Curr_) >= (size), _Pre1_impl_(__count_impl(size)))) -#define _In_reads_or_z_opt_(size) \ - _SAL2_Source_(_In_reads_or_z_opt_, (size), \ - _In_opt_ _When_(_String_length_(_Curr_) < (size), _Pre_z_) \ - _When_(_String_length_(_Curr_) >= (size), _Pre1_impl_(__count_impl(size)))) - -// 'input' buffers valid to the given end pointer - -#define _In_reads_to_ptr_(ptr) \ - _SAL2_Source_(_In_reads_to_ptr_, (ptr), _Pre_ptrdiff_count_(ptr) _Deref_pre_readonly_) -#define _In_reads_to_ptr_opt_(ptr) \ - _SAL2_Source_(_In_reads_to_ptr_opt_, (ptr), _Pre_opt_ptrdiff_count_(ptr) _Deref_pre_readonly_) -#define _In_reads_to_ptr_z_(ptr) \ - _SAL2_Source_(_In_reads_to_ptr_z_, (ptr), _In_reads_to_ptr_(ptr) _Pre_z_) -#define _In_reads_to_ptr_opt_z_(ptr) \ - _SAL2_Source_(_In_reads_to_ptr_opt_z_, (ptr), \ - _Pre_opt_ptrdiff_count_(ptr) _Deref_pre_readonly_ _Pre_opt_z_) - -// Output parameters -------------------------- - -// _Out_ - Annotations for pointer or reference parameters where data passed back to the caller. -// These are mostly used where the pointer/reference is to a non-pointer type. -// _Outptr_/_Outref) (see below) are typically used to return pointers via parameters. - -// e.g. void GetPoint( _Out_ POINT* pPT ); -#define _Out_ _SAL2_Source_(_Out_, (), _Out_impl_) -#define _Out_opt_ _SAL2_Source_(_Out_opt_, (), _Out_opt_impl_) - -#define _Out_writes_(size) _SAL2_Source_(_Out_writes_, (size), _Pre_cap_(size) _Post_valid_impl_) -#define _Out_writes_opt_(size) \ - _SAL2_Source_(_Out_writes_opt_, (size), _Pre_opt_cap_(size) _Post_valid_impl_) -#define _Out_writes_bytes_(size) \ - _SAL2_Source_(_Out_writes_bytes_, (size), _Pre_bytecap_(size) _Post_valid_impl_) -#define _Out_writes_bytes_opt_(size) \ - _SAL2_Source_(_Out_writes_bytes_opt_, (size), _Pre_opt_bytecap_(size) _Post_valid_impl_) -#define _Out_writes_z_(size) \ - _SAL2_Source_(_Out_writes_z_, (size), _Pre_cap_(size) _Post_valid_impl_ _Post_z_) -#define _Out_writes_opt_z_(size) \ - _SAL2_Source_(_Out_writes_opt_z_, (size), _Pre_opt_cap_(size) _Post_valid_impl_ _Post_z_) - -#define _Out_writes_to_(size, count) \ - _SAL2_Source_(_Out_writes_to_, (size, count), \ - _Pre_cap_(size) _Post_valid_impl_ _Post_count_(count)) -#define _Out_writes_to_opt_(size, count) \ - _SAL2_Source_(_Out_writes_to_opt_, (size, count), \ - _Pre_opt_cap_(size) _Post_valid_impl_ _Post_count_(count)) -#define _Out_writes_all_(size) \ - _SAL2_Source_(_Out_writes_all_, (size), _Out_writes_to_(_Old_(size), _Old_(size))) -#define _Out_writes_all_opt_(size) \ - _SAL2_Source_(_Out_writes_all_opt_, (size), _Out_writes_to_opt_(_Old_(size), _Old_(size))) - -#define _Out_writes_bytes_to_(size, count) \ - _SAL2_Source_(_Out_writes_bytes_to_, (size, count), \ - _Pre_bytecap_(size) _Post_valid_impl_ _Post_bytecount_(count)) -#define _Out_writes_bytes_to_opt_(size, count) \ - _SAL2_Source_(_Out_writes_bytes_to_opt_, (size, count), \ - _Pre_opt_bytecap_(size) _Post_valid_impl_ _Post_bytecount_(count)) -#define _Out_writes_bytes_all_(size) \ - _SAL2_Source_(_Out_writes_bytes_all_, (size), _Out_writes_bytes_to_(_Old_(size), _Old_(size))) -#define _Out_writes_bytes_all_opt_(size) \ - _SAL2_Source_(_Out_writes_bytes_all_opt_, (size), \ - _Out_writes_bytes_to_opt_(_Old_(size), _Old_(size))) - -#define _Out_writes_to_ptr_(ptr) \ - _SAL2_Source_(_Out_writes_to_ptr_, (ptr), _Pre_ptrdiff_cap_(ptr) _Post_valid_impl_) -#define _Out_writes_to_ptr_opt_(ptr) \ - _SAL2_Source_(_Out_writes_to_ptr_opt_, (ptr), _Pre_opt_ptrdiff_cap_(ptr) _Post_valid_impl_) -#define _Out_writes_to_ptr_z_(ptr) \ - _SAL2_Source_(_Out_writes_to_ptr_z_, (ptr), _Pre_ptrdiff_cap_(ptr) _Post_valid_impl_ Post_z_) -#define _Out_writes_to_ptr_opt_z_(ptr) \ - _SAL2_Source_(_Out_writes_to_ptr_opt_z_, (ptr), \ - _Pre_opt_ptrdiff_cap_(ptr) _Post_valid_impl_ Post_z_) - -// Inout parameters ---------------------------- - -// _Inout_ - Annotations for pointer or reference parameters where data is passed in and -// potentially modified. -// void ModifyPoint( _Inout_ POINT* pPT ); -// void ModifyPointByRef( _Inout_ POINT& pPT ); - -#define _Inout_ _SAL2_Source_(_Inout_, (), _Prepost_valid_) -#define _Inout_opt_ _SAL2_Source_(_Inout_opt_, (), _Prepost_opt_valid_) - -// For modifying string buffers -// void toupper( _Inout_z_ char* sz ); -#define _Inout_z_ _SAL2_Source_(_Inout_z_, (), _Prepost_z_) -#define _Inout_opt_z_ _SAL2_Source_(_Inout_opt_z_, (), _Prepost_opt_z_) - -// For modifying buffers with explicit element size -#define _Inout_updates_(size) \ - _SAL2_Source_(_Inout_updates_, (size), _Pre_cap_(size) _Pre_valid_impl_ _Post_valid_impl_) -#define _Inout_updates_opt_(size) \ - _SAL2_Source_(_Inout_updates_opt_, (size), \ - _Pre_opt_cap_(size) _Pre_valid_impl_ _Post_valid_impl_) -#define _Inout_updates_z_(size) \ - _SAL2_Source_(_Inout_updates_z_, (size), \ - _Pre_cap_(size) _Pre_valid_impl_ _Post_valid_impl_ _Pre1_impl_(__zterm_impl) \ - _Post1_impl_(__zterm_impl)) -#define _Inout_updates_opt_z_(size) \ - _SAL2_Source_(_Inout_updates_opt_z_, (size), \ - _Pre_opt_cap_(size) _Pre_valid_impl_ _Post_valid_impl_ _Pre1_impl_(__zterm_impl) \ - _Post1_impl_(__zterm_impl)) - -#define _Inout_updates_to_(size, count) \ - _SAL2_Source_(_Inout_updates_to_, (size, count), \ - _Out_writes_to_(size, count) _Pre_valid_impl_ _Pre1_impl_(__count_impl(count))) -#define _Inout_updates_to_opt_(size, count) \ - _SAL2_Source_(_Inout_updates_to_opt_, (size, count), \ - _Out_writes_to_opt_(size, count) \ - _Pre_valid_impl_ _Pre1_impl_(__count_impl(count))) - -#define _Inout_updates_all_(size) \ - _SAL2_Source_(_Inout_updates_all_, (size), _Inout_updates_to_(_Old_(size), _Old_(size))) -#define _Inout_updates_all_opt_(size) \ - _SAL2_Source_(_Inout_updates_all_opt_, (size), _Inout_updates_to_opt_(_Old_(size), _Old_(size))) - -// For modifying buffers with explicit byte size -#define _Inout_updates_bytes_(size) \ - _SAL2_Source_(_Inout_updates_bytes_, (size), \ - _Pre_bytecap_(size) _Pre_valid_impl_ _Post_valid_impl_) -#define _Inout_updates_bytes_opt_(size) \ - _SAL2_Source_(_Inout_updates_bytes_opt_, (size), \ - _Pre_opt_bytecap_(size) _Pre_valid_impl_ _Post_valid_impl_) - -#define _Inout_updates_bytes_to_(size, count) \ - _SAL2_Source_(_Inout_updates_bytes_to_, (size, count), \ - _Out_writes_bytes_to_(size, count) \ - _Pre_valid_impl_ _Pre1_impl_(__bytecount_impl(count))) -#define _Inout_updates_bytes_to_opt_(size, count) \ - _SAL2_Source_(_Inout_updates_bytes_to_opt_, (size, count), \ - _Out_writes_bytes_to_opt_(size, count) \ - _Pre_valid_impl_ _Pre1_impl_(__bytecount_impl(count))) - -#define _Inout_updates_bytes_all_(size) \ - _SAL2_Source_(_Inout_updates_bytes_all_, (size), \ - _Inout_updates_bytes_to_(_Old_(size), _Old_(size))) -#define _Inout_updates_bytes_all_opt_(size) \ - _SAL2_Source_(_Inout_updates_bytes_all_opt_, (size), \ - _Inout_updates_bytes_to_opt_(_Old_(size), _Old_(size))) - -// Pointer to pointer parameters ------------------------- - -// _Outptr_ - Annotations for output params returning pointers -// These describe parameters where the called function provides the buffer: -// HRESULT SHStrDupW(_In_ LPCWSTR psz, _Outptr_ LPWSTR *ppwsz); -// The caller passes the address of an LPWSTR variable as ppwsz, and SHStrDupW allocates -// and initializes memory and returns the pointer to the new LPWSTR in *ppwsz. -// -// _Outptr_opt_ - describes parameters that are allowed to be NULL. -// _Outptr_*_result_maybenull_ - describes parameters where the called function might return NULL -// to the caller. -// -// Example: -// void MyFunc(_Outptr_opt_ int **ppData1, _Outptr_result_maybenull_ int **ppData2); -// Callers: -// MyFunc(NULL, NULL); // error: parameter 2, ppData2, should not be NULL -// MyFunc(&pData1, &pData2); // ok: both non-NULL -// if (*pData1 == *pData2) ... // error: pData2 might be NULL after call - -#define _Outptr_ \ - _SAL2_Source_(_Outptr_, (), \ - _Out_impl_ _Deref_post2_impl_(__notnull_impl_notref, __count_impl(1))) -#define _Outptr_result_maybenull_ \ - _SAL2_Source_(_Outptr_result_maybenull_, (), \ - _Out_impl_ _Deref_post2_impl_(__maybenull_impl_notref, __count_impl(1))) -#define _Outptr_opt_ \ - _SAL2_Source_(_Outptr_opt_, (), \ - _Out_opt_impl_ _Deref_post2_impl_(__notnull_impl_notref, __count_impl(1))) -#define _Outptr_opt_result_maybenull_ \ - _SAL2_Source_(_Outptr_opt_result_maybenull_, (), \ - _Out_opt_impl_ _Deref_post2_impl_(__maybenull_impl_notref, __count_impl(1))) - -// Annotations for _Outptr_ parameters returning pointers to null terminated strings. - -#define _Outptr_result_z_ _SAL2_Source_(_Outptr_result_z_, (), _Out_impl_ _Deref_post_z_) -#define _Outptr_opt_result_z_ \ - _SAL2_Source_(_Outptr_opt_result_z_, (), _Out_opt_impl_ _Deref_post_z_) -#define _Outptr_result_maybenull_z_ \ - _SAL2_Source_(_Outptr_result_maybenull_z_, (), _Out_impl_ _Deref_post_opt_z_) -#define _Outptr_opt_result_maybenull_z_ \ - _SAL2_Source_(_Outptr_opt_result_maybenull_z_, (), _Out_opt_impl_ _Deref_post_opt_z_) - -// Annotations for _Outptr_ parameters where the output pointer is set to NULL if the function -// fails. - -#define _Outptr_result_nullonfailure_ \ - _SAL2_Source_(_Outptr_result_nullonfailure_, (), _Outptr_ _On_failure_(_Deref_post_null_)) -#define _Outptr_opt_result_nullonfailure_ \ - _SAL2_Source_(_Outptr_opt_result_nullonfailure_, (), \ - _Outptr_opt_ _On_failure_(_Deref_post_null_)) - -// Annotations for _Outptr_ parameters which return a pointer to a ref-counted COM object, -// following the COM convention of setting the output to NULL on failure. -// The current implementation is identical to _Outptr_result_nullonfailure_. -// For pointers to types that are not COM objects, _Outptr_result_nullonfailure_ is preferred. - -#define _COM_Outptr_ _SAL2_Source_(_COM_Outptr_, (), _Outptr_ _On_failure_(_Deref_post_null_)) -#define _COM_Outptr_result_maybenull_ \ - _SAL2_Source_(_COM_Outptr_result_maybenull_, (), \ - _Outptr_result_maybenull_ _On_failure_(_Deref_post_null_)) -#define _COM_Outptr_opt_ \ - _SAL2_Source_(_COM_Outptr_opt_, (), _Outptr_opt_ _On_failure_(_Deref_post_null_)) -#define _COM_Outptr_opt_result_maybenull_ \ - _SAL2_Source_(_COM_Outptr_opt_result_maybenull_, (), \ - _Outptr_opt_result_maybenull_ _On_failure_(_Deref_post_null_)) - -// Annotations for _Outptr_ parameters returning a pointer to buffer with a specified number of -// elements/bytes - -#define _Outptr_result_buffer_(size) \ - _SAL2_Source_(_Outptr_result_buffer_, (size), \ - _Out_impl_ _Deref_post2_impl_(__notnull_impl_notref, __cap_impl(size))) -#define _Outptr_opt_result_buffer_(size) \ - _SAL2_Source_(_Outptr_opt_result_buffer_, (size), \ - _Out_opt_impl_ _Deref_post2_impl_(__notnull_impl_notref, __cap_impl(size))) -#define _Outptr_result_buffer_to_(size, count) \ - _SAL2_Source_(_Outptr_result_buffer_to_, (size, count), \ - _Out_impl_ _Deref_post3_impl_(__notnull_impl_notref, __cap_impl(size), \ - __count_impl(count))) -#define _Outptr_opt_result_buffer_to_(size, count) \ - _SAL2_Source_(_Outptr_opt_result_buffer_to_, (size, count), \ - _Out_opt_impl_ _Deref_post3_impl_(__notnull_impl_notref, __cap_impl(size), \ - __count_impl(count))) - -#define _Outptr_result_buffer_all_(size) \ - _SAL2_Source_(_Outptr_result_buffer_all_, (size), \ - _Out_impl_ _Deref_post2_impl_(__notnull_impl_notref, __count_impl(size))) -#define _Outptr_opt_result_buffer_all_(size) \ - _SAL2_Source_(_Outptr_opt_result_buffer_all_, (size), \ - _Out_opt_impl_ _Deref_post2_impl_(__notnull_impl_notref, __count_impl(size))) - -#define _Outptr_result_buffer_maybenull_(size) \ - _SAL2_Source_(_Outptr_result_buffer_maybenull_, (size), \ - _Out_impl_ _Deref_post2_impl_(__maybenull_impl_notref, __cap_impl(size))) -#define _Outptr_opt_result_buffer_maybenull_(size) \ - _SAL2_Source_(_Outptr_opt_result_buffer_maybenull_, (size), \ - _Out_opt_impl_ _Deref_post2_impl_(__maybenull_impl_notref, __cap_impl(size))) -#define _Outptr_result_buffer_to_maybenull_(size, count) \ - _SAL2_Source_(_Outptr_result_buffer_to_maybenull_, (size, count), \ - _Out_impl_ _Deref_post3_impl_(__maybenull_impl_notref, __cap_impl(size), \ - __count_impl(count))) -#define _Outptr_opt_result_buffer_to_maybenull_(size, count) \ - _SAL2_Source_(_Outptr_opt_result_buffer_to_maybenull_, (size, count), \ - _Out_opt_impl_ _Deref_post3_impl_(__maybenull_impl_notref, __cap_impl(size), \ - __count_impl(count))) - -#define _Outptr_result_buffer_all_maybenull_(size) \ - _SAL2_Source_(_Outptr_result_buffer_all_maybenull_, (size), \ - _Out_impl_ _Deref_post2_impl_(__maybenull_impl_notref, __count_impl(size))) -#define _Outptr_opt_result_buffer_all_maybenull_(size) \ - _SAL2_Source_(_Outptr_opt_result_buffer_all_maybenull_, (size), \ - _Out_opt_impl_ _Deref_post2_impl_(__maybenull_impl_notref, __count_impl(size))) - -#define _Outptr_result_bytebuffer_(size) \ - _SAL2_Source_(_Outptr_result_bytebuffer_, (size), \ - _Out_impl_ _Deref_post2_impl_(__notnull_impl_notref, __bytecap_impl(size))) -#define _Outptr_opt_result_bytebuffer_(size) \ - _SAL2_Source_(_Outptr_opt_result_bytebuffer_, (size), \ - _Out_opt_impl_ _Deref_post2_impl_(__notnull_impl_notref, __bytecap_impl(size))) -#define _Outptr_result_bytebuffer_to_(size, count) \ - _SAL2_Source_(_Outptr_result_bytebuffer_to_, (size, count), \ - _Out_impl_ _Deref_post3_impl_(__notnull_impl_notref, __bytecap_impl(size), \ - __bytecount_impl(count))) -#define _Outptr_opt_result_bytebuffer_to_(size, count) \ - _SAL2_Source_(_Outptr_opt_result_bytebuffer_to_, (size, count), \ - _Out_opt_impl_ _Deref_post3_impl_(__notnull_impl_notref, __bytecap_impl(size), \ - __bytecount_impl(count))) - -#define _Outptr_result_bytebuffer_all_(size) \ - _SAL2_Source_(_Outptr_result_bytebuffer_all_, (size), \ - _Out_impl_ _Deref_post2_impl_(__notnull_impl_notref, __bytecount_impl(size))) -#define _Outptr_opt_result_bytebuffer_all_(size) \ - _SAL2_Source_( \ - _Outptr_opt_result_bytebuffer_all_, (size), \ - _Out_opt_impl_ _Deref_post2_impl_(__notnull_impl_notref, __bytecount_impl(size))) - -#define _Outptr_result_bytebuffer_maybenull_(size) \ - _SAL2_Source_(_Outptr_result_bytebuffer_maybenull_, (size), \ - _Out_impl_ _Deref_post2_impl_(__maybenull_impl_notref, __bytecap_impl(size))) -#define _Outptr_opt_result_bytebuffer_maybenull_(size) \ - _SAL2_Source_( \ - _Outptr_opt_result_bytebuffer_maybenull_, (size), \ - _Out_opt_impl_ _Deref_post2_impl_(__maybenull_impl_notref, __bytecap_impl(size))) -#define _Outptr_result_bytebuffer_to_maybenull_(size, count) \ - _SAL2_Source_(_Outptr_result_bytebuffer_to_maybenull_, (size, count), \ - _Out_impl_ _Deref_post3_impl_(__maybenull_impl_notref, __bytecap_impl(size), \ - __bytecount_impl(count))) -#define _Outptr_opt_result_bytebuffer_to_maybenull_(size, count) \ - _SAL2_Source_(_Outptr_opt_result_bytebuffer_to_maybenull_, (size, count), \ - _Out_opt_impl_ _Deref_post3_impl_(__maybenull_impl_notref, __bytecap_impl(size), \ - __bytecount_impl(count))) - -#define _Outptr_result_bytebuffer_all_maybenull_(size) \ - _SAL2_Source_(_Outptr_result_bytebuffer_all_maybenull_, (size), \ - _Out_impl_ _Deref_post2_impl_(__maybenull_impl_notref, __bytecount_impl(size))) -#define _Outptr_opt_result_bytebuffer_all_maybenull_(size) \ - _SAL2_Source_( \ - _Outptr_opt_result_bytebuffer_all_maybenull_, (size), \ - _Out_opt_impl_ _Deref_post2_impl_(__maybenull_impl_notref, __bytecount_impl(size))) - -// Annotations for output reference to pointer parameters. - -#define _Outref_ _SAL2_Source_(_Outref_, (), _Out_impl_ _Post_notnull_) -#define _Outref_result_maybenull_ \ - _SAL2_Source_(_Outref_result_maybenull_, (), \ - _Pre2_impl_(__notnull_impl_notref, __cap_c_one_notref_impl) \ - _Post_maybenull_ _Post_valid_impl_) - -#define _Outref_result_buffer_(size) \ - _SAL2_Source_(_Outref_result_buffer_, (size), _Outref_ _Post1_impl_(__cap_impl(size))) -#define _Outref_result_bytebuffer_(size) \ - _SAL2_Source_(_Outref_result_bytebuffer_, (size), _Outref_ _Post1_impl_(__bytecap_impl(size))) -#define _Outref_result_buffer_to_(size, count) \ - _SAL2_Source_(_Outref_result_buffer_to_, (size, count), \ - _Outref_result_buffer_(size) _Post1_impl_(__count_impl(count))) -#define _Outref_result_bytebuffer_to_(size, count) \ - _SAL2_Source_(_Outref_result_bytebuffer_to_, (size, count), \ - _Outref_result_bytebuffer_(size) _Post1_impl_(__bytecount_impl(count))) -#define _Outref_result_buffer_all_(size) \ - _SAL2_Source_(_Outref_result_buffer_all_, (size), _Outref_result_buffer_to_(size, _Old_(size))) -#define _Outref_result_bytebuffer_all_(size) \ - _SAL2_Source_(_Outref_result_bytebuffer_all_, (size), \ - _Outref_result_bytebuffer_to_(size, _Old_(size))) - -#define _Outref_result_buffer_maybenull_(size) \ - _SAL2_Source_(_Outref_result_buffer_maybenull_, (size), \ - _Outref_result_maybenull_ _Post1_impl_(__cap_impl(size))) -#define _Outref_result_bytebuffer_maybenull_(size) \ - _SAL2_Source_(_Outref_result_bytebuffer_maybenull_, (size), \ - _Outref_result_maybenull_ _Post1_impl_(__bytecap_impl(size))) -#define _Outref_result_buffer_to_maybenull_(size, count) \ - _SAL2_Source_(_Outref_result_buffer_to_maybenull_, (size, count), \ - _Outref_result_buffer_maybenull_(size) _Post1_impl_(__count_impl(count))) -#define _Outref_result_bytebuffer_to_maybenull_(size, count) \ - _SAL2_Source_(_Outref_result_bytebuffer_to_maybenull_, (size, count), \ - _Outref_result_bytebuffer_maybenull_(size) \ - _Post1_impl_(__bytecount_impl(count))) -#define _Outref_result_buffer_all_maybenull_(size) \ - _SAL2_Source_(_Outref_result_buffer_all_maybenull_, (size), \ - _Outref_result_buffer_to_maybenull_(size, _Old_(size))) -#define _Outref_result_bytebuffer_all_maybenull_(size) \ - _SAL2_Source_(_Outref_result_bytebuffer_all_maybenull_, (size), \ - _Outref_result_bytebuffer_to_maybenull_(size, _Old_(size))) - -// Annotations for output reference to pointer parameters that guarantee -// that the pointer is set to NULL on failure. -#define _Outref_result_nullonfailure_ \ - _SAL2_Source_(_Outref_result_nullonfailure_, (), _Outref_ _On_failure_(_Post_null_)) - -// Generic annotations to set output value of a by-pointer or by-reference parameter to null/zero on -// failure. -#define _Result_nullonfailure_ \ - _SAL2_Source_(_Result_nullonfailure_, (), _On_failure_(_Notref_impl_ _Deref_impl_ _Post_null_)) -#define _Result_zeroonfailure_ \ - _SAL2_Source_(_Result_zeroonfailure_, (), \ - _On_failure_(_Notref_impl_ _Deref_impl_ _Out_range_(==, 0))) - -// return values ------------------------------- - -// -// _Ret_ annotations -// -// describing conditions that hold for return values after the call - -// e.g. _Ret_z_ CString::operator const WCHAR*() const throw(); -#define _Ret_z_ \ - _SAL2_Source_(_Ret_z_, (), _Ret2_impl_(__notnull_impl, __zterm_impl) _Ret_valid_impl_) -#define _Ret_maybenull_z_ \ - _SAL2_Source_(_Ret_maybenull_z_, (), \ - _Ret2_impl_(__maybenull_impl, __zterm_impl) _Ret_valid_impl_) - -// used with allocated but not yet initialized objects -#define _Ret_notnull_ _SAL2_Source_(_Ret_notnull_, (), _Ret1_impl_(__notnull_impl)) -#define _Ret_maybenull_ _SAL2_Source_(_Ret_maybenull_, (), _Ret1_impl_(__maybenull_impl)) -#define _Ret_null_ _SAL2_Source_(_Ret_null_, (), _Ret1_impl_(__null_impl)) - -// used with allocated and initialized objects -// returns single valid object -#define _Ret_valid_ \ - _SAL2_Source_(_Ret_valid_, (), _Ret1_impl_(__notnull_impl_notref) _Ret_valid_impl_) - -// returns pointer to initialized buffer of specified size -#define _Ret_writes_(size) \ - _SAL2_Source_(_Ret_writes_, (size), \ - _Ret2_impl_(__notnull_impl, __count_impl(size)) _Ret_valid_impl_) -#define _Ret_writes_z_(size) \ - _SAL2_Source_(_Ret_writes_z_, (size), \ - _Ret3_impl_(__notnull_impl, __count_impl(size), __zterm_impl) _Ret_valid_impl_) -#define _Ret_writes_bytes_(size) \ - _SAL2_Source_(_Ret_writes_bytes_, (size), \ - _Ret2_impl_(__notnull_impl, __bytecount_impl(size)) _Ret_valid_impl_) -#define _Ret_writes_maybenull_(size) \ - _SAL2_Source_(_Ret_writes_maybenull_, (size), \ - _Ret2_impl_(__maybenull_impl, __count_impl(size)) _Ret_valid_impl_) -#define _Ret_writes_maybenull_z_(size) \ - _SAL2_Source_(_Ret_writes_maybenull_z_, (size), \ - _Ret3_impl_(__maybenull_impl, __count_impl(size), __zterm_impl) \ - _Ret_valid_impl_) -#define _Ret_writes_bytes_maybenull_(size) \ - _SAL2_Source_(_Ret_writes_bytes_maybenull_, (size), \ - _Ret2_impl_(__maybenull_impl, __bytecount_impl(size)) _Ret_valid_impl_) - -// returns pointer to partially initialized buffer, with total size 'size' and initialized size -// 'count' -#define _Ret_writes_to_(size, count) \ - _SAL2_Source_(_Ret_writes_to_, (size, count), \ - _Ret3_impl_(__notnull_impl, __cap_impl(size), __count_impl(count)) \ - _Ret_valid_impl_) -#define _Ret_writes_bytes_to_(size, count) \ - _SAL2_Source_(_Ret_writes_bytes_to_, (size, count), \ - _Ret3_impl_(__notnull_impl, __bytecap_impl(size), __bytecount_impl(count)) \ - _Ret_valid_impl_) -#define _Ret_writes_to_maybenull_(size, count) \ - _SAL2_Source_(_Ret_writes_to_maybenull_, (size, count), \ - _Ret3_impl_(__maybenull_impl, __cap_impl(size), __count_impl(count)) \ - _Ret_valid_impl_) -#define _Ret_writes_bytes_to_maybenull_(size, count) \ - _SAL2_Source_(_Ret_writes_bytes_to_maybenull_, (size, count), \ - _Ret3_impl_(__maybenull_impl, __bytecap_impl(size), __bytecount_impl(count)) \ - _Ret_valid_impl_) - -// Annotations for strict type checking -#define _Points_to_data_ _SAL2_Source_(_Points_to_data_, (), _Pre_ _Points_to_data_impl_) -#define _Literal_ _SAL2_Source_(_Literal_, (), _Pre_ _Literal_impl_) -#define _Notliteral_ _SAL2_Source_(_Notliteral_, (), _Pre_ _Notliteral_impl_) - -// Check the return value of a function e.g. _Check_return_ ErrorCode Foo(); -#define _Check_return_ _SAL2_Source_(_Check_return_, (), _Check_return_impl_) -#define _Must_inspect_result_ \ - _SAL2_Source_(_Must_inspect_result_, (), _Must_inspect_impl_ _Check_return_impl_) - -// e.g. MyPrintF( _Printf_format_string_ const WCHAR* wzFormat, ... ); -#define _Printf_format_string_ \ - _SAL2_Source_(_Printf_format_string_, (), _Printf_format_string_impl_) -#define _Scanf_format_string_ _SAL2_Source_(_Scanf_format_string_, (), _Scanf_format_string_impl_) -#define _Scanf_s_format_string_ \ - _SAL2_Source_(_Scanf_s_format_string_, (), _Scanf_s_format_string_impl_) - -#define _Format_string_impl_(kind, where) _SA_annotes2(SAL_IsFormatString2, kind, where) -#define _Printf_format_string_params_(x) \ - _SAL2_Source_(_Printf_format_string_params_, (x), _Format_string_impl_("printf", x)) -#define _Scanf_format_string_params_(x) \ - _SAL2_Source_(_Scanf_format_string_params_, (x), _Format_string_impl_("scanf", x)) -#define _Scanf_s_format_string_params_(x) \ - _SAL2_Source_(_Scanf_s_format_string_params_, (x), _Format_string_impl_("scanf_s", x)) - -// annotations to express value of integral or pointer parameter -#define _In_range_(lb, ub) _SAL2_Source_(_In_range_, (lb, ub), _In_range_impl_(lb, ub)) -#define _Out_range_(lb, ub) _SAL2_Source_(_Out_range_, (lb, ub), _Out_range_impl_(lb, ub)) -#define _Ret_range_(lb, ub) _SAL2_Source_(_Ret_range_, (lb, ub), _Ret_range_impl_(lb, ub)) -#define _Deref_in_range_(lb, ub) \ - _SAL2_Source_(_Deref_in_range_, (lb, ub), _Deref_in_range_impl_(lb, ub)) -#define _Deref_out_range_(lb, ub) \ - _SAL2_Source_(_Deref_out_range_, (lb, ub), _Deref_out_range_impl_(lb, ub)) -#define _Deref_ret_range_(lb, ub) \ - _SAL2_Source_(_Deref_ret_range_, (lb, ub), _Deref_ret_range_impl_(lb, ub)) -#define _Pre_equal_to_(expr) _SAL2_Source_(_Pre_equal_to_, (expr), _In_range_(==, expr)) -#define _Post_equal_to_(expr) _SAL2_Source_(_Post_equal_to_, (expr), _Out_range_(==, expr)) - -// annotation to express that a value (usually a field of a mutable class) -// is not changed by a function call -#define _Unchanged_(e) _SAL2_Source_(_Unchanged_, (e), _At_(e, _Post_equal_to_(_Old_(e)) _Const_)) - -// Annotations to allow expressing generalized pre and post conditions. -// 'cond' may be any valid SAL expression that is considered to be true as a precondition -// or postcondition (respsectively). -#define _Pre_satisfies_(cond) _SAL2_Source_(_Pre_satisfies_, (cond), _Pre_satisfies_impl_(cond)) -#define _Post_satisfies_(cond) _SAL2_Source_(_Post_satisfies_, (cond), _Post_satisfies_impl_(cond)) - -// Annotations to express struct, class and field invariants -#define _Struct_size_bytes_(size) _SAL2_Source_(_Struct_size_bytes_, (size), _Writable_bytes_(size)) - -#define _Field_size_(size) _SAL2_Source_(_Field_size_, (size), _Notnull_ _Writable_elements_(size)) -#define _Field_size_opt_(size) \ - _SAL2_Source_(_Field_size_opt_, (size), _Maybenull_ _Writable_elements_(size)) -#define _Field_size_part_(size, count) \ - _SAL2_Source_(_Field_size_part_, (size, count), \ - _Notnull_ _Writable_elements_(size) _Readable_elements_(count)) -#define _Field_size_part_opt_(size, count) \ - _SAL2_Source_(_Field_size_part_opt_, (size, count), \ - _Maybenull_ _Writable_elements_(size) _Readable_elements_(count)) -#define _Field_size_full_(size) \ - _SAL2_Source_(_Field_size_full_, (size), _Field_size_part_(size, size)) -#define _Field_size_full_opt_(size) \ - _SAL2_Source_(_Field_size_full_opt_, (size), _Field_size_part_opt_(size, size)) - -#define _Field_size_bytes_(size) \ - _SAL2_Source_(_Field_size_bytes_, (size), _Notnull_ _Writable_bytes_(size)) -#define _Field_size_bytes_opt_(size) \ - _SAL2_Source_(_Field_size_bytes_opt_, (size), _Maybenull_ _Writable_bytes_(size)) -#define _Field_size_bytes_part_(size, count) \ - _SAL2_Source_(_Field_size_bytes_part_, (size, count), \ - _Notnull_ _Writable_bytes_(size) _Readable_bytes_(count)) -#define _Field_size_bytes_part_opt_(size, count) \ - _SAL2_Source_(_Field_size_bytes_part_opt_, (size, count), \ - _Maybenull_ _Writable_bytes_(size) _Readable_bytes_(count)) -#define _Field_size_bytes_full_(size) \ - _SAL2_Source_(_Field_size_bytes_full_, (size), _Field_size_bytes_part_(size, size)) -#define _Field_size_bytes_full_opt_(size) \ - _SAL2_Source_(_Field_size_bytes_full_opt_, (size), _Field_size_bytes_part_opt_(size, size)) - -#define _Field_z_ _SAL2_Source_(_Field_z_, (), _Null_terminated_) - -#define _Field_range_(min, max) \ - _SAL2_Source_(_Field_range_, (min, max), _Field_range_impl_(min, max)) - -//============================================================================ -// _Pre_\_Post_ Layer: -//============================================================================ - -// -// Raw Pre/Post for declaring custom pre/post conditions -// - -#define _Pre_ _Pre_impl_ -#define _Post_ _Post_impl_ - -// -// Validity property -// - -#define _Valid_ _Valid_impl_ -#define _Notvalid_ _Notvalid_impl_ -#define _Maybevalid_ _Maybevalid_impl_ - -// -// Buffer size properties -// - -// Expressing buffer sizes without specifying pre or post condition -#define _Readable_bytes_(size) _SAL2_Source_(_Readable_bytes_, (size), _Readable_bytes_impl_(size)) -#define _Readable_elements_(size) \ - _SAL2_Source_(_Readable_elements_, (size), _Readable_elements_impl_(size)) -#define _Writable_bytes_(size) _SAL2_Source_(_Writable_bytes_, (size), _Writable_bytes_impl_(size)) -#define _Writable_elements_(size) \ - _SAL2_Source_(_Writable_elements_, (size), _Writable_elements_impl_(size)) - -#define _Null_terminated_ _SAL2_Source_(_Null_terminated_, (), _Null_terminated_impl_) -#define _NullNull_terminated_ _SAL2_Source_(_NullNull_terminated_, (), _NullNull_terminated_impl_) - -// Expressing buffer size as pre or post condition -#define _Pre_readable_size_(size) \ - _SAL2_Source_(_Pre_readable_size_, (size), _Pre1_impl_(__count_impl(size)) _Pre_valid_impl_) -#define _Pre_writable_size_(size) \ - _SAL2_Source_(_Pre_writable_size_, (size), _Pre1_impl_(__cap_impl(size))) -#define _Pre_readable_byte_size_(size) \ - _SAL2_Source_(_Pre_readable_byte_size_, (size), \ - _Pre1_impl_(__bytecount_impl(size)) _Pre_valid_impl_) -#define _Pre_writable_byte_size_(size) \ - _SAL2_Source_(_Pre_writable_byte_size_, (size), _Pre1_impl_(__bytecap_impl(size))) - -#define _Post_readable_size_(size) \ - _SAL2_Source_(_Post_readable_size_, (size), _Post1_impl_(__count_impl(size)) _Post_valid_impl_) -#define _Post_writable_size_(size) \ - _SAL2_Source_(_Post_writable_size_, (size), _Post1_impl_(__cap_impl(size))) -#define _Post_readable_byte_size_(size) \ - _SAL2_Source_(_Post_readable_byte_size_, (size), \ - _Post1_impl_(__bytecount_impl(size)) _Post_valid_impl_) -#define _Post_writable_byte_size_(size) \ - _SAL2_Source_(_Post_writable_byte_size_, (size), _Post1_impl_(__bytecap_impl(size))) - -// -// Pointer null-ness properties -// -#define _Null_ _Null_impl_ -#define _Notnull_ _Notnull_impl_ -#define _Maybenull_ _Maybenull_impl_ - -// -// _Pre_ annotations --- -// -// describing conditions that must be met before the call of the function - -// e.g. int strlen( _Pre_z_ const char* sz ); -// buffer is a zero terminated string -#define _Pre_z_ _SAL2_Source_(_Pre_z_, (), _Pre1_impl_(__zterm_impl) _Pre_valid_impl_) - -// valid size unknown or indicated by type (e.g.:LPSTR) -#define _Pre_valid_ \ - _SAL2_Source_(_Pre_valid_, (), _Pre1_impl_(__notnull_impl_notref) _Pre_valid_impl_) -#define _Pre_opt_valid_ \ - _SAL2_Source_(_Pre_opt_valid_, (), _Pre1_impl_(__maybenull_impl_notref) _Pre_valid_impl_) - -#define _Pre_invalid_ _SAL2_Source_(_Pre_invalid_, (), _Deref_pre1_impl_(__notvalid_impl)) - -// Overrides recursive valid when some field is not yet initialized when using _Inout_ -#define _Pre_unknown_ _SAL2_Source_(_Pre_unknown_, (), _Pre1_impl_(__maybevalid_impl)) - -// used with allocated but not yet initialized objects -#define _Pre_notnull_ _SAL2_Source_(_Pre_notnull_, (), _Pre1_impl_(__notnull_impl_notref)) -#define _Pre_maybenull_ _SAL2_Source_(_Pre_maybenull_, (), _Pre1_impl_(__maybenull_impl_notref)) -#define _Pre_null_ _SAL2_Source_(_Pre_null_, (), _Pre1_impl_(__null_impl_notref)) - -// -// _Post_ annotations --- -// -// describing conditions that hold after the function call - -// void CopyStr( _In_z_ const char* szFrom, _Pre_cap_(cch) _Post_z_ char* szFrom, size_t cchFrom ); -// buffer will be a zero-terminated string after the call -#define _Post_z_ _SAL2_Source_(_Post_z_, (), _Post1_impl_(__zterm_impl) _Post_valid_impl_) - -// e.g. HRESULT InitStruct( _Post_valid_ Struct* pobj ); -#define _Post_valid_ _SAL2_Source_(_Post_valid_, (), _Post_valid_impl_) -#define _Post_invalid_ _SAL2_Source_(_Post_invalid_, (), _Deref_post1_impl_(__notvalid_impl)) - -// e.g. void free( _Post_ptr_invalid_ void* pv ); -#define _Post_ptr_invalid_ _SAL2_Source_(_Post_ptr_invalid_, (), _Post1_impl_(__notvalid_impl)) - -// e.g. void ThrowExceptionIfNull( _Post_notnull_ const void* pv ); -#define _Post_notnull_ _SAL2_Source_(_Post_notnull_, (), _Post1_impl_(__notnull_impl)) - -// e.g. HRESULT GetObject(_Outptr_ _On_failure_(_At_(*p, _Post_null_)) T **p); -#define _Post_null_ _SAL2_Source_(_Post_null_, (), _Post1_impl_(__null_impl)) - -#define _Post_maybenull_ _SAL2_Source_(_Post_maybenull_, (), _Post1_impl_(__maybenull_impl)) - -#define _Prepost_z_ _SAL2_Source_(_Prepost_z_, (), _Pre_z_ _Post_z_) - -// #pragma region Input Buffer SAL 1 compatibility macros - -/*========================================================================== - - This section contains definitions for macros defined for VS2010 and earlier. - Usage of these macros is still supported, but the SAL 2 macros defined above - are recommended instead. This comment block is retained to assist in - understanding SAL that still uses the older syntax. - - The macros are defined in 3 layers: - - _In_\_Out_ Layer: - ---------------- - This layer provides the highest abstraction and its macros should be used - in most cases. Its macros start with _In_, _Out_ or _Inout_. For the - typical case they provide the most concise annotations. - - _Pre_\_Post_ Layer: - ------------------ - The macros of this layer only should be used when there is no suitable macro - in the _In_\_Out_ layer. Its macros start with _Pre_, _Post_, _Ret_, - _Deref_pre_ _Deref_post_ and _Deref_ret_. This layer provides the most - flexibility for annotations. - - Implementation Abstraction Layer: - -------------------------------- - Macros from this layer should never be used directly. The layer only exists - to hide the implementation of the annotation macros. - - - Annotation Syntax: - |--------------|----------|----------------|-----------------------------| - | Usage | Nullness | ZeroTerminated | Extent | - |--------------|----------|----------------|-----------------------------| - | _In_ | <> | <> | <> | - | _Out_ | opt_ | z_ | [byte]cap_[c_|x_]( size ) | - | _Inout_ | | | [byte]count_[c_|x_]( size ) | - | _Deref_out_ | | | ptrdiff_cap_( ptr ) | - |--------------| | | ptrdiff_count_( ptr ) | - | _Ret_ | | | | - | _Deref_ret_ | | | | - |--------------| | | | - | _Pre_ | | | | - | _Post_ | | | | - | _Deref_pre_ | | | | - | _Deref_post_ | | | | - |--------------|----------|----------------|-----------------------------| - - Usage: - ----- - _In_, _Out_, _Inout_, _Pre_, _Post_, _Deref_pre_, _Deref_post_ are for - formal parameters. - _Ret_, _Deref_ret_ must be used for return values. - - Nullness: - -------- - If the pointer can be NULL the annotation contains _opt. If the macro - does not contain '_opt' the pointer may not be NULL. - - String Type: - ----------- - _z: NullTerminated string - for _In_ parameters the buffer must have the specified stringtype before the call - for _Out_ parameters the buffer must have the specified stringtype after the call - for _Inout_ parameters both conditions apply - - Extent Syntax: - |------|---------------|---------------| - | Unit | Writ\Readable | Argument Type | - |------|---------------|---------------| - | <> | cap_ | <> | - | byte | count_ | c_ | - | | | x_ | - |------|---------------|---------------| - - 'cap' (capacity) describes the writable size of the buffer and is typically used - with _Out_. The default unit is elements. Use 'bytecap' if the size is given in bytes - 'count' describes the readable size of the buffer and is typically used with _In_. - The default unit is elements. Use 'bytecount' if the size is given in bytes. - - Argument syntax for cap_, bytecap_, count_, bytecount_: - (|return)[+n] e.g. cch, return, cb+2 - - If the buffer size is a constant expression use the c_ postfix. - E.g. cap_c_(20), count_c_(MAX_PATH), bytecount_c_(16) - - If the buffer size is given by a limiting pointer use the ptrdiff_ versions - of the macros. - - If the buffer size is neither a parameter nor a constant expression use the x_ - postfix. e.g. bytecount_x_(num*size) x_ annotations accept any arbitrary string. - No analysis can be done for x_ annotations but they at least tell the tool that - the buffer has some sort of extent description. x_ annotations might be supported - by future compiler versions. - -============================================================================*/ - -// e.g. void SetCharRange( _In_count_(cch) const char* rgch, size_t cch ) -// valid buffer extent described by another parameter -#define _In_count_(size) _SAL1_1_Source_(_In_count_, (size), _Pre_count_(size) _Deref_pre_readonly_) -#define _In_opt_count_(size) \ - _SAL1_1_Source_(_In_opt_count_, (size), _Pre_opt_count_(size) _Deref_pre_readonly_) -#define _In_bytecount_(size) \ - _SAL1_1_Source_(_In_bytecount_, (size), _Pre_bytecount_(size) _Deref_pre_readonly_) -#define _In_opt_bytecount_(size) \ - _SAL1_1_Source_(_In_opt_bytecount_, (size), _Pre_opt_bytecount_(size) _Deref_pre_readonly_) - -// valid buffer extent described by a constant extression -#define _In_count_c_(size) \ - _SAL1_1_Source_(_In_count_c_, (size), _Pre_count_c_(size) _Deref_pre_readonly_) -#define _In_opt_count_c_(size) \ - _SAL1_1_Source_(_In_opt_count_c_, (size), _Pre_opt_count_c_(size) _Deref_pre_readonly_) -#define _In_bytecount_c_(size) \ - _SAL1_1_Source_(_In_bytecount_c_, (size), _Pre_bytecount_c_(size) _Deref_pre_readonly_) -#define _In_opt_bytecount_c_(size) \ - _SAL1_1_Source_(_In_opt_bytecount_c_, (size), _Pre_opt_bytecount_c_(size) _Deref_pre_readonly_) - -// nullterminated 'input' buffers with given size - -// e.g. void SetCharRange( _In_count_(cch) const char* rgch, size_t cch ) -// nullterminated valid buffer extent described by another parameter -#define _In_z_count_(size) \ - _SAL1_1_Source_(_In_z_count_, (size), _Pre_z_ _Pre_count_(size) _Deref_pre_readonly_) -#define _In_opt_z_count_(size) \ - _SAL1_1_Source_(_In_opt_z_count_, (size), \ - _Pre_opt_z_ _Pre_opt_count_(size) _Deref_pre_readonly_) -#define _In_z_bytecount_(size) \ - _SAL1_1_Source_(_In_z_bytecount_, (size), _Pre_z_ _Pre_bytecount_(size) _Deref_pre_readonly_) -#define _In_opt_z_bytecount_(size) \ - _SAL1_1_Source_(_In_opt_z_bytecount_, (size), \ - _Pre_opt_z_ _Pre_opt_bytecount_(size) _Deref_pre_readonly_) - -// nullterminated valid buffer extent described by a constant extression -#define _In_z_count_c_(size) \ - _SAL1_1_Source_(_In_z_count_c_, (size), _Pre_z_ _Pre_count_c_(size) _Deref_pre_readonly_) -#define _In_opt_z_count_c_(size) \ - _SAL1_1_Source_(_In_opt_z_count_c_, (size), \ - _Pre_opt_z_ _Pre_opt_count_c_(size) _Deref_pre_readonly_) -#define _In_z_bytecount_c_(size) \ - _SAL1_1_Source_(_In_z_bytecount_c_, (size), \ - _Pre_z_ _Pre_bytecount_c_(size) _Deref_pre_readonly_) -#define _In_opt_z_bytecount_c_(size) \ - _SAL1_1_Source_(_In_opt_z_bytecount_c_, (size), \ - _Pre_opt_z_ _Pre_opt_bytecount_c_(size) _Deref_pre_readonly_) - -// buffer capacity is described by another pointer -// e.g. void Foo( _In_ptrdiff_count_(pchMax) const char* pch, const char* pchMax ) { while pch < -// pchMax ) pch++; } -#define _In_ptrdiff_count_(size) \ - _SAL1_1_Source_(_In_ptrdiff_count_, (size), _Pre_ptrdiff_count_(size) _Deref_pre_readonly_) -#define _In_opt_ptrdiff_count_(size) \ - _SAL1_1_Source_(_In_opt_ptrdiff_count_, (size), \ - _Pre_opt_ptrdiff_count_(size) _Deref_pre_readonly_) - -// 'x' version for complex expressions that are not supported by the current compiler version -// e.g. void Set3ColMatrix( _In_count_x_(3*cRows) const Elem* matrix, int cRows ); -#define _In_count_x_(size) \ - _SAL1_1_Source_(_In_count_x_, (size), _Pre_count_x_(size) _Deref_pre_readonly_) -#define _In_opt_count_x_(size) \ - _SAL1_1_Source_(_In_opt_count_x_, (size), _Pre_opt_count_x_(size) _Deref_pre_readonly_) -#define _In_bytecount_x_(size) \ - _SAL1_1_Source_(_In_bytecount_x_, (size), _Pre_bytecount_x_(size) _Deref_pre_readonly_) -#define _In_opt_bytecount_x_(size) \ - _SAL1_1_Source_(_In_opt_bytecount_x_, (size), _Pre_opt_bytecount_x_(size) _Deref_pre_readonly_) - -// 'out' with buffer size -// e.g. void GetIndices( _Out_cap_(cIndices) int* rgIndices, size_t cIndices ); -// buffer capacity is described by another parameter -#define _Out_cap_(size) _SAL1_1_Source_(_Out_cap_, (size), _Pre_cap_(size) _Post_valid_impl_) -#define _Out_opt_cap_(size) \ - _SAL1_1_Source_(_Out_opt_cap_, (size), _Pre_opt_cap_(size) _Post_valid_impl_) -#define _Out_bytecap_(size) \ - _SAL1_1_Source_(_Out_bytecap_, (size), _Pre_bytecap_(size) _Post_valid_impl_) -#define _Out_opt_bytecap_(size) \ - _SAL1_1_Source_(_Out_opt_bytecap_, (size), _Pre_opt_bytecap_(size) _Post_valid_impl_) - -// buffer capacity is described by a constant expression -#define _Out_cap_c_(size) _SAL1_1_Source_(_Out_cap_c_, (size), _Pre_cap_c_(size) _Post_valid_impl_) -#define _Out_opt_cap_c_(size) \ - _SAL1_1_Source_(_Out_opt_cap_c_, (size), _Pre_opt_cap_c_(size) _Post_valid_impl_) -#define _Out_bytecap_c_(size) \ - _SAL1_1_Source_(_Out_bytecap_c_, (size), _Pre_bytecap_c_(size) _Post_valid_impl_) -#define _Out_opt_bytecap_c_(size) \ - _SAL1_1_Source_(_Out_opt_bytecap_c_, (size), _Pre_opt_bytecap_c_(size) _Post_valid_impl_) - -// buffer capacity is described by another parameter multiplied by a constant expression -#define _Out_cap_m_(mult, size) \ - _SAL1_1_Source_(_Out_cap_m_, (mult, size), _Pre_cap_m_(mult, size) _Post_valid_impl_) -#define _Out_opt_cap_m_(mult, size) \ - _SAL1_1_Source_(_Out_opt_cap_m_, (mult, size), _Pre_opt_cap_m_(mult, size) _Post_valid_impl_) -#define _Out_z_cap_m_(mult, size) \ - _SAL1_1_Source_(_Out_z_cap_m_, (mult, size), _Pre_cap_m_(mult, size) _Post_valid_impl_ _Post_z_) -#define _Out_opt_z_cap_m_(mult, size) \ - _SAL1_1_Source_(_Out_opt_z_cap_m_, (mult, size), \ - _Pre_opt_cap_m_(mult, size) _Post_valid_impl_ _Post_z_) - -// buffer capacity is described by another pointer -// e.g. void Foo( _Out_ptrdiff_cap_(pchMax) char* pch, const char* pchMax ) { while pch < pchMax ) -// pch++; } -#define _Out_ptrdiff_cap_(size) \ - _SAL1_1_Source_(_Out_ptrdiff_cap_, (size), _Pre_ptrdiff_cap_(size) _Post_valid_impl_) -#define _Out_opt_ptrdiff_cap_(size) \ - _SAL1_1_Source_(_Out_opt_ptrdiff_cap_, (size), _Pre_opt_ptrdiff_cap_(size) _Post_valid_impl_) - -// buffer capacity is described by a complex expression -#define _Out_cap_x_(size) _SAL1_1_Source_(_Out_cap_x_, (size), _Pre_cap_x_(size) _Post_valid_impl_) -#define _Out_opt_cap_x_(size) \ - _SAL1_1_Source_(_Out_opt_cap_x_, (size), _Pre_opt_cap_x_(size) _Post_valid_impl_) -#define _Out_bytecap_x_(size) \ - _SAL1_1_Source_(_Out_bytecap_x_, (size), _Pre_bytecap_x_(size) _Post_valid_impl_) -#define _Out_opt_bytecap_x_(size) \ - _SAL1_1_Source_(_Out_opt_bytecap_x_, (size), _Pre_opt_bytecap_x_(size) _Post_valid_impl_) - -// a zero terminated string is filled into a buffer of given capacity -// e.g. void CopyStr( _In_z_ const char* szFrom, _Out_z_cap_(cchTo) char* szTo, size_t cchTo ); -// buffer capacity is described by another parameter -#define _Out_z_cap_(size) \ - _SAL1_1_Source_(_Out_z_cap_, (size), _Pre_cap_(size) _Post_valid_impl_ _Post_z_) -#define _Out_opt_z_cap_(size) \ - _SAL1_1_Source_(_Out_opt_z_cap_, (size), _Pre_opt_cap_(size) _Post_valid_impl_ _Post_z_) -#define _Out_z_bytecap_(size) \ - _SAL1_1_Source_(_Out_z_bytecap_, (size), _Pre_bytecap_(size) _Post_valid_impl_ _Post_z_) -#define _Out_opt_z_bytecap_(size) \ - _SAL1_1_Source_(_Out_opt_z_bytecap_, (size), _Pre_opt_bytecap_(size) _Post_valid_impl_ _Post_z_) - -// buffer capacity is described by a constant expression -#define _Out_z_cap_c_(size) \ - _SAL1_1_Source_(_Out_z_cap_c_, (size), _Pre_cap_c_(size) _Post_valid_impl_ _Post_z_) -#define _Out_opt_z_cap_c_(size) \ - _SAL1_1_Source_(_Out_opt_z_cap_c_, (size), _Pre_opt_cap_c_(size) _Post_valid_impl_ _Post_z_) -#define _Out_z_bytecap_c_(size) \ - _SAL1_1_Source_(_Out_z_bytecap_c_, (size), _Pre_bytecap_c_(size) _Post_valid_impl_ _Post_z_) -#define _Out_opt_z_bytecap_c_(size) \ - _SAL1_1_Source_(_Out_opt_z_bytecap_c_, (size), \ - _Pre_opt_bytecap_c_(size) _Post_valid_impl_ _Post_z_) - -// buffer capacity is described by a complex expression -#define _Out_z_cap_x_(size) \ - _SAL1_1_Source_(_Out_z_cap_x_, (size), _Pre_cap_x_(size) _Post_valid_impl_ _Post_z_) -#define _Out_opt_z_cap_x_(size) \ - _SAL1_1_Source_(_Out_opt_z_cap_x_, (size), _Pre_opt_cap_x_(size) _Post_valid_impl_ _Post_z_) -#define _Out_z_bytecap_x_(size) \ - _SAL1_1_Source_(_Out_z_bytecap_x_, (size), _Pre_bytecap_x_(size) _Post_valid_impl_ _Post_z_) -#define _Out_opt_z_bytecap_x_(size) \ - _SAL1_1_Source_(_Out_opt_z_bytecap_x_, (size), \ - _Pre_opt_bytecap_x_(size) _Post_valid_impl_ _Post_z_) - -// a zero terminated string is filled into a buffer of given capacity -// e.g. size_t CopyCharRange( _In_count_(cchFrom) const char* rgchFrom, size_t cchFrom, -// _Out_cap_post_count_(cchTo,return)) char* rgchTo, size_t cchTo ); -#define _Out_cap_post_count_(cap, count) \ - _SAL1_1_Source_(_Out_cap_post_count_, (cap, count), \ - _Pre_cap_(cap) _Post_valid_impl_ _Post_count_(count)) -#define _Out_opt_cap_post_count_(cap, count) \ - _SAL1_1_Source_(_Out_opt_cap_post_count_, (cap, count), \ - _Pre_opt_cap_(cap) _Post_valid_impl_ _Post_count_(count)) -#define _Out_bytecap_post_bytecount_(cap, count) \ - _SAL1_1_Source_(_Out_bytecap_post_bytecount_, (cap, count), \ - _Pre_bytecap_(cap) _Post_valid_impl_ _Post_bytecount_(count)) -#define _Out_opt_bytecap_post_bytecount_(cap, count) \ - _SAL1_1_Source_(_Out_opt_bytecap_post_bytecount_, (cap, count), \ - _Pre_opt_bytecap_(cap) _Post_valid_impl_ _Post_bytecount_(count)) - -// a zero terminated string is filled into a buffer of given capacity -// e.g. size_t CopyStr( _In_z_ const char* szFrom, _Out_z_cap_post_count_(cchTo,return+1) char* -// szTo, size_t cchTo ); -#define _Out_z_cap_post_count_(cap, count) \ - _SAL1_1_Source_(_Out_z_cap_post_count_, (cap, count), \ - _Pre_cap_(cap) _Post_valid_impl_ _Post_z_count_(count)) -#define _Out_opt_z_cap_post_count_(cap, count) \ - _SAL1_1_Source_(_Out_opt_z_cap_post_count_, (cap, count), \ - _Pre_opt_cap_(cap) _Post_valid_impl_ _Post_z_count_(count)) -#define _Out_z_bytecap_post_bytecount_(cap, count) \ - _SAL1_1_Source_(_Out_z_bytecap_post_bytecount_, (cap, count), \ - _Pre_bytecap_(cap) _Post_valid_impl_ _Post_z_bytecount_(count)) -#define _Out_opt_z_bytecap_post_bytecount_(cap, count) \ - _SAL1_1_Source_(_Out_opt_z_bytecap_post_bytecount_, (cap, count), \ - _Pre_opt_bytecap_(cap) _Post_valid_impl_ _Post_z_bytecount_(count)) - -// only use with dereferenced arguments e.g. '*pcch' -#define _Out_capcount_(capcount) \ - _SAL1_1_Source_(_Out_capcount_, (capcount), \ - _Pre_cap_(capcount) _Post_valid_impl_ _Post_count_(capcount)) -#define _Out_opt_capcount_(capcount) \ - _SAL1_1_Source_(_Out_opt_capcount_, (capcount), \ - _Pre_opt_cap_(capcount) _Post_valid_impl_ _Post_count_(capcount)) -#define _Out_bytecapcount_(capcount) \ - _SAL1_1_Source_(_Out_bytecapcount_, (capcount), \ - _Pre_bytecap_(capcount) _Post_valid_impl_ _Post_bytecount_(capcount)) -#define _Out_opt_bytecapcount_(capcount) \ - _SAL1_1_Source_(_Out_opt_bytecapcount_, (capcount), \ - _Pre_opt_bytecap_(capcount) _Post_valid_impl_ _Post_bytecount_(capcount)) - -#define _Out_capcount_x_(capcount) \ - _SAL1_1_Source_(_Out_capcount_x_, (capcount), \ - _Pre_cap_x_(capcount) _Post_valid_impl_ _Post_count_x_(capcount)) -#define _Out_opt_capcount_x_(capcount) \ - _SAL1_1_Source_(_Out_opt_capcount_x_, (capcount), \ - _Pre_opt_cap_x_(capcount) _Post_valid_impl_ _Post_count_x_(capcount)) -#define _Out_bytecapcount_x_(capcount) \ - _SAL1_1_Source_(_Out_bytecapcount_x_, (capcount), \ - _Pre_bytecap_x_(capcount) _Post_valid_impl_ _Post_bytecount_x_(capcount)) -#define _Out_opt_bytecapcount_x_(capcount) \ - _SAL1_1_Source_(_Out_opt_bytecapcount_x_, (capcount), \ - _Pre_opt_bytecap_x_(capcount) _Post_valid_impl_ _Post_bytecount_x_(capcount)) - -// e.g. GetString( _Out_z_capcount_(*pLen+1) char* sz, size_t* pLen ); -#define _Out_z_capcount_(capcount) \ - _SAL1_1_Source_(_Out_z_capcount_, (capcount), \ - _Pre_cap_(capcount) _Post_valid_impl_ _Post_z_count_(capcount)) -#define _Out_opt_z_capcount_(capcount) \ - _SAL1_1_Source_(_Out_opt_z_capcount_, (capcount), \ - _Pre_opt_cap_(capcount) _Post_valid_impl_ _Post_z_count_(capcount)) -#define _Out_z_bytecapcount_(capcount) \ - _SAL1_1_Source_(_Out_z_bytecapcount_, (capcount), \ - _Pre_bytecap_(capcount) _Post_valid_impl_ _Post_z_bytecount_(capcount)) -#define _Out_opt_z_bytecapcount_(capcount) \ - _SAL1_1_Source_(_Out_opt_z_bytecapcount_, (capcount), \ - _Pre_opt_bytecap_(capcount) _Post_valid_impl_ _Post_z_bytecount_(capcount)) - -// 'inout' buffers with initialized elements before and after the call -// e.g. void ModifyIndices( _Inout_count_(cIndices) int* rgIndices, size_t cIndices ); -#define _Inout_count_(size) _SAL1_1_Source_(_Inout_count_, (size), _Prepost_count_(size)) -#define _Inout_opt_count_(size) \ - _SAL1_1_Source_(_Inout_opt_count_, (size), _Prepost_opt_count_(size)) -#define _Inout_bytecount_(size) \ - _SAL1_1_Source_(_Inout_bytecount_, (size), _Prepost_bytecount_(size)) -#define _Inout_opt_bytecount_(size) \ - _SAL1_1_Source_(_Inout_opt_bytecount_, (size), _Prepost_opt_bytecount_(size)) - -#define _Inout_count_c_(size) _SAL1_1_Source_(_Inout_count_c_, (size), _Prepost_count_c_(size)) -#define _Inout_opt_count_c_(size) \ - _SAL1_1_Source_(_Inout_opt_count_c_, (size), _Prepost_opt_count_c_(size)) -#define _Inout_bytecount_c_(size) \ - _SAL1_1_Source_(_Inout_bytecount_c_, (size), _Prepost_bytecount_c_(size)) -#define _Inout_opt_bytecount_c_(size) \ - _SAL1_1_Source_(_Inout_opt_bytecount_c_, (size), _Prepost_opt_bytecount_c_(size)) - -// nullterminated 'inout' buffers with initialized elements before and after the call -// e.g. void ModifyIndices( _Inout_count_(cIndices) int* rgIndices, size_t cIndices ); -#define _Inout_z_count_(size) \ - _SAL1_1_Source_(_Inout_z_count_, (size), _Prepost_z_ _Prepost_count_(size)) -#define _Inout_opt_z_count_(size) \ - _SAL1_1_Source_(_Inout_opt_z_count_, (size), _Prepost_z_ _Prepost_opt_count_(size)) -#define _Inout_z_bytecount_(size) \ - _SAL1_1_Source_(_Inout_z_bytecount_, (size), _Prepost_z_ _Prepost_bytecount_(size)) -#define _Inout_opt_z_bytecount_(size) \ - _SAL1_1_Source_(_Inout_opt_z_bytecount_, (size), _Prepost_z_ _Prepost_opt_bytecount_(size)) - -#define _Inout_z_count_c_(size) \ - _SAL1_1_Source_(_Inout_z_count_c_, (size), _Prepost_z_ _Prepost_count_c_(size)) -#define _Inout_opt_z_count_c_(size) \ - _SAL1_1_Source_(_Inout_opt_z_count_c_, (size), _Prepost_z_ _Prepost_opt_count_c_(size)) -#define _Inout_z_bytecount_c_(size) \ - _SAL1_1_Source_(_Inout_z_bytecount_c_, (size), _Prepost_z_ _Prepost_bytecount_c_(size)) -#define _Inout_opt_z_bytecount_c_(size) \ - _SAL1_1_Source_(_Inout_opt_z_bytecount_c_, (size), _Prepost_z_ _Prepost_opt_bytecount_c_(size)) - -#define _Inout_ptrdiff_count_(size) \ - _SAL1_1_Source_(_Inout_ptrdiff_count_, (size), _Pre_ptrdiff_count_(size)) -#define _Inout_opt_ptrdiff_count_(size) \ - _SAL1_1_Source_(_Inout_opt_ptrdiff_count_, (size), _Pre_opt_ptrdiff_count_(size)) - -#define _Inout_count_x_(size) _SAL1_1_Source_(_Inout_count_x_, (size), _Prepost_count_x_(size)) -#define _Inout_opt_count_x_(size) \ - _SAL1_1_Source_(_Inout_opt_count_x_, (size), _Prepost_opt_count_x_(size)) -#define _Inout_bytecount_x_(size) \ - _SAL1_1_Source_(_Inout_bytecount_x_, (size), _Prepost_bytecount_x_(size)) -#define _Inout_opt_bytecount_x_(size) \ - _SAL1_1_Source_(_Inout_opt_bytecount_x_, (size), _Prepost_opt_bytecount_x_(size)) - -// e.g. void AppendToLPSTR( _In_ LPCSTR szFrom, _Inout_cap_(cchTo) LPSTR* szTo, size_t cchTo ); -#define _Inout_cap_(size) _SAL1_1_Source_(_Inout_cap_, (size), _Pre_valid_cap_(size) _Post_valid_) -#define _Inout_opt_cap_(size) \ - _SAL1_1_Source_(_Inout_opt_cap_, (size), _Pre_opt_valid_cap_(size) _Post_valid_) -#define _Inout_bytecap_(size) \ - _SAL1_1_Source_(_Inout_bytecap_, (size), _Pre_valid_bytecap_(size) _Post_valid_) -#define _Inout_opt_bytecap_(size) \ - _SAL1_1_Source_(_Inout_opt_bytecap_, (size), _Pre_opt_valid_bytecap_(size) _Post_valid_) - -#define _Inout_cap_c_(size) \ - _SAL1_1_Source_(_Inout_cap_c_, (size), _Pre_valid_cap_c_(size) _Post_valid_) -#define _Inout_opt_cap_c_(size) \ - _SAL1_1_Source_(_Inout_opt_cap_c_, (size), _Pre_opt_valid_cap_c_(size) _Post_valid_) -#define _Inout_bytecap_c_(size) \ - _SAL1_1_Source_(_Inout_bytecap_c_, (size), _Pre_valid_bytecap_c_(size) _Post_valid_) -#define _Inout_opt_bytecap_c_(size) \ - _SAL1_1_Source_(_Inout_opt_bytecap_c_, (size), _Pre_opt_valid_bytecap_c_(size) _Post_valid_) - -#define _Inout_cap_x_(size) \ - _SAL1_1_Source_(_Inout_cap_x_, (size), _Pre_valid_cap_x_(size) _Post_valid_) -#define _Inout_opt_cap_x_(size) \ - _SAL1_1_Source_(_Inout_opt_cap_x_, (size), _Pre_opt_valid_cap_x_(size) _Post_valid_) -#define _Inout_bytecap_x_(size) \ - _SAL1_1_Source_(_Inout_bytecap_x_, (size), _Pre_valid_bytecap_x_(size) _Post_valid_) -#define _Inout_opt_bytecap_x_(size) \ - _SAL1_1_Source_(_Inout_opt_bytecap_x_, (size), _Pre_opt_valid_bytecap_x_(size) _Post_valid_) - -// inout string buffers with writable size -// e.g. void AppendStr( _In_z_ const char* szFrom, _Inout_z_cap_(cchTo) char* szTo, size_t cchTo ); -#define _Inout_z_cap_(size) _SAL1_1_Source_(_Inout_z_cap_, (size), _Pre_z_cap_(size) _Post_z_) -#define _Inout_opt_z_cap_(size) \ - _SAL1_1_Source_(_Inout_opt_z_cap_, (size), _Pre_opt_z_cap_(size) _Post_z_) -#define _Inout_z_bytecap_(size) \ - _SAL1_1_Source_(_Inout_z_bytecap_, (size), _Pre_z_bytecap_(size) _Post_z_) -#define _Inout_opt_z_bytecap_(size) \ - _SAL1_1_Source_(_Inout_opt_z_bytecap_, (size), _Pre_opt_z_bytecap_(size) _Post_z_) - -#define _Inout_z_cap_c_(size) _SAL1_1_Source_(_Inout_z_cap_c_, (size), _Pre_z_cap_c_(size) _Post_z_) -#define _Inout_opt_z_cap_c_(size) \ - _SAL1_1_Source_(_Inout_opt_z_cap_c_, (size), _Pre_opt_z_cap_c_(size) _Post_z_) -#define _Inout_z_bytecap_c_(size) \ - _SAL1_1_Source_(_Inout_z_bytecap_c_, (size), _Pre_z_bytecap_c_(size) _Post_z_) -#define _Inout_opt_z_bytecap_c_(size) \ - _SAL1_1_Source_(_Inout_opt_z_bytecap_c_, (size), _Pre_opt_z_bytecap_c_(size) _Post_z_) - -#define _Inout_z_cap_x_(size) _SAL1_1_Source_(_Inout_z_cap_x_, (size), _Pre_z_cap_x_(size) _Post_z_) -#define _Inout_opt_z_cap_x_(size) \ - _SAL1_1_Source_(_Inout_opt_z_cap_x_, (size), _Pre_opt_z_cap_x_(size) _Post_z_) -#define _Inout_z_bytecap_x_(size) \ - _SAL1_1_Source_(_Inout_z_bytecap_x_, (size), _Pre_z_bytecap_x_(size) _Post_z_) -#define _Inout_opt_z_bytecap_x_(size) \ - _SAL1_1_Source_(_Inout_opt_z_bytecap_x_, (size), _Pre_opt_z_bytecap_x_(size) _Post_z_) - -// returning pointers to valid objects -#define _Ret_ _SAL1_1_Source_(_Ret_, (), _Ret_valid_) -#define _Ret_opt_ _SAL1_1_Source_(_Ret_opt_, (), _Ret_opt_valid_) - -// annotations to express 'boundedness' of integral value parameter -#define _In_bound_ _SAL1_1_Source_(_In_bound_, (), _In_bound_impl_) -#define _Out_bound_ _SAL1_1_Source_(_Out_bound_, (), _Out_bound_impl_) -#define _Ret_bound_ _SAL1_1_Source_(_Ret_bound_, (), _Ret_bound_impl_) -#define _Deref_in_bound_ _SAL1_1_Source_(_Deref_in_bound_, (), _Deref_in_bound_impl_) -#define _Deref_out_bound_ _SAL1_1_Source_(_Deref_out_bound_, (), _Deref_out_bound_impl_) -#define _Deref_inout_bound_ \ - _SAL1_1_Source_(_Deref_inout_bound_, (), _Deref_in_bound_ _Deref_out_bound_) -#define _Deref_ret_bound_ _SAL1_1_Source_(_Deref_ret_bound_, (), _Deref_ret_bound_impl_) - -// e.g. HRESULT HrCreatePoint( _Deref_out_opt_ POINT** ppPT ); -#define _Deref_out_ _SAL1_1_Source_(_Deref_out_, (), _Out_ _Deref_post_valid_) -#define _Deref_out_opt_ _SAL1_1_Source_(_Deref_out_opt_, (), _Out_ _Deref_post_opt_valid_) -#define _Deref_opt_out_ _SAL1_1_Source_(_Deref_opt_out_, (), _Out_opt_ _Deref_post_valid_) -#define _Deref_opt_out_opt_ \ - _SAL1_1_Source_(_Deref_opt_out_opt_, (), _Out_opt_ _Deref_post_opt_valid_) - -// e.g. void CloneString( _In_z_ const WCHAR* wzFrom, _Deref_out_z_ WCHAR** pWzTo ); -#define _Deref_out_z_ _SAL1_1_Source_(_Deref_out_z_, (), _Out_ _Deref_post_z_) -#define _Deref_out_opt_z_ _SAL1_1_Source_(_Deref_out_opt_z_, (), _Out_ _Deref_post_opt_z_) -#define _Deref_opt_out_z_ _SAL1_1_Source_(_Deref_opt_out_z_, (), _Out_opt_ _Deref_post_z_) -#define _Deref_opt_out_opt_z_ \ - _SAL1_1_Source_(_Deref_opt_out_opt_z_, (), _Out_opt_ _Deref_post_opt_z_) - -// -// _Deref_pre_ --- -// -// describing conditions for array elements of dereferenced pointer parameters that must be met -// before the call - -// e.g. void SaveStringArray( _In_count_(cStrings) _Deref_pre_z_ const WCHAR* const rgpwch[] ); -#define _Deref_pre_z_ \ - _SAL1_1_Source_(_Deref_pre_z_, (), \ - _Deref_pre1_impl_(__notnull_impl_notref) _Deref_pre1_impl_(__zterm_impl) \ - _Pre_valid_impl_) -#define _Deref_pre_opt_z_ \ - _SAL1_1_Source_(_Deref_pre_opt_z_, (), \ - _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre1_impl_(__zterm_impl) \ - _Pre_valid_impl_) - -// e.g. void FillInArrayOfStr32( _In_count_(cStrings) _Deref_pre_cap_c_(32) _Deref_post_z_ WCHAR* -// const rgpwch[] ); buffer capacity is described by another parameter -#define _Deref_pre_cap_(size) \ - _SAL1_1_Source_(_Deref_pre_cap_, (size), \ - _Deref_pre1_impl_(__notnull_impl_notref) _Deref_pre1_impl_(__cap_impl(size))) -#define _Deref_pre_opt_cap_(size) \ - _SAL1_1_Source_(_Deref_pre_opt_cap_, (size), \ - _Deref_pre1_impl_(__maybenull_impl_notref) \ - _Deref_pre1_impl_(__cap_impl(size))) -#define _Deref_pre_bytecap_(size) \ - _SAL1_1_Source_(_Deref_pre_bytecap_, (size), \ - _Deref_pre1_impl_(__notnull_impl_notref) \ - _Deref_pre1_impl_(__bytecap_impl(size))) -#define _Deref_pre_opt_bytecap_(size) \ - _SAL1_1_Source_(_Deref_pre_opt_bytecap_, (size), \ - _Deref_pre1_impl_(__maybenull_impl_notref) \ - _Deref_pre1_impl_(__bytecap_impl(size))) - -// buffer capacity is described by a constant expression -#define _Deref_pre_cap_c_(size) \ - _SAL1_1_Source_(_Deref_pre_cap_c_, (size), \ - _Deref_pre1_impl_(__notnull_impl_notref) \ - _Deref_pre1_impl_(__cap_c_impl(size))) -#define _Deref_pre_opt_cap_c_(size) \ - _SAL1_1_Source_(_Deref_pre_opt_cap_c_, (size), \ - _Deref_pre1_impl_(__maybenull_impl_notref) \ - _Deref_pre1_impl_(__cap_c_impl(size))) -#define _Deref_pre_bytecap_c_(size) \ - _SAL1_1_Source_(_Deref_pre_bytecap_c_, (size), \ - _Deref_pre1_impl_(__notnull_impl_notref) \ - _Deref_pre1_impl_(__bytecap_c_impl(size))) -#define _Deref_pre_opt_bytecap_c_(size) \ - _SAL1_1_Source_(_Deref_pre_opt_bytecap_c_, (size), \ - _Deref_pre1_impl_(__maybenull_impl_notref) \ - _Deref_pre1_impl_(__bytecap_c_impl(size))) - -// buffer capacity is described by a complex condition -#define _Deref_pre_cap_x_(size) \ - _SAL1_1_Source_(_Deref_pre_cap_x_, (size), \ - _Deref_pre1_impl_(__notnull_impl_notref) \ - _Deref_pre1_impl_(__cap_x_impl(size))) -#define _Deref_pre_opt_cap_x_(size) \ - _SAL1_1_Source_(_Deref_pre_opt_cap_x_, (size), \ - _Deref_pre1_impl_(__maybenull_impl_notref) \ - _Deref_pre1_impl_(__cap_x_impl(size))) -#define _Deref_pre_bytecap_x_(size) \ - _SAL1_1_Source_(_Deref_pre_bytecap_x_, (size), \ - _Deref_pre1_impl_(__notnull_impl_notref) \ - _Deref_pre1_impl_(__bytecap_x_impl(size))) -#define _Deref_pre_opt_bytecap_x_(size) \ - _SAL1_1_Source_(_Deref_pre_opt_bytecap_x_, (size), \ - _Deref_pre1_impl_(__maybenull_impl_notref) \ - _Deref_pre1_impl_(__bytecap_x_impl(size))) - -// convenience macros for nullterminated buffers with given capacity -#define _Deref_pre_z_cap_(size) \ - _SAL1_1_Source_(_Deref_pre_z_cap_, (size), \ - _Deref_pre1_impl_(__notnull_impl_notref) \ - _Deref_pre2_impl_(__zterm_impl, __cap_impl(size)) _Pre_valid_impl_) -#define _Deref_pre_opt_z_cap_(size) \ - _SAL1_1_Source_(_Deref_pre_opt_z_cap_, (size), \ - _Deref_pre1_impl_(__maybenull_impl_notref) \ - _Deref_pre2_impl_(__zterm_impl, __cap_impl(size)) _Pre_valid_impl_) -#define _Deref_pre_z_bytecap_(size) \ - _SAL1_1_Source_(_Deref_pre_z_bytecap_, (size), \ - _Deref_pre1_impl_(__notnull_impl_notref) \ - _Deref_pre2_impl_(__zterm_impl, __bytecap_impl(size)) _Pre_valid_impl_) -#define _Deref_pre_opt_z_bytecap_(size) \ - _SAL1_1_Source_(_Deref_pre_opt_z_bytecap_, (size), \ - _Deref_pre1_impl_(__maybenull_impl_notref) \ - _Deref_pre2_impl_(__zterm_impl, __bytecap_impl(size)) _Pre_valid_impl_) - -#define _Deref_pre_z_cap_c_(size) \ - _SAL1_1_Source_(_Deref_pre_z_cap_c_, (size), \ - _Deref_pre1_impl_(__notnull_impl_notref) \ - _Deref_pre2_impl_(__zterm_impl, __cap_c_impl(size)) _Pre_valid_impl_) -#define _Deref_pre_opt_z_cap_c_(size) \ - _SAL1_1_Source_(_Deref_pre_opt_z_cap_c_, (size), \ - _Deref_pre1_impl_(__maybenull_impl_notref) \ - _Deref_pre2_impl_(__zterm_impl, __cap_c_impl(size)) _Pre_valid_impl_) -#define _Deref_pre_z_bytecap_c_(size) \ - _SAL1_1_Source_(_Deref_pre_z_bytecap_c_, (size), \ - _Deref_pre1_impl_(__notnull_impl_notref) \ - _Deref_pre2_impl_(__zterm_impl, __bytecap_c_impl(size)) _Pre_valid_impl_) -#define _Deref_pre_opt_z_bytecap_c_(size) \ - _SAL1_1_Source_(_Deref_pre_opt_z_bytecap_c_, (size), \ - _Deref_pre1_impl_(__maybenull_impl_notref) \ - _Deref_pre2_impl_(__zterm_impl, __bytecap_c_impl(size)) _Pre_valid_impl_) - -#define _Deref_pre_z_cap_x_(size) \ - _SAL1_1_Source_(_Deref_pre_z_cap_x_, (size), \ - _Deref_pre1_impl_(__notnull_impl_notref) \ - _Deref_pre2_impl_(__zterm_impl, __cap_x_impl(size)) _Pre_valid_impl_) -#define _Deref_pre_opt_z_cap_x_(size) \ - _SAL1_1_Source_(_Deref_pre_opt_z_cap_x_, (size), \ - _Deref_pre1_impl_(__maybenull_impl_notref) \ - _Deref_pre2_impl_(__zterm_impl, __cap_x_impl(size)) _Pre_valid_impl_) -#define _Deref_pre_z_bytecap_x_(size) \ - _SAL1_1_Source_(_Deref_pre_z_bytecap_x_, (size), \ - _Deref_pre1_impl_(__notnull_impl_notref) \ - _Deref_pre2_impl_(__zterm_impl, __bytecap_x_impl(size)) _Pre_valid_impl_) -#define _Deref_pre_opt_z_bytecap_x_(size) \ - _SAL1_1_Source_(_Deref_pre_opt_z_bytecap_x_, (size), \ - _Deref_pre1_impl_(__maybenull_impl_notref) \ - _Deref_pre2_impl_(__zterm_impl, __bytecap_x_impl(size)) _Pre_valid_impl_) - -// known capacity and valid but unknown readable extent -#define _Deref_pre_valid_cap_(size) \ - _SAL1_1_Source_(_Deref_pre_valid_cap_, (size), \ - _Deref_pre1_impl_(__notnull_impl_notref) _Deref_pre1_impl_(__cap_impl(size)) \ - _Pre_valid_impl_) -#define _Deref_pre_opt_valid_cap_(size) \ - _SAL1_1_Source_(_Deref_pre_opt_valid_cap_, (size), \ - _Deref_pre1_impl_(__maybenull_impl_notref) _Deref_pre1_impl_(__cap_impl(size)) \ - _Pre_valid_impl_) -#define _Deref_pre_valid_bytecap_(size) \ - _SAL1_1_Source_(_Deref_pre_valid_bytecap_, (size), \ - _Deref_pre1_impl_(__notnull_impl_notref) \ - _Deref_pre1_impl_(__bytecap_impl(size)) _Pre_valid_impl_) -#define _Deref_pre_opt_valid_bytecap_(size) \ - _SAL1_1_Source_(_Deref_pre_opt_valid_bytecap_, (size), \ - _Deref_pre1_impl_(__maybenull_impl_notref) \ - _Deref_pre1_impl_(__bytecap_impl(size)) _Pre_valid_impl_) - -#define _Deref_pre_valid_cap_c_(size) \ - _SAL1_1_Source_(_Deref_pre_valid_cap_c_, (size), \ - _Deref_pre1_impl_(__notnull_impl_notref) _Deref_pre1_impl_(__cap_c_impl(size)) \ - _Pre_valid_impl_) -#define _Deref_pre_opt_valid_cap_c_(size) \ - _SAL1_1_Source_(_Deref_pre_opt_valid_cap_c_, (size), \ - _Deref_pre1_impl_(__maybenull_impl_notref) \ - _Deref_pre1_impl_(__cap_c_impl(size)) _Pre_valid_impl_) -#define _Deref_pre_valid_bytecap_c_(size) \ - _SAL1_1_Source_(_Deref_pre_valid_bytecap_c_, (size), \ - _Deref_pre1_impl_(__notnull_impl_notref) \ - _Deref_pre1_impl_(__bytecap_c_impl(size)) _Pre_valid_impl_) -#define _Deref_pre_opt_valid_bytecap_c_(size) \ - _SAL1_1_Source_(_Deref_pre_opt_valid_bytecap_c_, (size), \ - _Deref_pre1_impl_(__maybenull_impl_notref) \ - _Deref_pre1_impl_(__bytecap_c_impl(size)) _Pre_valid_impl_) - -#define _Deref_pre_valid_cap_x_(size) \ - _SAL1_1_Source_(_Deref_pre_valid_cap_x_, (size), \ - _Deref_pre1_impl_(__notnull_impl_notref) _Deref_pre1_impl_(__cap_x_impl(size)) \ - _Pre_valid_impl_) -#define _Deref_pre_opt_valid_cap_x_(size) \ - _SAL1_1_Source_(_Deref_pre_opt_valid_cap_x_, (size), \ - _Deref_pre1_impl_(__maybenull_impl_notref) \ - _Deref_pre1_impl_(__cap_x_impl(size)) _Pre_valid_impl_) -#define _Deref_pre_valid_bytecap_x_(size) \ - _SAL1_1_Source_(_Deref_pre_valid_bytecap_x_, (size), \ - _Deref_pre1_impl_(__notnull_impl_notref) \ - _Deref_pre1_impl_(__bytecap_x_impl(size)) _Pre_valid_impl_) -#define _Deref_pre_opt_valid_bytecap_x_(size) \ - _SAL1_1_Source_(_Deref_pre_opt_valid_bytecap_x_, (size), \ - _Deref_pre1_impl_(__maybenull_impl_notref) \ - _Deref_pre1_impl_(__bytecap_x_impl(size)) _Pre_valid_impl_) - -// e.g. void SaveMatrix( _In_count_(n) _Deref_pre_count_(n) const Elem** matrix, size_t n ); -// valid buffer extent is described by another parameter -#define _Deref_pre_count_(size) \ - _SAL1_1_Source_(_Deref_pre_count_, (size), \ - _Deref_pre1_impl_(__notnull_impl_notref) _Deref_pre1_impl_(__count_impl(size)) \ - _Pre_valid_impl_) -#define _Deref_pre_opt_count_(size) \ - _SAL1_1_Source_(_Deref_pre_opt_count_, (size), \ - _Deref_pre1_impl_(__maybenull_impl_notref) \ - _Deref_pre1_impl_(__count_impl(size)) _Pre_valid_impl_) -#define _Deref_pre_bytecount_(size) \ - _SAL1_1_Source_(_Deref_pre_bytecount_, (size), \ - _Deref_pre1_impl_(__notnull_impl_notref) \ - _Deref_pre1_impl_(__bytecount_impl(size)) _Pre_valid_impl_) -#define _Deref_pre_opt_bytecount_(size) \ - _SAL1_1_Source_(_Deref_pre_opt_bytecount_, (size), \ - _Deref_pre1_impl_(__maybenull_impl_notref) \ - _Deref_pre1_impl_(__bytecount_impl(size)) _Pre_valid_impl_) - -// valid buffer extent is described by a constant expression -#define _Deref_pre_count_c_(size) \ - _SAL1_1_Source_(_Deref_pre_count_c_, (size), \ - _Deref_pre1_impl_(__notnull_impl_notref) \ - _Deref_pre1_impl_(__count_c_impl(size)) _Pre_valid_impl_) -#define _Deref_pre_opt_count_c_(size) \ - _SAL1_1_Source_(_Deref_pre_opt_count_c_, (size), \ - _Deref_pre1_impl_(__maybenull_impl_notref) \ - _Deref_pre1_impl_(__count_c_impl(size)) _Pre_valid_impl_) -#define _Deref_pre_bytecount_c_(size) \ - _SAL1_1_Source_(_Deref_pre_bytecount_c_, (size), \ - _Deref_pre1_impl_(__notnull_impl_notref) \ - _Deref_pre1_impl_(__bytecount_c_impl(size)) _Pre_valid_impl_) -#define _Deref_pre_opt_bytecount_c_(size) \ - _SAL1_1_Source_(_Deref_pre_opt_bytecount_c_, (size), \ - _Deref_pre1_impl_(__maybenull_impl_notref) \ - _Deref_pre1_impl_(__bytecount_c_impl(size)) _Pre_valid_impl_) - -// valid buffer extent is described by a complex expression -#define _Deref_pre_count_x_(size) \ - _SAL1_1_Source_(_Deref_pre_count_x_, (size), \ - _Deref_pre1_impl_(__notnull_impl_notref) \ - _Deref_pre1_impl_(__count_x_impl(size)) _Pre_valid_impl_) -#define _Deref_pre_opt_count_x_(size) \ - _SAL1_1_Source_(_Deref_pre_opt_count_x_, (size), \ - _Deref_pre1_impl_(__maybenull_impl_notref) \ - _Deref_pre1_impl_(__count_x_impl(size)) _Pre_valid_impl_) -#define _Deref_pre_bytecount_x_(size) \ - _SAL1_1_Source_(_Deref_pre_bytecount_x_, (size), \ - _Deref_pre1_impl_(__notnull_impl_notref) \ - _Deref_pre1_impl_(__bytecount_x_impl(size)) _Pre_valid_impl_) -#define _Deref_pre_opt_bytecount_x_(size) \ - _SAL1_1_Source_(_Deref_pre_opt_bytecount_x_, (size), \ - _Deref_pre1_impl_(__maybenull_impl_notref) \ - _Deref_pre1_impl_(__bytecount_x_impl(size)) _Pre_valid_impl_) - -// e.g. void PrintStringArray( _In_count_(cElems) _Deref_pre_valid_ LPCSTR rgStr[], size_t cElems ); -#define _Deref_pre_valid_ \ - _SAL1_1_Source_(_Deref_pre_valid_, (), \ - _Deref_pre1_impl_(__notnull_impl_notref) _Pre_valid_impl_) -#define _Deref_pre_opt_valid_ \ - _SAL1_1_Source_(_Deref_pre_opt_valid_, (), \ - _Deref_pre1_impl_(__maybenull_impl_notref) _Pre_valid_impl_) -#define _Deref_pre_invalid_ \ - _SAL1_1_Source_(_Deref_pre_invalid_, (), _Deref_pre1_impl_(__notvalid_impl)) - -#define _Deref_pre_notnull_ \ - _SAL1_1_Source_(_Deref_pre_notnull_, (), _Deref_pre1_impl_(__notnull_impl_notref)) -#define _Deref_pre_maybenull_ \ - _SAL1_1_Source_(_Deref_pre_maybenull_, (), _Deref_pre1_impl_(__maybenull_impl_notref)) -#define _Deref_pre_null_ \ - _SAL1_1_Source_(_Deref_pre_null_, (), _Deref_pre1_impl_(__null_impl_notref)) - -// restrict access rights -#define _Deref_pre_readonly_ \ - _SAL1_1_Source_(_Deref_pre_readonly_, (), _Deref_pre1_impl_(__readaccess_impl_notref)) -#define _Deref_pre_writeonly_ \ - _SAL1_1_Source_(_Deref_pre_writeonly_, (), _Deref_pre1_impl_(__writeaccess_impl_notref)) - -// -// _Deref_post_ --- -// -// describing conditions for array elements or dereferenced pointer parameters that hold after the -// call - -// e.g. void CloneString( _In_z_ const Wchar_t* wzIn _Out_ _Deref_post_z_ WCHAR** pWzOut ); -#define _Deref_post_z_ \ - _SAL1_1_Source_(_Deref_post_z_, (), \ - _Deref_post1_impl_(__notnull_impl_notref) _Deref_post1_impl_(__zterm_impl) \ - _Post_valid_impl_) -#define _Deref_post_opt_z_ \ - _SAL1_1_Source_(_Deref_post_opt_z_, (), \ - _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post1_impl_(__zterm_impl) \ - _Post_valid_impl_) - -// e.g. HRESULT HrAllocateMemory( size_t cb, _Out_ _Deref_post_bytecap_(cb) void** ppv ); -// buffer capacity is described by another parameter -#define _Deref_post_cap_(size) \ - _SAL1_1_Source_(_Deref_post_cap_, (size), \ - _Deref_post1_impl_(__notnull_impl_notref) \ - _Deref_post1_impl_(__cap_impl(size))) -#define _Deref_post_opt_cap_(size) \ - _SAL1_1_Source_(_Deref_post_opt_cap_, (size), \ - _Deref_post1_impl_(__maybenull_impl_notref) \ - _Deref_post1_impl_(__cap_impl(size))) -#define _Deref_post_bytecap_(size) \ - _SAL1_1_Source_(_Deref_post_bytecap_, (size), \ - _Deref_post1_impl_(__notnull_impl_notref) \ - _Deref_post1_impl_(__bytecap_impl(size))) -#define _Deref_post_opt_bytecap_(size) \ - _SAL1_1_Source_(_Deref_post_opt_bytecap_, (size), \ - _Deref_post1_impl_(__maybenull_impl_notref) \ - _Deref_post1_impl_(__bytecap_impl(size))) - -// buffer capacity is described by a constant expression -#define _Deref_post_cap_c_(size) \ - _SAL1_1_Source_(_Deref_post_cap_c_, (size), \ - _Deref_post1_impl_(__notnull_impl_notref) \ - _Deref_post1_impl_(__cap_c_impl(size))) -#define _Deref_post_opt_cap_c_(size) \ - _SAL1_1_Source_(_Deref_post_opt_cap_c_, (size), \ - _Deref_post1_impl_(__maybenull_impl_notref) \ - _Deref_post1_impl_(__cap_c_impl(size))) -#define _Deref_post_bytecap_c_(size) \ - _SAL1_1_Source_(_Deref_post_bytecap_c_, (size), \ - _Deref_post1_impl_(__notnull_impl_notref) \ - _Deref_post1_impl_(__bytecap_c_impl(size))) -#define _Deref_post_opt_bytecap_c_(size) \ - _SAL1_1_Source_(_Deref_post_opt_bytecap_c_, (size), \ - _Deref_post1_impl_(__maybenull_impl_notref) \ - _Deref_post1_impl_(__bytecap_c_impl(size))) - -// buffer capacity is described by a complex expression -#define _Deref_post_cap_x_(size) \ - _SAL1_1_Source_(_Deref_post_cap_x_, (size), \ - _Deref_post1_impl_(__notnull_impl_notref) \ - _Deref_post1_impl_(__cap_x_impl(size))) -#define _Deref_post_opt_cap_x_(size) \ - _SAL1_1_Source_(_Deref_post_opt_cap_x_, (size), \ - _Deref_post1_impl_(__maybenull_impl_notref) \ - _Deref_post1_impl_(__cap_x_impl(size))) -#define _Deref_post_bytecap_x_(size) \ - _SAL1_1_Source_(_Deref_post_bytecap_x_, (size), \ - _Deref_post1_impl_(__notnull_impl_notref) \ - _Deref_post1_impl_(__bytecap_x_impl(size))) -#define _Deref_post_opt_bytecap_x_(size) \ - _SAL1_1_Source_(_Deref_post_opt_bytecap_x_, (size), \ - _Deref_post1_impl_(__maybenull_impl_notref) \ - _Deref_post1_impl_(__bytecap_x_impl(size))) - -// convenience macros for nullterminated buffers with given capacity -#define _Deref_post_z_cap_(size) \ - _SAL1_1_Source_(_Deref_post_z_cap_, (size), \ - _Deref_post1_impl_(__notnull_impl_notref) \ - _Deref_post2_impl_(__zterm_impl, __cap_impl(size)) _Post_valid_impl_) -#define _Deref_post_opt_z_cap_(size) \ - _SAL1_1_Source_(_Deref_post_opt_z_cap_, (size), \ - _Deref_post1_impl_(__maybenull_impl_notref) \ - _Deref_post2_impl_(__zterm_impl, __cap_impl(size)) _Post_valid_impl_) -#define _Deref_post_z_bytecap_(size) \ - _SAL1_1_Source_(_Deref_post_z_bytecap_, (size), \ - _Deref_post1_impl_(__notnull_impl_notref) \ - _Deref_post2_impl_(__zterm_impl, __bytecap_impl(size)) _Post_valid_impl_) -#define _Deref_post_opt_z_bytecap_(size) \ - _SAL1_1_Source_(_Deref_post_opt_z_bytecap_, (size), \ - _Deref_post1_impl_(__maybenull_impl_notref) \ - _Deref_post2_impl_(__zterm_impl, __bytecap_impl(size)) _Post_valid_impl_) - -#define _Deref_post_z_cap_c_(size) \ - _SAL1_1_Source_(_Deref_post_z_cap_c_, (size), \ - _Deref_post1_impl_(__notnull_impl_notref) \ - _Deref_post2_impl_(__zterm_impl, __cap_c_impl(size)) _Post_valid_impl_) -#define _Deref_post_opt_z_cap_c_(size) \ - _SAL1_1_Source_(_Deref_post_opt_z_cap_c_, (size), \ - _Deref_post1_impl_(__maybenull_impl_notref) \ - _Deref_post2_impl_(__zterm_impl, __cap_c_impl(size)) _Post_valid_impl_) -#define _Deref_post_z_bytecap_c_(size) \ - _SAL1_1_Source_(_Deref_post_z_bytecap_c_, (size), \ - _Deref_post1_impl_(__notnull_impl_notref) _Deref_post2_impl_( \ - __zterm_impl, __bytecap_c_impl(size)) _Post_valid_impl_) -#define _Deref_post_opt_z_bytecap_c_(size) \ - _SAL1_1_Source_(_Deref_post_opt_z_bytecap_c_, (size), \ - _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post2_impl_( \ - __zterm_impl, __bytecap_c_impl(size)) _Post_valid_impl_) - -#define _Deref_post_z_cap_x_(size) \ - _SAL1_1_Source_(_Deref_post_z_cap_x_, (size), \ - _Deref_post1_impl_(__notnull_impl_notref) \ - _Deref_post2_impl_(__zterm_impl, __cap_x_impl(size)) _Post_valid_impl_) -#define _Deref_post_opt_z_cap_x_(size) \ - _SAL1_1_Source_(_Deref_post_opt_z_cap_x_, (size), \ - _Deref_post1_impl_(__maybenull_impl_notref) \ - _Deref_post2_impl_(__zterm_impl, __cap_x_impl(size)) _Post_valid_impl_) -#define _Deref_post_z_bytecap_x_(size) \ - _SAL1_1_Source_(_Deref_post_z_bytecap_x_, (size), \ - _Deref_post1_impl_(__notnull_impl_notref) _Deref_post2_impl_( \ - __zterm_impl, __bytecap_x_impl(size)) _Post_valid_impl_) -#define _Deref_post_opt_z_bytecap_x_(size) \ - _SAL1_1_Source_(_Deref_post_opt_z_bytecap_x_, (size), \ - _Deref_post1_impl_(__maybenull_impl_notref) _Deref_post2_impl_( \ - __zterm_impl, __bytecap_x_impl(size)) _Post_valid_impl_) - -// known capacity and valid but unknown readable extent -#define _Deref_post_valid_cap_(size) \ - _SAL1_1_Source_(_Deref_post_valid_cap_, (size), \ - _Deref_post1_impl_(__notnull_impl_notref) _Deref_post1_impl_(__cap_impl(size)) \ - _Post_valid_impl_) -#define _Deref_post_opt_valid_cap_(size) \ - _SAL1_1_Source_(_Deref_post_opt_valid_cap_, (size), \ - _Deref_post1_impl_(__maybenull_impl_notref) \ - _Deref_post1_impl_(__cap_impl(size)) _Post_valid_impl_) -#define _Deref_post_valid_bytecap_(size) \ - _SAL1_1_Source_(_Deref_post_valid_bytecap_, (size), \ - _Deref_post1_impl_(__notnull_impl_notref) \ - _Deref_post1_impl_(__bytecap_impl(size)) _Post_valid_impl_) -#define _Deref_post_opt_valid_bytecap_(size) \ - _SAL1_1_Source_(_Deref_post_opt_valid_bytecap_, (size), \ - _Deref_post1_impl_(__maybenull_impl_notref) \ - _Deref_post1_impl_(__bytecap_impl(size)) _Post_valid_impl_) - -#define _Deref_post_valid_cap_c_(size) \ - _SAL1_1_Source_(_Deref_post_valid_cap_c_, (size), \ - _Deref_post1_impl_(__notnull_impl_notref) \ - _Deref_post1_impl_(__cap_c_impl(size)) _Post_valid_impl_) -#define _Deref_post_opt_valid_cap_c_(size) \ - _SAL1_1_Source_(_Deref_post_opt_valid_cap_c_, (size), \ - _Deref_post1_impl_(__maybenull_impl_notref) \ - _Deref_post1_impl_(__cap_c_impl(size)) _Post_valid_impl_) -#define _Deref_post_valid_bytecap_c_(size) \ - _SAL1_1_Source_(_Deref_post_valid_bytecap_c_, (size), \ - _Deref_post1_impl_(__notnull_impl_notref) \ - _Deref_post1_impl_(__bytecap_c_impl(size)) _Post_valid_impl_) -#define _Deref_post_opt_valid_bytecap_c_(size) \ - _SAL1_1_Source_(_Deref_post_opt_valid_bytecap_c_, (size), \ - _Deref_post1_impl_(__maybenull_impl_notref) \ - _Deref_post1_impl_(__bytecap_c_impl(size)) _Post_valid_impl_) - -#define _Deref_post_valid_cap_x_(size) \ - _SAL1_1_Source_(_Deref_post_valid_cap_x_, (size), \ - _Deref_post1_impl_(__notnull_impl_notref) \ - _Deref_post1_impl_(__cap_x_impl(size)) _Post_valid_impl_) -#define _Deref_post_opt_valid_cap_x_(size) \ - _SAL1_1_Source_(_Deref_post_opt_valid_cap_x_, (size), \ - _Deref_post1_impl_(__maybenull_impl_notref) \ - _Deref_post1_impl_(__cap_x_impl(size)) _Post_valid_impl_) -#define _Deref_post_valid_bytecap_x_(size) \ - _SAL1_1_Source_(_Deref_post_valid_bytecap_x_, (size), \ - _Deref_post1_impl_(__notnull_impl_notref) \ - _Deref_post1_impl_(__bytecap_x_impl(size)) _Post_valid_impl_) -#define _Deref_post_opt_valid_bytecap_x_(size) \ - _SAL1_1_Source_(_Deref_post_opt_valid_bytecap_x_, (size), \ - _Deref_post1_impl_(__maybenull_impl_notref) \ - _Deref_post1_impl_(__bytecap_x_impl(size)) _Post_valid_impl_) - -// e.g. HRESULT HrAllocateZeroInitializedMemory( size_t cb, _Out_ _Deref_post_bytecount_(cb) void** -// ppv ); valid buffer extent is described by another parameter -#define _Deref_post_count_(size) \ - _SAL1_1_Source_(_Deref_post_count_, (size), \ - _Deref_post1_impl_(__notnull_impl_notref) \ - _Deref_post1_impl_(__count_impl(size)) _Post_valid_impl_) -#define _Deref_post_opt_count_(size) \ - _SAL1_1_Source_(_Deref_post_opt_count_, (size), \ - _Deref_post1_impl_(__maybenull_impl_notref) \ - _Deref_post1_impl_(__count_impl(size)) _Post_valid_impl_) -#define _Deref_post_bytecount_(size) \ - _SAL1_1_Source_(_Deref_post_bytecount_, (size), \ - _Deref_post1_impl_(__notnull_impl_notref) \ - _Deref_post1_impl_(__bytecount_impl(size)) _Post_valid_impl_) -#define _Deref_post_opt_bytecount_(size) \ - _SAL1_1_Source_(_Deref_post_opt_bytecount_, (size), \ - _Deref_post1_impl_(__maybenull_impl_notref) \ - _Deref_post1_impl_(__bytecount_impl(size)) _Post_valid_impl_) - -// buffer capacity is described by a constant expression -#define _Deref_post_count_c_(size) \ - _SAL1_1_Source_(_Deref_post_count_c_, (size), \ - _Deref_post1_impl_(__notnull_impl_notref) \ - _Deref_post1_impl_(__count_c_impl(size)) _Post_valid_impl_) -#define _Deref_post_opt_count_c_(size) \ - _SAL1_1_Source_(_Deref_post_opt_count_c_, (size), \ - _Deref_post1_impl_(__maybenull_impl_notref) \ - _Deref_post1_impl_(__count_c_impl(size)) _Post_valid_impl_) -#define _Deref_post_bytecount_c_(size) \ - _SAL1_1_Source_(_Deref_post_bytecount_c_, (size), \ - _Deref_post1_impl_(__notnull_impl_notref) \ - _Deref_post1_impl_(__bytecount_c_impl(size)) _Post_valid_impl_) -#define _Deref_post_opt_bytecount_c_(size) \ - _SAL1_1_Source_(_Deref_post_opt_bytecount_c_, (size), \ - _Deref_post1_impl_(__maybenull_impl_notref) \ - _Deref_post1_impl_(__bytecount_c_impl(size)) _Post_valid_impl_) - -// buffer capacity is described by a complex expression -#define _Deref_post_count_x_(size) \ - _SAL1_1_Source_(_Deref_post_count_x_, (size), \ - _Deref_post1_impl_(__notnull_impl_notref) \ - _Deref_post1_impl_(__count_x_impl(size)) _Post_valid_impl_) -#define _Deref_post_opt_count_x_(size) \ - _SAL1_1_Source_(_Deref_post_opt_count_x_, (size), \ - _Deref_post1_impl_(__maybenull_impl_notref) \ - _Deref_post1_impl_(__count_x_impl(size)) _Post_valid_impl_) -#define _Deref_post_bytecount_x_(size) \ - _SAL1_1_Source_(_Deref_post_bytecount_x_, (size), \ - _Deref_post1_impl_(__notnull_impl_notref) \ - _Deref_post1_impl_(__bytecount_x_impl(size)) _Post_valid_impl_) -#define _Deref_post_opt_bytecount_x_(size) \ - _SAL1_1_Source_(_Deref_post_opt_bytecount_x_, (size), \ - _Deref_post1_impl_(__maybenull_impl_notref) \ - _Deref_post1_impl_(__bytecount_x_impl(size)) _Post_valid_impl_) - -// e.g. void GetStrings( _Out_count_(cElems) _Deref_post_valid_ LPSTR const rgStr[], size_t cElems -// ); -#define _Deref_post_valid_ \ - _SAL1_1_Source_(_Deref_post_valid_, (), \ - _Deref_post1_impl_(__notnull_impl_notref) _Post_valid_impl_) -#define _Deref_post_opt_valid_ \ - _SAL1_1_Source_(_Deref_post_opt_valid_, (), \ - _Deref_post1_impl_(__maybenull_impl_notref) _Post_valid_impl_) - -#define _Deref_post_notnull_ \ - _SAL1_1_Source_(_Deref_post_notnull_, (), _Deref_post1_impl_(__notnull_impl_notref)) -#define _Deref_post_maybenull_ \ - _SAL1_1_Source_(_Deref_post_maybenull_, (), _Deref_post1_impl_(__maybenull_impl_notref)) -#define _Deref_post_null_ \ - _SAL1_1_Source_(_Deref_post_null_, (), _Deref_post1_impl_(__null_impl_notref)) - -// -// _Deref_ret_ --- -// - -#define _Deref_ret_z_ \ - _SAL1_1_Source_(_Deref_ret_z_, (), \ - _Deref_ret1_impl_(__notnull_impl_notref) _Deref_ret1_impl_(__zterm_impl)) -#define _Deref_ret_opt_z_ \ - _SAL1_1_Source_(_Deref_ret_opt_z_, (), \ - _Deref_ret1_impl_(__maybenull_impl_notref) _Ret1_impl_(__zterm_impl)) - -// -// special _Deref_ --- -// -#define _Deref2_pre_readonly_ \ - _SAL1_1_Source_(_Deref2_pre_readonly_, (), _Deref2_pre1_impl_(__readaccess_impl_notref)) - -// -// _Ret_ --- -// - -// e.g. _Ret_opt_valid_ LPSTR void* CloneSTR( _Pre_valid_ LPSTR src ); -#define _Ret_opt_valid_ \ - _SAL1_1_Source_(_Ret_opt_valid_, (), _Ret1_impl_(__maybenull_impl_notref) _Ret_valid_impl_) -#define _Ret_opt_z_ \ - _SAL1_1_Source_(_Ret_opt_z_, (), _Ret2_impl_(__maybenull_impl, __zterm_impl) _Ret_valid_impl_) - -// e.g. _Ret_opt_bytecap_(cb) void* AllocateMemory( size_t cb ); -// Buffer capacity is described by another parameter -#define _Ret_cap_(size) \ - _SAL1_1_Source_(_Ret_cap_, (size), \ - _Ret1_impl_(__notnull_impl_notref) _Ret1_impl_(__cap_impl(size))) -#define _Ret_opt_cap_(size) \ - _SAL1_1_Source_(_Ret_opt_cap_, (size), \ - _Ret1_impl_(__maybenull_impl_notref) _Ret1_impl_(__cap_impl(size))) -#define _Ret_bytecap_(size) \ - _SAL1_1_Source_(_Ret_bytecap_, (size), \ - _Ret1_impl_(__notnull_impl_notref) _Ret1_impl_(__bytecap_impl(size))) -#define _Ret_opt_bytecap_(size) \ - _SAL1_1_Source_(_Ret_opt_bytecap_, (size), \ - _Ret1_impl_(__maybenull_impl_notref) _Ret1_impl_(__bytecap_impl(size))) - -// Buffer capacity is described by a constant expression -#define _Ret_cap_c_(size) \ - _SAL1_1_Source_(_Ret_cap_c_, (size), \ - _Ret1_impl_(__notnull_impl_notref) _Ret1_impl_(__cap_c_impl(size))) -#define _Ret_opt_cap_c_(size) \ - _SAL1_1_Source_(_Ret_opt_cap_c_, (size), \ - _Ret1_impl_(__maybenull_impl_notref) _Ret1_impl_(__cap_c_impl(size))) -#define _Ret_bytecap_c_(size) \ - _SAL1_1_Source_(_Ret_bytecap_c_, (size), \ - _Ret1_impl_(__notnull_impl_notref) _Ret1_impl_(__bytecap_c_impl(size))) -#define _Ret_opt_bytecap_c_(size) \ - _SAL1_1_Source_(_Ret_opt_bytecap_c_, (size), \ - _Ret1_impl_(__maybenull_impl_notref) _Ret1_impl_(__bytecap_c_impl(size))) - -// Buffer capacity is described by a complex condition -#define _Ret_cap_x_(size) \ - _SAL1_1_Source_(_Ret_cap_x_, (size), \ - _Ret1_impl_(__notnull_impl_notref) _Ret1_impl_(__cap_x_impl(size))) -#define _Ret_opt_cap_x_(size) \ - _SAL1_1_Source_(_Ret_opt_cap_x_, (size), \ - _Ret1_impl_(__maybenull_impl_notref) _Ret1_impl_(__cap_x_impl(size))) -#define _Ret_bytecap_x_(size) \ - _SAL1_1_Source_(_Ret_bytecap_x_, (size), \ - _Ret1_impl_(__notnull_impl_notref) _Ret1_impl_(__bytecap_x_impl(size))) -#define _Ret_opt_bytecap_x_(size) \ - _SAL1_1_Source_(_Ret_opt_bytecap_x_, (size), \ - _Ret1_impl_(__maybenull_impl_notref) _Ret1_impl_(__bytecap_x_impl(size))) - -// return value is nullterminated and capacity is given by another parameter -#define _Ret_z_cap_(size) \ - _SAL1_1_Source_(_Ret_z_cap_, (size), \ - _Ret1_impl_(__notnull_impl_notref) _Ret2_impl_(__zterm_impl, __cap_impl(size)) \ - _Ret_valid_impl_) -#define _Ret_opt_z_cap_(size) \ - _SAL1_1_Source_(_Ret_opt_z_cap_, (size), \ - _Ret1_impl_(__maybenull_impl_notref) \ - _Ret2_impl_(__zterm_impl, __cap_impl(size)) _Ret_valid_impl_) -#define _Ret_z_bytecap_(size) \ - _SAL1_1_Source_(_Ret_z_bytecap_, (size), \ - _Ret1_impl_(__notnull_impl_notref) \ - _Ret2_impl_(__zterm_impl, __bytecap_impl(size)) _Ret_valid_impl_) -#define _Ret_opt_z_bytecap_(size) \ - _SAL1_1_Source_(_Ret_opt_z_bytecap_, (size), \ - _Ret1_impl_(__maybenull_impl_notref) \ - _Ret2_impl_(__zterm_impl, __bytecap_impl(size)) _Ret_valid_impl_) - -// e.g. _Ret_opt_bytecount_(cb) void* AllocateZeroInitializedMemory( size_t cb ); -// Valid Buffer extent is described by another parameter -#define _Ret_count_(size) \ - _SAL1_1_Source_(_Ret_count_, (size), \ - _Ret1_impl_(__notnull_impl_notref) _Ret1_impl_(__count_impl(size)) \ - _Ret_valid_impl_) -#define _Ret_opt_count_(size) \ - _SAL1_1_Source_(_Ret_opt_count_, (size), \ - _Ret1_impl_(__maybenull_impl_notref) _Ret1_impl_(__count_impl(size)) \ - _Ret_valid_impl_) -#define _Ret_bytecount_(size) \ - _SAL1_1_Source_(_Ret_bytecount_, (size), \ - _Ret1_impl_(__notnull_impl_notref) _Ret1_impl_(__bytecount_impl(size)) \ - _Ret_valid_impl_) -#define _Ret_opt_bytecount_(size) \ - _SAL1_1_Source_(_Ret_opt_bytecount_, (size), \ - _Ret1_impl_(__maybenull_impl_notref) _Ret1_impl_(__bytecount_impl(size)) \ - _Ret_valid_impl_) - -// Valid Buffer extent is described by a constant expression -#define _Ret_count_c_(size) \ - _SAL1_1_Source_(_Ret_count_c_, (size), \ - _Ret1_impl_(__notnull_impl_notref) _Ret1_impl_(__count_c_impl(size)) \ - _Ret_valid_impl_) -#define _Ret_opt_count_c_(size) \ - _SAL1_1_Source_(_Ret_opt_count_c_, (size), \ - _Ret1_impl_(__maybenull_impl_notref) _Ret1_impl_(__count_c_impl(size)) \ - _Ret_valid_impl_) -#define _Ret_bytecount_c_(size) \ - _SAL1_1_Source_(_Ret_bytecount_c_, (size), \ - _Ret1_impl_(__notnull_impl_notref) _Ret1_impl_(__bytecount_c_impl(size)) \ - _Ret_valid_impl_) -#define _Ret_opt_bytecount_c_(size) \ - _SAL1_1_Source_(_Ret_opt_bytecount_c_, (size), \ - _Ret1_impl_(__maybenull_impl_notref) _Ret1_impl_(__bytecount_c_impl(size)) \ - _Ret_valid_impl_) - -// Valid Buffer extent is described by a complex expression -#define _Ret_count_x_(size) \ - _SAL1_1_Source_(_Ret_count_x_, (size), \ - _Ret1_impl_(__notnull_impl_notref) _Ret1_impl_(__count_x_impl(size)) \ - _Ret_valid_impl_) -#define _Ret_opt_count_x_(size) \ - _SAL1_1_Source_(_Ret_opt_count_x_, (size), \ - _Ret1_impl_(__maybenull_impl_notref) _Ret1_impl_(__count_x_impl(size)) \ - _Ret_valid_impl_) -#define _Ret_bytecount_x_(size) \ - _SAL1_1_Source_(_Ret_bytecount_x_, (size), \ - _Ret1_impl_(__notnull_impl_notref) _Ret1_impl_(__bytecount_x_impl(size)) \ - _Ret_valid_impl_) -#define _Ret_opt_bytecount_x_(size) \ - _SAL1_1_Source_(_Ret_opt_bytecount_x_, (size), \ - _Ret1_impl_(__maybenull_impl_notref) _Ret1_impl_(__bytecount_x_impl(size)) \ - _Ret_valid_impl_) - -// return value is nullterminated and length is given by another parameter -#define _Ret_z_count_(size) \ - _SAL1_1_Source_(_Ret_z_count_, (size), \ - _Ret1_impl_(__notnull_impl_notref) \ - _Ret2_impl_(__zterm_impl, __count_impl(size)) _Ret_valid_impl_) -#define _Ret_opt_z_count_(size) \ - _SAL1_1_Source_(_Ret_opt_z_count_, (size), \ - _Ret1_impl_(__maybenull_impl_notref) \ - _Ret2_impl_(__zterm_impl, __count_impl(size)) _Ret_valid_impl_) -#define _Ret_z_bytecount_(size) \ - _SAL1_1_Source_(_Ret_z_bytecount_, (size), \ - _Ret1_impl_(__notnull_impl_notref) \ - _Ret2_impl_(__zterm_impl, __bytecount_impl(size)) _Ret_valid_impl_) -#define _Ret_opt_z_bytecount_(size) \ - _SAL1_1_Source_(_Ret_opt_z_bytecount_, (size), \ - _Ret1_impl_(__maybenull_impl_notref) \ - _Ret2_impl_(__zterm_impl, __bytecount_impl(size)) _Ret_valid_impl_) - -// _Pre_ annotations --- -#define _Pre_opt_z_ \ - _SAL1_1_Source_(_Pre_opt_z_, (), \ - _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__zterm_impl) \ - _Pre_valid_impl_) - -// restrict access rights -#define _Pre_readonly_ _SAL1_1_Source_(_Pre_readonly_, (), _Pre1_impl_(__readaccess_impl_notref)) -#define _Pre_writeonly_ _SAL1_1_Source_(_Pre_writeonly_, (), _Pre1_impl_(__writeaccess_impl_notref)) - -// e.g. void FreeMemory( _Pre_bytecap_(cb) _Post_ptr_invalid_ void* pv, size_t cb ); -// buffer capacity described by another parameter -#define _Pre_cap_(size) \ - _SAL1_1_Source_(_Pre_cap_, (size), \ - _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__cap_impl(size))) -#define _Pre_opt_cap_(size) \ - _SAL1_1_Source_(_Pre_opt_cap_, (size), \ - _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__cap_impl(size))) -#define _Pre_bytecap_(size) \ - _SAL1_1_Source_(_Pre_bytecap_, (size), \ - _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__bytecap_impl(size))) -#define _Pre_opt_bytecap_(size) \ - _SAL1_1_Source_(_Pre_opt_bytecap_, (size), \ - _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__bytecap_impl(size))) - -// buffer capacity described by a constant expression -#define _Pre_cap_c_(size) \ - _SAL1_1_Source_(_Pre_cap_c_, (size), \ - _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__cap_c_impl(size))) -#define _Pre_opt_cap_c_(size) \ - _SAL1_1_Source_(_Pre_opt_cap_c_, (size), \ - _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__cap_c_impl(size))) -#define _Pre_bytecap_c_(size) \ - _SAL1_1_Source_(_Pre_bytecap_c_, (size), \ - _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__bytecap_c_impl(size))) -#define _Pre_opt_bytecap_c_(size) \ - _SAL1_1_Source_(_Pre_opt_bytecap_c_, (size), \ - _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__bytecap_c_impl(size))) -#define _Pre_cap_c_one_ \ - _SAL1_1_Source_(_Pre_cap_c_one_, (), \ - _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__cap_c_one_notref_impl)) -#define _Pre_opt_cap_c_one_ \ - _SAL1_1_Source_(_Pre_opt_cap_c_one_, (), \ - _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__cap_c_one_notref_impl)) - -// buffer capacity is described by another parameter multiplied by a constant expression -#define _Pre_cap_m_(mult, size) \ - _SAL1_1_Source_(_Pre_cap_m_, (mult, size), \ - _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__mult_impl(mult, size))) -#define _Pre_opt_cap_m_(mult, size) \ - _SAL1_1_Source_(_Pre_opt_cap_m_, (mult, size), \ - _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__mult_impl(mult, size))) - -// buffer capacity described by size of other buffer, only used by dangerous legacy APIs -// e.g. int strcpy(_Pre_cap_for_(src) char* dst, const char* src); -#define _Pre_cap_for_(param) \ - _SAL1_1_Source_(_Pre_cap_for_, (param), \ - _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__cap_for_impl(param))) -#define _Pre_opt_cap_for_(param) \ - _SAL1_1_Source_(_Pre_opt_cap_for_, (param), \ - _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__cap_for_impl(param))) - -// buffer capacity described by a complex condition -#define _Pre_cap_x_(size) \ - _SAL1_1_Source_(_Pre_cap_x_, (size), \ - _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__cap_x_impl(size))) -#define _Pre_opt_cap_x_(size) \ - _SAL1_1_Source_(_Pre_opt_cap_x_, (size), \ - _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__cap_x_impl(size))) -#define _Pre_bytecap_x_(size) \ - _SAL1_1_Source_(_Pre_bytecap_x_, (size), \ - _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__bytecap_x_impl(size))) -#define _Pre_opt_bytecap_x_(size) \ - _SAL1_1_Source_(_Pre_opt_bytecap_x_, (size), \ - _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__bytecap_x_impl(size))) - -// buffer capacity described by the difference to another pointer parameter -#define _Pre_ptrdiff_cap_(ptr) \ - _SAL1_1_Source_(_Pre_ptrdiff_cap_, (ptr), \ - _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__cap_x_impl(__ptrdiff(ptr)))) -#define _Pre_opt_ptrdiff_cap_(ptr) \ - _SAL1_1_Source_(_Pre_opt_ptrdiff_cap_, (ptr), \ - _Pre1_impl_(__maybenull_impl_notref) \ - _Pre1_impl_(__cap_x_impl(__ptrdiff(ptr)))) - -// e.g. void AppendStr( _Pre_z_ const char* szFrom, _Pre_z_cap_(cchTo) _Post_z_ char* szTo, size_t -// cchTo ); -#define _Pre_z_cap_(size) \ - _SAL1_1_Source_(_Pre_z_cap_, (size), \ - _Pre1_impl_(__notnull_impl_notref) _Pre2_impl_(__zterm_impl, __cap_impl(size)) \ - _Pre_valid_impl_) -#define _Pre_opt_z_cap_(size) \ - _SAL1_1_Source_(_Pre_opt_z_cap_, (size), \ - _Pre1_impl_(__maybenull_impl_notref) \ - _Pre2_impl_(__zterm_impl, __cap_impl(size)) _Pre_valid_impl_) -#define _Pre_z_bytecap_(size) \ - _SAL1_1_Source_(_Pre_z_bytecap_, (size), \ - _Pre1_impl_(__notnull_impl_notref) \ - _Pre2_impl_(__zterm_impl, __bytecap_impl(size)) _Pre_valid_impl_) -#define _Pre_opt_z_bytecap_(size) \ - _SAL1_1_Source_(_Pre_opt_z_bytecap_, (size), \ - _Pre1_impl_(__maybenull_impl_notref) \ - _Pre2_impl_(__zterm_impl, __bytecap_impl(size)) _Pre_valid_impl_) - -#define _Pre_z_cap_c_(size) \ - _SAL1_1_Source_(_Pre_z_cap_c_, (size), \ - _Pre1_impl_(__notnull_impl_notref) \ - _Pre2_impl_(__zterm_impl, __cap_c_impl(size)) _Pre_valid_impl_) -#define _Pre_opt_z_cap_c_(size) \ - _SAL1_1_Source_(_Pre_opt_z_cap_c_, (size), \ - _Pre1_impl_(__maybenull_impl_notref) \ - _Pre2_impl_(__zterm_impl, __cap_c_impl(size)) _Pre_valid_impl_) -#define _Pre_z_bytecap_c_(size) \ - _SAL1_1_Source_(_Pre_z_bytecap_c_, (size), \ - _Pre1_impl_(__notnull_impl_notref) \ - _Pre2_impl_(__zterm_impl, __bytecap_c_impl(size)) _Pre_valid_impl_) -#define _Pre_opt_z_bytecap_c_(size) \ - _SAL1_1_Source_(_Pre_opt_z_bytecap_c_, (size), \ - _Pre1_impl_(__maybenull_impl_notref) \ - _Pre2_impl_(__zterm_impl, __bytecap_c_impl(size)) _Pre_valid_impl_) - -#define _Pre_z_cap_x_(size) \ - _SAL1_1_Source_(_Pre_z_cap_x_, (size), \ - _Pre1_impl_(__notnull_impl_notref) \ - _Pre2_impl_(__zterm_impl, __cap_x_impl(size)) _Pre_valid_impl_) -#define _Pre_opt_z_cap_x_(size) \ - _SAL1_1_Source_(_Pre_opt_z_cap_x_, (size), \ - _Pre1_impl_(__maybenull_impl_notref) \ - _Pre2_impl_(__zterm_impl, __cap_x_impl(size)) _Pre_valid_impl_) -#define _Pre_z_bytecap_x_(size) \ - _SAL1_1_Source_(_Pre_z_bytecap_x_, (size), \ - _Pre1_impl_(__notnull_impl_notref) \ - _Pre2_impl_(__zterm_impl, __bytecap_x_impl(size)) _Pre_valid_impl_) -#define _Pre_opt_z_bytecap_x_(size) \ - _SAL1_1_Source_(_Pre_opt_z_bytecap_x_, (size), \ - _Pre1_impl_(__maybenull_impl_notref) \ - _Pre2_impl_(__zterm_impl, __bytecap_x_impl(size)) _Pre_valid_impl_) - -// known capacity and valid but unknown readable extent -#define _Pre_valid_cap_(size) \ - _SAL1_1_Source_(_Pre_valid_cap_, (size), \ - _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__cap_impl(size)) \ - _Pre_valid_impl_) -#define _Pre_opt_valid_cap_(size) \ - _SAL1_1_Source_(_Pre_opt_valid_cap_, (size), \ - _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__cap_impl(size)) \ - _Pre_valid_impl_) -#define _Pre_valid_bytecap_(size) \ - _SAL1_1_Source_(_Pre_valid_bytecap_, (size), \ - _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__bytecap_impl(size)) \ - _Pre_valid_impl_) -#define _Pre_opt_valid_bytecap_(size) \ - _SAL1_1_Source_(_Pre_opt_valid_bytecap_, (size), \ - _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__bytecap_impl(size)) \ - _Pre_valid_impl_) - -#define _Pre_valid_cap_c_(size) \ - _SAL1_1_Source_(_Pre_valid_cap_c_, (size), \ - _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__cap_c_impl(size)) \ - _Pre_valid_impl_) -#define _Pre_opt_valid_cap_c_(size) \ - _SAL1_1_Source_(_Pre_opt_valid_cap_c_, (size), \ - _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__cap_c_impl(size)) \ - _Pre_valid_impl_) -#define _Pre_valid_bytecap_c_(size) \ - _SAL1_1_Source_(_Pre_valid_bytecap_c_, (size), \ - _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__bytecap_c_impl(size)) \ - _Pre_valid_impl_) -#define _Pre_opt_valid_bytecap_c_(size) \ - _SAL1_1_Source_(_Pre_opt_valid_bytecap_c_, (size), \ - _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__bytecap_c_impl(size)) \ - _Pre_valid_impl_) - -#define _Pre_valid_cap_x_(size) \ - _SAL1_1_Source_(_Pre_valid_cap_x_, (size), \ - _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__cap_x_impl(size)) \ - _Pre_valid_impl_) -#define _Pre_opt_valid_cap_x_(size) \ - _SAL1_1_Source_(_Pre_opt_valid_cap_x_, (size), \ - _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__cap_x_impl(size)) \ - _Pre_valid_impl_) -#define _Pre_valid_bytecap_x_(size) \ - _SAL1_1_Source_(_Pre_valid_bytecap_x_, (size), \ - _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__bytecap_x_impl(size)) \ - _Pre_valid_impl_) -#define _Pre_opt_valid_bytecap_x_(size) \ - _SAL1_1_Source_(_Pre_opt_valid_bytecap_x_, (size), \ - _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__bytecap_x_impl(size)) \ - _Pre_valid_impl_) - -// e.g. void AppendCharRange( _Pre_count_(cchFrom) const char* rgFrom, size_t cchFrom, -// _Out_z_cap_(cchTo) char* szTo, size_t cchTo ); Valid buffer extent described by another parameter -#define _Pre_count_(size) \ - _SAL1_1_Source_(_Pre_count_, (size), \ - _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__count_impl(size)) \ - _Pre_valid_impl_) -#define _Pre_opt_count_(size) \ - _SAL1_1_Source_(_Pre_opt_count_, (size), \ - _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__count_impl(size)) \ - _Pre_valid_impl_) -#define _Pre_bytecount_(size) \ - _SAL1_1_Source_(_Pre_bytecount_, (size), \ - _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__bytecount_impl(size)) \ - _Pre_valid_impl_) -#define _Pre_opt_bytecount_(size) \ - _SAL1_1_Source_(_Pre_opt_bytecount_, (size), \ - _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__bytecount_impl(size)) \ - _Pre_valid_impl_) - -// Valid buffer extent described by a constant expression -#define _Pre_count_c_(size) \ - _SAL1_1_Source_(_Pre_count_c_, (size), \ - _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__count_c_impl(size)) \ - _Pre_valid_impl_) -#define _Pre_opt_count_c_(size) \ - _SAL1_1_Source_(_Pre_opt_count_c_, (size), \ - _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__count_c_impl(size)) \ - _Pre_valid_impl_) -#define _Pre_bytecount_c_(size) \ - _SAL1_1_Source_(_Pre_bytecount_c_, (size), \ - _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__bytecount_c_impl(size)) \ - _Pre_valid_impl_) -#define _Pre_opt_bytecount_c_(size) \ - _SAL1_1_Source_(_Pre_opt_bytecount_c_, (size), \ - _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__bytecount_c_impl(size)) \ - _Pre_valid_impl_) - -// Valid buffer extent described by a complex expression -#define _Pre_count_x_(size) \ - _SAL1_1_Source_(_Pre_count_x_, (size), \ - _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__count_x_impl(size)) \ - _Pre_valid_impl_) -#define _Pre_opt_count_x_(size) \ - _SAL1_1_Source_(_Pre_opt_count_x_, (size), \ - _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__count_x_impl(size)) \ - _Pre_valid_impl_) -#define _Pre_bytecount_x_(size) \ - _SAL1_1_Source_(_Pre_bytecount_x_, (size), \ - _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__bytecount_x_impl(size)) \ - _Pre_valid_impl_) -#define _Pre_opt_bytecount_x_(size) \ - _SAL1_1_Source_(_Pre_opt_bytecount_x_, (size), \ - _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__bytecount_x_impl(size)) \ - _Pre_valid_impl_) - -// Valid buffer extent described by the difference to another pointer parameter -#define _Pre_ptrdiff_count_(ptr) \ - _SAL1_1_Source_(_Pre_ptrdiff_count_, (ptr), \ - _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__count_x_impl(__ptrdiff(ptr))) \ - _Pre_valid_impl_) -#define _Pre_opt_ptrdiff_count_(ptr) \ - _SAL1_1_Source_(_Pre_opt_ptrdiff_count_, (ptr), \ - _Pre1_impl_(__maybenull_impl_notref) \ - _Pre1_impl_(__count_x_impl(__ptrdiff(ptr))) _Pre_valid_impl_) - -// char * strncpy(_Out_cap_(_Count) _Post_maybez_ char * _Dest, _In_z_ const char * _Source, _In_ -// size_t _Count) buffer maybe zero-terminated after the call -#define _Post_maybez_ _SAL1_1_Source_(_Post_maybez_, (), _Post1_impl_(__maybezterm_impl)) - -// e.g. size_t HeapSize( _In_ HANDLE hHeap, DWORD dwFlags, _Pre_notnull_ _Post_bytecap_(return) -// LPCVOID lpMem ); -#define _Post_cap_(size) _SAL1_1_Source_(_Post_cap_, (size), _Post1_impl_(__cap_impl(size))) -#define _Post_bytecap_(size) \ - _SAL1_1_Source_(_Post_bytecap_, (size), _Post1_impl_(__bytecap_impl(size))) - -// e.g. int strlen( _In_z_ _Post_count_(return+1) const char* sz ); -#define _Post_count_(size) \ - _SAL1_1_Source_(_Post_count_, (size), _Post1_impl_(__count_impl(size)) _Post_valid_impl_) -#define _Post_bytecount_(size) \ - _SAL1_1_Source_(_Post_bytecount_, (size), \ - _Post1_impl_(__bytecount_impl(size)) _Post_valid_impl_) -#define _Post_count_c_(size) \ - _SAL1_1_Source_(_Post_count_c_, (size), _Post1_impl_(__count_c_impl(size)) _Post_valid_impl_) -#define _Post_bytecount_c_(size) \ - _SAL1_1_Source_(_Post_bytecount_c_, (size), \ - _Post1_impl_(__bytecount_c_impl(size)) _Post_valid_impl_) -#define _Post_count_x_(size) \ - _SAL1_1_Source_(_Post_count_x_, (size), _Post1_impl_(__count_x_impl(size)) _Post_valid_impl_) -#define _Post_bytecount_x_(size) \ - _SAL1_1_Source_(_Post_bytecount_x_, (size), \ - _Post1_impl_(__bytecount_x_impl(size)) _Post_valid_impl_) - -// e.g. size_t CopyStr( _In_z_ const char* szFrom, _Pre_cap_(cch) _Post_z_count_(return+1) char* -// szFrom, size_t cchFrom ); -#define _Post_z_count_(size) \ - _SAL1_1_Source_(_Post_z_count_, (size), \ - _Post2_impl_(__zterm_impl, __count_impl(size)) _Post_valid_impl_) -#define _Post_z_bytecount_(size) \ - _SAL1_1_Source_(_Post_z_bytecount_, (size), \ - _Post2_impl_(__zterm_impl, __bytecount_impl(size)) _Post_valid_impl_) -#define _Post_z_count_c_(size) \ - _SAL1_1_Source_(_Post_z_count_c_, (size), \ - _Post2_impl_(__zterm_impl, __count_c_impl(size)) _Post_valid_impl_) -#define _Post_z_bytecount_c_(size) \ - _SAL1_1_Source_(_Post_z_bytecount_c_, (size), \ - _Post2_impl_(__zterm_impl, __bytecount_c_impl(size)) _Post_valid_impl_) -#define _Post_z_count_x_(size) \ - _SAL1_1_Source_(_Post_z_count_x_, (size), \ - _Post2_impl_(__zterm_impl, __count_x_impl(size)) _Post_valid_impl_) -#define _Post_z_bytecount_x_(size) \ - _SAL1_1_Source_(_Post_z_bytecount_x_, (size), \ - _Post2_impl_(__zterm_impl, __bytecount_x_impl(size)) _Post_valid_impl_) - -// -// _Prepost_ --- -// -// describing conditions that hold before and after the function call - -#define _Prepost_opt_z_ _SAL1_1_Source_(_Prepost_opt_z_, (), _Pre_opt_z_ _Post_z_) - -#define _Prepost_count_(size) \ - _SAL1_1_Source_(_Prepost_count_, (size), _Pre_count_(size) _Post_count_(size)) -#define _Prepost_opt_count_(size) \ - _SAL1_1_Source_(_Prepost_opt_count_, (size), _Pre_opt_count_(size) _Post_count_(size)) -#define _Prepost_bytecount_(size) \ - _SAL1_1_Source_(_Prepost_bytecount_, (size), _Pre_bytecount_(size) _Post_bytecount_(size)) -#define _Prepost_opt_bytecount_(size) \ - _SAL1_1_Source_(_Prepost_opt_bytecount_, (size), \ - _Pre_opt_bytecount_(size) _Post_bytecount_(size)) -#define _Prepost_count_c_(size) \ - _SAL1_1_Source_(_Prepost_count_c_, (size), _Pre_count_c_(size) _Post_count_c_(size)) -#define _Prepost_opt_count_c_(size) \ - _SAL1_1_Source_(_Prepost_opt_count_c_, (size), _Pre_opt_count_c_(size) _Post_count_c_(size)) -#define _Prepost_bytecount_c_(size) \ - _SAL1_1_Source_(_Prepost_bytecount_c_, (size), _Pre_bytecount_c_(size) _Post_bytecount_c_(size)) -#define _Prepost_opt_bytecount_c_(size) \ - _SAL1_1_Source_(_Prepost_opt_bytecount_c_, (size), \ - _Pre_opt_bytecount_c_(size) _Post_bytecount_c_(size)) -#define _Prepost_count_x_(size) \ - _SAL1_1_Source_(_Prepost_count_x_, (size), _Pre_count_x_(size) _Post_count_x_(size)) -#define _Prepost_opt_count_x_(size) \ - _SAL1_1_Source_(_Prepost_opt_count_x_, (size), _Pre_opt_count_x_(size) _Post_count_x_(size)) -#define _Prepost_bytecount_x_(size) \ - _SAL1_1_Source_(_Prepost_bytecount_x_, (size), _Pre_bytecount_x_(size) _Post_bytecount_x_(size)) -#define _Prepost_opt_bytecount_x_(size) \ - _SAL1_1_Source_(_Prepost_opt_bytecount_x_, (size), \ - _Pre_opt_bytecount_x_(size) _Post_bytecount_x_(size)) - -#define _Prepost_valid_ _SAL1_1_Source_(_Prepost_valid_, (), _Pre_valid_ _Post_valid_) -#define _Prepost_opt_valid_ _SAL1_1_Source_(_Prepost_opt_valid_, (), _Pre_opt_valid_ _Post_valid_) - -// -// _Deref_ --- -// -// short version for _Deref_pre_ _Deref_post_ -// describing conditions for array elements or dereferenced pointer parameters that hold before and -// after the call - -#define _Deref_prepost_z_ _SAL1_1_Source_(_Deref_prepost_z_, (), _Deref_pre_z_ _Deref_post_z_) -#define _Deref_prepost_opt_z_ \ - _SAL1_1_Source_(_Deref_prepost_opt_z_, (), _Deref_pre_opt_z_ _Deref_post_opt_z_) - -#define _Deref_prepost_cap_(size) \ - _SAL1_1_Source_(_Deref_prepost_cap_, (size), _Deref_pre_cap_(size) _Deref_post_cap_(size)) -#define _Deref_prepost_opt_cap_(size) \ - _SAL1_1_Source_(_Deref_prepost_opt_cap_, (size), \ - _Deref_pre_opt_cap_(size) _Deref_post_opt_cap_(size)) -#define _Deref_prepost_bytecap_(size) \ - _SAL1_1_Source_(_Deref_prepost_bytecap_, (size), \ - _Deref_pre_bytecap_(size) _Deref_post_bytecap_(size)) -#define _Deref_prepost_opt_bytecap_(size) \ - _SAL1_1_Source_(_Deref_prepost_opt_bytecap_, (size), \ - _Deref_pre_opt_bytecap_(size) _Deref_post_opt_bytecap_(size)) - -#define _Deref_prepost_cap_x_(size) \ - _SAL1_1_Source_(_Deref_prepost_cap_x_, (size), _Deref_pre_cap_x_(size) _Deref_post_cap_x_(size)) -#define _Deref_prepost_opt_cap_x_(size) \ - _SAL1_1_Source_(_Deref_prepost_opt_cap_x_, (size), \ - _Deref_pre_opt_cap_x_(size) _Deref_post_opt_cap_x_(size)) -#define _Deref_prepost_bytecap_x_(size) \ - _SAL1_1_Source_(_Deref_prepost_bytecap_x_, (size), \ - _Deref_pre_bytecap_x_(size) _Deref_post_bytecap_x_(size)) -#define _Deref_prepost_opt_bytecap_x_(size) \ - _SAL1_1_Source_(_Deref_prepost_opt_bytecap_x_, (size), \ - _Deref_pre_opt_bytecap_x_(size) _Deref_post_opt_bytecap_x_(size)) - -#define _Deref_prepost_z_cap_(size) \ - _SAL1_1_Source_(_Deref_prepost_z_cap_, (size), _Deref_pre_z_cap_(size) _Deref_post_z_cap_(size)) -#define _Deref_prepost_opt_z_cap_(size) \ - _SAL1_1_Source_(_Deref_prepost_opt_z_cap_, (size), \ - _Deref_pre_opt_z_cap_(size) _Deref_post_opt_z_cap_(size)) -#define _Deref_prepost_z_bytecap_(size) \ - _SAL1_1_Source_(_Deref_prepost_z_bytecap_, (size), \ - _Deref_pre_z_bytecap_(size) _Deref_post_z_bytecap_(size)) -#define _Deref_prepost_opt_z_bytecap_(size) \ - _SAL1_1_Source_(_Deref_prepost_opt_z_bytecap_, (size), \ - _Deref_pre_opt_z_bytecap_(size) _Deref_post_opt_z_bytecap_(size)) - -#define _Deref_prepost_valid_cap_(size) \ - _SAL1_1_Source_(_Deref_prepost_valid_cap_, (size), \ - _Deref_pre_valid_cap_(size) _Deref_post_valid_cap_(size)) -#define _Deref_prepost_opt_valid_cap_(size) \ - _SAL1_1_Source_(_Deref_prepost_opt_valid_cap_, (size), \ - _Deref_pre_opt_valid_cap_(size) _Deref_post_opt_valid_cap_(size)) -#define _Deref_prepost_valid_bytecap_(size) \ - _SAL1_1_Source_(_Deref_prepost_valid_bytecap_, (size), \ - _Deref_pre_valid_bytecap_(size) _Deref_post_valid_bytecap_(size)) -#define _Deref_prepost_opt_valid_bytecap_(size) \ - _SAL1_1_Source_(_Deref_prepost_opt_valid_bytecap_, (size), \ - _Deref_pre_opt_valid_bytecap_(size) _Deref_post_opt_valid_bytecap_(size)) - -#define _Deref_prepost_valid_cap_x_(size) \ - _SAL1_1_Source_(_Deref_prepost_valid_cap_x_, (size), \ - _Deref_pre_valid_cap_x_(size) _Deref_post_valid_cap_x_(size)) -#define _Deref_prepost_opt_valid_cap_x_(size) \ - _SAL1_1_Source_(_Deref_prepost_opt_valid_cap_x_, (size), \ - _Deref_pre_opt_valid_cap_x_(size) _Deref_post_opt_valid_cap_x_(size)) -#define _Deref_prepost_valid_bytecap_x_(size) \ - _SAL1_1_Source_(_Deref_prepost_valid_bytecap_x_, (size), \ - _Deref_pre_valid_bytecap_x_(size) _Deref_post_valid_bytecap_x_(size)) -#define _Deref_prepost_opt_valid_bytecap_x_(size) \ - _SAL1_1_Source_(_Deref_prepost_opt_valid_bytecap_x_, (size), \ - _Deref_pre_opt_valid_bytecap_x_(size) _Deref_post_opt_valid_bytecap_x_(size)) - -#define _Deref_prepost_count_(size) \ - _SAL1_1_Source_(_Deref_prepost_count_, (size), _Deref_pre_count_(size) _Deref_post_count_(size)) -#define _Deref_prepost_opt_count_(size) \ - _SAL1_1_Source_(_Deref_prepost_opt_count_, (size), \ - _Deref_pre_opt_count_(size) _Deref_post_opt_count_(size)) -#define _Deref_prepost_bytecount_(size) \ - _SAL1_1_Source_(_Deref_prepost_bytecount_, (size), \ - _Deref_pre_bytecount_(size) _Deref_post_bytecount_(size)) -#define _Deref_prepost_opt_bytecount_(size) \ - _SAL1_1_Source_(_Deref_prepost_opt_bytecount_, (size), \ - _Deref_pre_opt_bytecount_(size) _Deref_post_opt_bytecount_(size)) - -#define _Deref_prepost_count_x_(size) \ - _SAL1_1_Source_(_Deref_prepost_count_x_, (size), \ - _Deref_pre_count_x_(size) _Deref_post_count_x_(size)) -#define _Deref_prepost_opt_count_x_(size) \ - _SAL1_1_Source_(_Deref_prepost_opt_count_x_, (size), \ - _Deref_pre_opt_count_x_(size) _Deref_post_opt_count_x_(size)) -#define _Deref_prepost_bytecount_x_(size) \ - _SAL1_1_Source_(_Deref_prepost_bytecount_x_, (size), \ - _Deref_pre_bytecount_x_(size) _Deref_post_bytecount_x_(size)) -#define _Deref_prepost_opt_bytecount_x_(size) \ - _SAL1_1_Source_(_Deref_prepost_opt_bytecount_x_, (size), \ - _Deref_pre_opt_bytecount_x_(size) _Deref_post_opt_bytecount_x_(size)) - -#define _Deref_prepost_valid_ \ - _SAL1_1_Source_(_Deref_prepost_valid_, (), _Deref_pre_valid_ _Deref_post_valid_) -#define _Deref_prepost_opt_valid_ \ - _SAL1_1_Source_(_Deref_prepost_opt_valid_, (), _Deref_pre_opt_valid_ _Deref_post_opt_valid_) - -// -// _Deref_ -// -// used with references to arrays - -#define _Deref_out_z_cap_c_(size) \ - _SAL1_1_Source_(_Deref_out_z_cap_c_, (size), _Deref_pre_cap_c_(size) _Deref_post_z_) -#define _Deref_inout_z_cap_c_(size) \ - _SAL1_1_Source_(_Deref_inout_z_cap_c_, (size), _Deref_pre_z_cap_c_(size) _Deref_post_z_) -#define _Deref_out_z_bytecap_c_(size) \ - _SAL1_1_Source_(_Deref_out_z_bytecap_c_, (size), _Deref_pre_bytecap_c_(size) _Deref_post_z_) -#define _Deref_inout_z_bytecap_c_(size) \ - _SAL1_1_Source_(_Deref_inout_z_bytecap_c_, (size), _Deref_pre_z_bytecap_c_(size) _Deref_post_z_) -#define _Deref_inout_z_ _SAL1_1_Source_(_Deref_inout_z_, (), _Deref_prepost_z_) - -// #pragma endregion Input Buffer SAL 1 compatibility macros - -//============================================================================ -// Implementation Layer: -//============================================================================ - -// Naming conventions: -// A symbol the begins with _SA_ is for the machinery of creating any -// annotations; many of those come from sourceannotations.h in the case -// of attributes. - -// A symbol that ends with _impl is the very lowest level macro. It is -// not required to be a legal standalone annotation, and in the case -// of attribute annotations, usually is not. (In the case of some declspec -// annotations, it might be, but it should not be assumed so.) Those -// symols will be used in the _PreN..., _PostN... and _RetN... annotations -// to build up more complete annotations. - -// A symbol ending in _impl_ is reserved to the implementation as well, -// but it does form a complete annotation; usually they are used to build -// up even higher level annotations. - -#if _USE_ATTRIBUTES_FOR_SAL || _USE_DECLSPECS_FOR_SAL // [ -// Sharable "_impl" macros: these can be shared between the various annotation -// forms but are part of the implementation of the macros. These are collected -// here to assure that only necessary differences in the annotations -// exist. - -#define _Always_impl_(annos) _Group_(annos _SAL_nop_impl_) _On_failure_impl_(annos _SAL_nop_impl_) -#define _Bound_impl_ _SA_annotes0(SAL_bound) -#define _Field_range_impl_(min, max) _Range_impl_(min, max) -#define _Literal_impl_ _SA_annotes1(SAL_constant, __yes) -#define _Maybenull_impl_ _SA_annotes1(SAL_null, __maybe) -#define _Maybevalid_impl_ _SA_annotes1(SAL_valid, __maybe) -#define _Must_inspect_impl_ _Post_impl_ _SA_annotes0(SAL_mustInspect) -#define _Notliteral_impl_ _SA_annotes1(SAL_constant, __no) -#define _Notnull_impl_ _SA_annotes1(SAL_null, __no) -#define _Notvalid_impl_ _SA_annotes1(SAL_valid, __no) -#define _NullNull_terminated_impl_ \ - _Group_(_SA_annotes1(SAL_nullTerminated, __yes) \ - _SA_annotes1(SAL_readableTo, inexpressibleCount("NullNull terminated string"))) -#define _Null_impl_ _SA_annotes1(SAL_null, __yes) -#define _Null_terminated_impl_ _SA_annotes1(SAL_nullTerminated, __yes) -#define _Out_impl_ \ - _Pre1_impl_(__notnull_impl_notref) _Pre1_impl_(__cap_c_one_notref_impl) _Post_valid_impl_ -#define _Out_opt_impl_ \ - _Pre1_impl_(__maybenull_impl_notref) _Pre1_impl_(__cap_c_one_notref_impl) _Post_valid_impl_ -#define _Points_to_data_impl_ _At_(*_Curr_, _SA_annotes1(SAL_mayBePointer, __no)) -#define _Post_satisfies_impl_(cond) _Post_impl_ _Satisfies_impl_(cond) -#define _Post_valid_impl_ _Post1_impl_(__valid_impl) -#define _Pre_satisfies_impl_(cond) _Pre_impl_ _Satisfies_impl_(cond) -#define _Pre_valid_impl_ _Pre1_impl_(__valid_impl) -#define _Range_impl_(min, max) _SA_annotes2(SAL_range, min, max) -#define _Readable_bytes_impl_(size) _SA_annotes1(SAL_readableTo, byteCount(size)) -#define _Readable_elements_impl_(size) _SA_annotes1(SAL_readableTo, elementCount(size)) -#define _Ret_valid_impl_ _Ret1_impl_(__valid_impl) -#define _Satisfies_impl_(cond) _SA_annotes1(SAL_satisfies, cond) -#define _Valid_impl_ _SA_annotes1(SAL_valid, __yes) -#define _Writable_bytes_impl_(size) _SA_annotes1(SAL_writableTo, byteCount(size)) -#define _Writable_elements_impl_(size) _SA_annotes1(SAL_writableTo, elementCount(size)) - -#define _In_range_impl_(min, max) _Pre_impl_ _Range_impl_(min, max) -#define _Out_range_impl_(min, max) _Post_impl_ _Range_impl_(min, max) -#define _Ret_range_impl_(min, max) _Post_impl_ _Range_impl_(min, max) -#define _Deref_in_range_impl_(min, max) _Deref_pre_impl_ _Range_impl_(min, max) -#define _Deref_out_range_impl_(min, max) _Deref_post_impl_ _Range_impl_(min, max) -#define _Deref_ret_range_impl_(min, max) _Deref_post_impl_ _Range_impl_(min, max) - -#define _Deref_pre_impl_ _Pre_impl_ _Notref_impl_ _Deref_impl_ -#define _Deref_post_impl_ _Post_impl_ _Notref_impl_ _Deref_impl_ - -// The following are for the implementation machinery, and are not -// suitable for annotating general code. -// We're tying to phase this out, someday. The parser quotes the param. -#define __AuToQuOtE _SA_annotes0(SAL_AuToQuOtE) - -// Normally the parser does some simple type checking of annotation params, -// defer that check to the plugin. -#define __deferTypecheck _SA_annotes0(SAL_deferTypecheck) - -#define _SA_SPECSTRIZE(x) #x -#define _SAL_nop_impl_ /* nothing */ -#define __nop_impl(x) x -#endif - -#if _USE_ATTRIBUTES_FOR_SAL // [ - -// Using attributes for sal - -#include "codeanalysis\sourceannotations.h" - -#define _SA_annotes0(n) [SAL_annotes(Name = #n)] -#define _SA_annotes1(n, pp1) [SAL_annotes(Name = #n, p1 = _SA_SPECSTRIZE(pp1))] -#define _SA_annotes2(n, pp1, pp2) \ - [SAL_annotes(Name = #n, p1 = _SA_SPECSTRIZE(pp1), p2 = _SA_SPECSTRIZE(pp2))] -#define _SA_annotes3(n, pp1, pp2, pp3) \ - [SAL_annotes(Name = #n, p1 = _SA_SPECSTRIZE(pp1), p2 = _SA_SPECSTRIZE(pp2), \ - p3 = _SA_SPECSTRIZE(pp3))] - -#define _Pre_impl_ [SAL_pre] -#define _Post_impl_ [SAL_post] -#define _Deref_impl_ [SAL_deref] -#define _Notref_impl_ [SAL_notref] - -// Declare a function to be an annotation or primop (respectively). -// Done this way so that they don't appear in the regular compiler's -// namespace. -#define __ANNOTATION(fun) _SA_annotes0(SAL_annotation) void __SA_##fun; -#define __PRIMOP(type, fun) _SA_annotes0(SAL_primop) type __SA_##fun; -#define __QUALIFIER(fun) _SA_annotes0(SAL_qualifier) void __SA_##fun; - -// Benign declspec needed here for WindowsPREfast -#define __In_impl_ \ - [SA_Pre(Valid = SA_Yes)][SA_Pre(Deref = 1, Notref = 1, Access = SA_Read)] \ - __declspec("SAL_pre SAL_valid") - -#elif _USE_DECLSPECS_FOR_SAL // ][ - -// Using declspecs for sal - -#define _SA_annotes0(n) __declspec(#n) -#define _SA_annotes1(n, pp1) __declspec(#n "(" _SA_SPECSTRIZE(pp1) ")") -#define _SA_annotes2(n, pp1, pp2) __declspec(#n "(" _SA_SPECSTRIZE(pp1) "," _SA_SPECSTRIZE(pp2) ")") -#define _SA_annotes3(n, pp1, pp2, pp3) \ - __declspec(#n "(" _SA_SPECSTRIZE(pp1) "," _SA_SPECSTRIZE(pp2) "," _SA_SPECSTRIZE(pp3) ")") - -#define _Pre_impl_ _SA_annotes0(SAL_pre) -#define _Post_impl_ _SA_annotes0(SAL_post) -#define _Deref_impl_ _SA_annotes0(SAL_deref) -#define _Notref_impl_ _SA_annotes0(SAL_notref) - -// Declare a function to be an annotation or primop (respectively). -// Done this way so that they don't appear in the regular compiler's -// namespace. -#define __ANNOTATION(fun) _SA_annotes0(SAL_annotation) void __SA_##fun - -#define __PRIMOP(type, fun) _SA_annotes0(SAL_primop) type __SA_##fun - -#define __QUALIFIER(fun) _SA_annotes0(SAL_qualifier) void __SA_##fun; - -#define __In_impl_ \ - _Pre_impl_ _SA_annotes0(SAL_valid) \ - _Pre_impl_ _Deref_impl_ _Notref_impl_ _SA_annotes0(SAL_readonly) - -#else // ][ - -// Using "nothing" for sal - -#define _SA_annotes0(n) -#define _SA_annotes1(n, pp1) -#define _SA_annotes2(n, pp1, pp2) -#define _SA_annotes3(n, pp1, pp2, pp3) - -#define __ANNOTATION(fun) -#define __PRIMOP(type, fun) -#define __QUALIFIER(type, fun) - -#endif // ] - -#if _USE_ATTRIBUTES_FOR_SAL || _USE_DECLSPECS_FOR_SAL // [ - -// Declare annotations that need to be declared. -__ANNOTATION(SAL_useHeader(void)); -__ANNOTATION(SAL_bound(void)); -__ANNOTATION(SAL_allocator(void)); //??? resolve with PFD -__ANNOTATION(SAL_file_parser(__AuToQuOtE __In_impl_ char *, __In_impl_ char *)); -__ANNOTATION(SAL_source_code_content(__In_impl_ char *)); -__ANNOTATION(SAL_analysisHint(__AuToQuOtE __In_impl_ char *)); -__ANNOTATION(SAL_untrusted_data_source(__AuToQuOtE __In_impl_ char *)); -__ANNOTATION(SAL_untrusted_data_source_this(__AuToQuOtE __In_impl_ char *)); -__ANNOTATION(SAL_validated(__AuToQuOtE __In_impl_ char *)); -__ANNOTATION(SAL_validated_this(__AuToQuOtE __In_impl_ char *)); -__ANNOTATION(SAL_encoded(void)); -__ANNOTATION(SAL_adt(__AuToQuOtE __In_impl_ char *, __AuToQuOtE __In_impl_ char *)); -__ANNOTATION(SAL_add_adt_property(__AuToQuOtE __In_impl_ char *, __AuToQuOtE __In_impl_ char *)); -__ANNOTATION(SAL_remove_adt_property(__AuToQuOtE __In_impl_ char *, __AuToQuOtE __In_impl_ char *)); -__ANNOTATION(SAL_transfer_adt_property_from(__AuToQuOtE __In_impl_ char *)); -__ANNOTATION(SAL_post_type(__AuToQuOtE __In_impl_ char *)); -__ANNOTATION(SAL_volatile(void)); -__ANNOTATION(SAL_nonvolatile(void)); -__ANNOTATION(SAL_entrypoint(__AuToQuOtE __In_impl_ char *, __AuToQuOtE __In_impl_ char *)); -__ANNOTATION(SAL_blocksOn(__In_impl_ void *)); -__ANNOTATION(SAL_mustInspect(void)); - -// Only appears in model files, but needs to be declared. -__ANNOTATION(SAL_TypeName(__AuToQuOtE __In_impl_ char *)); - -// To be declared well-known soon. -__ANNOTATION(SAL_interlocked(void);) - -#pragma warning(suppress : 28227 28241) -__ANNOTATION(SAL_name(__In_impl_ char *, __In_impl_ char *, __In_impl_ char *);) - -__PRIMOP(char *, _Macro_value_(__In_impl_ char *)); -__PRIMOP(int, _Macro_defined_(__In_impl_ char *)); -__PRIMOP(char *, _Strstr_(__In_impl_ char *, __In_impl_ char *)); - -#endif // ] - -#if _USE_ATTRIBUTES_FOR_SAL // [ - -#define _Check_return_impl_ [SA_Post(MustCheck = SA_Yes)] - -#define _Success_impl_(expr) [SA_Success(Condition = #expr)] -#define _On_failure_impl_(annos) \ - [SAL_context(p1 = "SAL_failed")] _Group_(_Post_impl_ _Group_(annos _SAL_nop_impl_)) - -#define _Printf_format_string_impl_ [SA_FormatString(Style = "printf")] -#define _Scanf_format_string_impl_ [SA_FormatString(Style = "scanf")] -#define _Scanf_s_format_string_impl_ [SA_FormatString(Style = "scanf_s")] - -#define _In_bound_impl_ [SA_PreBound(Deref = 0)] -#define _Out_bound_impl_ [SA_PostBound(Deref = 0)] -#define _Ret_bound_impl_ [SA_PostBound(Deref = 0)] -#define _Deref_in_bound_impl_ [SA_PreBound(Deref = 1)] -#define _Deref_out_bound_impl_ [SA_PostBound(Deref = 1)] -#define _Deref_ret_bound_impl_ [SA_PostBound(Deref = 1)] - -#define __valid_impl Valid = SA_Yes -#define __maybevalid_impl Valid = SA_Maybe -#define __notvalid_impl Valid = SA_No - -#define __null_impl Null = SA_Yes -#define __maybenull_impl Null = SA_Maybe -#define __notnull_impl Null = SA_No - -#define __null_impl_notref Null = SA_Yes, Notref = 1 -#define __maybenull_impl_notref Null = SA_Maybe, Notref = 1 -#define __notnull_impl_notref Null = SA_No, Notref = 1 - -#define __zterm_impl NullTerminated = SA_Yes -#define __maybezterm_impl NullTerminated = SA_Maybe -#define __maybzterm_impl NullTerminated = SA_Maybe -#define __notzterm_impl NullTerminated = SA_No - -#define __readaccess_impl Access = SA_Read -#define __writeaccess_impl Access = SA_Write -#define __allaccess_impl Access = SA_ReadWrite - -#define __readaccess_impl_notref Access = SA_Read, Notref = 1 -#define __writeaccess_impl_notref Access = SA_Write, Notref = 1 -#define __allaccess_impl_notref Access = SA_ReadWrite, Notref = 1 - -#if _MSC_VER >= 1610 /*IFSTRIP=IGN*/ // [ - -// For SAL2, we need to expect general expressions. - -#define __cap_impl(size) WritableElements = "\n" #size -#define __bytecap_impl(size) WritableBytes = "\n" #size -#define __bytecount_impl(size) ValidBytes = "\n" #size -#define __count_impl(size) ValidElements = "\n" #size - -#else // ][ - -#define __cap_impl(size) WritableElements = #size -#define __bytecap_impl(size) WritableBytes = #size -#define __bytecount_impl(size) ValidBytes = #size -#define __count_impl(size) ValidElements = #size - -#endif // ] - -#define __cap_c_impl(size) WritableElementsConst = size -#define __cap_c_one_notref_impl WritableElementsConst = 1, Notref = 1 -#define __cap_for_impl(param) WritableElementsLength = #param -#define __cap_x_impl(size) WritableElements = "\n@" #size - -#define __bytecap_c_impl(size) WritableBytesConst = size -#define __bytecap_x_impl(size) WritableBytes = "\n@" #size - -#define __mult_impl(mult, size) __cap_impl((mult) * (size)) - -#define __count_c_impl(size) ValidElementsConst = size -#define __count_x_impl(size) ValidElements = "\n@" #size - -#define __bytecount_c_impl(size) ValidBytesConst = size -#define __bytecount_x_impl(size) ValidBytes = "\n@" #size - -#define _At_impl_(target, annos) [SAL_at(p1 = #target)] _Group_(annos) -#define _At_buffer_impl_(target, iter, bound, annos) \ - [SAL_at_buffer(p1 = #target, p2 = #iter, p3 = #bound)] _Group_(annos) -#define _When_impl_(expr, annos) [SAL_when(p1 = #expr)] _Group_(annos) - -#define _Group_impl_(annos) [SAL_begin] annos[SAL_end] -#define _GrouP_impl_(annos) [SAL_BEGIN] annos[SAL_END] - -#define _Use_decl_anno_impl_ _SA_annotes0(SAL_useHeader) // this is a special case! - -#define _Pre1_impl_(p1) [SA_Pre(p1)] -#define _Pre2_impl_(p1, p2) [SA_Pre(p1, p2)] -#define _Pre3_impl_(p1, p2, p3) [SA_Pre(p1, p2, p3)] - -#define _Post1_impl_(p1) [SA_Post(p1)] -#define _Post2_impl_(p1, p2) [SA_Post(p1, p2)] -#define _Post3_impl_(p1, p2, p3) [SA_Post(p1, p2, p3)] - -#define _Ret1_impl_(p1) [SA_Post(p1)] -#define _Ret2_impl_(p1, p2) [SA_Post(p1, p2)] -#define _Ret3_impl_(p1, p2, p3) [SA_Post(p1, p2, p3)] - -#define _Deref_pre1_impl_(p1) [SA_Pre(Deref = 1, p1)] -#define _Deref_pre2_impl_(p1, p2) [SA_Pre(Deref = 1, p1, p2)] -#define _Deref_pre3_impl_(p1, p2, p3) [SA_Pre(Deref = 1, p1, p2, p3)] - -#define _Deref_post1_impl_(p1) [SA_Post(Deref = 1, p1)] -#define _Deref_post2_impl_(p1, p2) [SA_Post(Deref = 1, p1, p2)] -#define _Deref_post3_impl_(p1, p2, p3) [SA_Post(Deref = 1, p1, p2, p3)] - -#define _Deref_ret1_impl_(p1) [SA_Post(Deref = 1, p1)] -#define _Deref_ret2_impl_(p1, p2) [SA_Post(Deref = 1, p1, p2)] -#define _Deref_ret3_impl_(p1, p2, p3) [SA_Post(Deref = 1, p1, p2, p3)] - -#define _Deref2_pre1_impl_(p1) [SA_Pre(Deref = 2, Notref = 1, p1)] -#define _Deref2_post1_impl_(p1) [SA_Post(Deref = 2, Notref = 1, p1)] -#define _Deref2_ret1_impl_(p1) [SA_Post(Deref = 2, Notref = 1, p1)] - -// Obsolete -- may be needed for transition to attributes. -#define __inner_typefix(ctype) [SAL_typefix(p1 = _SA_SPECSTRIZE(ctype))] -#define __inner_exceptthat [SAL_except] - -#elif _USE_DECLSPECS_FOR_SAL // ][ - -#define _Check_return_impl_ __post _SA_annotes0(SAL_checkReturn) - -#define _Success_impl_(expr) _SA_annotes1(SAL_success, expr) -#define _On_failure_impl_(annos) \ - _SA_annotes1(SAL_context, SAL_failed) _Group_(_Post_impl_ _Group_(_SAL_nop_impl_ annos)) - -#define _Printf_format_string_impl_ _SA_annotes1(SAL_IsFormatString, "printf") -#define _Scanf_format_string_impl_ _SA_annotes1(SAL_IsFormatString, "scanf") -#define _Scanf_s_format_string_impl_ _SA_annotes1(SAL_IsFormatString, "scanf_s") - -#define _In_bound_impl_ _Pre_impl_ _Bound_impl_ -#define _Out_bound_impl_ _Post_impl_ _Bound_impl_ -#define _Ret_bound_impl_ _Post_impl_ _Bound_impl_ -#define _Deref_in_bound_impl_ _Deref_pre_impl_ _Bound_impl_ -#define _Deref_out_bound_impl_ _Deref_post_impl_ _Bound_impl_ -#define _Deref_ret_bound_impl_ _Deref_post_impl_ _Bound_impl_ - -#define __null_impl _SA_annotes0(SAL_null) // _SA_annotes1(SAL_null, __yes) -#define __notnull_impl _SA_annotes0(SAL_notnull) // _SA_annotes1(SAL_null, __no) -#define __maybenull_impl _SA_annotes0(SAL_maybenull) // _SA_annotes1(SAL_null, __maybe) - -#define __valid_impl _SA_annotes0(SAL_valid) // _SA_annotes1(SAL_valid, __yes) -#define __notvalid_impl _SA_annotes0(SAL_notvalid) // _SA_annotes1(SAL_valid, __no) -#define __maybevalid_impl _SA_annotes0(SAL_maybevalid) // _SA_annotes1(SAL_valid, __maybe) - -#define __null_impl_notref _Notref_ _Null_impl_ -#define __maybenull_impl_notref _Notref_ _Maybenull_impl_ -#define __notnull_impl_notref _Notref_ _Notnull_impl_ - -#define __zterm_impl _SA_annotes1(SAL_nullTerminated, __yes) -#define __maybezterm_impl _SA_annotes1(SAL_nullTerminated, __maybe) -#define __maybzterm_impl _SA_annotes1(SAL_nullTerminated, __maybe) -#define __notzterm_impl _SA_annotes1(SAL_nullTerminated, __no) - -#define __readaccess_impl _SA_annotes1(SAL_access, 0x1) -#define __writeaccess_impl _SA_annotes1(SAL_access, 0x2) -#define __allaccess_impl _SA_annotes1(SAL_access, 0x3) - -#define __readaccess_impl_notref _Notref_ _SA_annotes1(SAL_access, 0x1) -#define __writeaccess_impl_notref _Notref_ _SA_annotes1(SAL_access, 0x2) -#define __allaccess_impl_notref _Notref_ _SA_annotes1(SAL_access, 0x3) - -#define __cap_impl(size) _SA_annotes1(SAL_writableTo, elementCount(size)) -#define __cap_c_impl(size) _SA_annotes1(SAL_writableTo, elementCount(size)) -#define __cap_c_one_notref_impl _Notref_ _SA_annotes1(SAL_writableTo, elementCount(1)) -#define __cap_for_impl(param) _SA_annotes1(SAL_writableTo, inexpressibleCount(sizeof(param))) -#define __cap_x_impl(size) _SA_annotes1(SAL_writableTo, inexpressibleCount(#size)) - -#define __bytecap_impl(size) _SA_annotes1(SAL_writableTo, byteCount(size)) -#define __bytecap_c_impl(size) _SA_annotes1(SAL_writableTo, byteCount(size)) -#define __bytecap_x_impl(size) _SA_annotes1(SAL_writableTo, inexpressibleCount(#size)) - -#define __mult_impl(mult, size) _SA_annotes1(SAL_writableTo, (mult) * (size)) - -#define __count_impl(size) _SA_annotes1(SAL_readableTo, elementCount(size)) -#define __count_c_impl(size) _SA_annotes1(SAL_readableTo, elementCount(size)) -#define __count_x_impl(size) _SA_annotes1(SAL_readableTo, inexpressibleCount(#size)) - -#define __bytecount_impl(size) _SA_annotes1(SAL_readableTo, byteCount(size)) -#define __bytecount_c_impl(size) _SA_annotes1(SAL_readableTo, byteCount(size)) -#define __bytecount_x_impl(size) _SA_annotes1(SAL_readableTo, inexpressibleCount(#size)) - -#define _At_impl_(target, annos) _SA_annotes0(SAL_at(target)) _Group_(annos) -#define _At_buffer_impl_(target, iter, bound, annos) \ - _SA_annotes3(SAL_at_buffer, target, iter, bound) _Group_(annos) -#define _Group_impl_(annos) _SA_annotes0(SAL_begin) annos _SA_annotes0(SAL_end) -#define _GrouP_impl_(annos) _SA_annotes0(SAL_BEGIN) annos _SA_annotes0(SAL_END) -#define _When_impl_(expr, annos) _SA_annotes0(SAL_when(expr)) _Group_(annos) - -#define _Use_decl_anno_impl_ __declspec("SAL_useHeader()") // this is a special case! - -#define _Pre1_impl_(p1) _Pre_impl_ p1 -#define _Pre2_impl_(p1, p2) _Pre_impl_ p1 _Pre_impl_ p2 -#define _Pre3_impl_(p1, p2, p3) _Pre_impl_ p1 _Pre_impl_ p2 _Pre_impl_ p3 - -#define _Post1_impl_(p1) _Post_impl_ p1 -#define _Post2_impl_(p1, p2) _Post_impl_ p1 _Post_impl_ p2 -#define _Post3_impl_(p1, p2, p3) _Post_impl_ p1 _Post_impl_ p2 _Post_impl_ p3 - -#define _Ret1_impl_(p1) _Post_impl_ p1 -#define _Ret2_impl_(p1, p2) _Post_impl_ p1 _Post_impl_ p2 -#define _Ret3_impl_(p1, p2, p3) _Post_impl_ p1 _Post_impl_ p2 _Post_impl_ p3 - -#define _Deref_pre1_impl_(p1) _Deref_pre_impl_ p1 -#define _Deref_pre2_impl_(p1, p2) _Deref_pre_impl_ p1 _Deref_pre_impl_ p2 -#define _Deref_pre3_impl_(p1, p2, p3) _Deref_pre_impl_ p1 _Deref_pre_impl_ p2 _Deref_pre_impl_ p3 - -#define _Deref_post1_impl_(p1) _Deref_post_impl_ p1 -#define _Deref_post2_impl_(p1, p2) _Deref_post_impl_ p1 _Deref_post_impl_ p2 -#define _Deref_post3_impl_(p1, p2, p3) \ - _Deref_post_impl_ p1 _Deref_post_impl_ p2 _Deref_post_impl_ p3 - -#define _Deref_ret1_impl_(p1) _Deref_post_impl_ p1 -#define _Deref_ret2_impl_(p1, p2) _Deref_post_impl_ p1 _Deref_post_impl_ p2 -#define _Deref_ret3_impl_(p1, p2, p3) _Deref_post_impl_ p1 _Deref_post_impl_ p2 _Deref_post_impl_ p3 - -#define _Deref2_pre1_impl_(p1) _Deref_pre_impl_ _Notref_impl_ _Deref_impl_ p1 -#define _Deref2_post1_impl_(p1) _Deref_post_impl_ _Notref_impl_ _Deref_impl_ p1 -#define _Deref2_ret1_impl_(p1) _Deref_post_impl_ _Notref_impl_ _Deref_impl_ p1 - -#define __inner_typefix(ctype) _SA_annotes1(SAL_typefix, ctype) -#define __inner_exceptthat _SA_annotes0(SAL_except) - -#elif defined(_MSC_EXTENSIONS) && !defined(MIDL_PASS) && !defined(__midl) && \ - !defined(RC_INVOKED) && defined(_PFT_VER) && _MSC_VER >= 1400 /*IFSTRIP=IGN*/ // ][ - -// minimum attribute expansion for foreground build - -#pragma push_macro("SA") -#pragma push_macro("REPEATABLE") - -#ifdef __cplusplus // [ -#define SA(id) id -#define REPEATABLE [repeatable] -#else // !__cplusplus // ][ -#define SA(id) SA_##id -#define REPEATABLE -#endif // !__cplusplus // ] - -REPEATABLE -[source_annotation_attribute(SA(Parameter))] struct __P_impl { -#ifdef __cplusplus // [ - __P_impl(); -#endif // ] - int __d_; -}; -typedef struct __P_impl __P_impl; - -REPEATABLE -[source_annotation_attribute(SA(ReturnValue))] struct __R_impl { -#ifdef __cplusplus // [ - __R_impl(); -#endif // ] - int __d_; -}; -typedef struct __R_impl __R_impl; - -[source_annotation_attribute(SA(Method))] struct __M_ { -#ifdef __cplusplus // [ - __M_(); -#endif // ] - int __d_; -}; -typedef struct __M_ __M_; - -[source_annotation_attribute(SA(All))] struct __A_ { -#ifdef __cplusplus // [ - __A_(); -#endif // ] - int __d_; -}; -typedef struct __A_ __A_; - -[source_annotation_attribute(SA(Field))] struct __F_ { -#ifdef __cplusplus // [ - __F_(); -#endif // ] - int __d_; -}; -typedef struct __F_ __F_; - -#pragma pop_macro("REPEATABLE") -#pragma pop_macro("SA") - -#define _SAL_nop_impl_ - -#define _At_impl_(target, annos) [__A_(__d_ = 0)] -#define _At_buffer_impl_(target, iter, bound, annos) [__A_(__d_ = 0)] -#define _When_impl_(expr, annos) annos -#define _Group_impl_(annos) annos -#define _GrouP_impl_(annos) annos -#define _Use_decl_anno_impl_ [__M_(__d_ = 0)] - -#define _Points_to_data_impl_ [__P_impl(__d_ = 0)] -#define _Literal_impl_ [__P_impl(__d_ = 0)] -#define _Notliteral_impl_ [__P_impl(__d_ = 0)] - -#define _Pre_valid_impl_ [__P_impl(__d_ = 0)] -#define _Post_valid_impl_ [__P_impl(__d_ = 0)] -#define _Ret_valid_impl_ [__R_impl(__d_ = 0)] - -#define _Check_return_impl_ [__R_impl(__d_ = 0)] -#define _Must_inspect_impl_ [__R_impl(__d_ = 0)] - -#define _Success_impl_(expr) [__M_(__d_ = 0)] -#define _On_failure_impl_(expr) [__M_(__d_ = 0)] -#define _Always_impl_(expr) [__M_(__d_ = 0)] - -#define _Printf_format_string_impl_ [__P_impl(__d_ = 0)] -#define _Scanf_format_string_impl_ [__P_impl(__d_ = 0)] -#define _Scanf_s_format_string_impl_ [__P_impl(__d_ = 0)] - -#define _Raises_SEH_exception_impl_ [__M_(__d_ = 0)] -#define _Maybe_raises_SEH_exception_impl_ [__M_(__d_ = 0)] - -#define _In_bound_impl_ [__P_impl(__d_ = 0)] -#define _Out_bound_impl_ [__P_impl(__d_ = 0)] -#define _Ret_bound_impl_ [__R_impl(__d_ = 0)] -#define _Deref_in_bound_impl_ [__P_impl(__d_ = 0)] -#define _Deref_out_bound_impl_ [__P_impl(__d_ = 0)] -#define _Deref_ret_bound_impl_ [__R_impl(__d_ = 0)] - -#define _Range_impl_(min, max) [__P_impl(__d_ = 0)] -#define _In_range_impl_(min, max) [__P_impl(__d_ = 0)] -#define _Out_range_impl_(min, max) [__P_impl(__d_ = 0)] -#define _Ret_range_impl_(min, max) [__R_impl(__d_ = 0)] -#define _Deref_in_range_impl_(min, max) [__P_impl(__d_ = 0)] -#define _Deref_out_range_impl_(min, max) [__P_impl(__d_ = 0)] -#define _Deref_ret_range_impl_(min, max) [__R_impl(__d_ = 0)] - -#define _Field_range_impl_(min, max) [__F_(__d_ = 0)] - -#define _Pre_satisfies_impl_(cond) [__A_(__d_ = 0)] -#define _Post_satisfies_impl_(cond) [__A_(__d_ = 0)] -#define _Satisfies_impl_(cond) [__A_(__d_ = 0)] - -#define _Null_impl_ [__A_(__d_ = 0)] -#define _Notnull_impl_ [__A_(__d_ = 0)] -#define _Maybenull_impl_ [__A_(__d_ = 0)] - -#define _Valid_impl_ [__A_(__d_ = 0)] -#define _Notvalid_impl_ [__A_(__d_ = 0)] -#define _Maybevalid_impl_ [__A_(__d_ = 0)] - -#define _Readable_bytes_impl_(size) [__A_(__d_ = 0)] -#define _Readable_elements_impl_(size) [__A_(__d_ = 0)] -#define _Writable_bytes_impl_(size) [__A_(__d_ = 0)] -#define _Writable_elements_impl_(size) [__A_(__d_ = 0)] - -#define _Null_terminated_impl_ [__A_(__d_ = 0)] -#define _NullNull_terminated_impl_ [__A_(__d_ = 0)] - -#define _Pre_impl_ [__P_impl(__d_ = 0)] -#define _Pre1_impl_(p1) [__P_impl(__d_ = 0)] -#define _Pre2_impl_(p1, p2) [__P_impl(__d_ = 0)] -#define _Pre3_impl_(p1, p2, p3) [__P_impl(__d_ = 0)] - -#define _Post_impl_ [__P_impl(__d_ = 0)] -#define _Post1_impl_(p1) [__P_impl(__d_ = 0)] -#define _Post2_impl_(p1, p2) [__P_impl(__d_ = 0)] -#define _Post3_impl_(p1, p2, p3) [__P_impl(__d_ = 0)] - -#define _Ret1_impl_(p1) [__R_impl(__d_ = 0)] -#define _Ret2_impl_(p1, p2) [__R_impl(__d_ = 0)] -#define _Ret3_impl_(p1, p2, p3) [__R_impl(__d_ = 0)] - -#define _Deref_pre1_impl_(p1) [__P_impl(__d_ = 0)] -#define _Deref_pre2_impl_(p1, p2) [__P_impl(__d_ = 0)] -#define _Deref_pre3_impl_(p1, p2, p3) [__P_impl(__d_ = 0)] - -#define _Deref_post1_impl_(p1) [__P_impl(__d_ = 0)] -#define _Deref_post2_impl_(p1, p2) [__P_impl(__d_ = 0)] -#define _Deref_post3_impl_(p1, p2, p3) [__P_impl(__d_ = 0)] - -#define _Deref_ret1_impl_(p1) [__R_impl(__d_ = 0)] -#define _Deref_ret2_impl_(p1, p2) [__R_impl(__d_ = 0)] -#define _Deref_ret3_impl_(p1, p2, p3) [__R_impl(__d_ = 0)] - -#define _Deref2_pre1_impl_(p1) //[__P_impl(__d_=0)] -#define _Deref2_post1_impl_(p1) //[__P_impl(__d_=0)] -#define _Deref2_ret1_impl_(p1) //[__P_impl(__d_=0)] - -#else // ][ - -#define _SAL_nop_impl_ X - -#define _At_impl_(target, annos) -#define _When_impl_(expr, annos) -#define _Group_impl_(annos) -#define _GrouP_impl_(annos) -#define _At_buffer_impl_(target, iter, bound, annos) -#define _Use_decl_anno_impl_ -#define _Points_to_data_impl_ -#define _Literal_impl_ -#define _Notliteral_impl_ -#define _Notref_impl_ - -#define _Pre_valid_impl_ -#define _Post_valid_impl_ -#define _Ret_valid_impl_ - -#define _Check_return_impl_ -#define _Must_inspect_impl_ - -#define _Success_impl_(expr) -#define _On_failure_impl_(annos) -#define _Always_impl_(annos) - -#define _Printf_format_string_impl_ -#define _Scanf_format_string_impl_ -#define _Scanf_s_format_string_impl_ - -#define _In_bound_impl_ -#define _Out_bound_impl_ -#define _Ret_bound_impl_ -#define _Deref_in_bound_impl_ -#define _Deref_out_bound_impl_ -#define _Deref_ret_bound_impl_ - -#define _Range_impl_(min, max) -#define _In_range_impl_(min, max) -#define _Out_range_impl_(min, max) -#define _Ret_range_impl_(min, max) -#define _Deref_in_range_impl_(min, max) -#define _Deref_out_range_impl_(min, max) -#define _Deref_ret_range_impl_(min, max) - -#define _Satisfies_impl_(expr) -#define _Pre_satisfies_impl_(expr) -#define _Post_satisfies_impl_(expr) - -#define _Null_impl_ -#define _Notnull_impl_ -#define _Maybenull_impl_ - -#define _Valid_impl_ -#define _Notvalid_impl_ -#define _Maybevalid_impl_ - -#define _Field_range_impl_(min, max) - -#define _Pre_impl_ -#define _Pre1_impl_(p1) -#define _Pre2_impl_(p1, p2) -#define _Pre3_impl_(p1, p2, p3) - -#define _Post_impl_ -#define _Post1_impl_(p1) -#define _Post2_impl_(p1, p2) -#define _Post3_impl_(p1, p2, p3) - -#define _Ret1_impl_(p1) -#define _Ret2_impl_(p1, p2) -#define _Ret3_impl_(p1, p2, p3) - -#define _Deref_pre1_impl_(p1) -#define _Deref_pre2_impl_(p1, p2) -#define _Deref_pre3_impl_(p1, p2, p3) - -#define _Deref_post1_impl_(p1) -#define _Deref_post2_impl_(p1, p2) -#define _Deref_post3_impl_(p1, p2, p3) - -#define _Deref_ret1_impl_(p1) -#define _Deref_ret2_impl_(p1, p2) -#define _Deref_ret3_impl_(p1, p2, p3) - -#define _Deref2_pre1_impl_(p1) -#define _Deref2_post1_impl_(p1) -#define _Deref2_ret1_impl_(p1) - -#define _Readable_bytes_impl_(size) -#define _Readable_elements_impl_(size) -#define _Writable_bytes_impl_(size) -#define _Writable_elements_impl_(size) - -#define _Null_terminated_impl_ -#define _NullNull_terminated_impl_ - -// Obsolete -- may be needed for transition to attributes. -#define __inner_typefix(ctype) -#define __inner_exceptthat - -#endif // ] - -// This section contains the deprecated annotations - -/* - ------------------------------------------------------------------------------- - Introduction - - sal.h provides a set of annotations to describe how a function uses its - parameters - the assumptions it makes about them, and the guarantees it makes - upon finishing. - - Annotations may be placed before either a function parameter's type or its return - type, and describe the function's behavior regarding the parameter or return value. - There are two classes of annotations: buffer annotations and advanced annotations. - Buffer annotations describe how functions use their pointer parameters, and - advanced annotations either describe complex/unusual buffer behavior, or provide - additional information about a parameter that is not otherwise expressible. - - ------------------------------------------------------------------------------- - Buffer Annotations - - The most important annotations in sal.h provide a consistent way to annotate - buffer parameters or return values for a function. Each of these annotations describes - a single buffer (which could be a string, a fixed-length or variable-length array, - or just a pointer) that the function interacts with: where it is, how large it is, - how much is initialized, and what the function does with it. - - The appropriate macro for a given buffer can be constructed using the table below. - Just pick the appropriate values from each category, and combine them together - with a leading underscore. Some combinations of values do not make sense as buffer - annotations. Only meaningful annotations can be added to your code; for a list of - these, see the buffer annotation definitions section. - - Only a single buffer annotation should be used for each parameter. - - |------------|------------|---------|--------|----------|----------|---------------| - | Level | Usage | Size | Output | NullTerm | Optional | Parameters | - |------------|------------|---------|--------|----------|----------|---------------| - | <> | <> | <> | <> | _z | <> | <> | - | _deref | _in | _ecount | _full | _nz | _opt | (size) | - | _deref_opt | _out | _bcount | _part | | | (size,length) | - | | _inout | | | | | | - | | | | | | | | - |------------|------------|---------|--------|----------|----------|---------------| - - Level: Describes the buffer pointer's level of indirection from the parameter or - return value 'p'. - - <> : p is the buffer pointer. - _deref : *p is the buffer pointer. p must not be NULL. - _deref_opt : *p may be the buffer pointer. p may be NULL, in which case the rest of - the annotation is ignored. - - Usage: Describes how the function uses the buffer. - - <> : The buffer is not accessed. If used on the return value or with _deref, the - function will provide the buffer, and it will be uninitialized at exit. - Otherwise, the caller must provide the buffer. This should only be used - for alloc and free functions. - _in : The function will only read from the buffer. The caller must provide the - buffer and initialize it. Cannot be used with _deref. - _out : The function will only write to the buffer. If used on the return value or - with _deref, the function will provide the buffer and initialize it. - Otherwise, the caller must provide the buffer, and the function will - initialize it. - _inout : The function may freely read from and write to the buffer. The caller must - provide the buffer and initialize it. If used with _deref, the buffer may - be reallocated by the function. - - Size: Describes the total size of the buffer. This may be less than the space actually - allocated for the buffer, in which case it describes the accessible amount. - - <> : No buffer size is given. If the type specifies the buffer size (such as - with LPSTR and LPWSTR), that amount is used. Otherwise, the buffer is one - element long. Must be used with _in, _out, or _inout. - _ecount : The buffer size is an explicit element count. - _bcount : The buffer size is an explicit byte count. - - Output: Describes how much of the buffer will be initialized by the function. For - _inout buffers, this also describes how much is initialized at entry. Omit this - category for _in buffers; they must be fully initialized by the caller. - - <> : The type specifies how much is initialized. For instance, a function initializing - an LPWSTR must NULL-terminate the string. - _full : The function initializes the entire buffer. - _part : The function initializes part of the buffer, and explicitly indicates how much. - - NullTerm: States if the present of a '\0' marks the end of valid elements in the buffer. - _z : A '\0' indicated the end of the buffer - _nz : The buffer may not be null terminated and a '\0' does not indicate the end of the - buffer. - Optional: Describes if the buffer itself is optional. - - <> : The pointer to the buffer must not be NULL. - _opt : The pointer to the buffer might be NULL. It will be checked before being dereferenced. - - Parameters: Gives explicit counts for the size and length of the buffer. - - <> : There is no explicit count. Use when neither _ecount nor _bcount is used. - (size) : Only the buffer's total size is given. Use with _ecount or _bcount but not _part. - (size,length) : The buffer's total size and initialized length are given. Use with _ecount_part - and _bcount_part. - - ------------------------------------------------------------------------------- - Buffer Annotation Examples - - LWSTDAPI_(BOOL) StrToIntExA( - __in LPCSTR pszString, - DWORD dwFlags, - __out int *piRet -- A pointer whose dereference will be filled in. - ); - - void MyPaintingFunction( - __in HWND hwndControl, -- An initialized read-only parameter. - __in_opt HDC hdcOptional, -- An initialized read-only parameter that might be NULL. - __inout IPropertyStore *ppsStore -- An initialized parameter that may be freely used - -- and modified. - ); - - LWSTDAPI_(BOOL) PathCompactPathExA( - __out_ecount(cchMax) LPSTR pszOut, -- A string buffer with cch elements that will - -- be NULL terminated on exit. - __in LPCSTR pszSrc, - UINT cchMax, - DWORD dwFlags - ); - - HRESULT SHLocalAllocBytes( - size_t cb, - __deref_bcount(cb) T **ppv -- A pointer whose dereference will be set to an - -- uninitialized buffer with cb bytes. - ); - - __inout_bcount_full(cb) : A buffer with cb elements that is fully initialized at - entry and exit, and may be written to by this function. - - __out_ecount_part(count, *countOut) : A buffer with count elements that will be - partially initialized by this function. The function indicates how much it - initialized by setting *countOut. - - ------------------------------------------------------------------------------- - Advanced Annotations - - Advanced annotations describe behavior that is not expressible with the regular - buffer macros. These may be used either to annotate buffer parameters that involve - complex or conditional behavior, or to enrich existing annotations with additional - information. - - __success(expr) f : - indicates whether function f succeeded or not. If is true at exit, - all the function's guarantees (as given by other annotations) must hold. If - is false at exit, the caller should not expect any of the function's guarantees - to hold. If not used, the function must always satisfy its guarantees. Added - automatically to functions that indicate success in standard ways, such as by - returning an HRESULT. - - __nullterminated p : - Pointer p is a buffer that may be read or written up to and including the first - NULL character or pointer. May be used on typedefs, which marks valid (properly - initialized) instances of that type as being NULL-terminated. - - __nullnullterminated p : - Pointer p is a buffer that may be read or written up to and including the first - sequence of two NULL characters or pointers. May be used on typedefs, which marks - valid instances of that type as being double-NULL terminated. - - __reserved v : - Value v must be 0/NULL, reserved for future use. - - __checkReturn v : - Return value v must not be ignored by callers of this function. - - __typefix(ctype) v : - Value v should be treated as an instance of ctype, rather than its declared type. - - __override f : - Specify C#-style 'override' behaviour for overriding virtual methods. - - __callback f : - Function f can be used as a function pointer. - - __format_string p : - Pointer p is a string that contains % markers in the style of printf. - - __blocksOn(resource) f : - Function f blocks on the resource 'resource'. - - FALLTHROUGH : - Annotates switch statement labels where fall-through is desired, to distinguish - from forgotten break statements. - - ------------------------------------------------------------------------------- - Advanced Annotation Examples - - __success(return != FALSE) LWSTDAPI_(BOOL) - PathCanonicalizeA(__out_ecount(MAX_PATH) LPSTR pszBuf, LPCSTR pszPath) : - pszBuf is only guaranteed to be NULL-terminated when TRUE is returned. - - typedef __nullterminated WCHAR* LPWSTR : Initialized LPWSTRs are NULL-terminated strings. - - __out_ecount(cch) __typefix(LPWSTR) void *psz : psz is a buffer parameter which will be - a NULL-terminated WCHAR string at exit, and which initially contains cch WCHARs. - - ------------------------------------------------------------------------------- -*/ - -#define __specstrings - -#ifdef __cplusplus // [ -#ifndef __nothrow // [ -#define __nothrow NOTHROW_DECL -#endif // ] -extern "C" { -#else // ][ -#ifndef __nothrow // [ -#define __nothrow -#endif // ] -#endif /* #ifdef __cplusplus */ // ] - -/* - ------------------------------------------------------------------------------- - Helper Macro Definitions - - These express behavior common to many of the high-level annotations. - DO NOT USE THESE IN YOUR CODE. - ------------------------------------------------------------------------------- -*/ - -/* - The helper annotations are only understood by the compiler version used by - various defect detection tools. When the regular compiler is running, they - are defined into nothing, and do not affect the compiled code. -*/ - -#if !defined(__midl) && defined(_PREFAST_) // [ - -/* - In the primitive "SAL_*" annotations "SAL" stands for Standard - Annotation Language. These "SAL_*" annotations are the - primitives the compiler understands and high-level MACROs - will decompose into these primivates. -*/ - -#define _SA_SPECSTRIZE(x) #x - -/* - __notnull p - __maybenull p - - Annotates a pointer p. States that pointer p is never null or maybe null. -*/ - -#define __notnull _Notnull_impl_ -#define __maybenull _Maybenull_impl_ - -/* - __readonly l - __notreadonly l - __maybereadonly l - - Annotates a location l. States that location l is not modified after - this point. If the annotation is placed on the precondition state of - a function, the restriction only applies until the postcondition state - of the function. __maybereadonly states that the annotated location - may be modified, whereas __notreadonly states that a location must be - modified. -*/ - -#define __readonly _Pre1_impl_(__readaccess_impl) -#define __notreadonly _Pre1_impl_(__allaccess_impl) -#define __maybereadonly _Pre1_impl_(__readaccess_impl) - -/* - __valid v - __notvalid v - __maybevalid v - - Annotates any value v. States that the value satisfies all properties of - valid values of its type. For example, for a string buffer, valid means - that the buffer pointer is either NULL or points to a NULL-terminated string. -*/ - -#define __valid _Valid_impl_ -#define __notvalid _Notvalid_impl_ -#define __maybevalid _Maybevalid_impl_ - -/* - __readableTo(extent) p - - Annotates a buffer pointer p. If the buffer can be read, extent describes - how much of the buffer is readable. For a reader of the buffer, this is - an explicit permission to read up to that amount, rather than a restriction to - read only up to it. -*/ - -#define __readableTo(extent) _SA_annotes1(SAL_readableTo, extent) - -/* - - __elem_readableTo(size) - - Annotates a buffer pointer p as being readable to size elements. -*/ - -#define __elem_readableTo(size) _SA_annotes1(SAL_readableTo, elementCount(size)) - -/* - __byte_readableTo(size) - - Annotates a buffer pointer p as being readable to size bytes. -*/ -#define __byte_readableTo(size) _SA_annotes1(SAL_readableTo, byteCount(size)) - -/* - __writableTo(extent) p - - Annotates a buffer pointer p. If the buffer can be modified, extent - describes how much of the buffer is writable (usually the allocation - size). For a writer of the buffer, this is an explicit permission to - write up to that amount, rather than a restriction to write only up to it. -*/ -#define __writableTo(size) _SA_annotes1(SAL_writableTo, size) - -/* - __elem_writableTo(size) - - Annotates a buffer pointer p as being writable to size elements. -*/ -#define __elem_writableTo(size) _SA_annotes1(SAL_writableTo, elementCount(size)) - -/* - __byte_writableTo(size) - - Annotates a buffer pointer p as being writable to size bytes. -*/ -#define __byte_writableTo(size) _SA_annotes1(SAL_writableTo, byteCount(size)) - -/* - __deref p - - Annotates a pointer p. The next annotation applies one dereference down - in the type. If readableTo(p, size) then the next annotation applies to - all elements *(p+i) for which i satisfies the size. If p is a pointer - to a struct, the next annotation applies to all fields of the struct. -*/ -#define __deref _Deref_impl_ - -/* - __pre __next_annotation - - The next annotation applies in the precondition state -*/ -#define __pre _Pre_impl_ - -/* - __post __next_annotation - - The next annotation applies in the postcondition state -*/ -#define __post _Post_impl_ - -/* - __precond() - - When is true, the next annotation applies in the precondition state - (currently not enabled) -*/ -#define __precond(expr) __pre - -/* - __postcond() - - When is true, the next annotation applies in the postcondition state - (currently not enabled) -*/ -#define __postcond(expr) __post - -/* - __exceptthat - - Given a set of annotations Q containing __exceptthat maybeP, the effect of - the except clause is to erase any P or notP annotations (explicit or - implied) within Q at the same level of dereferencing that the except - clause appears, and to replace it with maybeP. - - Example 1: __valid __pre_except_maybenull on a pointer p means that the - pointer may be null, and is otherwise valid, thus overriding - the implicit notnull annotation implied by __valid on - pointers. - - Example 2: __valid __deref __pre_except_maybenull on an int **p means - that p is not null (implied by valid), but the elements - pointed to by p could be null, and are otherwise valid. -*/ -#define __exceptthat __inner_exceptthat - -/* - _refparam - - Added to all out parameter macros to indicate that they are all reference - parameters. -*/ -#define __refparam _Notref_ __deref __notreadonly - -/* - __inner_* - - Helper macros that directly correspond to certain high-level annotations. - -*/ - -/* - Macros to classify the entrypoints and indicate their category. - - Pre-defined control point categories include: RPC, LPC, DeviceDriver, UserToKernel, ISAPI, COM. - -*/ -#define __inner_control_entrypoint(category) _SA_annotes2(SAL_entrypoint, controlEntry, category) - -/* - Pre-defined data entry point categories include: Registry, File, Network. -*/ -#define __inner_data_entrypoint(category) _SA_annotes2(SAL_entrypoint, dataEntry, category) - -#define __inner_override _SA_annotes0(__override) -#define __inner_callback _SA_annotes0(__callback) -#define __inner_blocksOn(resource) _SA_annotes1(SAL_blocksOn, resource) - -#define __post_except_maybenull __post __inner_exceptthat _Maybenull_impl_ -#define __pre_except_maybenull __pre __inner_exceptthat _Maybenull_impl_ - -#define __post_deref_except_maybenull __post __deref __inner_exceptthat _Maybenull_impl_ -#define __pre_deref_except_maybenull __pre __deref __inner_exceptthat _Maybenull_impl_ - -#define __inexpressible_readableTo(size) _Readable_elements_impl_(_Inexpressible_(size)) -#define __inexpressible_writableTo(size) _Writable_elements_impl_(_Inexpressible_(size)) - -#else // ][ -#define __notnull -#define __deref -#define __maybenull -#define __readonly -#define __notreadonly -#define __maybereadonly -#define __valid -#define __notvalid -#define __maybevalid -#define __readableTo(extent) -#define __elem_readableTo(size) -#define __byte_readableTo(size) -#define __writableTo(size) -#define __elem_writableTo(size) -#define __byte_writableTo(size) -#define __pre -#define __post -#define __precond(expr) -#define __postcond(expr) -#define __exceptthat -#define __inner_override -#define __inner_callback -#define __inner_blocksOn(resource) -#define __refparam -#define __inner_control_entrypoint(category) -#define __inner_data_entrypoint(category) - -#define __post_except_maybenull -#define __pre_except_maybenull -#define __post_deref_except_maybenull -#define __pre_deref_except_maybenull - -#define __inexpressible_readableTo(size) -#define __inexpressible_writableTo(size) - -#endif /* #if !defined(__midl) && defined(_PREFAST_) */ // ] - -/* -------------------------------------------------------------------------------- -Buffer Annotation Definitions - -Any of these may be used to directly annotate functions, but only one should -be used for each parameter. To determine which annotation to use for a given -buffer, use the table in the buffer annotations section. -------------------------------------------------------------------------------- -*/ - -#define __ecount(size) _SAL1_Source_(__ecount, (size), __notnull __elem_writableTo(size)) -#define __bcount(size) _SAL1_Source_(__bcount, (size), __notnull __byte_writableTo(size)) -#define __in_ecount(size) _SAL1_Source_(__in_ecount, (size), _In_reads_(size)) -#define __in_bcount(size) _SAL1_Source_(__in_bcount, (size), _In_reads_bytes_(size)) -#define __in_z _SAL1_Source_(__in_z, (), _In_z_) -#define __in_ecount_z(size) _SAL1_Source_(__in_ecount_z, (size), _In_reads_z_(size)) -#define __in_bcount_z(size) \ - _SAL1_Source_(__in_bcount_z, (size), __in_bcount(size) __pre __nullterminated) -#define __in_nz _SAL1_Source_(__in_nz, (), __in) -#define __in_ecount_nz(size) _SAL1_Source_(__in_ecount_nz, (size), __in_ecount(size)) -#define __in_bcount_nz(size) _SAL1_Source_(__in_bcount_nz, (size), __in_bcount(size)) -#define __out_ecount(size) _SAL1_Source_(__out_ecount, (size), _Out_writes_(size)) -#define __out_bcount(size) _SAL1_Source_(__out_bcount, (size), _Out_writes_bytes_(size)) -#define __out_ecount_part(size, length) \ - _SAL1_Source_(__out_ecount_part, (size, length), _Out_writes_to_(size, length)) -#define __out_bcount_part(size, length) \ - _SAL1_Source_(__out_bcount_part, (size, length), _Out_writes_bytes_to_(size, length)) -#define __out_ecount_full(size) _SAL1_Source_(__out_ecount_full, (size), _Out_writes_all_(size)) -#define __out_bcount_full(size) \ - _SAL1_Source_(__out_bcount_full, (size), _Out_writes_bytes_all_(size)) -#define __out_z _SAL1_Source_(__out_z, (), __post __valid __refparam __post __nullterminated) -#define __out_z_opt \ - _SAL1_Source_(__out_z_opt, (), \ - __post __valid __refparam __post __nullterminated __pre_except_maybenull) -#define __out_ecount_z(size) \ - _SAL1_Source_(__out_ecount_z, (size), \ - __ecount(size) __post __valid __refparam __post __nullterminated) -#define __out_bcount_z(size) \ - _SAL1_Source_(__out_bcount_z, (size), \ - __bcount(size) __post __valid __refparam __post __nullterminated) -#define __out_ecount_part_z(size, length) \ - _SAL1_Source_(__out_ecount_part_z, (size, length), \ - __out_ecount_part(size, length) __post __nullterminated) -#define __out_bcount_part_z(size, length) \ - _SAL1_Source_(__out_bcount_part_z, (size, length), \ - __out_bcount_part(size, length) __post __nullterminated) -#define __out_ecount_full_z(size) \ - _SAL1_Source_(__out_ecount_full_z, (size), __out_ecount_full(size) __post __nullterminated) -#define __out_bcount_full_z(size) \ - _SAL1_Source_(__out_bcount_full_z, (size), __out_bcount_full(size) __post __nullterminated) -#define __out_nz _SAL1_Source_(__out_nz, (), __post __valid __refparam) -#define __out_nz_opt \ - _SAL1_Source_(__out_nz_opt, (), __post __valid __refparam __post_except_maybenull_) -#define __out_ecount_nz(size) \ - _SAL1_Source_(__out_ecount_nz, (size), __ecount(size) __post __valid __refparam) -#define __out_bcount_nz(size) \ - _SAL1_Source_(__out_bcount_nz, (size), __bcount(size) __post __valid __refparam) -#define __inout _SAL1_Source_(__inout, (), _Inout_) -#define __inout_ecount(size) _SAL1_Source_(__inout_ecount, (size), _Inout_updates_(size)) -#define __inout_bcount(size) _SAL1_Source_(__inout_bcount, (size), _Inout_updates_bytes_(size)) -#define __inout_ecount_part(size, length) \ - _SAL1_Source_(__inout_ecount_part, (size, length), _Inout_updates_to_(size, length)) -#define __inout_bcount_part(size, length) \ - _SAL1_Source_(__inout_bcount_part, (size, length), _Inout_updates_bytes_to_(size, length)) -#define __inout_ecount_full(size) \ - _SAL1_Source_(__inout_ecount_full, (size), _Inout_updates_all_(size)) -#define __inout_bcount_full(size) \ - _SAL1_Source_(__inout_bcount_full, (size), _Inout_updates_bytes_all_(size)) -#define __inout_z _SAL1_Source_(__inout_z, (), _Inout_z_) -#define __inout_ecount_z(size) _SAL1_Source_(__inout_ecount_z, (size), _Inout_updates_z_(size)) -#define __inout_bcount_z(size) \ - _SAL1_Source_(__inout_bcount_z, (size), \ - __inout_bcount(size) __pre __nullterminated __post __nullterminated) -#define __inout_nz _SAL1_Source_(__inout_nz, (), __inout) -#define __inout_ecount_nz(size) _SAL1_Source_(__inout_ecount_nz, (size), __inout_ecount(size)) -#define __inout_bcount_nz(size) _SAL1_Source_(__inout_bcount_nz, (size), __inout_bcount(size)) -#define __ecount_opt(size) \ - _SAL1_Source_(__ecount_opt, (size), __ecount(size) __pre_except_maybenull) -#define __bcount_opt(size) \ - _SAL1_Source_(__bcount_opt, (size), __bcount(size) __pre_except_maybenull) -#define __in_opt _SAL1_Source_(__in_opt, (), _In_opt_) -#define __in_ecount_opt(size) _SAL1_Source_(__in_ecount_opt, (size), _In_reads_opt_(size)) -#define __in_bcount_opt(size) _SAL1_Source_(__in_bcount_opt, (size), _In_reads_bytes_opt_(size)) -#define __in_z_opt _SAL1_Source_(__in_z_opt, (), _In_opt_z_) -#define __in_ecount_z_opt(size) \ - _SAL1_Source_(__in_ecount_z_opt, (size), __in_ecount_opt(size) __pre __nullterminated) -#define __in_bcount_z_opt(size) \ - _SAL1_Source_(__in_bcount_z_opt, (size), __in_bcount_opt(size) __pre __nullterminated) -#define __in_nz_opt _SAL1_Source_(__in_nz_opt, (), __in_opt) -#define __in_ecount_nz_opt(size) _SAL1_Source_(__in_ecount_nz_opt, (size), __in_ecount_opt(size)) -#define __in_bcount_nz_opt(size) _SAL1_Source_(__in_bcount_nz_opt, (size), __in_bcount_opt(size)) -#define __out_opt _SAL1_Source_(__out_opt, (), _Out_opt_) -#define __out_ecount_opt(size) _SAL1_Source_(__out_ecount_opt, (size), _Out_writes_opt_(size)) -#define __out_bcount_opt(size) _SAL1_Source_(__out_bcount_opt, (size), _Out_writes_bytes_opt_(size)) -#define __out_ecount_part_opt(size, length) \ - _SAL1_Source_(__out_ecount_part_opt, (size, length), \ - __out_ecount_part(size, length) __pre_except_maybenull) -#define __out_bcount_part_opt(size, length) \ - _SAL1_Source_(__out_bcount_part_opt, (size, length), \ - __out_bcount_part(size, length) __pre_except_maybenull) -#define __out_ecount_full_opt(size) \ - _SAL1_Source_(__out_ecount_full_opt, (size), __out_ecount_full(size) __pre_except_maybenull) -#define __out_bcount_full_opt(size) \ - _SAL1_Source_(__out_bcount_full_opt, (size), __out_bcount_full(size) __pre_except_maybenull) -#define __out_ecount_z_opt(size) \ - _SAL1_Source_(__out_ecount_z_opt, (size), __out_ecount_opt(size) __post __nullterminated) -#define __out_bcount_z_opt(size) \ - _SAL1_Source_(__out_bcount_z_opt, (size), __out_bcount_opt(size) __post __nullterminated) -#define __out_ecount_part_z_opt(size, length) \ - _SAL1_Source_(__out_ecount_part_z_opt, (size, length), \ - __out_ecount_part_opt(size, length) __post __nullterminated) -#define __out_bcount_part_z_opt(size, length) \ - _SAL1_Source_(__out_bcount_part_z_opt, (size, length), \ - __out_bcount_part_opt(size, length) __post __nullterminated) -#define __out_ecount_full_z_opt(size) \ - _SAL1_Source_(__out_ecount_full_z_opt, (size), \ - __out_ecount_full_opt(size) __post __nullterminated) -#define __out_bcount_full_z_opt(size) \ - _SAL1_Source_(__out_bcount_full_z_opt, (size), \ - __out_bcount_full_opt(size) __post __nullterminated) -#define __out_ecount_nz_opt(size) \ - _SAL1_Source_(__out_ecount_nz_opt, (size), __out_ecount_opt(size) __post __nullterminated) -#define __out_bcount_nz_opt(size) \ - _SAL1_Source_(__out_bcount_nz_opt, (size), __out_bcount_opt(size) __post __nullterminated) -#define __inout_opt _SAL1_Source_(__inout_opt, (), _Inout_opt_) -#define __inout_ecount_opt(size) \ - _SAL1_Source_(__inout_ecount_opt, (size), __inout_ecount(size) __pre_except_maybenull) -#define __inout_bcount_opt(size) \ - _SAL1_Source_(__inout_bcount_opt, (size), __inout_bcount(size) __pre_except_maybenull) -#define __inout_ecount_part_opt(size, length) \ - _SAL1_Source_(__inout_ecount_part_opt, (size, length), \ - __inout_ecount_part(size, length) __pre_except_maybenull) -#define __inout_bcount_part_opt(size, length) \ - _SAL1_Source_(__inout_bcount_part_opt, (size, length), \ - __inout_bcount_part(size, length) __pre_except_maybenull) -#define __inout_ecount_full_opt(size) \ - _SAL1_Source_(__inout_ecount_full_opt, (size), __inout_ecount_full(size) __pre_except_maybenull) -#define __inout_bcount_full_opt(size) \ - _SAL1_Source_(__inout_bcount_full_opt, (size), __inout_bcount_full(size) __pre_except_maybenull) -#define __inout_z_opt \ - _SAL1_Source_(__inout_z_opt, (), __inout_opt __pre __nullterminated __post __nullterminated) -#define __inout_ecount_z_opt(size) \ - _SAL1_Source_(__inout_ecount_z_opt, (size), \ - __inout_ecount_opt(size) __pre __nullterminated __post __nullterminated) -#define __inout_ecount_z_opt(size) \ - _SAL1_Source_(__inout_ecount_z_opt, (size), \ - __inout_ecount_opt(size) __pre __nullterminated __post __nullterminated) -#define __inout_bcount_z_opt(size) \ - _SAL1_Source_(__inout_bcount_z_opt, (size), __inout_bcount_opt(size)) -#define __inout_nz_opt _SAL1_Source_(__inout_nz_opt, (), __inout_opt) -#define __inout_ecount_nz_opt(size) \ - _SAL1_Source_(__inout_ecount_nz_opt, (size), __inout_ecount_opt(size)) -#define __inout_bcount_nz_opt(size) \ - _SAL1_Source_(__inout_bcount_nz_opt, (size), __inout_bcount_opt(size)) -#define __deref_ecount(size) \ - _SAL1_Source_( \ - __deref_ecount, (size), \ - _Notref_ __ecount(1) __post _Notref_ __elem_readableTo(1) \ - __post _Notref_ __deref _Notref_ __notnull __post __deref __elem_writableTo(size)) -#define __deref_bcount(size) \ - _SAL1_Source_( \ - __deref_bcount, (size), \ - _Notref_ __ecount(1) __post _Notref_ __elem_readableTo(1) \ - __post _Notref_ __deref _Notref_ __notnull __post __deref __byte_writableTo(size)) -#define __deref_out _SAL1_Source_(__deref_out, (), _Outptr_) -#define __deref_out_ecount(size) \ - _SAL1_Source_(__deref_out_ecount, (size), _Outptr_result_buffer_(size)) -#define __deref_out_bcount(size) \ - _SAL1_Source_(__deref_out_bcount, (size), _Outptr_result_bytebuffer_(size)) -#define __deref_out_ecount_part(size, length) \ - _SAL1_Source_(__deref_out_ecount_part, (size, length), _Outptr_result_buffer_to_(size, length)) -#define __deref_out_bcount_part(size, length) \ - _SAL1_Source_(__deref_out_bcount_part, (size, length), \ - _Outptr_result_bytebuffer_to_(size, length)) -#define __deref_out_ecount_full(size) \ - _SAL1_Source_(__deref_out_ecount_full, (size), __deref_out_ecount_part(size, size)) -#define __deref_out_bcount_full(size) \ - _SAL1_Source_(__deref_out_bcount_full, (size), __deref_out_bcount_part(size, size)) -#define __deref_out_z _SAL1_Source_(__deref_out_z, (), _Outptr_result_z_) -#define __deref_out_ecount_z(size) \ - _SAL1_Source_(__deref_out_ecount_z, (size), \ - __deref_out_ecount(size) __post __deref __nullterminated) -#define __deref_out_bcount_z(size) \ - _SAL1_Source_(__deref_out_bcount_z, (size), \ - __deref_out_bcount(size) __post __deref __nullterminated) -#define __deref_out_nz _SAL1_Source_(__deref_out_nz, (), __deref_out) -#define __deref_out_ecount_nz(size) \ - _SAL1_Source_(__deref_out_ecount_nz, (size), __deref_out_ecount(size)) -#define __deref_out_bcount_nz(size) \ - _SAL1_Source_(__deref_out_bcount_nz, (size), __deref_out_ecount(size)) -#define __deref_inout \ - _SAL1_Source_(__deref_inout, (), \ - _Notref_ __notnull _Notref_ __elem_readableTo(1) \ - __pre __deref __valid __post _Notref_ __deref __valid __refparam) -#define __deref_inout_z \ - _SAL1_Source_( \ - __deref_inout_z, (), \ - __deref_inout __pre __deref __nullterminated __post _Notref_ __deref __nullterminated) -#define __deref_inout_ecount(size) \ - _SAL1_Source_(__deref_inout_ecount, (size), \ - __deref_inout __pre __deref __elem_writableTo(size) \ - __post _Notref_ __deref __elem_writableTo(size)) -#define __deref_inout_bcount(size) \ - _SAL1_Source_(__deref_inout_bcount, (size), \ - __deref_inout __pre __deref __byte_writableTo(size) \ - __post _Notref_ __deref __byte_writableTo(size)) -#define __deref_inout_ecount_part(size, length) \ - _SAL1_Source_(__deref_inout_ecount_part, (size, length), \ - __deref_inout_ecount(size) __pre __deref __elem_readableTo(length) \ - __post __deref __elem_readableTo(length)) -#define __deref_inout_bcount_part(size, length) \ - _SAL1_Source_(__deref_inout_bcount_part, (size, length), \ - __deref_inout_bcount(size) __pre __deref __byte_readableTo(length) \ - __post __deref __byte_readableTo(length)) -#define __deref_inout_ecount_full(size) \ - _SAL1_Source_(__deref_inout_ecount_full, (size), __deref_inout_ecount_part(size, size)) -#define __deref_inout_bcount_full(size) \ - _SAL1_Source_(__deref_inout_bcount_full, (size), __deref_inout_bcount_part(size, size)) -#define __deref_inout_ecount_z(size) \ - _SAL1_Source_(__deref_inout_ecount_z, (size), \ - __deref_inout_ecount(size) \ - __pre __deref __nullterminated __post __deref __nullterminated) -#define __deref_inout_bcount_z(size) \ - _SAL1_Source_(__deref_inout_bcount_z, (size), \ - __deref_inout_bcount(size) \ - __pre __deref __nullterminated __post __deref __nullterminated) -#define __deref_inout_nz _SAL1_Source_(__deref_inout_nz, (), __deref_inout) -#define __deref_inout_ecount_nz(size) \ - _SAL1_Source_(__deref_inout_ecount_nz, (size), __deref_inout_ecount(size)) -#define __deref_inout_bcount_nz(size) \ - _SAL1_Source_(__deref_inout_bcount_nz, (size), __deref_inout_ecount(size)) -#define __deref_ecount_opt(size) \ - _SAL1_Source_(__deref_ecount_opt, (size), __deref_ecount(size) __post_deref_except_maybenull) -#define __deref_bcount_opt(size) \ - _SAL1_Source_(__deref_bcount_opt, (size), __deref_bcount(size) __post_deref_except_maybenull) -#define __deref_out_opt \ - _SAL1_Source_(__deref_out_opt, (), __deref_out __post_deref_except_maybenull) -#define __deref_out_ecount_opt(size) \ - _SAL1_Source_(__deref_out_ecount_opt, (size), \ - __deref_out_ecount(size) __post_deref_except_maybenull) -#define __deref_out_bcount_opt(size) \ - _SAL1_Source_(__deref_out_bcount_opt, (size), \ - __deref_out_bcount(size) __post_deref_except_maybenull) -#define __deref_out_ecount_part_opt(size, length) \ - _SAL1_Source_(__deref_out_ecount_part_opt, (size, length), \ - __deref_out_ecount_part(size, length) __post_deref_except_maybenull) -#define __deref_out_bcount_part_opt(size, length) \ - _SAL1_Source_(__deref_out_bcount_part_opt, (size, length), \ - __deref_out_bcount_part(size, length) __post_deref_except_maybenull) -#define __deref_out_ecount_full_opt(size) \ - _SAL1_Source_(__deref_out_ecount_full_opt, (size), \ - __deref_out_ecount_full(size) __post_deref_except_maybenull) -#define __deref_out_bcount_full_opt(size) \ - _SAL1_Source_(__deref_out_bcount_full_opt, (size), \ - __deref_out_bcount_full(size) __post_deref_except_maybenull) -#define __deref_out_z_opt _SAL1_Source_(__deref_out_z_opt, (), _Outptr_result_maybenull_z_) -#define __deref_out_ecount_z_opt(size) \ - _SAL1_Source_(__deref_out_ecount_z_opt, (size), \ - __deref_out_ecount_opt(size) __post __deref __nullterminated) -#define __deref_out_bcount_z_opt(size) \ - _SAL1_Source_(__deref_out_bcount_z_opt, (size), \ - __deref_out_bcount_opt(size) __post __deref __nullterminated) -#define __deref_out_nz_opt _SAL1_Source_(__deref_out_nz_opt, (), __deref_out_opt) -#define __deref_out_ecount_nz_opt(size) \ - _SAL1_Source_(__deref_out_ecount_nz_opt, (size), __deref_out_ecount_opt(size)) -#define __deref_out_bcount_nz_opt(size) \ - _SAL1_Source_(__deref_out_bcount_nz_opt, (size), __deref_out_bcount_opt(size)) -#define __deref_inout_opt \ - _SAL1_Source_(__deref_inout_opt, (), \ - __deref_inout __pre_deref_except_maybenull __post_deref_except_maybenull) -#define __deref_inout_ecount_opt(size) \ - _SAL1_Source_(__deref_inout_ecount_opt, (size), \ - __deref_inout_ecount(size) \ - __pre_deref_except_maybenull __post_deref_except_maybenull) -#define __deref_inout_bcount_opt(size) \ - _SAL1_Source_(__deref_inout_bcount_opt, (size), \ - __deref_inout_bcount(size) \ - __pre_deref_except_maybenull __post_deref_except_maybenull) -#define __deref_inout_ecount_part_opt(size, length) \ - _SAL1_Source_(__deref_inout_ecount_part_opt, (size, length), \ - __deref_inout_ecount_part(size, length) \ - __pre_deref_except_maybenull __post_deref_except_maybenull) -#define __deref_inout_bcount_part_opt(size, length) \ - _SAL1_Source_(__deref_inout_bcount_part_opt, (size, length), \ - __deref_inout_bcount_part(size, length) \ - __pre_deref_except_maybenull __post_deref_except_maybenull) -#define __deref_inout_ecount_full_opt(size) \ - _SAL1_Source_(__deref_inout_ecount_full_opt, (size), \ - __deref_inout_ecount_full(size) \ - __pre_deref_except_maybenull __post_deref_except_maybenull) -#define __deref_inout_bcount_full_opt(size) \ - _SAL1_Source_(__deref_inout_bcount_full_opt, (size), \ - __deref_inout_bcount_full(size) \ - __pre_deref_except_maybenull __post_deref_except_maybenull) -#define __deref_inout_z_opt \ - _SAL1_Source_( \ - __deref_inout_z_opt, (), \ - __deref_inout_opt __pre __deref __nullterminated __post __deref __nullterminated) -#define __deref_inout_ecount_z_opt(size) \ - _SAL1_Source_(__deref_inout_ecount_z_opt, (size), \ - __deref_inout_ecount_opt(size) \ - __pre __deref __nullterminated __post __deref __nullterminated) -#define __deref_inout_bcount_z_opt(size) \ - _SAL1_Source_(__deref_inout_bcount_z_opt, (size), \ - __deref_inout_bcount_opt(size) \ - __pre __deref __nullterminated __post __deref __nullterminated) -#define __deref_inout_nz_opt _SAL1_Source_(__deref_inout_nz_opt, (), __deref_inout_opt) -#define __deref_inout_ecount_nz_opt(size) \ - _SAL1_Source_(__deref_inout_ecount_nz_opt, (size), __deref_inout_ecount_opt(size)) -#define __deref_inout_bcount_nz_opt(size) \ - _SAL1_Source_(__deref_inout_bcount_nz_opt, (size), __deref_inout_bcount_opt(size)) -#define __deref_opt_ecount(size) \ - _SAL1_Source_(__deref_opt_ecount, (size), __deref_ecount(size) __pre_except_maybenull) -#define __deref_opt_bcount(size) \ - _SAL1_Source_(__deref_opt_bcount, (size), __deref_bcount(size) __pre_except_maybenull) -#define __deref_opt_out _SAL1_Source_(__deref_opt_out, (), _Outptr_opt_) -#define __deref_opt_out_z _SAL1_Source_(__deref_opt_out_z, (), _Outptr_opt_result_z_) -#define __deref_opt_out_ecount(size) \ - _SAL1_Source_(__deref_opt_out_ecount, (size), __deref_out_ecount(size) __pre_except_maybenull) -#define __deref_opt_out_bcount(size) \ - _SAL1_Source_(__deref_opt_out_bcount, (size), __deref_out_bcount(size) __pre_except_maybenull) -#define __deref_opt_out_ecount_part(size, length) \ - _SAL1_Source_(__deref_opt_out_ecount_part, (size, length), \ - __deref_out_ecount_part(size, length) __pre_except_maybenull) -#define __deref_opt_out_bcount_part(size, length) \ - _SAL1_Source_(__deref_opt_out_bcount_part, (size, length), \ - __deref_out_bcount_part(size, length) __pre_except_maybenull) -#define __deref_opt_out_ecount_full(size) \ - _SAL1_Source_(__deref_opt_out_ecount_full, (size), \ - __deref_out_ecount_full(size) __pre_except_maybenull) -#define __deref_opt_out_bcount_full(size) \ - _SAL1_Source_(__deref_opt_out_bcount_full, (size), \ - __deref_out_bcount_full(size) __pre_except_maybenull) -#define __deref_opt_inout _SAL1_Source_(__deref_opt_inout, (), _Inout_opt_) -#define __deref_opt_inout_ecount(size) \ - _SAL1_Source_(__deref_opt_inout_ecount, (size), \ - __deref_inout_ecount(size) __pre_except_maybenull) -#define __deref_opt_inout_bcount(size) \ - _SAL1_Source_(__deref_opt_inout_bcount, (size), \ - __deref_inout_bcount(size) __pre_except_maybenull) -#define __deref_opt_inout_ecount_part(size, length) \ - _SAL1_Source_(__deref_opt_inout_ecount_part, (size, length), \ - __deref_inout_ecount_part(size, length) __pre_except_maybenull) -#define __deref_opt_inout_bcount_part(size, length) \ - _SAL1_Source_(__deref_opt_inout_bcount_part, (size, length), \ - __deref_inout_bcount_part(size, length) __pre_except_maybenull) -#define __deref_opt_inout_ecount_full(size) \ - _SAL1_Source_(__deref_opt_inout_ecount_full, (size), \ - __deref_inout_ecount_full(size) __pre_except_maybenull) -#define __deref_opt_inout_bcount_full(size) \ - _SAL1_Source_(__deref_opt_inout_bcount_full, (size), \ - __deref_inout_bcount_full(size) __pre_except_maybenull) -#define __deref_opt_inout_z \ - _SAL1_Source_( \ - __deref_opt_inout_z, (), \ - __deref_opt_inout __pre __deref __nullterminated __post __deref __nullterminated) -#define __deref_opt_inout_ecount_z(size) \ - _SAL1_Source_(__deref_opt_inout_ecount_z, (size), \ - __deref_opt_inout_ecount(size) \ - __pre __deref __nullterminated __post __deref __nullterminated) -#define __deref_opt_inout_bcount_z(size) \ - _SAL1_Source_(__deref_opt_inout_bcount_z, (size), \ - __deref_opt_inout_bcount(size) \ - __pre __deref __nullterminated __post __deref __nullterminated) -#define __deref_opt_inout_nz _SAL1_Source_(__deref_opt_inout_nz, (), __deref_opt_inout) -#define __deref_opt_inout_ecount_nz(size) \ - _SAL1_Source_(__deref_opt_inout_ecount_nz, (size), __deref_opt_inout_ecount(size)) -#define __deref_opt_inout_bcount_nz(size) \ - _SAL1_Source_(__deref_opt_inout_bcount_nz, (size), __deref_opt_inout_bcount(size)) -#define __deref_opt_ecount_opt(size) \ - _SAL1_Source_(__deref_opt_ecount_opt, (size), __deref_ecount_opt(size) __pre_except_maybenull) -#define __deref_opt_bcount_opt(size) \ - _SAL1_Source_(__deref_opt_bcount_opt, (size), __deref_bcount_opt(size) __pre_except_maybenull) -#define __deref_opt_out_opt _SAL1_Source_(__deref_opt_out_opt, (), _Outptr_opt_result_maybenull_) -#define __deref_opt_out_ecount_opt(size) \ - _SAL1_Source_(__deref_opt_out_ecount_opt, (size), \ - __deref_out_ecount_opt(size) __pre_except_maybenull) -#define __deref_opt_out_bcount_opt(size) \ - _SAL1_Source_(__deref_opt_out_bcount_opt, (size), \ - __deref_out_bcount_opt(size) __pre_except_maybenull) -#define __deref_opt_out_ecount_part_opt(size, length) \ - _SAL1_Source_(__deref_opt_out_ecount_part_opt, (size, length), \ - __deref_out_ecount_part_opt(size, length) __pre_except_maybenull) -#define __deref_opt_out_bcount_part_opt(size, length) \ - _SAL1_Source_(__deref_opt_out_bcount_part_opt, (size, length), \ - __deref_out_bcount_part_opt(size, length) __pre_except_maybenull) -#define __deref_opt_out_ecount_full_opt(size) \ - _SAL1_Source_(__deref_opt_out_ecount_full_opt, (size), \ - __deref_out_ecount_full_opt(size) __pre_except_maybenull) -#define __deref_opt_out_bcount_full_opt(size) \ - _SAL1_Source_(__deref_opt_out_bcount_full_opt, (size), \ - __deref_out_bcount_full_opt(size) __pre_except_maybenull) -#define __deref_opt_out_z_opt \ - _SAL1_Source_( \ - __deref_opt_out_z_opt, (), \ - __post __deref __valid __refparam __pre_except_maybenull __pre_deref_except_maybenull \ - __post_deref_except_maybenull __post __deref __nullterminated) -#define __deref_opt_out_ecount_z_opt(size) \ - _SAL1_Source_(__deref_opt_out_ecount_z_opt, (size), \ - __deref_opt_out_ecount_opt(size) __post __deref __nullterminated) -#define __deref_opt_out_bcount_z_opt(size) \ - _SAL1_Source_(__deref_opt_out_bcount_z_opt, (size), \ - __deref_opt_out_bcount_opt(size) __post __deref __nullterminated) -#define __deref_opt_out_nz_opt _SAL1_Source_(__deref_opt_out_nz_opt, (), __deref_opt_out_opt) -#define __deref_opt_out_ecount_nz_opt(size) \ - _SAL1_Source_(__deref_opt_out_ecount_nz_opt, (size), __deref_opt_out_ecount_opt(size)) -#define __deref_opt_out_bcount_nz_opt(size) \ - _SAL1_Source_(__deref_opt_out_bcount_nz_opt, (size), __deref_opt_out_bcount_opt(size)) -#define __deref_opt_inout_opt \ - _SAL1_Source_(__deref_opt_inout_opt, (), __deref_inout_opt __pre_except_maybenull) -#define __deref_opt_inout_ecount_opt(size) \ - _SAL1_Source_(__deref_opt_inout_ecount_opt, (size), \ - __deref_inout_ecount_opt(size) __pre_except_maybenull) -#define __deref_opt_inout_bcount_opt(size) \ - _SAL1_Source_(__deref_opt_inout_bcount_opt, (size), \ - __deref_inout_bcount_opt(size) __pre_except_maybenull) -#define __deref_opt_inout_ecount_part_opt(size, length) \ - _SAL1_Source_(__deref_opt_inout_ecount_part_opt, (size, length), \ - __deref_inout_ecount_part_opt(size, length) __pre_except_maybenull) -#define __deref_opt_inout_bcount_part_opt(size, length) \ - _SAL1_Source_(__deref_opt_inout_bcount_part_opt, (size, length), \ - __deref_inout_bcount_part_opt(size, length) __pre_except_maybenull) -#define __deref_opt_inout_ecount_full_opt(size) \ - _SAL1_Source_(__deref_opt_inout_ecount_full_opt, (size), \ - __deref_inout_ecount_full_opt(size) __pre_except_maybenull) -#define __deref_opt_inout_bcount_full_opt(size) \ - _SAL1_Source_(__deref_opt_inout_bcount_full_opt, (size), \ - __deref_inout_bcount_full_opt(size) __pre_except_maybenull) -#define __deref_opt_inout_z_opt \ - _SAL1_Source_( \ - __deref_opt_inout_z_opt, (), \ - __deref_opt_inout_opt __pre __deref __nullterminated __post __deref __nullterminated) -#define __deref_opt_inout_ecount_z_opt(size) \ - _SAL1_Source_(__deref_opt_inout_ecount_z_opt, (size), \ - __deref_opt_inout_ecount_opt(size) \ - __pre __deref __nullterminated __post __deref __nullterminated) -#define __deref_opt_inout_bcount_z_opt(size) \ - _SAL1_Source_(__deref_opt_inout_bcount_z_opt, (size), \ - __deref_opt_inout_bcount_opt(size) \ - __pre __deref __nullterminated __post __deref __nullterminated) -#define __deref_opt_inout_nz_opt _SAL1_Source_(__deref_opt_inout_nz_opt, (), __deref_opt_inout_opt) -#define __deref_opt_inout_ecount_nz_opt(size) \ - _SAL1_Source_(__deref_opt_inout_ecount_nz_opt, (size), __deref_opt_inout_ecount_opt(size)) -#define __deref_opt_inout_bcount_nz_opt(size) \ - _SAL1_Source_(__deref_opt_inout_bcount_nz_opt, (size), __deref_opt_inout_bcount_opt(size)) - -/* -------------------------------------------------------------------------------- -Advanced Annotation Definitions - -Any of these may be used to directly annotate functions, and may be used in -combination with each other or with regular buffer macros. For an explanation -of each annotation, see the advanced annotations section. -------------------------------------------------------------------------------- -*/ - -#define __success(expr) _Success_(expr) -#define __nullterminated _Null_terminated_ -#define __nullnullterminated -#define __clr_reserved _SAL1_Source_(__reserved, (), _Reserved_) -#define __checkReturn _SAL1_Source_(__checkReturn, (), _Check_return_) -#define __typefix(ctype) _SAL1_Source_(__typefix, (ctype), __inner_typefix(ctype)) -#define __override __inner_override -#define __callback __inner_callback -#define __format_string _Printf_format_string_ -#define __blocksOn(resource) __inner_blocksOn(resource) -#define __control_entrypoint(category) __inner_control_entrypoint(category) -#define __data_entrypoint(category) __inner_data_entrypoint(category) -#define __useHeader _Use_decl_anno_impl_ -#define __on_failure(annotes) _On_failure_impl_(annotes _SAL_nop_impl_) - -#ifndef __has_cpp_attribute -#define __has_cpp_attribute(x) (0) -#endif - -#ifndef __fallthrough // [ -#if __has_cpp_attribute(fallthrough) -#define __fallthrough [[fallthrough]] -#else -#define __fallthrough -#endif -#endif // ] - -#ifndef __analysis_assume // [ -#ifdef _PREFAST_ // [ -#define __analysis_assume(expr) __assume(expr) -#else // ][ -#define __analysis_assume(expr) -#endif // ] -#endif // ] - -#ifndef _Analysis_assume_ // [ -#ifdef _PREFAST_ // [ -#define _Analysis_assume_(expr) __assume(expr) -#else // ][ -#define _Analysis_assume_(expr) -#endif // ] -#endif // ] - -#define _Analysis_noreturn_ _SAL2_Source_(_Analysis_noreturn_, (), _SA_annotes0(SAL_terminates)) - -#ifdef _PREFAST_ // [ -__inline __nothrow void __AnalysisAssumeNullterminated(_Post_ __nullterminated void *p); - -#define _Analysis_assume_nullterminated_(x) __AnalysisAssumeNullterminated(x) -#else // ][ -#define _Analysis_assume_nullterminated_(x) -#endif // ] - -// -// Set the analysis mode (global flags to analysis). -// They take effect at the point of declaration; use at global scope -// as a declaration. -// - -// Synthesize a unique symbol. -#define ___MKID(x, y) x##y -#define __MKID(x, y) ___MKID(x, y) -#define __GENSYM(x) __MKID(x, __COUNTER__) - -__ANNOTATION(SAL_analysisMode(__AuToQuOtE __In_impl_ char *mode);) - -#define _Analysis_mode_impl_(mode) _SA_annotes1(SAL_analysisMode, #mode) - -#define _Analysis_mode_(mode) \ - typedef _Analysis_mode_impl_(mode) int __GENSYM(__prefast_analysis_mode_flag); - -// The following are predefined: -// _Analysis_operator_new_throw_ (operator new throws) -// _Analysis_operator_new_null_ (operator new returns null) -// _Analysis_operator_new_never_fails_ (operator new never fails) -// - -// Function class annotations. -__ANNOTATION(SAL_functionClassNew(__In_impl_ char *);) -__PRIMOP(int, _In_function_class_(__In_impl_ char *);) -#define _In_function_class_(x) _In_function_class_(#x) - -#define _Function_class_(x) _SA_annotes1(SAL_functionClassNew, #x) - -/* - * interlocked operand used in interlocked instructions - */ -// #define _Interlocked_operand_ _Pre_ _SA_annotes0(SAL_interlocked) - -#define _Enum_is_bitflag_ _SA_annotes0(SAL_enumIsBitflag) -#define _Strict_type_match_ _SA_annotes0(SAL_strictType2) - -#define _Maybe_raises_SEH_exception_ _Pre_ _SA_annotes1(SAL_inTry, __yes) -#define _Raises_SEH_exception_ _Group_(_Maybe_raises_SEH_exception_ _Analysis_noreturn_) - -// Clean up macros that collide with libstdc++ internals -#undef __valid -#undef __notvalid -#undef __maybevalid - -#ifdef __cplusplus // [ -} -#endif // ] \ No newline at end of file diff --git a/targets/app/linux/Stubs/d3d11_stubs.h b/targets/app/linux/Stubs/d3d11_stubs.h index 225255a4e..a2f38881d 100644 --- a/targets/app/linux/Stubs/d3d11_stubs.h +++ b/targets/app/linux/Stubs/d3d11_stubs.h @@ -3,11 +3,8 @@ #pragma once -#include "app/linux/Stubs/DirectXMath/DirectXMath.h" #include "winapi_stubs.h" -using namespace DirectX; - typedef struct _RECT { LONG left; LONG top; diff --git a/targets/minecraft/client/Camera.cpp b/targets/minecraft/client/Camera.cpp index ca9f7edb6..ded658065 100644 --- a/targets/minecraft/client/Camera.cpp +++ b/targets/minecraft/client/Camera.cpp @@ -1,13 +1,13 @@ #include "Camera.h" #include +#include #include #include #include #include "MemoryTracker.h" -#include "app/linux/Stubs/DirectXMath/DirectXMath.h" #include "app/include/stubs.h" #include "java/FloatBuffer.h" #include "minecraft/world/entity/LivingEntity.h" @@ -54,18 +54,16 @@ zPlayerOffs = position->get(2); // this is just working out how to get a (0,0,0) point in clip space to pass // into the inverted combined model/view/projection matrix, so we just need // to get this matrix and get its translation as an equivalent. - DirectX::XMMATRIX _modelview, _proj, _final, _invert; - DirectX::XMVECTOR _det; - DirectX::XMFLOAT4 trans; + glm::mat4 _modelview, _proj, _final, _invert; + glm::vec4 trans; memcpy(&_modelview, modelview->_getDataPointer(), 64); memcpy(&_proj, projection->_getDataPointer(), 64); - _final = XMMatrixMultiply(_modelview, _proj); - _det = XMMatrixDeterminant(_final); - _invert = XMMatrixInverse(&_det, _final); + _final = _proj * _modelview; // GLM is column-major; reverse multiply order + _invert = glm::inverse(_final); - XMStoreFloat4(&trans, _invert.r[3]); + trans = _invert[3]; // column 3 = translation column in column-major xPlayerOffs = trans.x / trans.w; yPlayerOffs = trans.y / trans.w; diff --git a/targets/minecraft/meson.build b/targets/minecraft/meson.build index 2f24ef871..4623b1e05 100644 --- a/targets/minecraft/meson.build +++ b/targets/minecraft/meson.build @@ -52,6 +52,7 @@ lib_minecraft = static_library('minecraft', input_dep, profile_dep, storage_dep, + glm_dep, nbt_dep, java_dep, assets_localisation_dep, diff --git a/targets/platform/meson.build b/targets/platform/meson.build index 4e216ab0a..49aed3276 100644 --- a/targets/platform/meson.build +++ b/targets/platform/meson.build @@ -7,7 +7,6 @@ platform_dep = declare_dependency( # SDL2-based platform implementations (formerly 4J.* modules) _sdl2 = dependency('sdl2') _threads = dependency('threads') -_glm = dependency('glm') _defs = [] if get_option('renderer') == 'gles' @@ -28,7 +27,7 @@ sdl2_sources = files( lib_platform_sdl2 = static_library('platform_sdl2', sdl2_sources, include_directories: [platform_inc, include_directories('sdl2')], - dependencies: [_sdl2, _gl, _threads, _glm, stb_dep], + dependencies: [_sdl2, _gl, _threads, glm_dep, stb_dep], cpp_args: _defs + global_cpp_args + global_cpp_defs, ) @@ -37,7 +36,7 @@ lib_platform_sdl2 = static_library('platform_sdl2', render_dep = declare_dependency( link_with: lib_platform_sdl2, include_directories: [platform_inc, include_directories('sdl2')], - dependencies: [_sdl2, _gl, _threads, _glm], + dependencies: [_sdl2, _gl, _threads, glm_dep], ) input_dep = render_dep profile_dep = render_dep