| 1 | // ======================================================================== // |
| 2 | // Copyright 2009-2019 Intel Corporation // |
| 3 | // // |
| 4 | // Licensed under the Apache License, Version 2.0 (the "License"); // |
| 5 | // you may not use this file except in compliance with the License. // |
| 6 | // You may obtain a copy of the License at // |
| 7 | // // |
| 8 | // http://www.apache.org/licenses/LICENSE-2.0 // |
| 9 | // // |
| 10 | // Unless required by applicable law or agreed to in writing, software // |
| 11 | // distributed under the License is distributed on an "AS IS" BASIS, // |
| 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // |
| 13 | // See the License for the specific language governing permissions and // |
| 14 | // limitations under the License. // |
| 15 | // ======================================================================== // |
| 16 | |
| 17 | #pragma once |
| 18 | |
| 19 | #include "common/platform.h" |
| 20 | |
| 21 | namespace oidn { |
| 22 | |
| 23 | constexpr float minVectorLength = 1e-10f; |
| 24 | constexpr float minVectorLengthSqr = minVectorLength * minVectorLength; |
| 25 | |
| 26 | using std::log; |
| 27 | using std::log2; |
| 28 | using std::exp; |
| 29 | using std::exp2; |
| 30 | using std::pow; |
| 31 | using std::isfinite; |
| 32 | using std::isnan; |
| 33 | |
| 34 | __forceinline float sqr(float x) |
| 35 | { |
| 36 | return x * x; |
| 37 | } |
| 38 | |
| 39 | __forceinline float rcp(float x) |
| 40 | { |
| 41 | __m128 r = _mm_rcp_ss(_mm_set_ss(x)); |
| 42 | return _mm_cvtss_f32(_mm_sub_ss(_mm_add_ss(r, r), _mm_mul_ss(_mm_mul_ss(r, r), _mm_set_ss(x)))); |
| 43 | } |
| 44 | |
| 45 | __forceinline float rsqrt(float x) |
| 46 | { |
| 47 | __m128 r = _mm_rsqrt_ss(_mm_set_ss(x)); |
| 48 | return _mm_cvtss_f32(_mm_add_ss(_mm_mul_ss(_mm_set_ss(1.5f), r), |
| 49 | _mm_mul_ss(_mm_mul_ss(_mm_mul_ss(_mm_set_ss(x), _mm_set_ss(-0.5f)), r), _mm_mul_ss(r, r)))); |
| 50 | } |
| 51 | |
| 52 | __forceinline float maxSafe(float value, float minValue) |
| 53 | { |
| 54 | return isfinite(value) ? max(value, minValue) : minValue; |
| 55 | } |
| 56 | |
| 57 | __forceinline float clampSafe(float value, float minValue, float maxValue) |
| 58 | { |
| 59 | return isfinite(value) ? clamp(value, minValue, maxValue) : minValue; |
| 60 | } |
| 61 | |
| 62 | // Returns ceil(a / b) for non-negative integers |
| 63 | template<class Int> |
| 64 | __forceinline constexpr Int ceilDiv(Int a, Int b) |
| 65 | { |
| 66 | //assert(a >= 0); |
| 67 | //assert(b > 0); |
| 68 | return (a + b - 1) / b; |
| 69 | } |
| 70 | |
| 71 | // Returns a rounded up to multiple of b |
| 72 | template<class Int> |
| 73 | __forceinline constexpr Int roundUp(Int a, Int b) |
| 74 | { |
| 75 | return ceilDiv(a, b) * b; |
| 76 | } |
| 77 | |
| 78 | } // namespace oidn |
| 79 | |