diff --git a/include/boost/polygon/detail/voronoi_ctypes.hpp b/include/boost/polygon/detail/voronoi_ctypes.hpp index 078d9351..efc53427 100644 --- a/include/boost/polygon/detail/voronoi_ctypes.hpp +++ b/include/boost/polygon/detail/voronoi_ctypes.hpp @@ -11,13 +11,26 @@ #define BOOST_POLYGON_DETAIL_VORONOI_CTYPES #include +#include #include +#include #include +#include #include #include #include +#if BOOST_VORONOI_USE_GMP +#include +#if GMP_NAIL_BITS != 0 + #error boost::voronoi expects GMP_LIMB_BITS == 0 +#endif +#if GMP_NUMB_BITS != 32 && GMP_NUMB_BITS != 64 + #error boost::voronoi expects GMP_NUMB_BITS == 32 or 64 +#endif +#endif /* BOOST_VORONOI_USE_GMP */ + namespace boost { namespace polygon { namespace detail { @@ -182,6 +195,10 @@ class extended_exponent_fpt { return *this = *this / that; } + extended_exponent_fpt sqr() const { + return extended_exponent_fpt(this->val_ * this->val_, this->exp_ * 2); + } + extended_exponent_fpt sqrt() const { fpt_type val = val_; exp_type exp = exp_; @@ -192,6 +209,11 @@ class extended_exponent_fpt { return extended_exponent_fpt(std::sqrt(val), exp >> 1); } + // Add to the exponent. + extended_exponent_fpt addexp(int n) const { + return extended_exponent_fpt(this->val_, this->exp_ + n); + } + fpt_type d() const { return std::ldexp(val_, exp_); } @@ -202,6 +224,11 @@ class extended_exponent_fpt { }; typedef extended_exponent_fpt efpt64; +template +extended_exponent_fpt<_fpt> sqr(const extended_exponent_fpt<_fpt>& that) { + return that.sqr(); +} + template extended_exponent_fpt<_fpt> get_sqrt(const extended_exponent_fpt<_fpt>& that) { return that.sqrt(); @@ -222,11 +249,79 @@ bool is_zero(const extended_exponent_fpt<_fpt>& that) { return that.is_zero(); } +#if BOOST_VORONOI_USE_GMP + #if GMP_NUMB_BITS == 32 + #define BOOST_VORONOI_64_T 0 + #elif GMP_NUMB_BITS == 64 + #define BOOST_VORONOI_64_T 1 + #else + #error BOOST_VORONOI_USE_GMP: mp_limb_t is expected to be either 32 or 64bit integer type. + #endif +#else + #if INTPTR_MAX == INT64_MAX + // Multi-precision using 64bit ints on a 64bit compiler. + #define BOOST_VORONOI_64_T 1 + #else + // Multi-precision using 32bit ints otherwise. + #define BOOST_VORONOI_64_T 0 + #endif +#endif /* BOOST_VORONOI_USE_GMP */ + // Very efficient stack allocated big integer class. // Supports next set of arithmetic operations: +, -, *. -template +template class extended_int { public: + + static constexpr std::size_t N = AN; + + using chunk_type = +#if BOOST_VORONOI_USE_GMP + mp_limb_t +#elif BOOST_VORONOI_64_T + uint64 +#else + uint32 +#endif + ; + + using chunk_type2 = +#if BOOST_VORONOI_64_T + #ifdef _MSC_VER + // Visual studio does not support 128bit type natively. boost::multiprecision::uint128_t utilizes 32bit limbs for multiplication, + // which is not the best we can do on a 64bit system. Give the caller an option to replace the 128bit uint with its own optimized solution. + #ifdef BOOST_VORONOI_UINT128T + BOOST_VORONOI_UINT128T + #else + boost::multiprecision::uint128_t + #endif + #else + unsigned __int128 + #endif +#else + uint64 +#endif + ; + + static constexpr int chunk_shift = +#if BOOST_VORONOI_USE_GMP + GMP_NUMB_BITS +#elif BOOST_VORONOI_64_T + 64 +#else + 32 +#endif + ; + + // Exponent conversion between this type and the extended floating point type. + static constexpr int chunks_to_fptexp = +#if BOOST_VORONOI_64_T + 10 +#else + 5 +#endif + ; + extended_int() {} extended_int(int32 that) { @@ -243,31 +338,33 @@ class extended_int { extended_int(int64 that) { if (that > 0) { - this->chunks_[0] = static_cast(that); +#if BOOST_VORONOI_64_T + this->chunks_[0] = that; + this->count_ = 1; +#else + this->chunks_[0] = static_cast(that); this->chunks_[1] = that >> 32; this->count_ = this->chunks_[1] ? 2 : 1; +#endif } else if (that < 0) { +#if BOOST_VORONOI_64_T + this->chunks_[0] = -that; + this->count_ = -1; +#else that = -that; - this->chunks_[0] = static_cast(that); + this->chunks_[0] = static_cast(that); this->chunks_[1] = that >> 32; this->count_ = this->chunks_[1] ? -2 : -1; +#endif } else { this->count_ = 0; } } - extended_int(const std::vector& chunks, bool plus = true) { - this->count_ = static_cast((std::min)(N, chunks.size())); - for (int i = 0; i < this->count_; ++i) - this->chunks_[i] = chunks[chunks.size() - i - 1]; - if (!plus) - this->count_ = -this->count_; - } - template extended_int(const extended_int& that) { this->count_ = that.count(); - std::memcpy(this->chunks_, that.chunks(), that.size() * sizeof(uint32)); + std::memcpy(this->chunks_, that.chunks(), that.size() * sizeof(chunk_type)); } extended_int& operator=(int32 that) { @@ -285,14 +382,24 @@ class extended_int { extended_int& operator=(int64 that) { if (that > 0) { - this->chunks_[0] = static_cast(that); +#if BOOST_VORONOI_64_T + this->chunks_[0] = that; + this->count_ = 1; +#else + this->chunks_[0] = static_cast(that); this->chunks_[1] = that >> 32; this->count_ = this->chunks_[1] ? 2 : 1; +#endif } else if (that < 0) { +#if BOOST_VORONOI_64_T + this->chunks_[0] = -that; + this->count_ = -1; +#else that = -that; - this->chunks_[0] = static_cast(that); + this->chunks_[0] = static_cast(that); this->chunks_[1] = that >> 32; this->count_ = this->chunks_[1] ? -2 : -1; +#endif } else { this->count_ = 0; } @@ -302,7 +409,7 @@ class extended_int { template extended_int& operator=(const extended_int& that) { this->count_ = that.count(); - std::memcpy(this->chunks_, that.chunks(), that.size() * sizeof(uint32)); + std::memcpy(this->chunks_, that.chunks(), that.size() * sizeof(chunk_type)); return *this; } @@ -432,6 +539,26 @@ class extended_int { return ret_val; } + extended_int sqr() const { + extended_int ret_val; + if (! this->count_) + ret_val.count_ = 0; + else + ret_val.mksqr(this->chunks_, this->size()); + return ret_val; + } + + // Square of a positive non-zero value with more than half the limbs utilized. + // Such a number does not fit the result, thus it is right shifted. + // The caller shall be aware of the right shift performed by this function + // on the result. + extended_int sqrext() const { + extended_int ret_val; + assert(this->size() * 2 > N); + ret_val.mksqrext(this->chunks_, this->size()); + return ret_val; + } + void mul(const extended_int& e1, const extended_int& e2) { if (!e1.count() || !e2.count()) { this->count_ = 0; @@ -442,7 +569,36 @@ class extended_int { this->count_ = -this->count_; } - const uint32* chunks() const { + // Multiple of two nonzero values, result of which does not fit the resulting type, + // thus it is right shifted. + // The caller shall be aware of the right shift performed by this function + // on the result. + extended_int mulext(const extended_int& rhs) const { + assert(this->size() > 0); + assert(rhs.size() > 0); + assert(this->size() + rhs.size() > N); + extended_int ret_val; + ret_val.mulext_(this->chunks(), this->size(), rhs.chunks(), rhs.size()); + if ((this->count() > 0) ^ (rhs.count() > 0)) + ret_val.count_ = -ret_val.count_; + return ret_val; + } + + // Shift the limbs of this object by nshift positions right. + void rshiftme(std::size_t nshift) { + std::size_t m = this->size(); + if (nshift >= m) { + // Underflow to zero. + this->count_ = 0; + } else { + m -= nshift; + for (std::size_t i = 0; i < m; ++ i) + this->chunks_[i] = this->chunks_[i + nshift]; + this->count_ = this->count_ > 0 ? m : - m; + } + } + + const chunk_type* chunks() const { return chunks_; } @@ -460,6 +616,47 @@ class extended_int { if (!sz) { return ret_val; } else { +#if BOOST_VORONOI_64_T + if (sz == 1) { + auto l0 = this->chunks_[0]; + uint32 l, h; + memcpy(&l, &l0, sizeof(uint32)); + memcpy(&h, reinterpret_cast(&l0) + sizeof(uint32), sizeof(uint32)); + if (h == 0) { + ret_val.first = static_cast(static_cast(l)); + } else { + ret_val.first = static_cast(h) * + static_cast(0x100000000LL) + + static_cast(l); + } + } else { + assert(sz >= 2); + auto last = this->chunks_[sz - 1]; + auto prev = this->chunks_[sz - 2]; + uint32 l, h; + memcpy(&l, &last, sizeof(uint32)); + memcpy(&h, reinterpret_cast(&last) + sizeof(uint32), sizeof(uint32)); + sz *= 2; + if (h == 0) { + -- sz; + ret_val.first = static_cast(l); + memcpy(&l, &prev, sizeof(uint32)); + memcpy(&h, reinterpret_cast(&prev) + sizeof(uint32), sizeof(uint32)); + ret_val.first *= static_cast(0x100000000LL); + ret_val.first += static_cast(h); + ret_val.first *= static_cast(0x100000000LL); + ret_val.first += static_cast(l); + } else { + ret_val.first = static_cast(h); + ret_val.first *= static_cast(0x100000000LL); + ret_val.first += static_cast(l); + memcpy(&h, reinterpret_cast(&prev) + sizeof(uint32), sizeof(uint32)); + ret_val.first *= static_cast(0x100000000LL); + ret_val.first += static_cast(h); + } + ret_val.second = static_cast((sz - 3) << (chunks_to_fptexp / 2)); + } +#else if (sz == 1) { ret_val.first = static_cast(this->chunks_[0]); } else if (sz == 2) { @@ -471,8 +668,9 @@ class extended_int { ret_val.first *= static_cast(0x100000000LL); ret_val.first += static_cast(this->chunks_[sz - i]); } - ret_val.second = static_cast((sz - 3) << 5); + ret_val.second = static_cast((sz - 3) << chunks_to_fptexp); } +#endif } if (this->count_ < 0) ret_val.first = -ret_val.first; @@ -485,38 +683,50 @@ class extended_int { } private: - void add(const uint32* c1, std::size_t sz1, - const uint32* c2, std::size_t sz2) { + + void add(const chunk_type* c1, std::size_t sz1, + const chunk_type* c2, std::size_t sz2) { if (sz1 < sz2) { add(c2, sz2, c1, sz1); return; } + + assert(sz1 >= sz2); + assert(sz2 > 0); this->count_ = static_cast(sz1); - uint64 temp = 0; +#if BOOST_VORONOI_USE_GMP + mp_limb_t temp = mpn_add(this->chunks_, c1, sz1, c2, sz2); +#else + chunk_type2 temp = 0; for (std::size_t i = 0; i < sz2; ++i) { - temp += static_cast(c1[i]) + static_cast(c2[i]); - this->chunks_[i] = static_cast(temp); - temp >>= 32; + temp += static_cast(c1[i]) + static_cast(c2[i]); + this->chunks_[i] = static_cast(temp); + temp >>= chunk_shift; } for (std::size_t i = sz2; i < sz1; ++i) { - temp += static_cast(c1[i]); - this->chunks_[i] = static_cast(temp); - temp >>= 32; + temp += static_cast(c1[i]); + this->chunks_[i] = static_cast(temp); + temp >>= chunk_shift; } +#endif + // One should never trim the most significant limb. + assert(temp == 0 || this->count_ < N); if (temp && (this->count_ != N)) { - this->chunks_[this->count_] = static_cast(temp); + this->chunks_[this->count_] = static_cast(temp); ++this->count_; } } - void dif(const uint32* c1, std::size_t sz1, - const uint32* c2, std::size_t sz2, + void dif(const chunk_type* c1, std::size_t sz1, + const chunk_type* c2, std::size_t sz2, bool rec = false) { if (sz1 < sz2) { dif(c2, sz2, c1, sz1, true); this->count_ = -this->count_; return; - } else if ((sz1 == sz2) && !rec) { + } + + if ((sz1 == sz2) && !rec) { do { --sz1; if (c1[sz1] < c2[sz1]) { @@ -535,6 +745,14 @@ class extended_int { } sz2 = sz1; } +#if BOOST_VORONOI_USE_GMP + assert(sz1 > sz2 || (sz1 == sz2 && mpn_cmp(c1, c2, sz1) >= 0)); + mp_limb_t borrow = mpn_sub(this->chunks_, c1, sz1, c2, sz2); + assert(! borrow); + this->count_ = sz1; + if (! this->chunks_[this->count_ - 1]) + -- this->count_; +#else this->count_ = static_cast(sz1-1); bool flag = false; for (std::size_t i = 0; i < sz2; ++i) { @@ -547,14 +765,62 @@ class extended_int { } if (this->chunks_[this->count_]) ++this->count_; - } - - void mul(const uint32* c1, std::size_t sz1, - const uint32* c2, std::size_t sz2) { - uint64 cur = 0, nxt, tmp; - this->count_ = static_cast((std::min)(N, sz1 + sz2 - 1)); - for (std::size_t shift = 0; shift < static_cast(this->count_); - ++shift) { +#endif + } + + // multiple of two positive nonzero values + void mul(const chunk_type* c1, std::size_t sz1, + const chunk_type* c2, std::size_t sz2) { + assert(sz1 > 0); + assert(sz2 > 0); + assert(this->chunks_ != c1); + assert(this->chunks_ != c2); + assert(sz1 + sz2 <= N); +#if BOOST_VORONOI_USE_GMP + this->count_ = sz1 + sz2; + auto msb = (sz1 >= sz2) ? mpn_mul(this->chunks_, c1, sz1, c2, sz2) : mpn_mul(this->chunks_, c2, sz2, c1, sz1); + assert(msb == this->chunks_[this->count_ - 1]); + if (msb == 0) + -- this->count_; +#else + chunk_type2 cur = 0, nxt, tmp; + // This stinks. One only calculates maximum N of the most significant limbs + // without letting the caller know. + this->count_ = sz1 + sz2 - 1; + for (std::size_t shift = 0; shift < static_cast(this->count_); ++shift) { + nxt = 0; + for (std::size_t first = 0; first <= shift; ++first) { + if (first >= sz1) + break; + std::size_t second = shift - first; + if (second >= sz2) + continue; + tmp = static_cast(c1[first]) * static_cast(c2[second]); + cur += static_cast(tmp); + nxt += tmp >> chunk_shift; + } + this->chunks_[shift] = static_cast(cur); + cur = nxt + (cur >> chunk_shift); + } + // One should never trim the most significant limb. + assert(cur == 0 || this->count_ < N); + if (cur && (this->count_ != N)) { + this->chunks_[this->count_] = static_cast(cur); + ++this->count_; + } +#endif + } + + // Multiple of two positive nonzero values, result of which does not fit the resulting type, + // thus it is right shifted. + void mulext_(const chunk_type* c1, std::size_t sz1, + const chunk_type* c2, std::size_t sz2) { + assert(sz1 > 0 && sz2 > 0 && sz1 + sz2 > N); + chunk_type2 cur = 0, nxt, tmp; + // This stinks. One only calculates maximum N of the most significant limbs + // without letting the caller know. + this->count_ = N-1; + for (std::size_t shift = 0; shift < static_cast(this->count_); ++shift) { nxt = 0; for (std::size_t first = 0; first <= shift; ++first) { if (first >= sz1) @@ -562,36 +828,108 @@ class extended_int { std::size_t second = shift - first; if (second >= sz2) continue; - tmp = static_cast(c1[first]) * static_cast(c2[second]); - cur += static_cast(tmp); - nxt += tmp >> 32; + tmp = static_cast(c1[first]) * static_cast(c2[second]); + cur += static_cast(tmp); + nxt += tmp >> chunk_shift; + } + this->chunks_[shift] = static_cast(cur); + cur = nxt + (cur >> chunk_shift); + } + // One should never trim the most significant limb. + assert(cur == 0 || this->count_ < N); + if (cur && (this->count_ != N)) { + this->chunks_[this->count_] = static_cast(cur); + ++this->count_; + } + } + + // square of a positive non-zero value + void mksqr(const chunk_type* c, std::size_t sz) { + assert(2 * sz <= N); +#if BOOST_VORONOI_USE_GMP + assert(sz > 0); + assert(this->chunks_ != c); + this->count_ = sz * 2; + mpn_sqr(this->chunks_, c, sz); + if (this->chunks_[this->count_ - 1] == 0) + -- this->count_; +#else + chunk_type2 cur = 0, nxt, tmp; + this->count_ = sz * 2 - 1; + for (std::size_t shift = 0; shift < static_cast(this->count_); ++shift) { + nxt = 0; + for (std::size_t first = 0; first <= shift; ++first) { + if (first >= sz) + break; + std::size_t second = shift - first; + if (second >= sz) + continue; + tmp = static_cast(c[first]) * static_cast(c[second]); + cur += static_cast(tmp); + nxt += tmp >> chunk_shift; + } + this->chunks_[shift] = static_cast(cur); + cur = nxt + (cur >> chunk_shift); + } + // One should never trim the most significant limb. + assert(cur == 0 || this->count_ < N); + if (cur && (this->count_ != N)) { + this->chunks_[this->count_] = static_cast(cur); + ++this->count_; + } +#endif + } + + // Square of a positive non-zero value with more than half the limbs utilized. + // Such a number does not fit the result, thus it is right shifted. + void mksqrext(const chunk_type* c, std::size_t sz) { + chunk_type2 cur = 0, nxt, tmp; + this->count_ = N-1; + for (std::size_t shift = 0; shift < static_cast(this->count_); ++shift) { + nxt = 0; + for (std::size_t first = 0; first <= shift; ++first) { + if (first >= sz) + break; + std::size_t second = shift - first; + if (second >= sz) + continue; + tmp = static_cast(c[first]) * static_cast(c[second]); + cur += static_cast(tmp); + nxt += tmp >> chunk_shift; } - this->chunks_[shift] = static_cast(cur); - cur = nxt + (cur >> 32); + this->chunks_[shift] = static_cast(cur); + cur = nxt + (cur >> chunk_shift); } + // One should never trim the most significant limb. + assert(cur == 0 || this->count_ < N); if (cur && (this->count_ != N)) { - this->chunks_[this->count_] = static_cast(cur); + this->chunks_[this->count_] = static_cast(cur); ++this->count_; } } - uint32 chunks_[N]; + chunk_type chunks_[N]; int32 count_; }; +template +extended_int sqr(const extended_int& that) { + return that.sqr(); +} + template bool is_pos(const extended_int& that) { - return that.count() > 0; + return that.is_pos(); } template bool is_neg(const extended_int& that) { - return that.count() < 0; + return that.is_neg(); } template bool is_zero(const extended_int& that) { - return !that.count(); + return that.is_zero(); } struct type_converter_fpt { @@ -629,7 +967,13 @@ struct voronoi_ctype_traits { typedef int32 int_type; typedef int64 int_x2_type; typedef uint64 uint_x2_type; +#if BOOST_VORONOI_64_T + // using uint64 + typedef extended_int<33> big_int_type; +#else + // using uint32 typedef extended_int<64> big_int_type; +#endif typedef fpt64 fpt_type; typedef extended_exponent_fpt efpt_type; typedef ulp_comparison ulp_cmp_type; diff --git a/include/boost/polygon/detail/voronoi_predicates.hpp b/include/boost/polygon/detail/voronoi_predicates.hpp index b6314d35..a25662c8 100644 --- a/include/boost/polygon/detail/voronoi_predicates.hpp +++ b/include/boost/polygon/detail/voronoi_predicates.hpp @@ -18,6 +18,10 @@ namespace boost { namespace polygon { namespace detail { +inline double sqr(const double d) { + return d * d; +} + // Predicate utilities. Operates with the coordinate types that could // be converted to the 32-bit signed integer without precision loss. template @@ -283,7 +287,7 @@ class voronoi_predicates { fpt_type dx = to_fpt(site.x()) - to_fpt(point.x()); fpt_type dy = to_fpt(site.y()) - to_fpt(point.y()); // The relative error is at most 3EPS. - return (dx * dx + dy * dy) / (to_fpt(2.0) * dx); + return (sqr(dx) + sqr(dy)) / (to_fpt(2.0) * dx); } fpt_type find_distance_to_segment_arc( @@ -295,12 +299,12 @@ class voronoi_predicates { const point_type& segment1 = site.point1(); fpt_type a1 = to_fpt(segment1.x()) - to_fpt(segment0.x()); fpt_type b1 = to_fpt(segment1.y()) - to_fpt(segment0.y()); - fpt_type k = get_sqrt(a1 * a1 + b1 * b1); + fpt_type k = get_sqrt(sqr(a1) + sqr(b1)); // Avoid subtraction while computing k. if (!is_neg(b1)) { k = to_fpt(1.0) / (b1 + k); } else { - k = (k - b1) / (a1 * a1); + k = (k - b1) / sqr(a1); } // The relative error is at most 7EPS. return k * robust_cross_product( @@ -571,9 +575,9 @@ class voronoi_predicates { if (recompute_lower_x) { // Evaluate radius of the circle. - big_int_type sqr_r = (dif_x[0] * dif_x[0] + dif_y[0] * dif_y[0]) * - (dif_x[1] * dif_x[1] + dif_y[1] * dif_y[1]) * - (dif_x[2] * dif_x[2] + dif_y[2] * dif_y[2]); + big_int_type sqr_r = (sqr(dif_x[0]) + sqr(dif_y[0])) * + (sqr(dif_x[1]) + sqr(dif_y[1])) * + (sqr(dif_x[2]) + sqr(dif_y[2])); fpt_type r = get_sqrt(to_fpt(sqr_r)); // If c_x >= 0 then lower_x = c_x + r, @@ -586,7 +590,7 @@ class voronoi_predicates { circle.lower_x(circle.x() - r * inv_denom); } } else { - big_int_type numer = c_x * c_x - sqr_r; + big_int_type numer = sqr(c_x) - sqr_r; fpt_type lower_x = to_fpt(numer) * inv_denom / (to_fpt(c_x) + r); circle.lower_x(lower_x); } @@ -613,7 +617,7 @@ class voronoi_predicates { static_cast(site3.y0()); big_int_type line_b = static_cast(site3.x0()) - static_cast(site3.x1()); - big_int_type segm_len = line_a * line_a + line_b * line_b; + big_int_type segm_len = sqr(line_a) + sqr(line_b); big_int_type vec_x = static_cast(site2.y()) - static_cast(site1.y()); big_int_type vec_y = static_cast(site1.x()) - @@ -638,7 +642,7 @@ class voronoi_predicates { big_int_type sum_AB = A + B; if (is_zero(denom)) { - big_int_type numer = teta * teta - sum_AB * sum_AB; + big_int_type numer = sqr(teta) - sqr(sum_AB); denom = teta * sum_AB; cA[0] = denom * sum_x * 2 + numer * vec_x; cB[0] = segm_len; @@ -657,12 +661,12 @@ class voronoi_predicates { return; } - big_int_type det = (teta * teta + denom * denom) * A * B * 4; + big_int_type det = (sqr(teta) + sqr(denom)) * A * B * 4; fpt_type inv_denom_sqr = to_fpt(1.0) / to_fpt(denom); inv_denom_sqr *= inv_denom_sqr; if (recompute_c_x || recompute_lower_x) { - cA[0] = sum_x * denom * denom + teta * sum_AB * vec_x; + cA[0] = sum_x * sqr(denom) + teta * sum_AB * vec_x; cB[0] = 1; cA[1] = (segment_index == 2) ? -vec_x : vec_x; cB[1] = det; @@ -673,7 +677,7 @@ class voronoi_predicates { } if (recompute_c_y || recompute_lower_x) { - cA[2] = sum_y * denom * denom + teta * sum_AB * vec_y; + cA[2] = sum_y * sqr(denom) + teta * sum_AB * vec_y; cB[2] = 1; cA[3] = (segment_index == 2) ? -vec_y : vec_y; cB[3] = det; @@ -686,7 +690,7 @@ class voronoi_predicates { if (recompute_lower_x) { cB[0] = cB[0] * segm_len; cB[1] = cB[1] * segm_len; - cA[2] = sum_AB * (denom * denom + teta * teta); + cA[2] = sum_AB * (sqr(denom) + sqr(teta)); cB[2] = 1; cA[3] = (segment_index == 2) ? -teta : teta; cB[3] = det; @@ -720,7 +724,7 @@ class voronoi_predicates { big_int_type orientation = a[1] * b[0] - a[0] * b[1]; if (is_zero(orientation)) { fpt_type denom = to_fpt(2.0) * to_fpt( - static_cast(a[0] * a[0] + b[0] * b[0])); + static_cast(sqr(a[0]) + sqr(b[0]))); c[0] = b[0] * (static_cast(segm_start2.x()) - static_cast(segm_start1.x())) - a[0] * (static_cast(segm_start2.y()) - @@ -738,24 +742,24 @@ class voronoi_predicates { if (recompute_c_y) { cA[0] = b[0] * ((point_index == 2) ? 2 : -2); - cA[1] = a[0] * a[0] * (static_cast(segm_start1.y()) + + cA[1] = a[0].sqr() * (static_cast(segm_start1.y()) + static_cast(segm_start2.y())) - a[0] * b[0] * (static_cast(segm_start1.x()) + static_cast(segm_start2.x()) - static_cast(site1.x()) * 2) + - b[0] * b[0] * (static_cast(site1.y()) * 2); + b[0].sqr() * (static_cast(site1.y()) * 2); fpt_type c_y = to_fpt(sqrt_expr_.eval2(cA, cB)); c_event.y(c_y / denom); } if (recompute_c_x || recompute_lower_x) { cA[0] = a[0] * ((point_index == 2) ? 2 : -2); - cA[1] = b[0] * b[0] * (static_cast(segm_start1.x()) + + cA[1] = b[0].sqr() * (static_cast(segm_start1.x()) + static_cast(segm_start2.x())) - a[0] * b[0] * (static_cast(segm_start1.y()) + static_cast(segm_start2.y()) - static_cast(site1.y()) * 2) + - a[0] * a[0] * (static_cast(site1.x()) * 2); + a[0].sqr() * (static_cast(site1.x()) * 2); if (recompute_c_x) { fpt_type c_x = to_fpt(sqrt_expr_.eval2(cA, cB)); @@ -764,7 +768,7 @@ class voronoi_predicates { if (recompute_lower_x) { cA[2] = is_neg(c[0]) ? -c[0] : c[0]; - cB[2] = a[0] * a[0] + b[0] * b[0]; + cB[2] = a[0].sqr() + b[0].sqr(); fpt_type lower_x = to_fpt(sqrt_expr_.eval3(cA, cB)); c_event.lower_x(lower_x / denom); } @@ -791,8 +795,8 @@ class voronoi_predicates { cA[1] = a[0] * -dx + b[0] * -dy; cA[2] = sign; cA[3] = 0; - cB[0] = a[0] * a[0] + b[0] * b[0]; - cB[1] = a[1] * a[1] + b[1] * b[1]; + cB[0] = a[0].sqr() + b[0].sqr(); + cB[1] = a[1].sqr() + b[1].sqr(); cB[2] = a[0] * a[1] + b[0] * b[1]; cB[3] = (a[0] * dy - b[0] * dx) * (a[1] * dy - b[1] * dx) * -2; fpt_type temp = to_fpt( @@ -800,8 +804,8 @@ class voronoi_predicates { fpt_type denom = temp * to_fpt(orientation); if (recompute_c_y) { - cA[0] = b[1] * (dx * dx + dy * dy) - iy * (dx * a[1] + dy * b[1]); - cA[1] = b[0] * (dx * dx + dy * dy) - iy * (dx * a[0] + dy * b[0]); + cA[0] = b[1] * (dx.sqr() + dy.sqr()) - iy * (dx * a[1] + dy * b[1]); + cA[1] = b[0] * (dx.sqr() + dy.sqr()) - iy * (dx * a[0] + dy * b[0]); cA[2] = iy * sign; fpt_type cy = to_fpt( sqrt_expr_evaluator_pss4(cA, cB)); @@ -809,8 +813,8 @@ class voronoi_predicates { } if (recompute_c_x || recompute_lower_x) { - cA[0] = a[1] * (dx * dx + dy * dy) - ix * (dx * a[1] + dy * b[1]); - cA[1] = a[0] * (dx * dx + dy * dy) - ix * (dx * a[0] + dy * b[0]); + cA[0] = a[1] * (dx.sqr() + dy.sqr()) - ix * (dx * a[1] + dy * b[1]); + cA[1] = a[0] * (dx.sqr() + dy.sqr()) - ix * (dx * a[0] + dy * b[0]); cA[2] = ix * sign; if (recompute_c_x) { @@ -820,7 +824,7 @@ class voronoi_predicates { } if (recompute_lower_x) { - cA[3] = orientation * (dx * dx + dy * dy) * (is_neg(temp) ? -1 : 1); + cA[3] = orientation * (dx.sqr() + dy.sqr()) * (is_neg(temp) ? -1 : 1); fpt_type lower_x = to_fpt( sqrt_expr_evaluator_pss4(cA, cB)); c_event.lower_x(lower_x / denom); @@ -867,7 +871,7 @@ class voronoi_predicates { static_cast(site3.x1()); for (int i = 0; i < 3; ++i) - cB[i] = a[i] * a[i] + b[i] * b[i]; + cB[i] = a[i].sqr() + b[i].sqr(); for (int i = 0; i < 3; ++i) { int j = (i+1) % 3; @@ -926,10 +930,10 @@ class voronoi_predicates { get_sqrt(sqrt_expr_.eval2(cA, cB)); if ((!is_neg(lh) && !is_neg(rh)) || (!is_pos(lh) && !is_pos(rh))) return lh + rh; - cA[0] = A[0] * A[0] * B[0] + A[1] * A[1] * B[1] - - A[2] * A[2] * B[3] * B[2]; + cA[0] = A[0].sqr() * B[0] + A[1].sqr() * B[1] - + A[2].sqr() * B[3] * B[2]; cB[0] = 1; - cA[1] = A[0] * A[1] * 2 - A[2] * A[2] * B[3]; + cA[1] = A[0] * A[1] * 2 - A[2].sqr() * B[3]; cB[1] = B[0] * B[1]; _fpt numer = sqrt_expr_.eval2(cA, cB); return numer / (lh - rh); @@ -950,11 +954,11 @@ class voronoi_predicates { return lh + rh; cA[0] = A[3] * A[0] * 2; cA[1] = A[3] * A[1] * 2; - cA[2] = A[0] * A[0] * B[0] + A[1] * A[1] * B[1] + - A[3] * A[3] - A[2] * A[2] * B[2] * B[3]; - cA[3] = A[0] * A[1] * 2 - A[2] * A[2] * B[3]; + cA[2] = A[0].sqr() * B[0] + A[1].sqr() * B[1] + + A[3].sqr() - A[2].sqr() * B[2] * B[3]; + cA[3] = A[0] * A[1] * 2 - A[2].sqr() * B[3]; cB[3] = B[0] * B[1]; - _fpt numer = sqrt_expr_evaluator_pss3<_int, _fpt>(cA, cB); + _fpt numer = sqrt_expr_evaluator_pss3ext<_int, _fpt>(cA, cB); return numer / (lh - rh); } @@ -968,8 +972,8 @@ class voronoi_predicates { _fpt rh = sqrt_expr_.eval2(A+2, B+2); if ((!is_neg(lh) && !is_neg(rh)) || (!is_pos(lh) && !is_pos(rh))) return lh + rh; - cA[0] = A[0] * A[0] * B[0] + A[1] * A[1] * B[1] - - A[2] * A[2] - A[3] * A[3] * B[0] * B[1]; + cA[0] = A[0].sqr() * B[0] + A[1].sqr() * B[1] - + A[2].sqr() - A[3].sqr() * B[0] * B[1]; cB[0] = 1; cA[1] = (A[0] * A[1] - A[2] * A[3]) * 2; cB[1] = B[3]; @@ -977,6 +981,25 @@ class voronoi_predicates { return numer / (lh - rh); } + template + // Evaluates A[0] * sqrt(B[0]) + A[1] * sqrt(B[1]) + + // A[2] + A[3] * sqrt(B[0] * B[1]). + // B[3] = B[0] * B[1]. + _fpt sqrt_expr_evaluator_pss3ext(_int *A, _int *B) { + _int cA[2], cB[2]; + _fpt lh = sqrt_expr_.eval2(A, B); + _fpt rh = sqrt_expr_.eval2(A+2, B+2); + if ((!is_neg(lh) && !is_neg(rh)) || (!is_pos(lh) && !is_pos(rh))) + return lh + rh; + cA[0] = A[0].sqr() * B[0] + A[1].sqr() * B[1] - + A[2].sqr() - A[3].sqr() * B[0] * B[1]; + cB[0] = 1; + cA[1] = (A[0] * A[1] - A[2] * A[3]) * 2; + cB[1] = B[3]; + _fpt numer = sqrt_expr_.eval2ext(cA, cB); + return numer / (lh - rh); + } + robust_sqrt_expr_type sqrt_expr_; to_fpt_converter to_fpt; }; @@ -1027,9 +1050,9 @@ class voronoi_predicates { c_y -= robust_fpt_type(dif_y1 * sum_y1 * dif_x2, to_fpt(2.0)); robust_dif_type lower_x(c_x); lower_x -= robust_fpt_type(get_sqrt( - (dif_x1 * dif_x1 + dif_y1 * dif_y1) * - (dif_x2 * dif_x2 + dif_y2 * dif_y2) * - (dif_x3 * dif_x3 + dif_y3 * dif_y3)), to_fpt(5.0)); + (sqr(dif_x1) + sqr(dif_y1)) * + (sqr(dif_x2) + sqr(dif_y2)) * + (sqr(dif_x3) + sqr(dif_y3))), to_fpt(5.0)); c_event = circle_type( c_x.dif().fpv() * inv_orientation.fpv(), c_y.dif().fpv() * inv_orientation.fpv(), @@ -1090,19 +1113,19 @@ class voronoi_predicates { static_cast(site3.x1()) - static_cast(site3.x0())), to_fpt(1.0)); robust_fpt_type inv_segm_len(to_fpt(1.0) / - get_sqrt(line_a * line_a + line_b * line_b), to_fpt(3.0)); + get_sqrt(sqr(line_a) + sqr(line_b)), to_fpt(3.0)); robust_dif_type t; if (ot::eval(denom) == ot::COLLINEAR) { t += teta / (robust_fpt_type(to_fpt(8.0)) * A); t -= A / (robust_fpt_type(to_fpt(2.0)) * teta); } else { - robust_fpt_type det = ((teta * teta + denom * denom) * A * B).sqrt(); + robust_fpt_type det = ((sqr(teta) + sqr(denom)) * A * B).sqrt(); if (segment_index == 2) { - t -= det / (denom * denom); + t -= det / sqr(denom); } else { - t += det / (denom * denom); + t += det / sqr(denom); } - t += teta * (A + B) / (robust_fpt_type(to_fpt(2.0)) * denom * denom); + t += teta * (A + B) / (robust_fpt_type(to_fpt(2.0)) * sqr(denom)); } robust_dif_type c_x, c_y; c_x += robust_fpt_type(to_fpt(0.5) * @@ -1155,7 +1178,7 @@ class voronoi_predicates { static_cast(segm_end2.x()) - static_cast(segm_start2.x())), to_fpt(1.0)); if (ot::eval(orientation) == ot::COLLINEAR) { - robust_fpt_type a(a1 * a1 + b1 * b1, to_fpt(2.0)); + robust_fpt_type a(sqr(a1) + sqr(b1), to_fpt(2.0)); robust_fpt_type c(robust_cross_product( static_cast(segm_end1.y()) - static_cast(segm_start1.y()), @@ -1217,8 +1240,8 @@ class voronoi_predicates { c_event = circle_type(c_x.dif().fpv(), c_y.dif().fpv(), lower_x.dif().fpv()); } else { - robust_fpt_type sqr_sum1(get_sqrt(a1 * a1 + b1 * b1), to_fpt(2.0)); - robust_fpt_type sqr_sum2(get_sqrt(a2 * a2 + b2 * b2), to_fpt(2.0)); + robust_fpt_type sqr_sum1(get_sqrt(sqr(a1) + sqr(b1)), to_fpt(2.0)); + robust_fpt_type sqr_sum2(get_sqrt(sqr(a2) + sqr(b2)), to_fpt(2.0)); robust_fpt_type a(robust_cross_product( static_cast(segm_end1.x()) - static_cast(segm_start1.x()), @@ -1231,7 +1254,7 @@ class voronoi_predicates { if (!is_neg(a)) { a += sqr_sum1 * sqr_sum2; } else { - a = (orientation * orientation) / (sqr_sum1 * sqr_sum2 - a); + a = sqr(orientation) / (sqr_sum1 * sqr_sum2 - a); } robust_fpt_type or1(robust_cross_product( static_cast(segm_end1.y()) - @@ -1298,7 +1321,7 @@ class voronoi_predicates { } else { t -= det.sqrt(); } - t /= (a * a); + t /= sqr(a); robust_dif_type c_x(ix), c_y(iy); c_x += t * (robust_fpt_type(a1) * sqr_sum2); c_x += t * (robust_fpt_type(a2) * sqr_sum1); @@ -1348,9 +1371,9 @@ class voronoi_predicates { site3.x0(), site3.y0(), site3.x1(), site3.y1()), to_fpt(1.0)); - robust_fpt_type len1 = (a1 * a1 + b1 * b1).sqrt(); - robust_fpt_type len2 = (a2 * a2 + b2 * b2).sqrt(); - robust_fpt_type len3 = (a3 * a3 + b3 * b3).sqrt(); + robust_fpt_type len1 = (sqr(a1) + sqr(b1)).sqrt(); + robust_fpt_type len2 = (sqr(a2) + sqr(b2)).sqrt(); + robust_fpt_type len3 = (sqr(a3) + sqr(b3)).sqrt(); robust_fpt_type cross_12(robust_cross_product( static_cast(site1.x1()) - static_cast(site1.x0()), diff --git a/include/boost/polygon/detail/voronoi_robust_fpt.hpp b/include/boost/polygon/detail/voronoi_robust_fpt.hpp index ce72ba8c..d8a4b7b3 100644 --- a/include/boost/polygon/detail/voronoi_robust_fpt.hpp +++ b/include/boost/polygon/detail/voronoi_robust_fpt.hpp @@ -201,6 +201,12 @@ class robust_fpt { return robust_fpt(fpv, re); } + robust_fpt sqr() const { + floating_point_type fpv = this->fpv_ * this->fpv_; + relative_error_type re = this->re_ * 2 + ROUNDING_ERROR; + return robust_fpt(fpv, re); + } + robust_fpt sqrt() const { return robust_fpt(get_sqrt(fpv_), re_ * static_cast(0.5) + @@ -212,6 +218,11 @@ class robust_fpt { relative_error_type re_; }; +template +robust_fpt sqr(const robust_fpt& that) { + return that.sqr(); +} + template robust_fpt get_sqrt(const robust_fpt& that) { return that.sqrt(); @@ -451,7 +462,63 @@ class robust_sqrt_expr { if ((!is_neg(a) && !is_neg(b)) || (!is_pos(a) && !is_pos(b))) return a + b; - return convert(A[0] * A[0] * B[0] - A[1] * A[1] * B[1]) / (a - b); + return convert(A[0].sqr() * B[0] - A[1].sqr() * B[1]) / (a - b); + } + + // Evaluates expression A^2 * B, where the expression may overflow the _int type. + // Returns the expression + exponent. + std::pair<_int, int> eval2ext_a2b(const _int &A, const _int &B) { + int A_size = int(A.size()); + int B_size = int(B.size()); + if (A_size && B_size) { + int rshift = std::max(0, A_size * 2 - int(_int::N)); + _int u = rshift ? A.sqrext() : A.sqr(); + int rshift2 = std::max(0, int(u.size()) + B_size - int(_int::N)); + return std::make_pair(rshift2 ? u.mulext(B) : (u * B), rshift + rshift2); + } else { + return std::make_pair(int(0), 0); + } + } + + // Evaluates expression (re = 7 EPS): + // A[0] * sqrt(B[0]) + A[1] * sqrt(B[1]). + // This variant of eval2() may be called with very large A and B, + // where the expression A[0].sqr() * B[0] - A[1].sqr() * B[1] + // could overflow. + _fpt eval2ext(_int* A, _int* B) { + _fpt a = eval1(A, B); + _fpt b = eval1(A + 1, B + 1); + if ((!is_neg(a) && !is_neg(b)) || + (!is_pos(a) && !is_pos(b))) + return a + b; + + std::pair<_int, int> u = eval2ext_a2b(A[0], B[0]); + std::pair<_int, int> v = eval2ext_a2b(A[1], B[1]); + // Make sure the top most limb is empty, so the successive addition + // will fit the result. + if (u.first.size() == _int::N) { + u.first.rshiftme(1); + ++ u.second; + } + if (v.first.size() == _int::N) { + v.first.rshiftme(1); + ++ v.second; + } + assert(u.first.size() < _int::N); + assert(v.first.size() < _int::N); + + // Normalize the two numbers to a common exponent. + int exp; + if (u.second > v.second) { + v.first.rshiftme(u.second - v.second); + exp = u.second; + } else { + u.first.rshiftme(v.second - u.second); + exp = v.second; + } + + // Finally calculate eval2() + return convert(u.first - v.first).addexp(exp << _int::chunks_to_fptexp) / (a - b); } // Evaluates expression (re = 16 EPS): @@ -462,13 +529,27 @@ class robust_sqrt_expr { if ((!is_neg(a) && !is_neg(b)) || (!is_pos(a) && !is_pos(b))) return a + b; - tA[3] = A[0] * A[0] * B[0] + A[1] * A[1] * B[1] - A[2] * A[2] * B[2]; + tA[3] = A[0].sqr() * B[0] + A[1].sqr() * B[1] - A[2].sqr() * B[2]; tB[3] = 1; tA[4] = A[0] * A[1] * 2; tB[4] = B[0] * B[1]; return eval2(tA + 3, tB + 3) / (a - b); } + // Evaluates expression (re = 16 EPS): + // A[0] * sqrt(B[0]) + A[1] * sqrt(B[1]) + A[2] * sqrt(B[2]). + _fpt eval3ext(_int* A, _int* B) { + _fpt a = eval2(A, B); + _fpt b = eval1(A + 2, B + 2); + if ((!is_neg(a) && !is_neg(b)) || + (!is_pos(a) && !is_pos(b))) + return a + b; + tA[3] = A[0].sqr() * B[0] + A[1].sqr() * B[1] - A[2].sqr() * B[2]; + tB[3] = 1; + tA[4] = A[0] * A[1] * 2; + tB[4] = B[0] * B[1]; + return eval2ext(tA + 3, tB + 3) / (a - b); + } // Evaluates expression (re = 25 EPS): // A[0] * sqrt(B[0]) + A[1] * sqrt(B[1]) + @@ -479,8 +560,8 @@ class robust_sqrt_expr { if ((!is_neg(a) && !is_neg(b)) || (!is_pos(a) && !is_pos(b))) return a + b; - tA[0] = A[0] * A[0] * B[0] + A[1] * A[1] * B[1] - - A[2] * A[2] * B[2] - A[3] * A[3] * B[3]; + tA[0] = A[0].sqr() * B[0] + A[1].sqr() * B[1] - + A[2].sqr() * B[2] - A[3].sqr() * B[3]; tB[0] = 1; tA[1] = A[0] * A[1] * 2; tB[1] = B[0] * B[1]; @@ -489,6 +570,25 @@ class robust_sqrt_expr { return eval3(tA, tB) / (a - b); } + // Evaluates expression (re = 25 EPS): + // A[0] * sqrt(B[0]) + A[1] * sqrt(B[1]) + + // A[2] * sqrt(B[2]) + A[3] * sqrt(B[3]). + _fpt eval4ext(_int* A, _int* B) { + _fpt a = eval2(A, B); + _fpt b = eval2(A + 2, B + 2); + if ((!is_neg(a) && !is_neg(b)) || + (!is_pos(a) && !is_pos(b))) + return a + b; + tA[0] = A[0].sqr() * B[0] + A[1].sqr() * B[1] - + A[2].sqr() * B[2] - A[3].sqr() * B[3]; + tB[0] = 1; + tA[1] = A[0] * A[1] * 2; + tB[1] = B[0] * B[1]; + tA[2] = A[2] * A[3] * -2; + tB[2] = B[2] * B[3]; + return eval3ext(tA, tB) / (a - b); + } + private: _int tA[5]; _int tB[5];