diff --git a/include/boost/polygon/detail/voronoi_ctypes.hpp b/include/boost/polygon/detail/voronoi_ctypes.hpp
index 078d9351..efc53427 100644
--- a/include/boost/polygon/detail/voronoi_ctypes.hpp
+++ b/include/boost/polygon/detail/voronoi_ctypes.hpp
@@ -11,13 +11,26 @@
 #define BOOST_POLYGON_DETAIL_VORONOI_CTYPES
 
 #include <boost/cstdint.hpp>
+#include <boost/multiprecision/cpp_int.hpp>
 
 #include <algorithm>
+#include <cassert>
 #include <cmath>
+#include <cstdint>
 #include <cstring>
 #include <utility>
 #include <vector>
 
+#if BOOST_VORONOI_USE_GMP
+#include <gmp.h>
+#if GMP_NAIL_BITS != 0
+  #error boost::voronoi expects GMP_LIMB_BITS == 0
+#endif
+#if GMP_NUMB_BITS != 32 && GMP_NUMB_BITS != 64
+  #error boost::voronoi expects GMP_NUMB_BITS == 32 or 64
+#endif
+#endif /* BOOST_VORONOI_USE_GMP */
+
 namespace boost {
 namespace polygon {
 namespace detail {
@@ -182,6 +195,10 @@ class extended_exponent_fpt {
     return *this = *this / that;
   }
 
+  extended_exponent_fpt sqr() const {
+    return extended_exponent_fpt(this->val_ * this->val_, this->exp_ * 2);
+  }
+
   extended_exponent_fpt sqrt() const {
     fpt_type val = val_;
     exp_type exp = exp_;
@@ -192,6 +209,11 @@ class extended_exponent_fpt {
     return extended_exponent_fpt(std::sqrt(val), exp >> 1);
   }
 
+  // Add to the exponent.
+  extended_exponent_fpt addexp(int n) const {
+    return extended_exponent_fpt(this->val_, this->exp_ + n);
+  }
+
   fpt_type d() const {
     return std::ldexp(val_, exp_);
   }
@@ -202,6 +224,11 @@ class extended_exponent_fpt {
 };
 typedef extended_exponent_fpt<double> efpt64;
 
+template <typename _fpt>
+extended_exponent_fpt<_fpt> sqr(const extended_exponent_fpt<_fpt>& that) {
+  return that.sqr();
+}
+
 template <typename _fpt>
 extended_exponent_fpt<_fpt> get_sqrt(const extended_exponent_fpt<_fpt>& that) {
   return that.sqrt();
@@ -222,11 +249,79 @@ bool is_zero(const extended_exponent_fpt<_fpt>& that) {
   return that.is_zero();
 }
 
+#if BOOST_VORONOI_USE_GMP
+  #if GMP_NUMB_BITS == 32
+    #define BOOST_VORONOI_64_T 0
+  #elif GMP_NUMB_BITS == 64
+    #define BOOST_VORONOI_64_T 1
+  #else
+    #error BOOST_VORONOI_USE_GMP: mp_limb_t is expected to be either 32 or 64bit integer type.
+  #endif
+#else
+  #if INTPTR_MAX == INT64_MAX
+    // Multi-precision using 64bit ints on a 64bit compiler.
+    #define BOOST_VORONOI_64_T 1
+  #else
+    // Multi-precision using 32bit ints otherwise.
+    #define BOOST_VORONOI_64_T 0
+  #endif
+#endif /* BOOST_VORONOI_USE_GMP */
+
 // Very efficient stack allocated big integer class.
 // Supports next set of arithmetic operations: +, -, *.
-template<std::size_t N>
+template<std::size_t AN>
 class extended_int {
  public:
+
+  static constexpr std::size_t N = AN;
+
+  using chunk_type  = 
+#if BOOST_VORONOI_USE_GMP
+    mp_limb_t
+#elif BOOST_VORONOI_64_T
+    uint64
+#else
+    uint32
+#endif
+    ;
+
+  using chunk_type2 = 
+#if BOOST_VORONOI_64_T
+    #ifdef _MSC_VER
+        // Visual studio does not support 128bit type natively. boost::multiprecision::uint128_t utilizes 32bit limbs for multiplication,
+        // which is not the best we can do on a 64bit system. Give the caller an option to replace the 128bit uint with its own optimized solution.
+        #ifdef BOOST_VORONOI_UINT128T
+            BOOST_VORONOI_UINT128T
+        #else
+            boost::multiprecision::uint128_t
+        #endif
+    #else
+        unsigned __int128
+    #endif
+#else
+    uint64
+#endif
+    ;
+
+  static constexpr int chunk_shift =
+#if BOOST_VORONOI_USE_GMP
+    GMP_NUMB_BITS
+#elif BOOST_VORONOI_64_T
+    64
+#else
+    32
+#endif
+    ;
+
+  // Exponent conversion between this type and the extended floating point type.
+  static constexpr int chunks_to_fptexp =
+#if BOOST_VORONOI_64_T
+    10
+#else
+    5
+#endif
+    ;
+
   extended_int() {}
 
   extended_int(int32 that) {
@@ -243,31 +338,33 @@ class extended_int {
 
   extended_int(int64 that) {
     if (that > 0) {
-      this->chunks_[0] = static_cast<uint32>(that);
+#if BOOST_VORONOI_64_T
+      this->chunks_[0] = that;
+      this->count_ = 1;
+#else
+      this->chunks_[0] = static_cast<chunk_type>(that);
       this->chunks_[1] = that >> 32;
       this->count_ = this->chunks_[1] ? 2 : 1;
+#endif
     } else if (that < 0) {
+#if BOOST_VORONOI_64_T
+      this->chunks_[0] = -that;
+      this->count_ = -1;
+#else
       that = -that;
-      this->chunks_[0] = static_cast<uint32>(that);
+      this->chunks_[0] = static_cast<chunk_type>(that);
       this->chunks_[1] = that >> 32;
       this->count_ = this->chunks_[1] ? -2 : -1;
+#endif
     } else {
       this->count_ = 0;
     }
   }
 
-  extended_int(const std::vector<uint32>& chunks, bool plus = true) {
-    this->count_ = static_cast<int32>((std::min)(N, chunks.size()));
-    for (int i = 0; i < this->count_; ++i)
-      this->chunks_[i] = chunks[chunks.size() - i - 1];
-    if (!plus)
-      this->count_ = -this->count_;
-  }
-
   template<std::size_t M>
   extended_int(const extended_int<M>& that) {
     this->count_ = that.count();
-    std::memcpy(this->chunks_, that.chunks(), that.size() * sizeof(uint32));
+    std::memcpy(this->chunks_, that.chunks(), that.size() * sizeof(chunk_type));
   }
 
   extended_int& operator=(int32 that) {
@@ -285,14 +382,24 @@ class extended_int {
 
   extended_int& operator=(int64 that) {
     if (that > 0) {
-      this->chunks_[0] = static_cast<uint32>(that);
+#if BOOST_VORONOI_64_T
+      this->chunks_[0] = that;
+      this->count_ = 1;
+#else
+      this->chunks_[0] = static_cast<chunk_type>(that);
       this->chunks_[1] = that >> 32;
       this->count_ = this->chunks_[1] ? 2 : 1;
+#endif
     } else if (that < 0) {
+#if BOOST_VORONOI_64_T
+      this->chunks_[0] = -that;
+      this->count_ = -1;
+#else
       that = -that;
-      this->chunks_[0] = static_cast<uint32>(that);
+      this->chunks_[0] = static_cast<chunk_type>(that);
       this->chunks_[1] = that >> 32;
       this->count_ = this->chunks_[1] ? -2 : -1;
+#endif
     } else {
       this->count_ = 0;
     }
@@ -302,7 +409,7 @@ class extended_int {
   template<std::size_t M>
   extended_int& operator=(const extended_int<M>& that) {
     this->count_ = that.count();
-    std::memcpy(this->chunks_, that.chunks(), that.size() * sizeof(uint32));
+    std::memcpy(this->chunks_, that.chunks(), that.size() * sizeof(chunk_type));
     return *this;
   }
 
@@ -432,6 +539,26 @@ class extended_int {
     return ret_val;
   }
 
+  extended_int sqr() const {
+    extended_int ret_val;
+    if (! this->count_)
+      ret_val.count_ = 0;
+    else
+      ret_val.mksqr(this->chunks_, this->size());
+    return ret_val;
+  }
+
+  // Square of a positive non-zero value with more than half the limbs utilized.
+  // Such a number does not fit the result, thus it is right shifted.
+  // The caller shall be aware of the right shift performed by this function
+  // on the result.
+  extended_int sqrext() const {
+    extended_int ret_val;
+    assert(this->size() * 2 > N);
+    ret_val.mksqrext(this->chunks_, this->size());
+    return ret_val;
+  }
+
   void mul(const extended_int& e1, const extended_int& e2) {
     if (!e1.count() || !e2.count()) {
       this->count_ = 0;
@@ -442,7 +569,36 @@ class extended_int {
       this->count_ = -this->count_;
   }
 
-  const uint32* chunks() const {
+  // Multiple of two nonzero values, result of which does not fit the resulting type,
+  // thus it is right shifted.
+  // The caller shall be aware of the right shift performed by this function
+  // on the result.
+  extended_int mulext(const extended_int& rhs) const {
+    assert(this->size() > 0);
+    assert(rhs.size() > 0);
+    assert(this->size() + rhs.size() > N);
+    extended_int ret_val;
+    ret_val.mulext_(this->chunks(), this->size(), rhs.chunks(), rhs.size());
+    if ((this->count() > 0) ^ (rhs.count() > 0))
+      ret_val.count_ = -ret_val.count_;
+    return ret_val;
+  }
+
+  // Shift the limbs of this object by nshift positions right.
+  void rshiftme(std::size_t nshift) {
+    std::size_t m = this->size();
+    if (nshift >= m) {
+      // Underflow to zero.
+      this->count_ = 0;
+    } else {
+      m -= nshift;
+      for (std::size_t i = 0; i < m; ++ i)
+        this->chunks_[i] = this->chunks_[i + nshift];
+      this->count_ = this->count_ > 0 ? m : - m;
+    }
+  }
+
+  const chunk_type* chunks() const {
     return chunks_;
   }
 
@@ -460,6 +616,47 @@ class extended_int {
     if (!sz) {
       return ret_val;
     } else {
+#if BOOST_VORONOI_64_T
+      if (sz == 1) {
+        auto   l0 = this->chunks_[0];
+        uint32 l, h;
+        memcpy(&l, &l0, sizeof(uint32));
+        memcpy(&h, reinterpret_cast<const char*>(&l0) + sizeof(uint32), sizeof(uint32));
+        if (h == 0) {
+          ret_val.first = static_cast<fpt64>(static_cast<uint32>(l));
+        } else {
+          ret_val.first = static_cast<fpt64>(h) *
+                          static_cast<fpt64>(0x100000000LL) +
+                          static_cast<fpt64>(l);
+        }
+      } else {
+        assert(sz >= 2);
+        auto last = this->chunks_[sz - 1];
+        auto prev = this->chunks_[sz - 2];
+        uint32 l, h;
+        memcpy(&l, &last, sizeof(uint32));
+        memcpy(&h, reinterpret_cast<const char*>(&last) + sizeof(uint32), sizeof(uint32));
+        sz *= 2;
+        if (h == 0) {
+          -- sz;
+          ret_val.first  = static_cast<fpt64>(l);
+          memcpy(&l, &prev, sizeof(uint32));
+          memcpy(&h, reinterpret_cast<const char*>(&prev) + sizeof(uint32), sizeof(uint32));          
+          ret_val.first *= static_cast<fpt64>(0x100000000LL);
+          ret_val.first += static_cast<fpt64>(h);
+          ret_val.first *= static_cast<fpt64>(0x100000000LL);
+          ret_val.first += static_cast<fpt64>(l);
+        } else {
+          ret_val.first  = static_cast<fpt64>(h);
+          ret_val.first *= static_cast<fpt64>(0x100000000LL);
+          ret_val.first += static_cast<fpt64>(l);
+          memcpy(&h, reinterpret_cast<const char*>(&prev) + sizeof(uint32), sizeof(uint32));
+          ret_val.first *= static_cast<fpt64>(0x100000000LL);
+          ret_val.first += static_cast<fpt64>(h);
+        }
+        ret_val.second = static_cast<int>((sz - 3) << (chunks_to_fptexp / 2));
+      }
+#else
       if (sz == 1) {
         ret_val.first = static_cast<fpt64>(this->chunks_[0]);
       } else if (sz == 2) {
@@ -471,8 +668,9 @@ class extended_int {
           ret_val.first *= static_cast<fpt64>(0x100000000LL);
           ret_val.first += static_cast<fpt64>(this->chunks_[sz - i]);
         }
-        ret_val.second = static_cast<int>((sz - 3) << 5);
+        ret_val.second = static_cast<int>((sz - 3) << chunks_to_fptexp);
       }
+#endif
     }
     if (this->count_ < 0)
       ret_val.first = -ret_val.first;
@@ -485,38 +683,50 @@ class extended_int {
   }
 
  private:
-  void add(const uint32* c1, std::size_t sz1,
-           const uint32* c2, std::size_t sz2) {
+
+  void add(const chunk_type* c1, std::size_t sz1,
+           const chunk_type* c2, std::size_t sz2) {
     if (sz1 < sz2) {
       add(c2, sz2, c1, sz1);
       return;
     }
+
+    assert(sz1 >= sz2);
+    assert(sz2 > 0);
     this->count_ = static_cast<int32>(sz1);
-    uint64 temp = 0;
+#if BOOST_VORONOI_USE_GMP
+    mp_limb_t temp = mpn_add(this->chunks_, c1, sz1, c2, sz2);
+#else
+    chunk_type2 temp = 0;
     for (std::size_t i = 0; i < sz2; ++i) {
-      temp += static_cast<uint64>(c1[i]) + static_cast<uint64>(c2[i]);
-      this->chunks_[i] = static_cast<uint32>(temp);
-      temp >>= 32;
+      temp += static_cast<chunk_type2>(c1[i]) + static_cast<chunk_type2>(c2[i]);
+      this->chunks_[i] = static_cast<chunk_type>(temp);
+      temp >>= chunk_shift;
     }
     for (std::size_t i = sz2; i < sz1; ++i) {
-      temp += static_cast<uint64>(c1[i]);
-      this->chunks_[i] = static_cast<uint32>(temp);
-      temp >>= 32;
+      temp += static_cast<chunk_type2>(c1[i]);
+      this->chunks_[i] = static_cast<chunk_type>(temp);
+      temp >>= chunk_shift;
     }
+#endif
+    // One should never trim the most significant limb.
+    assert(temp == 0 || this->count_ < N);
     if (temp && (this->count_ != N)) {
-      this->chunks_[this->count_] = static_cast<uint32>(temp);
+      this->chunks_[this->count_] = static_cast<chunk_type>(temp);
       ++this->count_;
     }
   }
 
-  void dif(const uint32* c1, std::size_t sz1,
-           const uint32* c2, std::size_t sz2,
+  void dif(const chunk_type* c1, std::size_t sz1,
+           const chunk_type* c2, std::size_t sz2,
            bool rec = false) {
     if (sz1 < sz2) {
       dif(c2, sz2, c1, sz1, true);
       this->count_ = -this->count_;
       return;
-    } else if ((sz1 == sz2) && !rec) {
+    }
+
+    if ((sz1 == sz2) && !rec) {
       do {
         --sz1;
         if (c1[sz1] < c2[sz1]) {
@@ -535,6 +745,14 @@ class extended_int {
       }
       sz2 = sz1;
     }
+#if BOOST_VORONOI_USE_GMP
+    assert(sz1 > sz2 || (sz1 == sz2 && mpn_cmp(c1, c2, sz1) >= 0));
+    mp_limb_t borrow = mpn_sub(this->chunks_, c1, sz1, c2, sz2);
+    assert(! borrow);
+    this->count_ = sz1;
+    if (! this->chunks_[this->count_ - 1])
+      -- this->count_;
+#else
     this->count_ = static_cast<int32>(sz1-1);
     bool flag = false;
     for (std::size_t i = 0; i < sz2; ++i) {
@@ -547,14 +765,62 @@ class extended_int {
     }
     if (this->chunks_[this->count_])
       ++this->count_;
-  }
-
-  void mul(const uint32* c1, std::size_t sz1,
-           const uint32* c2, std::size_t sz2) {
-    uint64 cur = 0, nxt, tmp;
-    this->count_ = static_cast<int32>((std::min)(N, sz1 + sz2 - 1));
-    for (std::size_t shift = 0; shift < static_cast<std::size_t>(this->count_);
-         ++shift) {
+#endif
+  }
+
+  // multiple of two positive nonzero values
+  void mul(const chunk_type* c1, std::size_t sz1,
+           const chunk_type* c2, std::size_t sz2) {
+    assert(sz1 > 0);
+    assert(sz2 > 0);
+    assert(this->chunks_ != c1);
+    assert(this->chunks_ != c2);
+    assert(sz1 + sz2 <= N);
+#if BOOST_VORONOI_USE_GMP
+    this->count_ = sz1 + sz2;
+    auto msb = (sz1 >= sz2) ? mpn_mul(this->chunks_, c1, sz1, c2, sz2) : mpn_mul(this->chunks_, c2, sz2, c1, sz1);
+    assert(msb == this->chunks_[this->count_ - 1]);
+    if (msb == 0)
+      -- this->count_;
+#else
+    chunk_type2 cur = 0, nxt, tmp;
+    // This stinks. One only calculates maximum N of the most significant limbs
+    // without letting the caller know.
+    this->count_ = sz1 + sz2 - 1;
+    for (std::size_t shift = 0; shift < static_cast<std::size_t>(this->count_); ++shift) {
+      nxt = 0;
+      for (std::size_t first = 0; first <= shift; ++first) {
+        if (first >= sz1)
+          break;
+        std::size_t second = shift - first;
+        if (second >= sz2)
+          continue;
+        tmp = static_cast<chunk_type2>(c1[first]) * static_cast<chunk_type2>(c2[second]);
+        cur += static_cast<chunk_type>(tmp);
+        nxt += tmp >> chunk_shift;
+      }
+      this->chunks_[shift] = static_cast<chunk_type>(cur);
+      cur = nxt + (cur >> chunk_shift);
+    }
+    // One should never trim the most significant limb.
+    assert(cur == 0 || this->count_ < N);
+    if (cur && (this->count_ != N)) {
+      this->chunks_[this->count_] = static_cast<chunk_type>(cur);
+      ++this->count_;
+    }
+#endif
+  }
+
+  // Multiple of two positive nonzero values, result of which does not fit the resulting type,
+  // thus it is right shifted.
+  void mulext_(const chunk_type* c1, std::size_t sz1,
+               const chunk_type* c2, std::size_t sz2) {
+    assert(sz1 > 0 && sz2 > 0 && sz1 + sz2 > N);
+    chunk_type2 cur = 0, nxt, tmp;
+    // This stinks. One only calculates maximum N of the most significant limbs
+    // without letting the caller know.
+    this->count_ = N-1;
+    for (std::size_t shift = 0; shift < static_cast<std::size_t>(this->count_); ++shift) {
       nxt = 0;
       for (std::size_t first = 0; first <= shift; ++first) {
         if (first >= sz1)
@@ -562,36 +828,108 @@ class extended_int {
         std::size_t second = shift - first;
         if (second >= sz2)
           continue;
-        tmp = static_cast<uint64>(c1[first]) * static_cast<uint64>(c2[second]);
-        cur += static_cast<uint32>(tmp);
-        nxt += tmp >> 32;
+        tmp = static_cast<chunk_type2>(c1[first]) * static_cast<chunk_type2>(c2[second]);
+        cur += static_cast<chunk_type>(tmp);
+        nxt += tmp >> chunk_shift;
+      }
+      this->chunks_[shift] = static_cast<chunk_type>(cur);
+      cur = nxt + (cur >> chunk_shift);
+    }
+    // One should never trim the most significant limb.
+    assert(cur == 0 || this->count_ < N);
+    if (cur && (this->count_ != N)) {
+      this->chunks_[this->count_] = static_cast<chunk_type>(cur);
+      ++this->count_;
+    }
+  }
+
+  // square of a positive non-zero value
+  void mksqr(const chunk_type* c, std::size_t sz) {
+    assert(2 * sz <= N);
+#if BOOST_VORONOI_USE_GMP
+    assert(sz > 0);
+    assert(this->chunks_ != c);
+    this->count_ = sz * 2;
+    mpn_sqr(this->chunks_, c, sz);
+    if (this->chunks_[this->count_ - 1] == 0)
+      -- this->count_;
+#else
+    chunk_type2 cur = 0, nxt, tmp;
+    this->count_ = sz * 2 - 1;
+    for (std::size_t shift = 0; shift < static_cast<std::size_t>(this->count_); ++shift) {
+      nxt = 0;
+      for (std::size_t first = 0; first <= shift; ++first) {
+        if (first >= sz)
+          break;
+        std::size_t second = shift - first;
+        if (second >= sz)
+          continue;
+        tmp = static_cast<chunk_type2>(c[first]) * static_cast<chunk_type2>(c[second]);
+        cur += static_cast<chunk_type>(tmp);
+        nxt += tmp >> chunk_shift;
+      }
+      this->chunks_[shift] = static_cast<chunk_type>(cur);
+      cur = nxt + (cur >> chunk_shift);
+    }
+    // One should never trim the most significant limb.
+    assert(cur == 0 || this->count_ < N);
+    if (cur && (this->count_ != N)) {
+      this->chunks_[this->count_] = static_cast<chunk_type>(cur);
+      ++this->count_;
+    }
+#endif
+  }
+
+  // Square of a positive non-zero value with more than half the limbs utilized.
+  // Such a number does not fit the result, thus it is right shifted.
+  void mksqrext(const chunk_type* c, std::size_t sz) {
+    chunk_type2 cur = 0, nxt, tmp;
+    this->count_ = N-1;
+    for (std::size_t shift = 0; shift < static_cast<std::size_t>(this->count_); ++shift) {
+      nxt = 0;
+      for (std::size_t first = 0; first <= shift; ++first) {
+        if (first >= sz)
+          break;
+        std::size_t second = shift - first;
+        if (second >= sz)
+          continue;
+        tmp = static_cast<chunk_type2>(c[first]) * static_cast<chunk_type2>(c[second]);
+        cur += static_cast<chunk_type>(tmp);
+        nxt += tmp >> chunk_shift;
       }
-      this->chunks_[shift] = static_cast<uint32>(cur);
-      cur = nxt + (cur >> 32);
+      this->chunks_[shift] = static_cast<chunk_type>(cur);
+      cur = nxt + (cur >> chunk_shift);
     }
+    // One should never trim the most significant limb.
+    assert(cur == 0 || this->count_ < N);
     if (cur && (this->count_ != N)) {
-      this->chunks_[this->count_] = static_cast<uint32>(cur);
+      this->chunks_[this->count_] = static_cast<chunk_type>(cur);
       ++this->count_;
     }
   }
 
-  uint32 chunks_[N];
+  chunk_type chunks_[N];
   int32 count_;
 };
 
+template <std::size_t N>
+extended_int<N> sqr(const extended_int<N>& that) {
+  return that.sqr();
+}
+
 template <std::size_t N>
 bool is_pos(const extended_int<N>& that) {
-  return that.count() > 0;
+  return that.is_pos();
 }
 
 template <std::size_t N>
 bool is_neg(const extended_int<N>& that) {
-  return that.count() < 0;
+  return that.is_neg();
 }
 
 template <std::size_t N>
 bool is_zero(const extended_int<N>& that) {
-  return !that.count();
+  return that.is_zero();
 }
 
 struct type_converter_fpt {
@@ -629,7 +967,13 @@ struct voronoi_ctype_traits<int32> {
   typedef int32 int_type;
   typedef int64 int_x2_type;
   typedef uint64 uint_x2_type;
+#if BOOST_VORONOI_64_T
+  // using uint64 
+  typedef extended_int<33> big_int_type;
+#else
+  // using uint32
   typedef extended_int<64> big_int_type;
+#endif
   typedef fpt64 fpt_type;
   typedef extended_exponent_fpt<fpt_type> efpt_type;
   typedef ulp_comparison<fpt_type> ulp_cmp_type;
diff --git a/include/boost/polygon/detail/voronoi_predicates.hpp b/include/boost/polygon/detail/voronoi_predicates.hpp
index b6314d35..a25662c8 100644
--- a/include/boost/polygon/detail/voronoi_predicates.hpp
+++ b/include/boost/polygon/detail/voronoi_predicates.hpp
@@ -18,6 +18,10 @@ namespace boost {
 namespace polygon {
 namespace detail {
 
+inline double sqr(const double d) {
+  return d * d;
+}
+
 // Predicate utilities. Operates with the coordinate types that could
 // be converted to the 32-bit signed integer without precision loss.
 template <typename CTYPE_TRAITS>
@@ -283,7 +287,7 @@ class voronoi_predicates {
       fpt_type dx = to_fpt(site.x()) - to_fpt(point.x());
       fpt_type dy = to_fpt(site.y()) - to_fpt(point.y());
       // The relative error is at most 3EPS.
-      return (dx * dx + dy * dy) / (to_fpt(2.0) * dx);
+      return (sqr(dx) + sqr(dy)) / (to_fpt(2.0) * dx);
     }
 
     fpt_type find_distance_to_segment_arc(
@@ -295,12 +299,12 @@ class voronoi_predicates {
         const point_type& segment1 = site.point1();
         fpt_type a1 = to_fpt(segment1.x()) - to_fpt(segment0.x());
         fpt_type b1 = to_fpt(segment1.y()) - to_fpt(segment0.y());
-        fpt_type k = get_sqrt(a1 * a1 + b1 * b1);
+        fpt_type k = get_sqrt(sqr(a1) + sqr(b1));
         // Avoid subtraction while computing k.
         if (!is_neg(b1)) {
           k = to_fpt(1.0) / (b1 + k);
         } else {
-          k = (k - b1) / (a1 * a1);
+          k = (k - b1) / sqr(a1);
         }
         // The relative error is at most 7EPS.
         return k * robust_cross_product(
@@ -571,9 +575,9 @@ class voronoi_predicates {
 
         if (recompute_lower_x) {
           // Evaluate radius of the circle.
-          big_int_type sqr_r = (dif_x[0] * dif_x[0] + dif_y[0] * dif_y[0]) *
-                               (dif_x[1] * dif_x[1] + dif_y[1] * dif_y[1]) *
-                               (dif_x[2] * dif_x[2] + dif_y[2] * dif_y[2]);
+          big_int_type sqr_r = (sqr(dif_x[0]) + sqr(dif_y[0])) *
+                               (sqr(dif_x[1]) + sqr(dif_y[1])) *
+                               (sqr(dif_x[2]) + sqr(dif_y[2]));
           fpt_type r = get_sqrt(to_fpt(sqr_r));
 
           // If c_x >= 0 then lower_x = c_x + r,
@@ -586,7 +590,7 @@ class voronoi_predicates {
               circle.lower_x(circle.x() - r * inv_denom);
             }
           } else {
-            big_int_type numer = c_x * c_x - sqr_r;
+            big_int_type numer = sqr(c_x) - sqr_r;
             fpt_type lower_x = to_fpt(numer) * inv_denom / (to_fpt(c_x) + r);
             circle.lower_x(lower_x);
           }
@@ -613,7 +617,7 @@ class voronoi_predicates {
                             static_cast<int_x2_type>(site3.y0());
       big_int_type line_b = static_cast<int_x2_type>(site3.x0()) -
                             static_cast<int_x2_type>(site3.x1());
-      big_int_type segm_len = line_a * line_a + line_b * line_b;
+      big_int_type segm_len = sqr(line_a) + sqr(line_b);
       big_int_type vec_x = static_cast<int_x2_type>(site2.y()) -
                            static_cast<int_x2_type>(site1.y());
       big_int_type vec_y = static_cast<int_x2_type>(site1.x()) -
@@ -638,7 +642,7 @@ class voronoi_predicates {
       big_int_type sum_AB = A + B;
 
       if (is_zero(denom)) {
-        big_int_type numer = teta * teta - sum_AB * sum_AB;
+        big_int_type numer = sqr(teta) - sqr(sum_AB);
         denom = teta * sum_AB;
         cA[0] = denom * sum_x * 2 + numer * vec_x;
         cB[0] = segm_len;
@@ -657,12 +661,12 @@ class voronoi_predicates {
         return;
       }
 
-      big_int_type det = (teta * teta + denom * denom) * A * B * 4;
+      big_int_type det = (sqr(teta) + sqr(denom)) * A * B * 4;
       fpt_type inv_denom_sqr = to_fpt(1.0) / to_fpt(denom);
       inv_denom_sqr *= inv_denom_sqr;
 
       if (recompute_c_x || recompute_lower_x) {
-        cA[0] = sum_x * denom * denom + teta * sum_AB * vec_x;
+        cA[0] = sum_x * sqr(denom) + teta * sum_AB * vec_x;
         cB[0] = 1;
         cA[1] = (segment_index == 2) ? -vec_x : vec_x;
         cB[1] = det;
@@ -673,7 +677,7 @@ class voronoi_predicates {
       }
 
       if (recompute_c_y || recompute_lower_x) {
-        cA[2] = sum_y * denom * denom + teta * sum_AB * vec_y;
+        cA[2] = sum_y * sqr(denom) + teta * sum_AB * vec_y;
         cB[2] = 1;
         cA[3] = (segment_index == 2) ? -vec_y : vec_y;
         cB[3] = det;
@@ -686,7 +690,7 @@ class voronoi_predicates {
       if (recompute_lower_x) {
         cB[0] = cB[0] * segm_len;
         cB[1] = cB[1] * segm_len;
-        cA[2] = sum_AB * (denom * denom + teta * teta);
+        cA[2] = sum_AB * (sqr(denom) + sqr(teta));
         cB[2] = 1;
         cA[3] = (segment_index == 2) ? -teta : teta;
         cB[3] = det;
@@ -720,7 +724,7 @@ class voronoi_predicates {
       big_int_type orientation = a[1] * b[0] - a[0] * b[1];
       if (is_zero(orientation)) {
         fpt_type denom = to_fpt(2.0) * to_fpt(
-            static_cast<big_int_type>(a[0] * a[0] + b[0] * b[0]));
+            static_cast<big_int_type>(sqr(a[0]) + sqr(b[0])));
         c[0] = b[0] * (static_cast<int_x2_type>(segm_start2.x()) -
                        static_cast<int_x2_type>(segm_start1.x())) -
                a[0] * (static_cast<int_x2_type>(segm_start2.y()) -
@@ -738,24 +742,24 @@ class voronoi_predicates {
 
         if (recompute_c_y) {
           cA[0] = b[0] * ((point_index == 2) ? 2 : -2);
-          cA[1] = a[0] * a[0] * (static_cast<int_x2_type>(segm_start1.y()) +
+          cA[1] = a[0].sqr()  * (static_cast<int_x2_type>(segm_start1.y()) +
                                  static_cast<int_x2_type>(segm_start2.y())) -
                   a[0] * b[0] * (static_cast<int_x2_type>(segm_start1.x()) +
                                  static_cast<int_x2_type>(segm_start2.x()) -
                                  static_cast<int_x2_type>(site1.x()) * 2) +
-                  b[0] * b[0] * (static_cast<int_x2_type>(site1.y()) * 2);
+                  b[0].sqr()  * (static_cast<int_x2_type>(site1.y()) * 2);
           fpt_type c_y = to_fpt(sqrt_expr_.eval2(cA, cB));
           c_event.y(c_y / denom);
         }
 
         if (recompute_c_x || recompute_lower_x) {
           cA[0] = a[0] * ((point_index == 2) ? 2 : -2);
-          cA[1] = b[0] * b[0] * (static_cast<int_x2_type>(segm_start1.x()) +
+          cA[1] = b[0].sqr()  * (static_cast<int_x2_type>(segm_start1.x()) +
                                  static_cast<int_x2_type>(segm_start2.x())) -
                   a[0] * b[0] * (static_cast<int_x2_type>(segm_start1.y()) +
                                  static_cast<int_x2_type>(segm_start2.y()) -
                                  static_cast<int_x2_type>(site1.y()) * 2) +
-                  a[0] * a[0] * (static_cast<int_x2_type>(site1.x()) * 2);
+                  a[0].sqr()  * (static_cast<int_x2_type>(site1.x()) * 2);
 
           if (recompute_c_x) {
             fpt_type c_x = to_fpt(sqrt_expr_.eval2(cA, cB));
@@ -764,7 +768,7 @@ class voronoi_predicates {
 
           if (recompute_lower_x) {
             cA[2] = is_neg(c[0]) ? -c[0] : c[0];
-            cB[2] = a[0] * a[0] + b[0] * b[0];
+            cB[2] = a[0].sqr() + b[0].sqr();
             fpt_type lower_x = to_fpt(sqrt_expr_.eval3(cA, cB));
             c_event.lower_x(lower_x / denom);
           }
@@ -791,8 +795,8 @@ class voronoi_predicates {
       cA[1] = a[0] * -dx + b[0] * -dy;
       cA[2] = sign;
       cA[3] = 0;
-      cB[0] = a[0] * a[0] + b[0] * b[0];
-      cB[1] = a[1] * a[1] + b[1] * b[1];
+      cB[0] = a[0].sqr() + b[0].sqr();
+      cB[1] = a[1].sqr() + b[1].sqr();
       cB[2] = a[0] * a[1] + b[0] * b[1];
       cB[3] = (a[0] * dy - b[0] * dx) * (a[1] * dy - b[1] * dx) * -2;
       fpt_type temp = to_fpt(
@@ -800,8 +804,8 @@ class voronoi_predicates {
       fpt_type denom = temp * to_fpt(orientation);
 
       if (recompute_c_y) {
-        cA[0] = b[1] * (dx * dx + dy * dy) - iy * (dx * a[1] + dy * b[1]);
-        cA[1] = b[0] * (dx * dx + dy * dy) - iy * (dx * a[0] + dy * b[0]);
+        cA[0] = b[1] * (dx.sqr() + dy.sqr()) - iy * (dx * a[1] + dy * b[1]);
+        cA[1] = b[0] * (dx.sqr() + dy.sqr()) - iy * (dx * a[0] + dy * b[0]);
         cA[2] = iy * sign;
         fpt_type cy = to_fpt(
             sqrt_expr_evaluator_pss4<big_int_type, efpt_type>(cA, cB));
@@ -809,8 +813,8 @@ class voronoi_predicates {
       }
 
       if (recompute_c_x || recompute_lower_x) {
-        cA[0] = a[1] * (dx * dx + dy * dy) - ix * (dx * a[1] + dy * b[1]);
-        cA[1] = a[0] * (dx * dx + dy * dy) - ix * (dx * a[0] + dy * b[0]);
+        cA[0] = a[1] * (dx.sqr() + dy.sqr()) - ix * (dx * a[1] + dy * b[1]);
+        cA[1] = a[0] * (dx.sqr() + dy.sqr()) - ix * (dx * a[0] + dy * b[0]);
         cA[2] = ix * sign;
 
         if (recompute_c_x) {
@@ -820,7 +824,7 @@ class voronoi_predicates {
         }
 
         if (recompute_lower_x) {
-          cA[3] = orientation * (dx * dx + dy * dy) * (is_neg(temp) ? -1 : 1);
+          cA[3] = orientation * (dx.sqr() + dy.sqr()) * (is_neg(temp) ? -1 : 1);
           fpt_type lower_x = to_fpt(
               sqrt_expr_evaluator_pss4<big_int_type, efpt_type>(cA, cB));
           c_event.lower_x(lower_x / denom);
@@ -867,7 +871,7 @@ class voronoi_predicates {
              static_cast<int_x2_type>(site3.x1());
 
       for (int i = 0; i < 3; ++i)
-        cB[i] = a[i] * a[i] + b[i] * b[i];
+        cB[i] = a[i].sqr() + b[i].sqr();
 
       for (int i = 0; i < 3; ++i) {
         int j = (i+1) % 3;
@@ -926,10 +930,10 @@ class voronoi_predicates {
             get_sqrt(sqrt_expr_.eval2(cA, cB));
         if ((!is_neg(lh) && !is_neg(rh)) || (!is_pos(lh) && !is_pos(rh)))
           return lh + rh;
-        cA[0] = A[0] * A[0] * B[0] + A[1] * A[1] * B[1] -
-                A[2] * A[2] * B[3] * B[2];
+        cA[0] = A[0].sqr() * B[0] + A[1].sqr() * B[1] -
+                A[2].sqr() * B[3] * B[2];
         cB[0] = 1;
-        cA[1] = A[0] * A[1] * 2 - A[2] * A[2] * B[3];
+        cA[1] = A[0] * A[1] * 2 - A[2].sqr() * B[3];
         cB[1] = B[0] * B[1];
         _fpt numer = sqrt_expr_.eval2(cA, cB);
         return numer / (lh - rh);
@@ -950,11 +954,11 @@ class voronoi_predicates {
         return lh + rh;
       cA[0] = A[3] * A[0] * 2;
       cA[1] = A[3] * A[1] * 2;
-      cA[2] = A[0] * A[0] * B[0] + A[1] * A[1] * B[1] +
-              A[3] * A[3] - A[2] * A[2] * B[2] * B[3];
-      cA[3] = A[0] * A[1] * 2 - A[2] * A[2] * B[3];
+      cA[2] = A[0].sqr() * B[0] + A[1].sqr() * B[1] +
+              A[3].sqr() - A[2].sqr() * B[2] * B[3];
+      cA[3] = A[0] * A[1] * 2 - A[2].sqr() * B[3];
       cB[3] = B[0] * B[1];
-      _fpt numer = sqrt_expr_evaluator_pss3<_int, _fpt>(cA, cB);
+      _fpt numer = sqrt_expr_evaluator_pss3ext<_int, _fpt>(cA, cB);
       return numer / (lh - rh);
     }
 
@@ -968,8 +972,8 @@ class voronoi_predicates {
       _fpt rh = sqrt_expr_.eval2(A+2, B+2);
       if ((!is_neg(lh) && !is_neg(rh)) || (!is_pos(lh) && !is_pos(rh)))
         return lh + rh;
-      cA[0] = A[0] * A[0] * B[0] + A[1] * A[1] * B[1] -
-              A[2] * A[2] - A[3] * A[3] * B[0] * B[1];
+      cA[0] = A[0].sqr() * B[0] + A[1].sqr() * B[1] -
+              A[2].sqr() - A[3].sqr() * B[0] * B[1];
       cB[0] = 1;
       cA[1] = (A[0] * A[1] - A[2] * A[3]) * 2;
       cB[1] = B[3];
@@ -977,6 +981,25 @@ class voronoi_predicates {
       return numer / (lh - rh);
     }
 
+    template <typename _int, typename _fpt>
+    // Evaluates A[0] * sqrt(B[0]) + A[1] * sqrt(B[1]) +
+    //           A[2] + A[3] * sqrt(B[0] * B[1]).
+    // B[3] = B[0] * B[1].
+    _fpt sqrt_expr_evaluator_pss3ext(_int *A, _int *B) {
+      _int cA[2], cB[2];
+      _fpt lh = sqrt_expr_.eval2(A, B);
+      _fpt rh = sqrt_expr_.eval2(A+2, B+2);
+      if ((!is_neg(lh) && !is_neg(rh)) || (!is_pos(lh) && !is_pos(rh)))
+        return lh + rh;
+      cA[0] = A[0].sqr() * B[0] + A[1].sqr() * B[1] -
+              A[2].sqr() - A[3].sqr() * B[0] * B[1];
+      cB[0] = 1;
+      cA[1] = (A[0] * A[1] - A[2] * A[3]) * 2;
+      cB[1] = B[3];
+      _fpt numer = sqrt_expr_.eval2ext(cA, cB);
+      return numer / (lh - rh);
+    }
+
     robust_sqrt_expr_type sqrt_expr_;
     to_fpt_converter to_fpt;
   };
@@ -1027,9 +1050,9 @@ class voronoi_predicates {
       c_y -= robust_fpt_type(dif_y1 * sum_y1 * dif_x2, to_fpt(2.0));
       robust_dif_type lower_x(c_x);
       lower_x -= robust_fpt_type(get_sqrt(
-          (dif_x1 * dif_x1 + dif_y1 * dif_y1) *
-          (dif_x2 * dif_x2 + dif_y2 * dif_y2) *
-          (dif_x3 * dif_x3 + dif_y3 * dif_y3)), to_fpt(5.0));
+          (sqr(dif_x1) + sqr(dif_y1)) *
+          (sqr(dif_x2) + sqr(dif_y2)) *
+          (sqr(dif_x3) + sqr(dif_y3))), to_fpt(5.0));
       c_event = circle_type(
           c_x.dif().fpv() * inv_orientation.fpv(),
           c_y.dif().fpv() * inv_orientation.fpv(),
@@ -1090,19 +1113,19 @@ class voronoi_predicates {
           static_cast<int_x2_type>(site3.x1()) -
           static_cast<int_x2_type>(site3.x0())), to_fpt(1.0));
       robust_fpt_type inv_segm_len(to_fpt(1.0) /
-          get_sqrt(line_a * line_a + line_b * line_b), to_fpt(3.0));
+          get_sqrt(sqr(line_a) + sqr(line_b)), to_fpt(3.0));
       robust_dif_type t;
       if (ot::eval(denom) == ot::COLLINEAR) {
         t += teta / (robust_fpt_type(to_fpt(8.0)) * A);
         t -= A / (robust_fpt_type(to_fpt(2.0)) * teta);
       } else {
-        robust_fpt_type det = ((teta * teta + denom * denom) * A * B).sqrt();
+        robust_fpt_type det = ((sqr(teta) + sqr(denom)) * A * B).sqrt();
         if (segment_index == 2) {
-          t -= det / (denom * denom);
+          t -= det / sqr(denom);
         } else {
-          t += det / (denom * denom);
+          t += det / sqr(denom);
         }
-        t += teta * (A + B) / (robust_fpt_type(to_fpt(2.0)) * denom * denom);
+        t += teta * (A + B) / (robust_fpt_type(to_fpt(2.0)) * sqr(denom));
       }
       robust_dif_type c_x, c_y;
       c_x += robust_fpt_type(to_fpt(0.5) *
@@ -1155,7 +1178,7 @@ class voronoi_predicates {
         static_cast<int_x2_type>(segm_end2.x()) -
         static_cast<int_x2_type>(segm_start2.x())), to_fpt(1.0));
       if (ot::eval(orientation) == ot::COLLINEAR) {
-        robust_fpt_type a(a1 * a1 + b1 * b1, to_fpt(2.0));
+        robust_fpt_type a(sqr(a1) + sqr(b1), to_fpt(2.0));
         robust_fpt_type c(robust_cross_product(
             static_cast<int_x2_type>(segm_end1.y()) -
             static_cast<int_x2_type>(segm_start1.y()),
@@ -1217,8 +1240,8 @@ class voronoi_predicates {
         c_event =
             circle_type(c_x.dif().fpv(), c_y.dif().fpv(), lower_x.dif().fpv());
       } else {
-        robust_fpt_type sqr_sum1(get_sqrt(a1 * a1 + b1 * b1), to_fpt(2.0));
-        robust_fpt_type sqr_sum2(get_sqrt(a2 * a2 + b2 * b2), to_fpt(2.0));
+        robust_fpt_type sqr_sum1(get_sqrt(sqr(a1) + sqr(b1)), to_fpt(2.0));
+        robust_fpt_type sqr_sum2(get_sqrt(sqr(a2) + sqr(b2)), to_fpt(2.0));
         robust_fpt_type a(robust_cross_product(
           static_cast<int_x2_type>(segm_end1.x()) -
           static_cast<int_x2_type>(segm_start1.x()),
@@ -1231,7 +1254,7 @@ class voronoi_predicates {
         if (!is_neg(a)) {
           a += sqr_sum1 * sqr_sum2;
         } else {
-          a = (orientation * orientation) / (sqr_sum1 * sqr_sum2 - a);
+          a = sqr(orientation) / (sqr_sum1 * sqr_sum2 - a);
         }
         robust_fpt_type or1(robust_cross_product(
             static_cast<int_x2_type>(segm_end1.y()) -
@@ -1298,7 +1321,7 @@ class voronoi_predicates {
         } else {
           t -= det.sqrt();
         }
-        t /= (a * a);
+        t /= sqr(a);
         robust_dif_type c_x(ix), c_y(iy);
         c_x += t * (robust_fpt_type(a1) * sqr_sum2);
         c_x += t * (robust_fpt_type(a2) * sqr_sum1);
@@ -1348,9 +1371,9 @@ class voronoi_predicates {
           site3.x0(), site3.y0(),
           site3.x1(), site3.y1()), to_fpt(1.0));
 
-      robust_fpt_type len1 = (a1 * a1 + b1 * b1).sqrt();
-      robust_fpt_type len2 = (a2 * a2 + b2 * b2).sqrt();
-      robust_fpt_type len3 = (a3 * a3 + b3 * b3).sqrt();
+      robust_fpt_type len1 = (sqr(a1) + sqr(b1)).sqrt();
+      robust_fpt_type len2 = (sqr(a2) + sqr(b2)).sqrt();
+      robust_fpt_type len3 = (sqr(a3) + sqr(b3)).sqrt();
       robust_fpt_type cross_12(robust_cross_product(
           static_cast<int_x2_type>(site1.x1()) -
           static_cast<int_x2_type>(site1.x0()),
diff --git a/include/boost/polygon/detail/voronoi_robust_fpt.hpp b/include/boost/polygon/detail/voronoi_robust_fpt.hpp
index ce72ba8c..d8a4b7b3 100644
--- a/include/boost/polygon/detail/voronoi_robust_fpt.hpp
+++ b/include/boost/polygon/detail/voronoi_robust_fpt.hpp
@@ -201,6 +201,12 @@ class robust_fpt {
     return robust_fpt(fpv, re);
   }
 
+  robust_fpt sqr() const {
+    floating_point_type fpv = this->fpv_ * this->fpv_;
+    relative_error_type re = this->re_ * 2 + ROUNDING_ERROR;
+    return robust_fpt(fpv, re);
+  }
+
   robust_fpt sqrt() const {
     return robust_fpt(get_sqrt(fpv_),
                       re_ * static_cast<relative_error_type>(0.5) +
@@ -212,6 +218,11 @@ class robust_fpt {
   relative_error_type re_;
 };
 
+template <typename T>
+robust_fpt<T> sqr(const robust_fpt<T>& that) {
+  return that.sqr();
+}
+
 template <typename T>
 robust_fpt<T> get_sqrt(const robust_fpt<T>& that) {
   return that.sqrt();
@@ -451,7 +462,63 @@ class robust_sqrt_expr {
     if ((!is_neg(a) && !is_neg(b)) ||
         (!is_pos(a) && !is_pos(b)))
       return a + b;
-    return convert(A[0] * A[0] * B[0] - A[1] * A[1] * B[1]) / (a - b);
+    return convert(A[0].sqr() * B[0] - A[1].sqr() * B[1]) / (a - b);
+  }
+
+  // Evaluates expression A^2 * B, where the expression may overflow the _int type.
+  // Returns the expression + exponent.
+  std::pair<_int, int> eval2ext_a2b(const _int &A, const _int &B) {
+    int A_size = int(A.size());
+    int B_size = int(B.size());
+    if (A_size && B_size) {
+      int rshift  = std::max(0, A_size * 2 - int(_int::N));
+      _int u = rshift ? A.sqrext() : A.sqr();
+      int rshift2 = std::max(0, int(u.size()) + B_size - int(_int::N));
+      return std::make_pair(rshift2 ? u.mulext(B) : (u * B), rshift + rshift2);
+    } else {
+      return std::make_pair(int(0), 0);
+    }
+  }
+
+  // Evaluates expression (re = 7 EPS):
+  // A[0] * sqrt(B[0]) + A[1] * sqrt(B[1]).
+  // This variant of eval2() may be called with very large A and B,
+  // where the expression A[0].sqr() * B[0] - A[1].sqr() * B[1]
+  // could overflow.
+  _fpt eval2ext(_int* A, _int* B) {
+    _fpt a = eval1(A, B);
+    _fpt b = eval1(A + 1, B + 1);
+    if ((!is_neg(a) && !is_neg(b)) ||
+        (!is_pos(a) && !is_pos(b)))
+      return a + b;
+
+    std::pair<_int, int> u = eval2ext_a2b(A[0], B[0]);
+    std::pair<_int, int> v = eval2ext_a2b(A[1], B[1]);
+    // Make sure the top most limb is empty, so the successive addition
+    // will fit the result.
+    if (u.first.size() == _int::N) {
+      u.first.rshiftme(1);
+      ++ u.second;
+    }
+    if (v.first.size() == _int::N) {
+      v.first.rshiftme(1);
+      ++ v.second;
+    }
+    assert(u.first.size() < _int::N);
+    assert(v.first.size() < _int::N);
+
+    // Normalize the two numbers to a common exponent.
+    int exp;
+    if (u.second > v.second) {
+      v.first.rshiftme(u.second - v.second);
+      exp = u.second;
+    } else {
+      u.first.rshiftme(v.second - u.second);
+      exp = v.second;
+    }
+
+    // Finally calculate eval2()
+    return convert(u.first - v.first).addexp(exp << _int::chunks_to_fptexp) / (a - b);
   }
 
   // Evaluates expression (re = 16 EPS):
@@ -462,13 +529,27 @@ class robust_sqrt_expr {
     if ((!is_neg(a) && !is_neg(b)) ||
         (!is_pos(a) && !is_pos(b)))
       return a + b;
-    tA[3] = A[0] * A[0] * B[0] + A[1] * A[1] * B[1] - A[2] * A[2] * B[2];
+    tA[3] = A[0].sqr() * B[0] + A[1].sqr() * B[1] - A[2].sqr() * B[2];
     tB[3] = 1;
     tA[4] = A[0] * A[1] * 2;
     tB[4] = B[0] * B[1];
     return eval2(tA + 3, tB + 3) / (a - b);
   }
 
+  // Evaluates expression (re = 16 EPS):
+  // A[0] * sqrt(B[0]) + A[1] * sqrt(B[1]) + A[2] * sqrt(B[2]).
+  _fpt eval3ext(_int* A, _int* B) {
+    _fpt a = eval2(A, B);
+    _fpt b = eval1(A + 2, B + 2);
+    if ((!is_neg(a) && !is_neg(b)) ||
+        (!is_pos(a) && !is_pos(b)))
+      return a + b;
+    tA[3] = A[0].sqr() * B[0] + A[1].sqr() * B[1] - A[2].sqr() * B[2];
+    tB[3] = 1;
+    tA[4] = A[0] * A[1] * 2;
+    tB[4] = B[0] * B[1];
+    return eval2ext(tA + 3, tB + 3) / (a - b);
+  }
 
   // Evaluates expression (re = 25 EPS):
   // A[0] * sqrt(B[0]) + A[1] * sqrt(B[1]) +
@@ -479,8 +560,8 @@ class robust_sqrt_expr {
     if ((!is_neg(a) && !is_neg(b)) ||
         (!is_pos(a) && !is_pos(b)))
       return a + b;
-    tA[0] = A[0] * A[0] * B[0] + A[1] * A[1] * B[1] -
-            A[2] * A[2] * B[2] - A[3] * A[3] * B[3];
+    tA[0] = A[0].sqr() * B[0] + A[1].sqr() * B[1] -
+            A[2].sqr() * B[2] - A[3].sqr() * B[3];
     tB[0] = 1;
     tA[1] = A[0] * A[1] * 2;
     tB[1] = B[0] * B[1];
@@ -489,6 +570,25 @@ class robust_sqrt_expr {
     return eval3(tA, tB) / (a - b);
   }
 
+  // Evaluates expression (re = 25 EPS):
+  // A[0] * sqrt(B[0]) + A[1] * sqrt(B[1]) +
+  // A[2] * sqrt(B[2]) + A[3] * sqrt(B[3]).
+  _fpt eval4ext(_int* A, _int* B) {
+    _fpt a = eval2(A, B);
+    _fpt b = eval2(A + 2, B + 2);
+    if ((!is_neg(a) && !is_neg(b)) ||
+        (!is_pos(a) && !is_pos(b)))
+      return a + b;
+    tA[0] = A[0].sqr() * B[0] + A[1].sqr() * B[1] -
+            A[2].sqr() * B[2] - A[3].sqr() * B[3];
+    tB[0] = 1;
+    tA[1] = A[0] * A[1] * 2;
+    tB[1] = B[0] * B[1];
+    tA[2] = A[2] * A[3] * -2;
+    tB[2] = B[2] * B[3];
+    return eval3ext(tA, tB) / (a - b);
+  }
+
  private:
   _int tA[5];
   _int tB[5];