python · lpereira · Feb 26, 2022 · Feb 26, 2022 · Feb 26, 2022 · Feb 26, 2022
diff --git a/Include/cpython/longintrepr.h b/Include/cpython/longintrepr.h
@@ -61,19 +61,52 @@ typedef long stwodigits; /* signed variant of twodigits */
 #define PyLong_BASE     ((digit)1 << PyLong_SHIFT)
 #define PyLong_MASK     ((digit)(PyLong_BASE - 1))
 
-/* Long integer representation.
-   The absolute value of a number is equal to
-        SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i)
-   Negative numbers are represented with ob_size < 0;
-   zero is represented by ob_size == 0.
-   In a normalized number, ob_digit[abs(ob_size)-1] (the most significant
-   digit) is never zero.  Also, in all cases, for all valid i,
-        0 <= ob_digit[i] <= MASK.
-   The allocation function takes care of allocating extra memory
-   so that ob_digit[0] ... ob_digit[abs(ob_size)-1] are actually available.
+/* Long Integer Representation
+   ---------------------------
+
+   There are two representations of long objects: the inlined
+   representation, where the sign and value is stored within the ob_size
+   field, and the bignum implementation, where the ob_size stores both the sign
+   and number of digits in the ob_digits field.
+
+   To distinguish between either representation, one looks at the least significant
+   bit of the ob_size field; if it's set, the value is inlined in that field; if it's
+   unset, then it should be treated as the number of digits in ob_digit.
+
+   For inlined longs, their value can be obtained with this expression:
+
+        ob_size >> 1
+
+   For inlined longs:
+       * These integers have a capacity of 62bits on 64-bit architectures:
+         one bit for the "is inlined" flag, and one sign bit.  This is 30 bits on
+         32-bit architectures (for the same reasons).
+       * Inlined longs are always normalized, as they use the machine
+         representation for integers.
+       * Allocation functions won't allocate the space for the ob_digit buffer,
+         because these are never used with this representation.
+       * As a consequence of the previous point, the width of a digit for
+         long longs can be either 15 or 30, and this doesn't affect the
+         representation of inlined longs.
+
+   For bignum longs, their absolute value can be obtained with this expression:
+
+        SUM(for i=0 through abs(ob_size >> 1)-1) ob_digit[i] * 2**(SHIFT*i)
+
+   In this representation:
+       * These numbers can also be normalized.  In a normalized number,
+         ob_digit[abs(ob_size)-1] (the most significant digit) is never zero.
+       * Also, in all cases, for all valid i, 0 <= ob_digit[i] <= MASK.
+       * The allocation function takes care of allocating extra memory
+         so that ob_digit[0] ... ob_digit[abs(ob_size)-1] are actually available.
+
+   In either case:
+       * Negative numbers are represented by (ob_size >> 1) < 0
+       * Zero is represented by (ob_size >> 1) == 0
 
    CAUTION:  Generic code manipulating subtypes of PyVarObject has to
-   aware that ints abuse  ob_size's sign bit.
+   aware that ints abuse  ob_size's sign bit and its least significant
+   bit.
 */
 
 struct _longobject {

diff --git a/Include/internal/pycore_bitutils.h b/Include/internal/pycore_bitutils.h
@@ -139,33 +139,46 @@ _Py_popcount32(uint32_t x)
 #endif
 }
 
+static inline int
+_Py_popcount64(uint64_t x)
+{
+#if (defined(__clang__) || defined(__GNUC__))
+    if (sizeof(long long) == sizeof(uint64_t)) {
+        return __builtin_popcountll(x);
+    }
+    if (sizeof(long) == sizeof(uint64_t)) {
+        return __builtin_popcountl(x);
+    }
+#endif
+    return _Py_popcount32(x >> 32) + _Py_popcount32((uint32_t)x);
+}
+
+static inline int
+_Py_popcount(long x)
+{
+    if (sizeof(long) == sizeof(uint32_t)) {
+        return _Py_popcount32(x);
+    }
+    if (sizeof(long) == sizeof(uint64_t)) {
+        return _Py_popcount64(x);
+    }
+    _Py_UNREACHABLE();
+}
 
 // Return the index of the most significant 1 bit in 'x'. This is the smallest
 // integer k such that x < 2**k. Equivalent to floor(log2(x)) + 1 for x != 0.
 static inline int
-_Py_bit_length(unsigned long x)
+_Py_bit_length32(uint32_t x)
 {
 #if (defined(__clang__) || defined(__GNUC__))
-    if (x != 0) {
-        // __builtin_clzl() is available since GCC 3.4.
-        // Undefined behavior for x == 0.
-        return (int)sizeof(unsigned long) * 8 - __builtin_clzl(x);
-    }
-    else {
-        return 0;
-    }
+    // __builtin_clzl() is undefined for x = 0.
+    Py_BUILT_ASSERT(sizeof(long) <= sizeof(uint32_t));
+    return x == 0 ? 0 : 32 - __builtin_clzl(x);
 #elif defined(_MSC_VER)
-    // _BitScanReverse() is documented to search 32 bits.
-    Py_BUILD_ASSERT(sizeof(unsigned long) <= 4);
     unsigned long msb;
-    if (_BitScanReverse(&msb, x)) {
-        return (int)msb + 1;
-    }
-    else {
-        return 0;
-    }
+    return _BitScanReverse(&msb, x) ? msb + 1 : 0;
 #else
-    const int BIT_LENGTH_TABLE[32] = {
+    static const int BIT_LENGTH_TABLE[32] = {
         0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
         5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
     };
@@ -180,6 +193,106 @@ _Py_bit_length(unsigned long x)
 }
 
 
+// Return the index of the most significant 1 bit in 'x'. This is the smallest
+// integer k such that x < 2**k. Equivalent to floor(log2(x)) + 1 for x != 0.
+// (Same as _Py_bit_length(), but works for 64-bit integers.)
+static inline int
+_Py_bit_length64(uint64_t x)
+{
+#if (defined(__clang__) || defined(__GNUC__))
+    /* __builtin_clzll() is undefined for x = 0 */
+    return x == 0 ? 0 : 64 - __builtin_clzll(x);
+#elif defined(_MSC_VER) && defined(_WIN64)
+    // FIXME(lpereira): Is _WIN64 sufficient to test for Aarch64 and x86-64?
+    // _BitScanReverse64() is only defined for 64-bit Windows, either on x86,
+    // or on ARM:
+    //    https://docs.microsoft.com/en-us/cpp/intrinsics/bitscanreverse-bitscanreverse64
+    unsigned long msb;
+    return _BitScanReverse64(&msb, x) ? msb + 1 : 0;
+#else
+    int upper_bits = _Py_bit_length32((uint32_t)(x >> 32));
+    int lower_bits = _Py_bit_length32((uint32_t)x);
+    return upper_bits + lower_bits;
+#endif
+}
+
+static inline int _Py_bit_length(unsigned long x)
+{
+    _Py_BUILD_ASSERT(sizeof(x) == sizeof(uint32_t) || sizeof(x) == sizeof(uint64_t));
+
+    if (sizeof(x) == sizeof(uint32_t)) {
+        return _Py_bit_length32(x);
+    }
+    if (sizeof(x) == sizeof(uint64_t)) {
+        return _Py_bit_length64(x);
+    }
+
+    _Py_UNREACHABLE();
+}
+
+static inline bool _Py_add_overflow32(int32_t a, int32_t b, int32_t *result)
+{
+#if (defined(__clang__) || defined(__GNUC__))
+    return __builtin_add_overflow(a, b, result);
+#else
+    *result = (int64_t)((uint32_t)a + (uint32_t)b);
+    /* When adding, signed overflow only happens if the result has different sign when
+     * both inputs have the same sign. */
+    return ((*result ^ a) & ~(a ^ b)) >> 31;
+#endif
+}
+
+static inline bool _Py_add_overflow64(int64_t a, int64_t b, int64_t *result)
+{
+#if (defined(__clang__) || defined(__GNUC__))
+    return __builtin_add_overflow(a, b, result);
+#else
+    *result = (int64_t)((uint64_t)a + (uint64_t)b);
+    return ((*result ^ a) & ~(a ^ b)) >> 63;
+#endif
+}
+
+static inline bool _Py_sub_overflow32(int32_t a, int32_t b, int32_t *result)
+{
+#if (defined(__clang__) || defined(__GNUC__))
+    return __builtin_sub_overflow(a, b, result);
+#else
+    *result = (int32_t)((uint32_t)a - (uint32_t)b);
+    return ((*result ^ a) & (a ^ b)) >> 31;
+#endif
+}
+
+static inline bool _Py_sub_overflow64(int64_t a, int64_t b, int64_t *result)
+{
+#if (defined(__clang__) || defined(__GNUC__))
+    return __builtin_sub_overflow(a, b, result);
+#else
+    *result = (int64_t)((uint64_t)a - (uint64_t)b);
+    return ((*result ^ a) & (a ^ b)) >> 63;
+#endif
+}
+
+static inline bool _Py_mul_overflow32(int32_t a, int32_t b, int32_t *result)
+{
+#if (defined(__clang__) || defined(__GNUC__))
+    return __builtin_mul_overflow(a, b, result);
+#else
+    uint64_t result64 = (uint64_t)((uint64_t)a * (uint64_t)b);
+    *result = (int32_t)result64;
+    return result64 <= INT32_MAX;
+#endif
+}
+
+static inline bool _Py_mul_overflow64(int64_t a, int64_t b, int64_t *result)
+{
+#if (defined(__clang__) || defined(__GNUC__))
+    return __builtin_mul_overflow(a, b, result);
+#else
+    *result = (uint64_t)a + (uint64_t)b;
+    return (a >= INT32_MAX || b >= INT32_MAX) && a > 0 && INT64_MAX / a < b;
+#endif
+}
+
 #ifdef __cplusplus
 }
 #endif

@@ -31,7 +31,7 @@ struct _Py_global_objects {
          * The integers that are preallocated are those in the range
          * -_PY_NSMALLNEGINTS (inclusive) to _PY_NSMALLPOSINTS (exclusive).
          */
-        PyLongObject small_ints[_PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS];
+        PyVarObject small_ints[_PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS];
 
         PyBytesObject bytes_empty;
         struct {

diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h
@@ -43,7 +43,7 @@ PyObject *_PyLong_Subtract(PyLongObject *left, PyLongObject *right);
 
 /* Used by Python/mystrtoul.c, _PyBytes_FromHex(),
    _PyBytes_DecodeEscape(), etc. */
-PyAPI_DATA(unsigned char) _PyLong_DigitValue[256];
+PyAPI_DATA(const unsigned char) _PyLong_DigitValue[256];
 
 /* Format the object based on the format_spec, as defined in PEP 3101
    (Advanced String Formatting). */