diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 435d09259ce99..5242739f8f846 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -139,6 +139,7 @@ jobs: timeout-minutes: 60 env: TOX_SKIP_MISSING_INTERPRETERS: False + VIRTUALENV_SYSTEM_SITE_PACKAGES: ${{ matrix.test_mypyc && 1 || 0 }} # Rich (pip) -- Disable color for windows + pytest FORCE_COLOR: ${{ !(startsWith(matrix.os, 'windows-') && startsWith(matrix.toxenv, 'py')) && 1 || 0 }} # Tox @@ -209,8 +210,10 @@ jobs: - name: Compiled with mypyc if: ${{ matrix.test_mypyc }} + # Use local version of librt during self-compilation in tests. run: | pip install -r test-requirements.txt + pip install -U mypyc/lib-rt CC=clang MYPYC_OPT_LEVEL=0 MYPY_USE_MYPYC=1 pip install -e . - name: Setup tox environment diff --git a/mypy-requirements.txt b/mypy-requirements.txt index c6fd8f38948ab..6949405d9a9c9 100644 --- a/mypy-requirements.txt +++ b/mypy-requirements.txt @@ -4,4 +4,4 @@ typing_extensions>=4.6.0 mypy_extensions>=1.0.0 pathspec>=1.0.0 tomli>=1.1.0; python_version<'3.11' -librt>=0.8.0; platform_python_implementation != 'PyPy' +librt>=0.9.0; platform_python_implementation != 'PyPy' diff --git a/mypy/cache.py b/mypy/cache.py index 0adc7affb8cb4..c4b74230c4c28 100644 --- a/mypy/cache.py +++ b/mypy/cache.py @@ -239,6 +239,7 @@ def read(cls, data: ReadBuffer, data_file: str) -> CacheMeta | None: # Always use this type alias to refer to type tags. Tag = u8 +# Note: all tags should be kept in sync with lib-rt/internal/librt_internal.c. # Primitives. LITERAL_FALSE: Final[Tag] = 0 LITERAL_TRUE: Final[Tag] = 1 @@ -264,6 +265,7 @@ def read(cls, data: ReadBuffer, data_file: str) -> CacheMeta | None: # Four integers representing source file (line, column) range. LOCATION: Final[Tag] = 152 +RESERVED: Final[Tag] = 254 END_TAG: Final[Tag] = 255 diff --git a/mypy/nodes.py b/mypy/nodes.py index 22de1223dea30..2b0e5940d6a99 100644 --- a/mypy/nodes.py +++ b/mypy/nodes.py @@ -21,6 +21,7 @@ ) from librt.internal import ( + extract_symbol, read_float as read_float_bare, read_int as read_int_bare, read_str as read_str_bare, @@ -4953,8 +4954,15 @@ def read(cls, data: ReadBuffer) -> SymbolTableNode: sym.plugin_generated = read_bool(data) cross_ref = read_str_opt(data) if cross_ref is None: - sym._node = read_symbol(data) - if not isinstance(sym._node, TypeInfo): + tag = read_tag(data) + if tag == TYPE_INFO: + sym._node = TypeInfo.read(data) + else: + # This logic is temporary, to make sure we don't introduce + # regressions until we have proper lazy deserialization. + # It has negligible performance impact. + node_bytes = extract_symbol(data) + sym._node = read_symbol(ReadBuffer(node_bytes), tag) sym.unfixed = True else: sym.cross_ref = cross_ref @@ -5374,8 +5382,7 @@ def set_info(node: SymbolNode, info: TypeInfo) -> None: TSTRING_EXPR: Final[Tag] = 229 -def read_symbol(data: ReadBuffer) -> SymbolNode: - tag = read_tag(data) +def read_symbol(data: ReadBuffer, tag: Tag) -> SymbolNode: # The branches here are ordered manually by type "popularity". if tag == VAR: return Var.read(data) @@ -5383,8 +5390,6 @@ def read_symbol(data: ReadBuffer) -> SymbolNode: return FuncDef.read(data) if tag == DECORATOR: return Decorator.read(data) - if tag == TYPE_INFO: - return TypeInfo.read(data) if tag == OVERLOADED_FUNC_DEF: return OverloadedFuncDef.read(data) if tag == TYPE_VAR_EXPR: diff --git a/mypy/typeshed/stubs/librt/librt/internal.pyi b/mypy/typeshed/stubs/librt/librt/internal.pyi index 72ed5a7ee76f4..8e597c4edbd70 100644 --- a/mypy/typeshed/stubs/librt/librt/internal.pyi +++ b/mypy/typeshed/stubs/librt/librt/internal.pyi @@ -19,3 +19,4 @@ def read_int(data: ReadBuffer, /) -> int: ... def write_tag(data: WriteBuffer, value: u8, /) -> None: ... def read_tag(data: ReadBuffer, /) -> u8: ... def cache_version() -> u8: ... +def extract_symbol(data: ReadBuffer, /) -> bytes: ... diff --git a/mypyc/lib-rt/internal/librt_internal.c b/mypyc/lib-rt/internal/librt_internal.c index ca8bc8bbea99a..04de7610736c0 100644 --- a/mypyc/lib-rt/internal/librt_internal.c +++ b/mypyc/lib-rt/internal/librt_internal.c @@ -804,7 +804,7 @@ _write_long_int(PyObject *data, CPyTagged value) { } // Write absolute integer value as byte array in a variable-length little endian format. - int i; + Py_ssize_t i; for (i = len; i > 1; i -= 2) { if (write_tag_internal( data, hex_to_int(str[i - 1]) | (hex_to_int(str[i - 2]) << 4)) == CPY_NONE_ERROR) @@ -920,6 +920,275 @@ write_tag(PyObject *self, PyObject *const *args, size_t nargs) { return Py_None; } +// All tags must be kept in sync with cache.py, nodes.py, and types.py. +// Primitive types. +#define LITERAL_FALSE 0 +#define LITERAL_TRUE 1 +#define LITERAL_NONE 2 +#define LITERAL_INT 3 +#define LITERAL_STR 4 +#define LITERAL_BYTES 5 +#define LITERAL_FLOAT 6 +#define LITERAL_COMPLEX 7 + +// Supported builtin collections. +#define LIST_GEN 20 +#define LIST_INT 21 +#define LIST_STR 22 +#define LIST_BYTES 23 +#define TUPLE_GEN 24 +#define DICT_STR_GEN 30 + +// This is the smallest custom class tag. +#define MYPY_FILE 50 + +// Instance class has special formats. +#define INSTANCE 80 +#define INSTANCE_SIMPLE 81 +#define INSTANCE_GENERIC 82 +#define INSTANCE_STR 83 +#define INSTANCE_FUNCTION 84 +#define INSTANCE_INT 85 +#define INSTANCE_BOOL 86 +#define INSTANCE_OBJECT 87 + +#define RESERVED 254 +#define END_TAG 255 + +// Forward declaration. +static char _skip_object(PyObject *data, uint8_t tag); + +static inline char +_skip(PyObject *data, Py_ssize_t size) { + // We are careful about error conditions, so all + // _skip_xxx() functions can return an error value. + _CHECK_READ(data, size, CPY_NONE_ERROR) + ((ReadBufferObject *)data)->ptr += size; + return CPY_NONE; +} + +static inline char +_skip_short_int(PyObject *data, uint8_t first) { + if ((first & TWO_BYTES_INT_BIT) == 0) + return CPY_NONE; + if ((first & FOUR_BYTES_INT_BIT) == 0) + return _skip(data, 1); + return _skip(data, 3); +} + +static inline char +_skip_int(PyObject *data) { + _CHECK_READ(data, 1, CPY_NONE_ERROR) + + uint8_t first; + _READ(&first, data, uint8_t); + if (likely(first != LONG_INT_TRAILER)) { + return _skip_short_int(data, first); + } + + _CHECK_READ(data, 1, CPY_NONE_ERROR) + _READ(&first, data, uint8_t); + Py_ssize_t size_and_sign = _read_short_int(data, first); + if (size_and_sign == CPY_INT_TAG) + return CPY_NONE_ERROR; + if ((Py_ssize_t)size_and_sign < 0) { + PyErr_SetString(PyExc_ValueError, "invalid int data"); + return CPY_NONE_ERROR; + } + Py_ssize_t size = size_and_sign >> 2; + return _skip(data, size); +} + +// This is essentially a wrapper around _read_short_int() that makes +// sure the result is valid. +static inline Py_ssize_t +_read_size(PyObject *data) { + _CHECK_READ(data, 1, -1) + uint8_t first; + _READ(&first, data, uint8_t); + // We actually allow serializing lists/dicts with over 4 billion items, + // but we don't really need to, fail with ValueError just in case. + if (unlikely(first == LONG_INT_TRAILER)) { + PyErr_SetString(PyExc_ValueError, "unsupported size"); + return -1; + } + CPyTagged tagged_size = _read_short_int(data, first); + if (tagged_size == CPY_INT_TAG) + return -1; + if ((Py_ssize_t)tagged_size < 0) { + PyErr_SetString(PyExc_ValueError, "invalid size"); + return -1; + } + Py_ssize_t size = tagged_size >> 1; + return size; +} + +static inline char +_skip_str_bytes(PyObject *data) { + Py_ssize_t size = _read_size(data); + if (size < 0) + return CPY_NONE_ERROR; + return _skip(data, size); +} + +// List/dict logic should be kept in sync with mypy/cache.py +static inline char +_skip_list_gen(PyObject *data) { + Py_ssize_t size = _read_size(data); + if (size < 0) + return CPY_NONE_ERROR; + Py_ssize_t i; + for (i = 0; i < size; i++) { + uint8_t tag = read_tag_internal(data); + if (unlikely(tag == CPY_LL_UINT_ERROR && PyErr_Occurred())) { + return CPY_NONE_ERROR; + } + if (unlikely(_skip_object(data, tag) == CPY_NONE_ERROR)) + return CPY_NONE_ERROR; + } + return CPY_NONE; +} + +static inline char +_skip_list_int(PyObject *data) { + Py_ssize_t size = _read_size(data); + if (size < 0) + return CPY_NONE_ERROR; + Py_ssize_t i; + for (i = 0; i < size; i++) { + if (unlikely(_skip_int(data) == CPY_NONE_ERROR)) + return CPY_NONE_ERROR; + } + return CPY_NONE; +} + +static inline char +_skip_list_str_bytes(PyObject *data) { + Py_ssize_t size = _read_size(data); + if (size < 0) + return CPY_NONE_ERROR; + Py_ssize_t i; + for (i = 0; i < size; i++) { + if (unlikely(_skip_str_bytes(data) == CPY_NONE_ERROR)) + return CPY_NONE_ERROR; + } + return CPY_NONE; +} + +static inline char +_skip_dict_str_gen(PyObject *data) { + Py_ssize_t size = _read_size(data); + if (size < 0) + return CPY_NONE_ERROR; + Py_ssize_t i; + for (i = 0; i < size; i++) { + // Bare key followed by tagged value. + if (unlikely(_skip_str_bytes(data) == CPY_NONE_ERROR)) + return CPY_NONE_ERROR; + uint8_t tag = read_tag_internal(data); + if (unlikely(tag == CPY_LL_UINT_ERROR && PyErr_Occurred())) { + return CPY_NONE_ERROR; + } + if (unlikely(_skip_object(data, tag) == CPY_NONE_ERROR)) + return CPY_NONE_ERROR; + } + return CPY_NONE; +} + +// Similar to mypy/cache.py, the convention is that the caller reads +// the opening tag for custom classes. +static inline char +_skip_class(PyObject *data) { + while (1) { + uint8_t tag = read_tag_internal(data); + if (unlikely(tag == CPY_LL_UINT_ERROR && PyErr_Occurred())) { + return CPY_NONE_ERROR; + } + if (tag == END_TAG) { + return CPY_NONE; + } + if (unlikely(_skip_object(data, tag) == CPY_NONE_ERROR)) { + return CPY_NONE_ERROR; + } + } +} + +// Instance has special compact layout (as an important optimization). +static inline char +_skip_instance(PyObject *data) { + uint8_t second_tag = read_tag_internal(data); + if (unlikely(second_tag == CPY_LL_UINT_ERROR && PyErr_Occurred())) { + return CPY_NONE_ERROR; + } + if (second_tag >= INSTANCE_STR && second_tag <= INSTANCE_OBJECT) { + return CPY_NONE; + } + if (second_tag == INSTANCE_SIMPLE) { + return _skip_str_bytes(data); + } + if (second_tag == INSTANCE_GENERIC) { + return _skip_class(data); + } + PyErr_Format(PyExc_ValueError, "Unexpected instance tag: %d", second_tag); + return CPY_NONE_ERROR; +} + +// This is the main dispatch point. Branches are ordered manually +// based roughly on frequency in self-check. +static char +_skip_object(PyObject *data, uint8_t tag) { + if (tag == LITERAL_STR || tag == LITERAL_BYTES) + return _skip_str_bytes(data); + if (tag == LITERAL_NONE || tag == LITERAL_FALSE || tag == LITERAL_TRUE) + return CPY_NONE; + if (tag == LIST_GEN || tag == TUPLE_GEN) + return _skip_list_gen(data); + if (tag == LITERAL_INT) + return _skip_int(data); + if (tag == INSTANCE) + return _skip_instance(data); + // We intentionally exclude MypyFile as a sanity check. Module symbols should + // be always handled via cross_ref, and never appear in a symbol table as is. + if (tag > MYPY_FILE && tag < RESERVED) + return _skip_class(data); + if (tag == LIST_INT) + return _skip_list_int(data); + if (tag == LIST_STR || tag == LIST_BYTES) + return _skip_list_str_bytes(data); + if (tag == DICT_STR_GEN) + return _skip_dict_str_gen(data); + if (tag == LITERAL_FLOAT) + return _skip(data, 8); + if (tag == LITERAL_COMPLEX) + return _skip(data, 16); + PyErr_Format(PyExc_ValueError, "Unsupported tag: %d", tag); + return CPY_NONE_ERROR; +} + +static PyObject* +extract_symbol_internal(PyObject *data) { + char *ptr = ((ReadBufferObject *)data)->ptr; + if (unlikely(_skip_class(data) == CPY_NONE_ERROR)) + return NULL; + Py_ssize_t size = ((ReadBufferObject *)data)->ptr - ptr; + PyObject *res = PyBytes_FromStringAndSize(ptr, size); + if (unlikely(res == NULL)) + return NULL; + return res; +} + +static PyObject* +extract_symbol(PyObject *self, PyObject *const *args, size_t nargs) { + if (unlikely(nargs != 1)) { + PyErr_Format(PyExc_TypeError, + "extract_symbol() takes exactly 1 argument (%zu given)", nargs); + return NULL; + } + PyObject *data = args[0]; + _CHECK_READ_BUFFER(data, NULL) + return extract_symbol_internal(data); +} + static uint8_t cache_version_internal(void) { return 0; @@ -954,6 +1223,7 @@ static PyMethodDef librt_internal_module_methods[] = { {"write_tag", (PyCFunction)write_tag, METH_FASTCALL, PyDoc_STR("write a short int")}, {"read_tag", (PyCFunction)read_tag, METH_FASTCALL, PyDoc_STR("read a short int")}, {"cache_version", (PyCFunction)cache_version, METH_NOARGS, PyDoc_STR("cache format version")}, + {"extract_symbol", (PyCFunction)extract_symbol, METH_FASTCALL, PyDoc_STR("extract bytes for a mypy symbol")}, {NULL, NULL, 0, NULL} }; @@ -1005,6 +1275,7 @@ librt_internal_module_exec(PyObject *m) (void *)ReadBuffer_type_internal, (void *)WriteBuffer_type_internal, (void *)NativeInternal_API_Version, + (void *)extract_symbol_internal }; PyObject *c_api_object = PyCapsule_New((void *)NativeInternal_API, "librt.internal._C_API", NULL); if (PyModule_Add(m, "_C_API", c_api_object) < 0) { diff --git a/mypyc/lib-rt/internal/librt_internal.h b/mypyc/lib-rt/internal/librt_internal.h index 1b325b20d95bb..e6d85901cabd9 100644 --- a/mypyc/lib-rt/internal/librt_internal.h +++ b/mypyc/lib-rt/internal/librt_internal.h @@ -11,11 +11,11 @@ // API version -- more recent versions must maintain backward compatibility, i.e. // we can add new features but not remove or change existing features (unless // ABI version is changed, but see the comment above). - #define LIBRT_INTERNAL_API_VERSION 0 + #define LIBRT_INTERNAL_API_VERSION 1 // Number of functions in the capsule API. If you add a new function, also increase // LIBRT_INTERNAL_API_VERSION. -#define LIBRT_INTERNAL_API_LEN 20 +#define LIBRT_INTERNAL_API_LEN 21 #ifdef LIBRT_INTERNAL_MODULE @@ -41,6 +41,7 @@ static uint8_t cache_version_internal(void); static PyTypeObject *ReadBuffer_type_internal(void); static PyTypeObject *WriteBuffer_type_internal(void); static int NativeInternal_API_Version(void); +static PyObject *extract_symbol_internal(PyObject *data); #else @@ -66,6 +67,7 @@ static void *NativeInternal_API[LIBRT_INTERNAL_API_LEN]; #define ReadBuffer_type_internal (*(PyTypeObject* (*)(void)) NativeInternal_API[17]) #define WriteBuffer_type_internal (*(PyTypeObject* (*)(void)) NativeInternal_API[18]) #define NativeInternal_API_Version (*(int (*)(void)) NativeInternal_API[19]) +#define extract_symbol_internal (*(PyObject* (*)(PyObject *source)) NativeInternal_API[20]) static int import_librt_internal(void) diff --git a/mypyc/primitives/misc_ops.py b/mypyc/primitives/misc_ops.py index ae2cfdf43e9c5..6be74baff3d0b 100644 --- a/mypyc/primitives/misc_ops.py +++ b/mypyc/primitives/misc_ops.py @@ -503,6 +503,14 @@ error_kind=ERR_NEVER, ) +function_op( + name="librt.internal.extract_symbol", + arg_types=[object_rprimitive], + return_type=bytes_rprimitive, + c_function_name="extract_symbol_internal", + error_kind=ERR_MAGIC, +) + function_op( name="librt.base64.b64encode", arg_types=[bytes_rprimitive], diff --git a/pyproject.toml b/pyproject.toml index ac77889617cc0..8aa2a5b619cc3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ requires = [ "mypy_extensions>=1.0.0", "pathspec>=1.0.0", "tomli>=1.1.0; python_version<'3.11'", - "librt>=0.8.0; platform_python_implementation != 'PyPy'", + "librt>=0.9.0; platform_python_implementation != 'PyPy'", # the following is from build-requirements.txt "types-psutil", "types-setuptools", @@ -53,7 +53,7 @@ dependencies = [ "mypy_extensions>=1.0.0", "pathspec>=1.0.0", "tomli>=1.1.0; python_version<'3.11'", - "librt>=0.8.0; platform_python_implementation != 'PyPy'", + "librt>=0.9.0; platform_python_implementation != 'PyPy'", ] dynamic = ["version"] diff --git a/test-requirements.txt b/test-requirements.txt index eac3524e5d7f7..ebb81ccd1d3d0 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -24,7 +24,7 @@ identify==2.6.15 # via pre-commit iniconfig==2.1.0 # via pytest -librt==0.8.0 ; platform_python_implementation != "PyPy" +librt==0.9.0 ; platform_python_implementation != "PyPy" # via -r mypy-requirements.txt lxml==6.0.2 ; python_version < "3.15" # via -r test-requirements.in