Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ jobs:
timeout-minutes: 60
env:
TOX_SKIP_MISSING_INTERPRETERS: False
VIRTUALENV_SYSTEM_SITE_PACKAGES: ${{ matrix.test_mypyc && 1 || 0 }}
# Rich (pip) -- Disable color for windows + pytest
FORCE_COLOR: ${{ !(startsWith(matrix.os, 'windows-') && startsWith(matrix.toxenv, 'py')) && 1 || 0 }}
# Tox
Expand Down Expand Up @@ -209,8 +210,10 @@ jobs:

- name: Compiled with mypyc
if: ${{ matrix.test_mypyc }}
# Use local version of librt during self-compilation in tests.
run: |
pip install -r test-requirements.txt
pip install -U mypyc/lib-rt
CC=clang MYPYC_OPT_LEVEL=0 MYPY_USE_MYPYC=1 pip install -e .

- name: Setup tox environment
Expand Down
2 changes: 1 addition & 1 deletion mypy-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ typing_extensions>=4.6.0
mypy_extensions>=1.0.0
pathspec>=1.0.0
tomli>=1.1.0; python_version<'3.11'
librt>=0.8.0; platform_python_implementation != 'PyPy'
librt>=0.9.0; platform_python_implementation != 'PyPy'
2 changes: 2 additions & 0 deletions mypy/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,7 @@ def read(cls, data: ReadBuffer, data_file: str) -> CacheMeta | None:
# Always use this type alias to refer to type tags.
Tag = u8

# Note: all tags should be kept in sync with lib-rt/internal/librt_internal.c.
# Primitives.
LITERAL_FALSE: Final[Tag] = 0
LITERAL_TRUE: Final[Tag] = 1
Expand All @@ -264,6 +265,7 @@ def read(cls, data: ReadBuffer, data_file: str) -> CacheMeta | None:
# Four integers representing source file (line, column) range.
LOCATION: Final[Tag] = 152

RESERVED: Final[Tag] = 254
END_TAG: Final[Tag] = 255


Expand Down
17 changes: 11 additions & 6 deletions mypy/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
)

from librt.internal import (
extract_symbol,
read_float as read_float_bare,
read_int as read_int_bare,
read_str as read_str_bare,
Expand Down Expand Up @@ -4953,8 +4954,15 @@ def read(cls, data: ReadBuffer) -> SymbolTableNode:
sym.plugin_generated = read_bool(data)
cross_ref = read_str_opt(data)
if cross_ref is None:
sym._node = read_symbol(data)
if not isinstance(sym._node, TypeInfo):
tag = read_tag(data)
if tag == TYPE_INFO:
sym._node = TypeInfo.read(data)
else:
# This logic is temporary, to make sure we don't introduce
# regressions until we have proper lazy deserialization.
# It has negligible performance impact.
node_bytes = extract_symbol(data)
sym._node = read_symbol(ReadBuffer(node_bytes), tag)
sym.unfixed = True
else:
sym.cross_ref = cross_ref
Expand Down Expand Up @@ -5374,17 +5382,14 @@ def set_info(node: SymbolNode, info: TypeInfo) -> None:
TSTRING_EXPR: Final[Tag] = 229


def read_symbol(data: ReadBuffer) -> SymbolNode:
tag = read_tag(data)
def read_symbol(data: ReadBuffer, tag: Tag) -> SymbolNode:
# The branches here are ordered manually by type "popularity".
if tag == VAR:
return Var.read(data)
if tag == FUNC_DEF:
return FuncDef.read(data)
if tag == DECORATOR:
return Decorator.read(data)
if tag == TYPE_INFO:
return TypeInfo.read(data)
if tag == OVERLOADED_FUNC_DEF:
return OverloadedFuncDef.read(data)
if tag == TYPE_VAR_EXPR:
Expand Down
1 change: 1 addition & 0 deletions mypy/typeshed/stubs/librt/librt/internal.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@ def read_int(data: ReadBuffer, /) -> int: ...
def write_tag(data: WriteBuffer, value: u8, /) -> None: ...
def read_tag(data: ReadBuffer, /) -> u8: ...
def cache_version() -> u8: ...
def extract_symbol(data: ReadBuffer, /) -> bytes: ...
273 changes: 272 additions & 1 deletion mypyc/lib-rt/internal/librt_internal.c
Original file line number Diff line number Diff line change
Expand Up @@ -804,7 +804,7 @@ _write_long_int(PyObject *data, CPyTagged value) {
}

// Write absolute integer value as byte array in a variable-length little endian format.
int i;
Py_ssize_t i;
for (i = len; i > 1; i -= 2) {
if (write_tag_internal(
data, hex_to_int(str[i - 1]) | (hex_to_int(str[i - 2]) << 4)) == CPY_NONE_ERROR)
Expand Down Expand Up @@ -920,6 +920,275 @@ write_tag(PyObject *self, PyObject *const *args, size_t nargs) {
return Py_None;
}

// All tags must be kept in sync with cache.py, nodes.py, and types.py.
// Primitive types.
#define LITERAL_FALSE 0
#define LITERAL_TRUE 1
#define LITERAL_NONE 2
#define LITERAL_INT 3
#define LITERAL_STR 4
#define LITERAL_BYTES 5
#define LITERAL_FLOAT 6
#define LITERAL_COMPLEX 7

// Supported builtin collections.
#define LIST_GEN 20
#define LIST_INT 21
#define LIST_STR 22
#define LIST_BYTES 23
#define TUPLE_GEN 24
#define DICT_STR_GEN 30

// This is the smallest custom class tag.
#define MYPY_FILE 50

// Instance class has special formats.
#define INSTANCE 80
#define INSTANCE_SIMPLE 81
#define INSTANCE_GENERIC 82
#define INSTANCE_STR 83
#define INSTANCE_FUNCTION 84
#define INSTANCE_INT 85
#define INSTANCE_BOOL 86
#define INSTANCE_OBJECT 87

#define RESERVED 254
#define END_TAG 255

// Forward declaration.
static char _skip_object(PyObject *data, uint8_t tag);

static inline char
_skip(PyObject *data, Py_ssize_t size) {
// We are careful about error conditions, so all
// _skip_xxx() functions can return an error value.
_CHECK_READ(data, size, CPY_NONE_ERROR)
((ReadBufferObject *)data)->ptr += size;
return CPY_NONE;
}

static inline char
_skip_short_int(PyObject *data, uint8_t first) {
if ((first & TWO_BYTES_INT_BIT) == 0)
return CPY_NONE;
if ((first & FOUR_BYTES_INT_BIT) == 0)
return _skip(data, 1);
return _skip(data, 3);
}

static inline char
_skip_int(PyObject *data) {
_CHECK_READ(data, 1, CPY_NONE_ERROR)

uint8_t first;
_READ(&first, data, uint8_t);
if (likely(first != LONG_INT_TRAILER)) {
return _skip_short_int(data, first);
}

_CHECK_READ(data, 1, CPY_NONE_ERROR)
_READ(&first, data, uint8_t);
Py_ssize_t size_and_sign = _read_short_int(data, first);
if (size_and_sign == CPY_INT_TAG)
return CPY_NONE_ERROR;
if ((Py_ssize_t)size_and_sign < 0) {
PyErr_SetString(PyExc_ValueError, "invalid int data");
return CPY_NONE_ERROR;
}
Py_ssize_t size = size_and_sign >> 2;
return _skip(data, size);
}

// This is essentially a wrapper around _read_short_int() that makes
// sure the result is valid.
static inline Py_ssize_t
_read_size(PyObject *data) {
_CHECK_READ(data, 1, -1)
uint8_t first;
_READ(&first, data, uint8_t);
// We actually allow serializing lists/dicts with over 4 billion items,
// but we don't really need to, fail with ValueError just in case.
if (unlikely(first == LONG_INT_TRAILER)) {
PyErr_SetString(PyExc_ValueError, "unsupported size");
return -1;
}
CPyTagged tagged_size = _read_short_int(data, first);
if (tagged_size == CPY_INT_TAG)
return -1;
if ((Py_ssize_t)tagged_size < 0) {
PyErr_SetString(PyExc_ValueError, "invalid size");
return -1;
}
Py_ssize_t size = tagged_size >> 1;
return size;
}

static inline char
_skip_str_bytes(PyObject *data) {
Py_ssize_t size = _read_size(data);
if (size < 0)
return CPY_NONE_ERROR;
return _skip(data, size);
}

// List/dict logic should be kept in sync with mypy/cache.py
static inline char
_skip_list_gen(PyObject *data) {
Py_ssize_t size = _read_size(data);
if (size < 0)
return CPY_NONE_ERROR;
Py_ssize_t i;
for (i = 0; i < size; i++) {
uint8_t tag = read_tag_internal(data);
if (unlikely(tag == CPY_LL_UINT_ERROR && PyErr_Occurred())) {
return CPY_NONE_ERROR;
}
if (unlikely(_skip_object(data, tag) == CPY_NONE_ERROR))
return CPY_NONE_ERROR;
}
return CPY_NONE;
}

static inline char
_skip_list_int(PyObject *data) {
Py_ssize_t size = _read_size(data);
if (size < 0)
return CPY_NONE_ERROR;
Py_ssize_t i;
for (i = 0; i < size; i++) {
if (unlikely(_skip_int(data) == CPY_NONE_ERROR))
return CPY_NONE_ERROR;
}
return CPY_NONE;
}

static inline char
_skip_list_str_bytes(PyObject *data) {
Py_ssize_t size = _read_size(data);
if (size < 0)
return CPY_NONE_ERROR;
Py_ssize_t i;
for (i = 0; i < size; i++) {
if (unlikely(_skip_str_bytes(data) == CPY_NONE_ERROR))
return CPY_NONE_ERROR;
}
return CPY_NONE;
}

static inline char
_skip_dict_str_gen(PyObject *data) {
Py_ssize_t size = _read_size(data);
if (size < 0)
return CPY_NONE_ERROR;
Py_ssize_t i;
for (i = 0; i < size; i++) {
// Bare key followed by tagged value.
if (unlikely(_skip_str_bytes(data) == CPY_NONE_ERROR))
return CPY_NONE_ERROR;
uint8_t tag = read_tag_internal(data);
if (unlikely(tag == CPY_LL_UINT_ERROR && PyErr_Occurred())) {
return CPY_NONE_ERROR;
}
if (unlikely(_skip_object(data, tag) == CPY_NONE_ERROR))
return CPY_NONE_ERROR;
}
return CPY_NONE;
}

// Similar to mypy/cache.py, the convention is that the caller reads
// the opening tag for custom classes.
static inline char
_skip_class(PyObject *data) {
while (1) {
uint8_t tag = read_tag_internal(data);
if (unlikely(tag == CPY_LL_UINT_ERROR && PyErr_Occurred())) {
return CPY_NONE_ERROR;
}
if (tag == END_TAG) {
return CPY_NONE;
}
if (unlikely(_skip_object(data, tag) == CPY_NONE_ERROR)) {
return CPY_NONE_ERROR;
}
}
}

// Instance has special compact layout (as an important optimization).
static inline char
_skip_instance(PyObject *data) {
uint8_t second_tag = read_tag_internal(data);
if (unlikely(second_tag == CPY_LL_UINT_ERROR && PyErr_Occurred())) {
return CPY_NONE_ERROR;
}
if (second_tag >= INSTANCE_STR && second_tag <= INSTANCE_OBJECT) {
return CPY_NONE;
}
if (second_tag == INSTANCE_SIMPLE) {
return _skip_str_bytes(data);
}
if (second_tag == INSTANCE_GENERIC) {
return _skip_class(data);
}
PyErr_Format(PyExc_ValueError, "Unexpected instance tag: %d", second_tag);
return CPY_NONE_ERROR;
}

// This is the main dispatch point. Branches are ordered manually
// based roughly on frequency in self-check.
static char
_skip_object(PyObject *data, uint8_t tag) {
if (tag == LITERAL_STR || tag == LITERAL_BYTES)
return _skip_str_bytes(data);
if (tag == LITERAL_NONE || tag == LITERAL_FALSE || tag == LITERAL_TRUE)
return CPY_NONE;
if (tag == LIST_GEN || tag == TUPLE_GEN)
return _skip_list_gen(data);
if (tag == LITERAL_INT)
return _skip_int(data);
if (tag == INSTANCE)
return _skip_instance(data);
// We intentionally exclude MypyFile as a sanity check. Module symbols should
// be always handled via cross_ref, and never appear in a symbol table as is.
if (tag > MYPY_FILE && tag < RESERVED)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be >=?

Copy link
Copy Markdown
Member Author

@ilevkivskyi ilevkivskyi Apr 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is actually intentional, a little sanity check. We know that we should only read symbol nodes using this function, and MypyFile should be always handled using cross_ref in cache, and never appear explicitly in symbol tables.

return _skip_class(data);
if (tag == LIST_INT)
return _skip_list_int(data);
if (tag == LIST_STR || tag == LIST_BYTES)
return _skip_list_str_bytes(data);
if (tag == DICT_STR_GEN)
return _skip_dict_str_gen(data);
if (tag == LITERAL_FLOAT)
return _skip(data, 8);
if (tag == LITERAL_COMPLEX)
return _skip(data, 16);
PyErr_Format(PyExc_ValueError, "Unsupported tag: %d", tag);
return CPY_NONE_ERROR;
}

static PyObject*
extract_symbol_internal(PyObject *data) {
char *ptr = ((ReadBufferObject *)data)->ptr;
if (unlikely(_skip_class(data) == CPY_NONE_ERROR))
return NULL;
Py_ssize_t size = ((ReadBufferObject *)data)->ptr - ptr;
PyObject *res = PyBytes_FromStringAndSize(ptr, size);
if (unlikely(res == NULL))
return NULL;
return res;
}

static PyObject*
extract_symbol(PyObject *self, PyObject *const *args, size_t nargs) {
if (unlikely(nargs != 1)) {
PyErr_Format(PyExc_TypeError,
"extract_symbol() takes exactly 1 argument (%zu given)", nargs);
return NULL;
}
PyObject *data = args[0];
_CHECK_READ_BUFFER(data, NULL)
return extract_symbol_internal(data);
}

static uint8_t
cache_version_internal(void) {
return 0;
Expand Down Expand Up @@ -954,6 +1223,7 @@ static PyMethodDef librt_internal_module_methods[] = {
{"write_tag", (PyCFunction)write_tag, METH_FASTCALL, PyDoc_STR("write a short int")},
{"read_tag", (PyCFunction)read_tag, METH_FASTCALL, PyDoc_STR("read a short int")},
{"cache_version", (PyCFunction)cache_version, METH_NOARGS, PyDoc_STR("cache format version")},
{"extract_symbol", (PyCFunction)extract_symbol, METH_FASTCALL, PyDoc_STR("extract bytes for a mypy symbol")},
{NULL, NULL, 0, NULL}
};

Expand Down Expand Up @@ -1005,6 +1275,7 @@ librt_internal_module_exec(PyObject *m)
(void *)ReadBuffer_type_internal,
(void *)WriteBuffer_type_internal,
(void *)NativeInternal_API_Version,
(void *)extract_symbol_internal
};
PyObject *c_api_object = PyCapsule_New((void *)NativeInternal_API, "librt.internal._C_API", NULL);
if (PyModule_Add(m, "_C_API", c_api_object) < 0) {
Expand Down
Loading
Loading