diff --git a/graalpython/com.oracle.graal.python.cext/src/capi.c b/graalpython/com.oracle.graal.python.cext/src/capi.c index a7517cd427..3c8e0c4afd 100644 --- a/graalpython/com.oracle.graal.python.cext/src/capi.c +++ b/graalpython/com.oracle.graal.python.cext/src/capi.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -272,9 +272,13 @@ PyObject* _Py_NotImplementedStructReference; */ THREAD_LOCAL PyThreadState *tstate_current = NULL; -static void initialize_globals() { - // store the thread state into a thread local variable - tstate_current = GraalPyPrivate_ThreadState_Get(&tstate_current); +PyAPI_FUNC(PyThreadState **) GraalPyPrivate_InitThreadStateCurrent(PyThreadState *tstate) { + tstate_current = tstate; + return &tstate_current; +} + +static void initialize_globals(PyThreadState *tstate) { + GraalPyPrivate_InitThreadStateCurrent(tstate); _Py_NoneStructReference = GraalPyPrivate_None(); _Py_NotImplementedStructReference = GraalPyPrivate_NotImplemented(); _Py_EllipsisObjectReference = GraalPyPrivate_Ellipsis(); @@ -667,7 +671,7 @@ Py_LOCAL_SYMBOL TruffleContext* TRUFFLE_CONTEXT; */ Py_LOCAL_SYMBOL int8_t *_graalpy_finalizing = NULL; -PyAPI_FUNC(void) initialize_graal_capi(TruffleEnv* env, void **builtin_closures, GCState *gc) { +PyAPI_FUNC(PyThreadState **) initialize_graal_capi(TruffleEnv* env, void **builtin_closures, GCState *gc, PyThreadState *tstate) { clock_t t = clock(); if (env) { @@ -706,7 +710,7 @@ PyAPI_FUNC(void) initialize_graal_capi(TruffleEnv* env, void **builtin_closures, initialize_builtin_types_and_structs(); // initialize global variables like '_Py_NoneStruct', etc. - initialize_globals(); + initialize_globals(tstate); initialize_exceptions(); initialize_hashes(); initialize_bufferprocs(); @@ -717,6 +721,7 @@ PyAPI_FUNC(void) initialize_graal_capi(TruffleEnv* env, void **builtin_closures, Py_FileSystemDefaultEncoding = "utf-8"; // strdup(PyUnicode_AsUTF8(GraalPyPrivate_FileSystemDefaultEncoding())); GraalPyPrivate_Log(PY_TRUFFLE_LOG_FINE, "initialize_graal_capi: %fs", ((double) (clock() - t)) / CLOCKS_PER_SEC); + return &tstate_current; } /* diff --git a/graalpython/com.oracle.graal.python.cext/src/pystate.c b/graalpython/com.oracle.graal.python.cext/src/pystate.c index b54a724306..9a5c1872d1 100644 --- a/graalpython/com.oracle.graal.python.cext/src/pystate.c +++ b/graalpython/com.oracle.graal.python.cext/src/pystate.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2024, 2025, Oracle and/or its affiliates. +/* Copyright (c) 2024, 2026, Oracle and/or its affiliates. * Copyright (C) 1996-2024 Python Software Foundation * * Licensed under the PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 @@ -84,9 +84,16 @@ static inline PyThreadState * _get_thread_state() { PyThreadState *ts = tstate_current; if (UNLIKELY(ts == NULL)) { - ts = GraalPyPrivate_ThreadState_Get(&tstate_current); - tstate_current = ts; + /* + * Very unlikely fallback: this can happen if another thread initializes the C API while + * the current thread is attached to Python but blocked and therefore misses eager + * initialization of its native 'tstate_current' TLS slot. + */ + ts = GraalPyPrivate_ThreadState_Get(&tstate_current); + assert(ts != NULL); + tstate_current = ts; } + assert(ts != NULL); return ts; } diff --git a/graalpython/com.oracle.graal.python.test/src/tests/test_datetime.py b/graalpython/com.oracle.graal.python.test/src/tests/test_datetime.py index 68835c0ea2..03d18c7258 100644 --- a/graalpython/com.oracle.graal.python.test/src/tests/test_datetime.py +++ b/graalpython/com.oracle.graal.python.test/src/tests/test_datetime.py @@ -38,6 +38,11 @@ # SOFTWARE. import datetime +import os +import subprocess +import sys +import textwrap +import time import unittest class DateTest(unittest.TestCase): @@ -542,12 +547,29 @@ def test_strptime(self): actual = datetime.datetime.strptime("+00:00 GMT", "%z %Z") self.assertEqual(actual.tzinfo.tzname(None), "GMT") - import time timezone_name = time.localtime().tm_zone self.assertIsNotNone(timezone_name) actual = datetime.datetime.strptime(f"+00:00 {timezone_name}", "%z %Z") self.assertEqual(actual.tzinfo.tzname(None), timezone_name) + if hasattr(time, "tzset") and sys.executable: + proc = subprocess.run( + [sys.executable, "-c", textwrap.dedent("""\ + import datetime + import time + + time.tzset() + timezone_name = time.localtime().tm_zone + actual = datetime.datetime.strptime(f"+00:00 {timezone_name}", "%z %Z") + assert actual.tzinfo.tzname(None) == timezone_name + """)], + env={**os.environ, "TZ": "Etc/GMT-1"}, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + self.assertEqual(proc.returncode, 0, proc.stderr) + # time zone name without utc offset is ignored actual = datetime.datetime.strptime("UTC", "%Z") self.assertIsNone(actual.tzinfo) diff --git a/graalpython/com.oracle.graal.python.test/src/tests/test_socket.py b/graalpython/com.oracle.graal.python.test/src/tests/test_socket.py index b9a10bbb8b..c49686d2c3 100644 --- a/graalpython/com.oracle.graal.python.test/src/tests/test_socket.py +++ b/graalpython/com.oracle.graal.python.test/src/tests/test_socket.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2020, 2026, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # The Universal Permissive License (UPL), Version 1.0 @@ -59,6 +59,13 @@ def test_inet_aton_errs(self): self.assertRaises(OSError, lambda : socket.inet_aton('255.255.256.1')) self.assertRaises(TypeError, lambda : socket.inet_aton(255)) + +class TestHostLookupErrors(unittest.TestCase): + def test_gethostbyname_ex_invalid_host_raises_gaierror(self): + with self.assertRaises(socket.gaierror): + socket.gethostbyname_ex("nonexistent.invalid") + + def test_get_name_info(): import socket try : diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/ImpModuleBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/ImpModuleBuiltins.java index 1d4752f59b..ff8e6742a2 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/ImpModuleBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/ImpModuleBuiltins.java @@ -284,7 +284,7 @@ private static int doExec(Node node, PythonContext context, PythonModule extensi return 0; } - if (!context.hasCApiContext()) { + if (context.getCApiState() != PythonContext.CApiState.INITIALIZED) { throw PRaiseNode.raiseStatic(node, PythonBuiltinClassType.SystemError, ErrorMessages.CAPI_NOT_YET_INITIALIZED); } diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/SocketModuleBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/SocketModuleBuiltins.java index 16980ff22e..1001fccd42 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/SocketModuleBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/SocketModuleBuiltins.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -385,7 +385,7 @@ static Object get(VirtualFrame frame, Object nameObj, addrInfoCursorLib.release(cursor); } } catch (GetAddrInfoException e) { - throw constructAndRaiseNode.get(inliningTarget).executeWithArgsOnly(frame, SocketHError, new Object[]{e.getMessageAsTruffleString()}); + throw constructAndRaiseNode.get(inliningTarget).executeWithArgsOnly(frame, SocketGAIError, new Object[]{e.getErrorCode(), e.getMessageAsTruffleString()}); } catch (PosixException e) { throw constructAndRaiseNode.get(inliningTarget).raiseOSErrorFromPosixException(frame, e); } diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextPyStateBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextPyStateBuiltins.java index 571a273492..872840159d 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextPyStateBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextPyStateBuiltins.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -71,7 +71,6 @@ import com.oracle.graal.python.runtime.GilNode; import com.oracle.graal.python.runtime.PythonContext; import com.oracle.graal.python.runtime.PythonContext.PythonThreadState; -import com.oracle.graal.python.runtime.object.PFactory; import com.oracle.graal.python.util.OverflowException; import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; import com.oracle.truffle.api.ThreadLocalAction; @@ -79,8 +78,6 @@ import com.oracle.truffle.api.dsl.Bind; import com.oracle.truffle.api.dsl.Cached; import com.oracle.truffle.api.dsl.Specialization; -import com.oracle.truffle.api.interop.InteropLibrary; -import com.oracle.truffle.api.library.CachedLibrary; import com.oracle.truffle.api.nodes.Node; import com.oracle.truffle.api.nodes.RootNode; @@ -116,19 +113,39 @@ static Object restore( } } - @CApiBuiltin(ret = PyThreadState, args = {Pointer}, call = Ignored) + /** + * Very unlikely fallback for threads that were already attached when another thread initialized + * the C API, but were blocked at that time and therefore could not process the thread-local + * action that eagerly initializes their native 'tstate_current' TLS slot. + */ + @CApiBuiltin(ret = PyThreadState, args = {Pointer}, acquireGil = false, call = Ignored) abstract static class GraalPyPrivate_ThreadState_Get extends CApiUnaryBuiltinNode { + private static final TruffleLogger LOGGER = CApiContext.getLogger(GraalPyPrivate_ThreadState_Get.class); - @Specialization(limit = "1") - static Object get(Object tstateCurrentPtr, - @Bind Node inliningTarget, - @Bind PythonContext context, - @CachedLibrary("tstateCurrentPtr") InteropLibrary lib) { - PythonThreadState pythonThreadState = context.getThreadState(context.getLanguage(inliningTarget)); - if (!lib.isNull(tstateCurrentPtr)) { - pythonThreadState.setNativeThreadLocalVarPointer(tstateCurrentPtr); + @Specialization + @TruffleBoundary + static Object get(Object tstateCurrentPtr) { + PythonContext context = PythonContext.get(null); + PythonThreadState threadState = context.getThreadState(context.getLanguage()); + + /* + * The C caller may have observed 'tstate_current == NULL' before entering this upcall. + * While entering this builtin, the same thread may process a queued thread-local action + * from C API initialization and initialize its native thread state eagerly. So the + * fallback decision made in C can be stale by the time we get here. + */ + if (threadState.isNativeThreadStateInitialized()) { + LOGGER.fine(() -> String.format("Lazy initialization attempt of native thread state for thread %s aborted. Was initialized in the meantime.", Thread.currentThread())); + Object nativeThreadState = PThreadState.getNativeThreadState(threadState); + assert nativeThreadState != null; + return nativeThreadState; } - return PThreadState.getOrCreateNativeThreadState(pythonThreadState); + + LOGGER.fine(() -> "Lazy (fallback) initialization of native thread state for thread " + Thread.currentThread()); + assert PThreadState.getNativeThreadState(threadState) == null; + Object nativeThreadState = PThreadState.getOrCreateNativeThreadState(threadState); + threadState.setNativeThreadLocalVarPointer(tstateCurrentPtr); + return nativeThreadState; } } @@ -151,12 +168,7 @@ static PDict get( @Bind Node inliningTarget, @Bind PythonContext context) { PythonThreadState threadState = context.getThreadState(context.getLanguage(inliningTarget)); - PDict threadStateDict = threadState.getDict(); - if (threadStateDict == null) { - threadStateDict = PFactory.createDict(context.getLanguage()); - threadState.setDict(threadStateDict); - } - return threadStateDict; + return PThreadState.getOrCreateThreadStateDict(context, threadState); } } diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/datetime/DateTimeBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/datetime/DateTimeBuiltins.java index 2e9ba79e03..a801017003 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/datetime/DateTimeBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/datetime/DateTimeBuiltins.java @@ -2310,22 +2310,14 @@ private static Object parse(String string, String format, PythonContext context, TimeZone timeZone = TimeModuleBuiltins.getGlobalTimeZone(context); String zoneName = timeZone.getDisplayName(false, TimeZone.SHORT); String zoneNameDaylightSaving = timeZone.getDisplayName(true, TimeZone.SHORT); + String matchedZoneName = matchTimeZoneName(string, i, zoneName, zoneNameDaylightSaving, "UTC", "GMT"); - if (string.startsWith("UTC", i)) { - builder.setTimeZoneName("UTC"); - i += 3; - } else if (string.startsWith("GMT", i)) { - builder.setTimeZoneName("GMT"); - i += 3; - } else if (string.startsWith(zoneName, i)) { - builder.setTimeZoneName(zoneName); - i += zoneName.length(); - } else if (string.startsWith(zoneNameDaylightSaving, i)) { - builder.setTimeZoneName(zoneNameDaylightSaving); - i += zoneNameDaylightSaving.length(); - } else { + if (matchedZoneName == null) { throw PRaiseNode.raiseStatic(inliningTarget, ValueError, ErrorMessages.TIME_DATA_S_DOES_NOT_MATCH_FORMAT_S, string, format); } + + builder.setTimeZoneName(matchedZoneName); + i += matchedZoneName.length(); } case 'j' -> { var pos = new ParsePosition(i); @@ -2487,6 +2479,16 @@ private static Integer parseDigits(String source, int from, int digitsCount) { return result; } + private static String matchTimeZoneName(String string, int from, String... candidates) { + String matched = null; + for (String candidate : candidates) { + if (candidate != null && string.startsWith(candidate, from) && (matched == null || candidate.length() > matched.length())) { + matched = candidate; + } + } + return matched; + } + @TruffleBoundary private static Integer parseDigitsUpTo(String source, ParsePosition from, int maxDigitsCount) { int result = 0; diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/CApiContext.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/CApiContext.java index dbc5401ef9..22ae518194 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/CApiContext.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/CApiContext.java @@ -127,6 +127,7 @@ import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; import com.oracle.truffle.api.CompilerDirectives.ValueType; import com.oracle.truffle.api.RootCallTarget; +import com.oracle.truffle.api.ThreadLocalAction; import com.oracle.truffle.api.TruffleFile; import com.oracle.truffle.api.TruffleLanguage.Env; import com.oracle.truffle.api.TruffleLogger; @@ -794,7 +795,7 @@ public static CApiContext ensureCapiWasLoaded(Node node, PythonContext context, assert PythonContext.get(null).ownsGil(); // unsafe lazy initialization // The initialization may run Python code (e.g., module import in // GraalPyPrivate_InitBuiltinTypesAndStructs), so just holding the GIL is not enough - if (!context.isCApiInitialized()) { + if (context.getCApiState() != PythonContext.CApiState.INITIALIZED) { // We import those modules ahead of the initialization without the initialization lock // to avoid deadlocks. We would have imported them in the initialization anyway, this @@ -808,17 +809,37 @@ public static CApiContext ensureCapiWasLoaded(Node node, PythonContext context, TruffleSafepoint.setBlockedThreadInterruptible(node, ReentrantLock::lockInterruptibly, initLock); } try { - if (!context.isCApiInitialized()) { - // loadCApi must set C API context half-way through its execution so that it can - // run internal Java code that needs C API context - TruffleSafepoint safepoint = TruffleSafepoint.getCurrent(); - boolean prevAllowSideEffects = safepoint.setAllowSideEffects(false); + PythonContext.CApiState state = context.getCApiState(); + if (state == PythonContext.CApiState.INITIALIZED || state == PythonContext.CApiState.INITIALIZING) { + return context.getCApiContext(); + } + if (state == PythonContext.CApiState.FAILED) { + throw new ApiInitException(toTruffleStringUncached("The C API initialization has previously failed.")); + } + + assert state == PythonContext.CApiState.UNINITIALIZED : state; + // loadCApi must set C API context half-way through its execution so that it can + // run internal Java code that needs C API context + TruffleSafepoint safepoint = TruffleSafepoint.getCurrent(); + boolean prevAllowSideEffects = safepoint.setAllowSideEffects(false); + try { + CApiContext cApiContext = loadCApi(node, context, name, path, reason); + assert context.getCApiState() == PythonContext.CApiState.INITIALIZING; + initializeThreadStateCurrentForAttachedThreads(context); + CApiTransitions.initializeReferenceQueuePolling(context.nativeContext); + context.runCApiHooks(); + context.setCApiState(PythonContext.CApiState.INITIALIZED); // volatile write try { - loadCApi(node, context, name, path, reason); - context.setCApiInitialized(); // volatile write - } finally { - safepoint.setAllowSideEffects(prevAllowSideEffects); + cApiContext.runBackgroundGCTask(context); + } catch (RuntimeException e) { + // This can happen when other languages restrict multithreading + LOGGER.warning(() -> "didn't start the background GC task due to: " + e.getMessage()); } + } catch (Throwable t) { + context.setCApiState(PythonContext.CApiState.FAILED); + throw t; + } finally { + safepoint.setAllowSideEffects(prevAllowSideEffects); } } finally { initLock.unlock(); @@ -827,6 +848,31 @@ public static CApiContext ensureCapiWasLoaded(Node node, PythonContext context, return context.getCApiContext(); } + private static void initializeThreadStateCurrentForAttachedThreads(PythonContext context) { + Thread[] threads = getOtherAliveAttachedThreads(context); + if (threads.length == 0) { + return; + } + ThreadLocalAction action = new ThreadLocalAction(true, false) { + @Override + protected void perform(ThreadLocalAction.Access access) { + context.initializeNativeThreadState(); + } + }; + context.getEnv().submitThreadLocal(threads, action); + } + + private static Thread[] getOtherAliveAttachedThreads(PythonContext context) { + Thread currentThread = Thread.currentThread(); + ArrayList threads = new ArrayList<>(); + for (Thread thread : context.getThreads()) { + if (thread != currentThread && thread.isAlive()) { + threads.add(thread); + } + } + return threads.toArray(Thread[]::new); + } + private static CApiContext loadCApi(Node node, PythonContext context, TruffleString name, TruffleString path, String reason) throws IOException, ImportException, ApiInitException { Env env = context.getEnv(); InteropLibrary U = InteropLibrary.getUncached(); @@ -872,6 +918,7 @@ private static CApiContext loadCApi(Node node, PythonContext context, TruffleStr Object initFunction = U.readMember(capiLibrary, "initialize_graal_capi"); CApiContext cApiContext = new CApiContext(context, capiLibrary, loc); context.setCApiContext(cApiContext); + context.setCApiState(PythonContext.CApiState.INITIALIZING); try (BuiltinArrayWrapper builtinArrayWrapper = new BuiltinArrayWrapper()) { /* @@ -880,14 +927,18 @@ private static CApiContext loadCApi(Node node, PythonContext context, TruffleStr * then already require the GC state. */ Object gcState = cApiContext.createGCState(); - Object signature = env.parseInternal(Source.newBuilder(J_NFI_LANGUAGE, "(ENV,POINTER,POINTER):VOID", "exec").build()).call(); + PythonThreadState currentThreadState = context.getThreadState(context.getLanguage()); + Object nativeThreadState = PThreadState.getOrCreateNativeThreadState(currentThreadState); + Object signature = env.parseInternal(Source.newBuilder(J_NFI_LANGUAGE, "(ENV,POINTER,POINTER,POINTER):POINTER", "exec").build()).call(); initFunction = SignatureLibrary.getUncached().bind(signature, initFunction); - U.execute(initFunction, builtinArrayWrapper, gcState); + Object nativeThreadLocalVarPointer = U.execute(initFunction, builtinArrayWrapper, gcState, nativeThreadState); + assert U.isPointer(nativeThreadLocalVarPointer); + assert !U.isNull(nativeThreadLocalVarPointer); + currentThreadState.setNativeThreadLocalVarPointer(nativeThreadLocalVarPointer); } assert PythonCApiAssertions.assertBuiltins(capiLibrary); cApiContext.pyDateTimeCAPICapsule = PyDateTimeCAPIWrapper.initWrapper(context, cApiContext); - context.runCApiHooks(); /* * C++ libraries sometimes declare global objects that have destructors that call @@ -902,7 +953,6 @@ private static CApiContext loadCApi(Node node, PythonContext context, TruffleStr Object finalizingPointer = SignatureLibrary.getUncached().call(finalizeSignature, finalizeFunction); try { cApiContext.addNativeFinalizer(context, finalizingPointer); - cApiContext.runBackgroundGCTask(context); } catch (RuntimeException e) { // This can happen when other languages restrict multithreading LOGGER.warning(() -> "didn't register a native finalizer due to: " + e.getMessage()); @@ -1106,7 +1156,7 @@ public void finalizeCApi() { * allocated resources (e.g. native object stubs). Calling * 'CApiTransitions.pollReferenceQueue' could then lead to a double-free. */ - CApiTransitions.disableReferenceQueuePolling(handleContext); + CApiTransitions.disableReferenceQueuePollingPermanently(handleContext); TruffleSafepoint sp = TruffleSafepoint.getCurrent(); boolean prev = sp.setAllowActions(false); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/CExtNodes.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/CExtNodes.java index 089ac42070..b2fcbd59ad 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/CExtNodes.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/CExtNodes.java @@ -849,7 +849,8 @@ static Object doWithoutContext(NativeCAPISymbol symbol, Object[] args, @Cached EnsureTruffleStringNode ensureTruffleStringNode) { try { PythonContext pythonContext = PythonContext.get(inliningTarget); - if (!pythonContext.hasCApiContext()) { + PythonContext.CApiState capiState = pythonContext.getCApiState(); + if (capiState != PythonContext.CApiState.INITIALIZING && capiState != PythonContext.CApiState.INITIALIZED) { CompilerDirectives.transferToInterpreterAndInvalidate(); CApiContext.ensureCapiWasLoaded("call internal native GraalPy function"); } diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/NativeCAPISymbol.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/NativeCAPISymbol.java index a842633b53..a45b84c36b 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/NativeCAPISymbol.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/NativeCAPISymbol.java @@ -134,6 +134,7 @@ public enum NativeCAPISymbol implements NativeCExtSymbol { FUN_TRUFFLE_CHECK_TYPE_READY("GraalPyPrivate_CheckTypeReady", ArgDescriptor.Void, PyTypeObject), FUN_GRAALPY_GC_COLLECT("GraalPyPrivate_GC_Collect", Py_ssize_t, Int), FUN_SUBTYPE_TRAVERSE("GraalPyPrivate_SubtypeTraverse", Int, PyObject, Pointer, Pointer), + FUN_INIT_THREAD_STATE_CURRENT("GraalPyPrivate_InitThreadStateCurrent", Pointer, PyThreadState), /* PyDateTime_CAPI */ diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/PThreadState.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/PThreadState.java index 08fc5d4735..a3a0cb8613 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/PThreadState.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/PThreadState.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2019, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -41,16 +41,18 @@ package com.oracle.graal.python.builtins.objects.cext.capi; import com.oracle.graal.python.PythonLanguage; +import com.oracle.graal.python.builtins.objects.PNone; import com.oracle.graal.python.builtins.objects.cext.capi.PythonNativeWrapper.PythonStructNativeWrapper; import com.oracle.graal.python.builtins.objects.cext.capi.transitions.CApiTransitions; import com.oracle.graal.python.builtins.objects.cext.capi.transitions.CApiTransitions.PythonToNativeNode; -import com.oracle.graal.python.builtins.objects.cext.capi.transitions.CApiTransitionsFactory.PythonToNativeNodeGen; import com.oracle.graal.python.builtins.objects.cext.common.NativePointer; import com.oracle.graal.python.builtins.objects.cext.structs.CFields; import com.oracle.graal.python.builtins.objects.cext.structs.CStructAccess; +import com.oracle.graal.python.builtins.objects.cext.structs.CStructAccess.ReadObjectNode; import com.oracle.graal.python.builtins.objects.cext.structs.CStructs; import com.oracle.graal.python.builtins.objects.dict.PDict; import com.oracle.graal.python.runtime.PythonContext; +import com.oracle.graal.python.runtime.PythonContext.CApiState; import com.oracle.graal.python.runtime.PythonContext.PythonThreadState; import com.oracle.graal.python.runtime.object.PFactory; import com.oracle.truffle.api.CompilerDirectives; @@ -78,7 +80,7 @@ public final class PThreadState extends PythonStructNativeWrapper { @TruffleBoundary private PThreadState(PythonThreadState threadState) { super(threadState); - long ptr = allocateCLayout(threadState); + long ptr = allocateCLayout(); CApiTransitions.createReference(this, ptr, true); // TODO: wrap in NativePointer for NFI replacement = new NativePointer(ptr); @@ -110,18 +112,51 @@ public PythonThreadState getThreadState() { } @TruffleBoundary - private static long allocateCLayout(PythonThreadState threadState) { - PythonToNativeNode toNative = PythonToNativeNodeGen.getUncached(); + public static PDict getOrCreateThreadStateDict(PythonContext context, PythonThreadState threadState) { + /* + * C API initialization must be finished at that time. This implies that there is already a + * native thread state. + */ + assert context.getCApiState() == CApiState.INITIALIZED; + Object nativeThreadState = PThreadState.getNativeThreadState(threadState); + assert nativeThreadState != null; + + PDict threadStateDict = threadState.getDict(); + if (threadStateDict != null) { + assert threadStateDict == ReadObjectNode.getUncached().read(nativeThreadState, CFields.PyThreadState__dict); + return threadStateDict; + } + + threadStateDict = PFactory.createDict(context.getLanguage()); + threadState.setDict(threadStateDict); + assert ReadObjectNode.getUncached().read(nativeThreadState, CFields.PyThreadState__dict) == PNone.NO_VALUE; + CStructAccess.WritePointerNode.writeUncached(nativeThreadState, CFields.PyThreadState__dict, PythonToNativeNode.executeUncached(threadStateDict)); + + return threadStateDict; + } + + /** + * This method runs on a critical bootstrap path when creating the native thread state. It may + * execute while the C API state is still INITIALIZING and before the current thread has + * installed its native 'tstate_current' TLS slot. So, this code must stay very restricted: only + * use bootstrap-safe allocation and raw struct writes here. + * + * In particular, do not introduce conversions such as PythonToNative(NewRef)Node or any other + * code paths that may poll the native reference queue, materialize additional native wrappers, + * or otherwise assume that the native thread state is already fully initialized. + */ + @TruffleBoundary + private static long allocateCLayout() { long ptr = CStructAccess.AllocateNode.allocUncachedPointer(CStructs.PyThreadState.size()); CStructAccess.WritePointerNode writePtrNode = CStructAccess.WritePointerNode.getUncached(); PythonContext pythonContext = PythonContext.get(null); - PDict threadStateDict = threadState.getDict(); - if (threadStateDict == null) { - threadStateDict = PFactory.createDict(pythonContext.getLanguage()); - threadState.setDict(threadStateDict); - } - writePtrNode.write(ptr, CFields.PyThreadState__dict, toNative.execute(threadStateDict)); + /* + * As in CPython, the thread state dict is initialized lazily. This is necessary to avoid + * cycles in the bootstrapping process because creating the dict will need the GC state + * which needs the thread state. + */ + writePtrNode.write(ptr, CFields.PyThreadState__dict, pythonContext.getNativeNull()); CApiContext cApiContext = pythonContext.getCApiContext(); Object smallInts = CStructAccess.AllocateNode.allocUncached((PY_NSMALLNEGINTS + PY_NSMALLPOSINTS) * CStructAccess.POINTER_SIZE); writePtrNode.write(ptr, CFields.PyThreadState__small_ints, smallInts); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/transitions/CApiTransitions.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/transitions/CApiTransitions.java index 6fac3e14ce..dac761033b 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/transitions/CApiTransitions.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/transitions/CApiTransitions.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2022, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -42,6 +42,11 @@ import static com.oracle.graal.python.builtins.objects.cext.capi.PythonNativeWrapper.PythonAbstractObjectNativeWrapper.IMMORTAL_REFCNT; import static com.oracle.graal.python.builtins.objects.cext.capi.PythonNativeWrapper.PythonAbstractObjectNativeWrapper.MANAGED_REFCNT; +import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.CApiTransitions.PollingState.RQ_DISABLED_PERMANENT; +import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.CApiTransitions.PollingState.RQ_DISABLED_TEMP; +import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.CApiTransitions.PollingState.RQ_POLLING; +import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.CApiTransitions.PollingState.RQ_READY; +import static com.oracle.graal.python.builtins.objects.cext.capi.transitions.CApiTransitions.PollingState.RQ_UNINITIALIZED; import java.lang.ref.ReferenceQueue; import java.lang.ref.WeakReference; @@ -149,6 +154,23 @@ public abstract class CApiTransitions { private static final TruffleLogger LOGGER = CApiContext.getLogger(CApiTransitions.class); + enum PollingState { + /** startup barrier not finished yet, polling must not run */ + RQ_UNINITIALIZED, + + /** normal steady state, polling allowed */ + RQ_READY, + + /** one thread is currently polling */ + RQ_POLLING, + + /** temporarily disabled by GraalPyPrivate_DisableReferneceQueuePolling */ + RQ_DISABLED_TEMP, + + /** shutdown/finalization, end state */ + RQ_DISABLED_PERMANENT + } + private CApiTransitions() { } @@ -183,7 +205,7 @@ public HandleContext(boolean useShadowTable) { public final ReferenceQueue referenceQueue = new ReferenceQueue<>(); - volatile boolean referenceQueuePollActive = false; + volatile PollingState referenceQueuePollingState = RQ_UNINITIALIZED; @TruffleBoundary public static T putShadowTable(HashMap table, long pointer, T ref) { @@ -429,29 +451,48 @@ public static int pollReferenceQueue() { PythonContext context = PythonContext.get(null); HandleContext handleContext = context.nativeContext; int manuallyCollected = 0; - if (!handleContext.referenceQueuePollActive) { - try (GilNode.UncachedAcquire ignored = GilNode.uncachedAcquire()) { - ReferenceQueue queue = handleContext.referenceQueue; - int count = 0; - long start = 0; - ArrayList referencesToBeFreed = handleContext.referencesToBeFreed; + if (handleContext.referenceQueuePollingState != RQ_READY) { + return manuallyCollected; + } + /* + * Polling the reference queue may deallocate native GC objects and therefore re-enter + * native code paths that use '_PyThreadState_GET()' to obtain the current thread's GC + * state. So, we may only poll once the current thread has installed its native + * 'tstate_current' pointer. + */ + if (!context.getThreadState(context.getLanguage()).isNativeThreadStateInitialized()) { + return manuallyCollected; + } + try (GilNode.UncachedAcquire ignored = GilNode.uncachedAcquire()) { + if (handleContext.referenceQueuePollingState != RQ_READY) { + return manuallyCollected; + } + if (!context.getThreadState(context.getLanguage()).isNativeThreadStateInitialized()) { + return manuallyCollected; + } + ReferenceQueue queue = handleContext.referenceQueue; + int count = 0; + long start = 0; + boolean polling = false; + ArrayList referencesToBeFreed = handleContext.referencesToBeFreed; + try { while (true) { Object entry = queue.poll(); if (entry == null) { if (count > 0) { - assert handleContext.referenceQueuePollActive; + assert handleContext.referenceQueuePollingState == RQ_POLLING || handleContext.referenceQueuePollingState == RQ_DISABLED_PERMANENT; releaseNativeObjects(context, referencesToBeFreed); - handleContext.referenceQueuePollActive = false; LOGGER.fine("collected " + count + " references from native reference queue in " + ((System.nanoTime() - start) / 1000000) + "ms"); } return manuallyCollected; } if (count == 0) { - assert !handleContext.referenceQueuePollActive; - handleContext.referenceQueuePollActive = true; + assert handleContext.referenceQueuePollingState == RQ_READY; + handleContext.referenceQueuePollingState = RQ_POLLING; + polling = true; start = System.nanoTime(); } else { - assert handleContext.referenceQueuePollActive; + assert handleContext.referenceQueuePollingState == RQ_POLLING; } count++; LOGGER.fine(() -> PythonUtils.formatJString("releasing %s, no remaining managed references", entry)); @@ -529,9 +570,12 @@ public static int pollReferenceQueue() { processPyCapsuleReference(reference); } } + } finally { + if (polling && handleContext.referenceQueuePollingState == RQ_POLLING) { + handleContext.referenceQueuePollingState = RQ_READY; + } } } - return manuallyCollected; } /** @@ -698,15 +742,26 @@ public static void freeNativeReplacementStructs(PythonContext context, HandleCon } public static boolean disableReferenceQueuePolling(HandleContext handleContext) { - if (!handleContext.referenceQueuePollActive) { - handleContext.referenceQueuePollActive = true; + if (handleContext.referenceQueuePollingState == RQ_READY) { + handleContext.referenceQueuePollingState = RQ_DISABLED_TEMP; return false; } return true; } public static void enableReferenceQueuePolling(HandleContext handleContext) { - handleContext.referenceQueuePollActive = false; + if (handleContext.referenceQueuePollingState == RQ_DISABLED_TEMP) { + handleContext.referenceQueuePollingState = RQ_READY; + } + } + + public static void initializeReferenceQueuePolling(HandleContext handleContext) { + assert handleContext.referenceQueuePollingState == RQ_UNINITIALIZED : handleContext.referenceQueuePollingState; + handleContext.referenceQueuePollingState = RQ_READY; + } + + public static void disableReferenceQueuePollingPermanently(HandleContext handleContext) { + handleContext.referenceQueuePollingState = RQ_DISABLED_PERMANENT; } private static void freeNativeStub(PythonObjectReference ref) { diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/PythonContext.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/PythonContext.java index c420e5b03c..e07389ca40 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/PythonContext.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/PythonContext.java @@ -40,11 +40,11 @@ import static com.oracle.graal.python.nodes.BuiltinNames.T_SHA3; import static com.oracle.graal.python.nodes.BuiltinNames.T_STDERR; import static com.oracle.graal.python.nodes.BuiltinNames.T_STDOUT; -import static com.oracle.graal.python.nodes.BuiltinNames.T___STDOUT__; import static com.oracle.graal.python.nodes.BuiltinNames.T_SYS; import static com.oracle.graal.python.nodes.BuiltinNames.T_THREADING; import static com.oracle.graal.python.nodes.BuiltinNames.T___BUILTINS__; import static com.oracle.graal.python.nodes.BuiltinNames.T___MAIN__; +import static com.oracle.graal.python.nodes.BuiltinNames.T___STDOUT__; import static com.oracle.graal.python.nodes.SpecialAttributeNames.T___ANNOTATIONS__; import static com.oracle.graal.python.nodes.SpecialAttributeNames.T___FILE__; import static com.oracle.graal.python.nodes.SpecialMethodNames.T_INSERT; @@ -112,6 +112,8 @@ import com.oracle.graal.python.builtins.objects.PNone; import com.oracle.graal.python.builtins.objects.cext.PythonNativeClass; import com.oracle.graal.python.builtins.objects.cext.capi.CApiContext; +import com.oracle.graal.python.builtins.objects.cext.capi.CExtNodes.PCallCapiFunction; +import com.oracle.graal.python.builtins.objects.cext.capi.NativeCAPISymbol; import com.oracle.graal.python.builtins.objects.cext.capi.PThreadState; import com.oracle.graal.python.builtins.objects.cext.capi.PythonNativeWrapper.PythonAbstractObjectNativeWrapper; import com.oracle.graal.python.builtins.objects.cext.capi.transitions.CApiTransitions; @@ -590,6 +592,14 @@ public void setNativeThreadLocalVarPointer(Object ptr) { String.format("ptr = %s; nativeThreadLocalVarPointer = %s", ptr, nativeThreadLocalVarPointer); this.nativeThreadLocalVarPointer = ptr; } + + public Object getNativeThreadLocalVarPointer() { + return nativeThreadLocalVarPointer; + } + + public boolean isNativeThreadStateInitialized() { + return nativeThreadLocalVarPointer != null; + } } private static final class AtExitHook { @@ -722,12 +732,21 @@ PythonThreadState getThreadState(Node n) { private OutputStream out; private OutputStream err; private InputStream in; + + public enum CApiState { + UNINITIALIZED, + INITIALIZING, + INITIALIZED, + FAILED + } + + /** Initialization state of the C API context. */ + private volatile CApiState cApiState = CApiState.UNINITIALIZED; private final ReentrantLock cApiInitializationLock = new ReentrantLock(false); - private volatile boolean cApiWasInitialized = false; @CompilationFinal private CApiContext cApiContext; @CompilationFinal private boolean nativeAccessAllowed; - private TruffleString soABI; // cache for soAPI + private TruffleString soABI; private static final class GlobalInterpreterLock extends ReentrantLock { private static final long serialVersionUID = 1L; @@ -1967,7 +1986,7 @@ public void clearAtexitHooks() { } public void registerCApiHook(Runnable hook) { - if (hasCApiContext()) { + if (getCApiState() == CApiState.INITIALIZED) { hook.run(); } else { capiHooks.add(hook); @@ -2566,7 +2585,42 @@ public void initializeMultiThreading() { public synchronized void attachThread(Thread thread, ContextThreadLocal threadState) { CompilerAsserts.neverPartOfCompilation(); - threadStateMapping.put(thread, threadState.get(thread)); + PythonThreadState pythonThreadState = threadState.get(thread); + threadStateMapping.put(thread, pythonThreadState); + ReentrantLock initLock = getcApiInitializationLock(); + /* + * Synchronize with C API initialization so that we do not miss eager initialization of this + * thread's 'tstate_current'. Otherwise, a thread could attach while another thread is + * sweeping all already-attached threads during C API initialization, observe + * 'INITIALIZING', skip eager initialization here, and then also miss the initialization + * sweep because it was not yet part of the thread snapshot. + */ + initLock.lock(); + try { + if (getCApiState() == CApiState.INITIALIZED) { + // initialize this thread's native TLS slot eagerly instead of on first use + initializeNativeThreadState(pythonThreadState); + } + } finally { + initLock.unlock(); + } + } + + @TruffleBoundary + public void initializeNativeThreadState() { + LOGGER.fine(() -> "Initializing native thread state for thread " + Thread.currentThread()); + initializeNativeThreadState(getThreadState(getLanguage())); + } + + @SuppressWarnings("try") + public void initializeNativeThreadState(PythonThreadState pythonThreadState) { + CompilerAsserts.neverPartOfCompilation(); + try (GilNode.UncachedAcquire ignored = GilNode.uncachedAcquire()) { + assert getCApiContext() != null; + Object nativeThreadState = PThreadState.getOrCreateNativeThreadState(pythonThreadState); + Object nativeThreadLocalVarPointer = PCallCapiFunction.callUncached(NativeCAPISymbol.FUN_INIT_THREAD_STATE_CURRENT, nativeThreadState); + pythonThreadState.setNativeThreadLocalVarPointer(nativeThreadLocalVarPointer); + } } public synchronized void disposeThread(Thread thread, boolean canRunGuestCode) { @@ -2594,25 +2648,29 @@ private static void releaseSentinelLock(WeakReference sentinelLockWeakref } } - public boolean hasCApiContext() { - // This may be called during C API initialization, we have a context so that we can finish - // the initialization, but the C API is not fully initialized yet - assert (cApiContext != null) || !cApiWasInitialized; - return cApiContext != null; + public CApiState getCApiState() { + assert cApiContext != null || cApiState == CApiState.UNINITIALIZED || cApiState == CApiState.FAILED : cApiState; + return cApiState; } - public boolean isCApiInitialized() { - assert (cApiContext != null) || !cApiWasInitialized; - return cApiWasInitialized; - } - - public void setCApiInitialized() { - assert cApiContext != null; - cApiWasInitialized = true; + public void setCApiState(CApiState state) { + /*- Allowed transitions: + * UNINITIALIZED -> INITIALIZING, FAILED + * INITIALIZING -> INITIALIZED, FAILED + */ + assert state != CApiState.UNINITIALIZED; + assert cApiInitializationLock.isHeldByCurrentThread(); + assert state != CApiState.INITIALIZING || cApiContext != null; + assert state != CApiState.INITIALIZED || cApiContext != null; + assert cApiState != CApiState.UNINITIALIZED || state == CApiState.INITIALIZING || state == CApiState.FAILED; + assert cApiState != CApiState.INITIALIZING || state == CApiState.INITIALIZED || state == CApiState.FAILED; + assert cApiState != CApiState.INITIALIZED; + assert cApiState != CApiState.FAILED; + cApiState = state; } public CApiContext getCApiContext() { - assert (cApiContext != null) || !cApiWasInitialized; + assert cApiContext != null || cApiState == CApiState.UNINITIALIZED || cApiState == CApiState.FAILED; return cApiContext; } @@ -2622,6 +2680,7 @@ public ReentrantLock getcApiInitializationLock() { public void setCApiContext(CApiContext capiContext) { assert this.cApiContext == null : "tried to create new C API context but it was already created"; + assert getCApiState() == CApiState.UNINITIALIZED; this.cApiContext = capiContext; }