From 71967051bbb5894a1b26f6023838bea14263940a Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 13 Feb 2026 15:58:56 -0600 Subject: [PATCH 01/26] docs: Add thread-safe mode specification Simple spec for blocking global state access in multi-tenant environments. Co-Authored-By: Claude Opus 4.5 --- docs/design/thread-safe-mode.md | 67 +++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 docs/design/thread-safe-mode.md diff --git a/docs/design/thread-safe-mode.md b/docs/design/thread-safe-mode.md new file mode 100644 index 000000000..1b6c15cb8 --- /dev/null +++ b/docs/design/thread-safe-mode.md @@ -0,0 +1,67 @@ +# Thread-Safe Mode Specification + +## Problem + +DataJoint uses global state (`dj.config`, `dj.conn()`) that is not thread-safe. Multi-tenant applications (web servers, async workers) need isolated connections per request/task. + +## Solution + +Add `thread_safe` mode that blocks global state access and requires explicit connection configuration. + +## API + +### Enable Thread-Safe Mode + +Set via environment variable or config file (read-only after initialization): + +```bash +export DJ_THREAD_SAFE=true +``` + +```json +// datajoint.json +{"thread_safe": true} +``` + +### Create Connections + +```python +conn = dj.Connection.from_config( + host="localhost", + user="user", + password="password" +) +schema = dj.Schema("my_schema", connection=conn) +``` + +## Behavior + +| Operation | `thread_safe=False` | `thread_safe=True` | +|-----------|--------------------|--------------------| +| `dj.config.X` | Works | Raises `ThreadSafetyError` | +| `dj.conn()` | Works | Raises `ThreadSafetyError` | +| `dj.Schema("name")` | Works | Raises `ThreadSafetyError` | +| `Connection.from_config()` | Works | Works | +| `Schema(..., connection=conn)` | Works | Works | + +## Implementation + +1. Add `thread_safe: bool = False` field to `Config` with `DJ_THREAD_SAFE` env alias +2. Make `thread_safe` read-only after `Config` initialization +3. Add guards to `Config.__getattr__`, `Config.__setattr__`, `Config.__getitem__`, `Config.__setitem__` +4. Add guard to `dj.conn()` +5. Add guard to `Schema.__init__` when `connection=None` +6. Add `Connection.from_config()` class method +7. Add `ThreadSafetyError` exception + +## Exceptions + +```python +class ThreadSafetyError(DataJointError): + """Raised when accessing global state in thread-safe mode.""" +``` + +Error messages: +- Config access: `"Global config is inaccessible in thread-safe mode. Use Connection.from_config() with explicit configuration."` +- `dj.conn()`: `"dj.conn() is disabled in thread-safe mode. Use Connection.from_config() with explicit configuration."` +- Schema without connection: `"Schema requires explicit connection in thread-safe mode. Use Schema(..., connection=conn)."` From 477d36585d86e1674478d7f3d74b9f488c270a44 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 13 Feb 2026 16:30:24 -0600 Subject: [PATCH 02/26] chore: Remove unused settings Remove dead code: - filepath_checksum_size_limit (never used) - enable_python_native_blobs (never used) - cache (only query_cache is used) - init_function/init_command (database init command) Co-Authored-By: Claude Opus 4.5 --- src/datajoint/adapters/mysql.py | 3 --- src/datajoint/connection.py | 12 +----------- src/datajoint/settings.py | 11 ++--------- tests/integration/test_jobs.py | 5 ++--- tests/unit/test_settings.py | 24 ++++++++++++------------ 5 files changed, 17 insertions(+), 38 deletions(-) diff --git a/src/datajoint/adapters/mysql.py b/src/datajoint/adapters/mysql.py index 88339335f..21aab2908 100644 --- a/src/datajoint/adapters/mysql.py +++ b/src/datajoint/adapters/mysql.py @@ -75,7 +75,6 @@ def connect( Password for authentication. **kwargs : Any Additional MySQL-specific parameters: - - init_command: SQL initialization command - ssl: TLS/SSL configuration dict (deprecated, use use_tls) - use_tls: bool or dict - DataJoint's SSL parameter (preferred) - charset: Character set (default from kwargs) @@ -85,7 +84,6 @@ def connect( pymysql.Connection MySQL connection object. """ - init_command = kwargs.get("init_command") # Handle both ssl (old) and use_tls (new) parameter names ssl_config = kwargs.get("use_tls", kwargs.get("ssl")) # Convert boolean True to dict for PyMySQL (PyMySQL expects dict or SSLContext) @@ -99,7 +97,6 @@ def connect( "port": port, "user": user, "passwd": password, - "init_command": init_command, "sql_mode": "NO_ZERO_DATE,NO_ZERO_IN_DATE,ERROR_FOR_DIVISION_BY_ZERO," "STRICT_ALL_TABLES,NO_ENGINE_SUBSTITUTION,ONLY_FULL_GROUP_BY", "charset": charset, diff --git a/src/datajoint/connection.py b/src/datajoint/connection.py index 21b48e638..488a26e7d 100644 --- a/src/datajoint/connection.py +++ b/src/datajoint/connection.py @@ -55,7 +55,6 @@ def conn( user: str | None = None, password: str | None = None, *, - init_fun: Callable | None = None, reset: bool = False, use_tls: bool | dict | None = None, ) -> Connection: @@ -73,8 +72,6 @@ def conn( Database username. Required if not set in config. password : str, optional Database password. Required if not set in config. - init_fun : callable, optional - Initialization function called after connection. reset : bool, optional If True, reset existing connection. Default False. use_tls : bool or dict, optional @@ -103,9 +100,8 @@ def conn( raise errors.DataJointError( "Database password not configured. Set datajoint.config['database.password'] or pass password= argument." ) - init_fun = init_fun if init_fun is not None else config["connection.init_function"] use_tls = use_tls if use_tls is not None else config["database.use_tls"] - conn.connection = Connection(host, user, password, None, init_fun, use_tls) + conn.connection = Connection(host, user, password, None, use_tls) return conn.connection @@ -150,8 +146,6 @@ class Connection: Database password. port : int, optional Port number. Overridden if specified in host. - init_fun : str, optional - SQL initialization command. use_tls : bool or dict, optional TLS encryption option. @@ -169,7 +163,6 @@ def __init__( user: str, password: str, port: int | None = None, - init_fun: str | None = None, use_tls: bool | dict | None = None, ) -> None: if ":" in host: @@ -190,7 +183,6 @@ def __init__( # use_tls=True: enable SSL with default settings self.conn_info["ssl"] = True self.conn_info["ssl_input"] = use_tls - self.init_fun = init_fun self._conn = None self._query_cache = None self._is_closed = True # Mark as closed until connect() succeeds @@ -227,7 +219,6 @@ def connect(self) -> None: port=self.conn_info["port"], user=self.conn_info["user"], password=self.conn_info["passwd"], - init_command=self.init_fun, charset=config["connection.charset"], use_tls=self.conn_info.get("ssl"), ) @@ -244,7 +235,6 @@ def connect(self) -> None: port=self.conn_info["port"], user=self.conn_info["user"], password=self.conn_info["passwd"], - init_command=self.init_fun, charset=config["connection.charset"], use_tls=False, # Explicitly disable SSL for fallback ) diff --git a/src/datajoint/settings.py b/src/datajoint/settings.py index e373ca38f..7019d8345 100644 --- a/src/datajoint/settings.py +++ b/src/datajoint/settings.py @@ -224,7 +224,6 @@ class ConnectionSettings(BaseSettings): model_config = SettingsConfigDict(extra="forbid", validate_assignment=True) - init_function: str | None = None charset: str = "" # pymysql uses '' as default @@ -341,11 +340,8 @@ class Config(BaseSettings): # Top-level settings loglevel: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = Field(default="INFO", validation_alias="DJ_LOG_LEVEL") safemode: bool = True - enable_python_native_blobs: bool = True - filepath_checksum_size_limit: int | None = None - # Cache paths - cache: Path | None = None + # Cache path for query results query_cache: Path | None = None # Download path for attachments and filepaths @@ -362,7 +358,7 @@ def set_logger_level(cls, v: str) -> str: logger.setLevel(v) return v - @field_validator("cache", "query_cache", mode="before") + @field_validator("query_cache", mode="before") @classmethod def convert_path(cls, v: Any) -> Path | None: """Convert string paths to Path objects.""" @@ -819,7 +815,6 @@ def save_template( "use_tls": None, }, "connection": { - "init_function": None, "charset": "", }, "display": { @@ -844,8 +839,6 @@ def save_template( }, "loglevel": "INFO", "safemode": True, - "enable_python_native_blobs": True, - "cache": None, "query_cache": None, "download_path": ".", } diff --git a/tests/integration/test_jobs.py b/tests/integration/test_jobs.py index 20fa3233d..5a9203dca 100644 --- a/tests/integration/test_jobs.py +++ b/tests/integration/test_jobs.py @@ -108,10 +108,9 @@ def test_sigterm(clean_jobs, schema_any): def test_suppress_dj_errors(clean_jobs, schema_any): - """Test that DataJoint errors are suppressible without native py blobs.""" + """Test that DataJoint errors are suppressible.""" error_class = schema.ErrorClass() - with dj.config.override(enable_python_native_blobs=False): - error_class.populate(reserve_jobs=True, suppress_errors=True) + error_class.populate(reserve_jobs=True, suppress_errors=True) assert len(schema.DjExceptionName()) == len(error_class.jobs.errors) > 0 diff --git a/tests/unit/test_settings.py b/tests/unit/test_settings.py index af5718503..475d96df9 100644 --- a/tests/unit/test_settings.py +++ b/tests/unit/test_settings.py @@ -504,23 +504,23 @@ def test_display_limit(self): class TestCachePaths: """Test cache path settings.""" - def test_cache_path_string(self): - """Test setting cache path as string.""" - original = dj.config.cache + def test_query_cache_path_string(self): + """Test setting query_cache path as string.""" + original = dj.config.query_cache try: - dj.config.cache = "/tmp/cache" - assert dj.config.cache == Path("/tmp/cache") + dj.config.query_cache = "/tmp/cache" + assert dj.config.query_cache == Path("/tmp/cache") finally: - dj.config.cache = original + dj.config.query_cache = original - def test_cache_path_none(self): - """Test cache path can be None.""" - original = dj.config.cache + def test_query_cache_path_none(self): + """Test query_cache path can be None.""" + original = dj.config.query_cache try: - dj.config.cache = None - assert dj.config.cache is None + dj.config.query_cache = None + assert dj.config.query_cache is None finally: - dj.config.cache = original + dj.config.query_cache = original class TestSaveTemplate: From 6a5a309a7e79d97e34424bb51bf0784d6c67d741 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 13 Feb 2026 16:32:21 -0600 Subject: [PATCH 03/26] docs: Clarify that all settings are connection-scoped - All settings can be passed to Connection.from_config() - Only thread_safe is read-only after initialization Co-Authored-By: Claude Opus 4.5 --- docs/design/thread-safe-mode.md | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/docs/design/thread-safe-mode.md b/docs/design/thread-safe-mode.md index 1b6c15cb8..29aa89310 100644 --- a/docs/design/thread-safe-mode.md +++ b/docs/design/thread-safe-mode.md @@ -25,11 +25,16 @@ export DJ_THREAD_SAFE=true ### Create Connections +All settings can be passed to `Connection.from_config()`: + ```python conn = dj.Connection.from_config( host="localhost", user="user", - password="password" + password="password", + safemode=False, + display_limit=25, + # ... any other settings ) schema = dj.Schema("my_schema", connection=conn) ``` @@ -44,6 +49,12 @@ schema = dj.Schema("my_schema", connection=conn) | `Connection.from_config()` | Works | Works | | `Schema(..., connection=conn)` | Works | Works | +## Read-Only Settings + +Only `thread_safe` is read-only after initialization. It can only be set via: +- Environment variable `DJ_THREAD_SAFE` +- Config file `datajoint.json` + ## Implementation 1. Add `thread_safe: bool = False` field to `Config` with `DJ_THREAD_SAFE` env alias From c0598f4ecda57f7683472927dbcfbc74cf19e89b Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 13 Feb 2026 16:40:09 -0600 Subject: [PATCH 04/26] docs: Specify Connection.from_config() behavior - Parameters and defaults - Connection-scoped settings via conn.config - Never accesses global dj.config Co-Authored-By: Claude Opus 4.5 --- docs/design/thread-safe-mode.md | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/docs/design/thread-safe-mode.md b/docs/design/thread-safe-mode.md index 29aa89310..50c55ce79 100644 --- a/docs/design/thread-safe-mode.md +++ b/docs/design/thread-safe-mode.md @@ -23,9 +23,9 @@ export DJ_THREAD_SAFE=true {"thread_safe": true} ``` -### Create Connections +### Connection.from_config() -All settings can be passed to `Connection.from_config()`: +Creates a connection with explicit configuration. Works in both `thread_safe=True` and `thread_safe=False` modes. ```python conn = dj.Connection.from_config( @@ -34,11 +34,29 @@ conn = dj.Connection.from_config( password="password", safemode=False, display_limit=25, - # ... any other settings ) schema = dj.Schema("my_schema", connection=conn) ``` +**Parameters:** +- `host` (required): Database hostname +- `user` (required): Database username +- `password` (required): Database password +- `port`: Database port (default: 3306) +- Any other setting from `dj.config` (e.g., `safemode`, `display_limit`, `stores`) + +**Defaults:** Settings not explicitly provided use hardcoded defaults (same as `dj.config` defaults). Global `dj.config` is never accessed. + +**Connection-scoped settings:** Stored on `conn.config` and accessed as `conn.config.safemode`, `conn.config.display_limit`, etc. + +```python +conn = dj.Connection.from_config(host="localhost", user="u", password="p") +conn.config.safemode # True (default) +conn.config.display_limit # 12 (default) + +conn.config.safemode = False # Modify for this connection only +``` + ## Behavior | Operation | `thread_safe=False` | `thread_safe=True` | @@ -62,7 +80,10 @@ Only `thread_safe` is read-only after initialization. It can only be set via: 3. Add guards to `Config.__getattr__`, `Config.__setattr__`, `Config.__getitem__`, `Config.__setitem__` 4. Add guard to `dj.conn()` 5. Add guard to `Schema.__init__` when `connection=None` -6. Add `Connection.from_config()` class method +6. Add `Connection.from_config()` class method that: + - Accepts all connection params and settings as kwargs + - Uses hardcoded defaults (never accesses global config) + - Creates `conn.config` object to store connection-scoped settings 7. Add `ThreadSafetyError` exception ## Exceptions From 697ec6d0302899d0d7c6c5e4e4cbb789bbf99598 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 13 Feb 2026 16:51:07 -0600 Subject: [PATCH 05/26] docs: conn.config uses same Config class, connection settings read-only - Connection.from_config() creates a Config instance for conn.config - Database connection settings (host, port, user, password, use_tls, backend) become read-only after connection is established - Other settings remain mutable per-connection Co-Authored-By: Claude Opus 4.5 --- docs/design/thread-safe-mode.md | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/docs/design/thread-safe-mode.md b/docs/design/thread-safe-mode.md index 50c55ce79..d5bc0ecf9 100644 --- a/docs/design/thread-safe-mode.md +++ b/docs/design/thread-safe-mode.md @@ -45,16 +45,21 @@ schema = dj.Schema("my_schema", connection=conn) - `port`: Database port (default: 3306) - Any other setting from `dj.config` (e.g., `safemode`, `display_limit`, `stores`) -**Defaults:** Settings not explicitly provided use hardcoded defaults (same as `dj.config` defaults). Global `dj.config` is never accessed. +**Config creation:** Uses the same `Config` class as global `dj.config`. Each connection gets its own `Config` instance via `conn.config`. -**Connection-scoped settings:** Stored on `conn.config` and accessed as `conn.config.safemode`, `conn.config.display_limit`, etc. +**Read-only after connection:** Database connection settings become read-only after connection is established: +- `host`, `port`, `user`, `password`, `use_tls`, `backend` + +**Mutable settings:** All other settings remain mutable per-connection: +- `safemode`, `display_limit`, `stores`, etc. ```python conn = dj.Connection.from_config(host="localhost", user="u", password="p") conn.config.safemode # True (default) conn.config.display_limit # 12 (default) -conn.config.safemode = False # Modify for this connection only +conn.config.safemode = False # OK: modify for this connection +conn.config.host = "other" # Error: read-only after connection ``` ## Behavior @@ -69,9 +74,8 @@ conn.config.safemode = False # Modify for this connection only ## Read-Only Settings -Only `thread_safe` is read-only after initialization. It can only be set via: -- Environment variable `DJ_THREAD_SAFE` -- Config file `datajoint.json` +- `thread_safe`: Read-only after global config initialization (set via env var or config file only) +- `host`, `port`, `user`, `password`, `use_tls`, `backend`: Read-only on `conn.config` after connection is established ## Implementation @@ -82,8 +86,8 @@ Only `thread_safe` is read-only after initialization. It can only be set via: 5. Add guard to `Schema.__init__` when `connection=None` 6. Add `Connection.from_config()` class method that: - Accepts all connection params and settings as kwargs - - Uses hardcoded defaults (never accesses global config) - - Creates `conn.config` object to store connection-scoped settings + - Creates a new `Config` instance for `conn.config` + - Marks connection settings as read-only after connection 7. Add `ThreadSafetyError` exception ## Exceptions From 467367e637a59e1d81a34e57a0b99f56ce78c1ac Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 13 Feb 2026 16:55:02 -0600 Subject: [PATCH 06/26] docs: Global config read-only (not blocked) in thread-safe mode - thread_safe=True: global dj.config becomes read-only - conn.config copies from global config, always mutable - Simpler: global config still readable for defaults Co-Authored-By: Claude Opus 4.5 --- docs/design/thread-safe-mode.md | 47 ++++++++++++++------------------- 1 file changed, 20 insertions(+), 27 deletions(-) diff --git a/docs/design/thread-safe-mode.md b/docs/design/thread-safe-mode.md index d5bc0ecf9..f670e6cf2 100644 --- a/docs/design/thread-safe-mode.md +++ b/docs/design/thread-safe-mode.md @@ -6,7 +6,7 @@ DataJoint uses global state (`dj.config`, `dj.conn()`) that is not thread-safe. ## Solution -Add `thread_safe` mode that blocks global state access and requires explicit connection configuration. +Add `thread_safe` mode that makes global config read-only and requires explicit connections with mutable connection-scoped settings. ## API @@ -25,7 +25,7 @@ export DJ_THREAD_SAFE=true ### Connection.from_config() -Creates a connection with explicit configuration. Works in both `thread_safe=True` and `thread_safe=False` modes. +Creates a connection with explicit configuration. Works in both modes. ```python conn = dj.Connection.from_config( @@ -43,61 +43,54 @@ schema = dj.Schema("my_schema", connection=conn) - `user` (required): Database username - `password` (required): Database password - `port`: Database port (default: 3306) -- Any other setting from `dj.config` (e.g., `safemode`, `display_limit`, `stores`) +- Any other setting (e.g., `safemode`, `display_limit`, `stores`) -**Config creation:** Uses the same `Config` class as global `dj.config`. Each connection gets its own `Config` instance via `conn.config`. - -**Read-only after connection:** Database connection settings become read-only after connection is established: -- `host`, `port`, `user`, `password`, `use_tls`, `backend` - -**Mutable settings:** All other settings remain mutable per-connection: -- `safemode`, `display_limit`, `stores`, etc. +**Config creation:** Copies global `dj.config`, then applies kwargs. Creates `conn.config` which is always mutable. ```python conn = dj.Connection.from_config(host="localhost", user="u", password="p") -conn.config.safemode # True (default) -conn.config.display_limit # 12 (default) - -conn.config.safemode = False # OK: modify for this connection -conn.config.host = "other" # Error: read-only after connection +conn.config.safemode = False # Always OK: conn.config is mutable +conn.config.display_limit = 25 # Always OK ``` ## Behavior | Operation | `thread_safe=False` | `thread_safe=True` | |-----------|--------------------|--------------------| -| `dj.config.X` | Works | Raises `ThreadSafetyError` | +| `dj.config` read | Works | Works (read-only) | +| `dj.config` write | Works | Raises `ThreadSafetyError` | | `dj.conn()` | Works | Raises `ThreadSafetyError` | | `dj.Schema("name")` | Works | Raises `ThreadSafetyError` | | `Connection.from_config()` | Works | Works | +| `conn.config` read/write | Works | Works | | `Schema(..., connection=conn)` | Works | Works | ## Read-Only Settings -- `thread_safe`: Read-only after global config initialization (set via env var or config file only) -- `host`, `port`, `user`, `password`, `use_tls`, `backend`: Read-only on `conn.config` after connection is established +- `thread_safe`: Always read-only after initialization (set via env var or config file only) +- All of `dj.config`: Read-only when `thread_safe=True` ## Implementation 1. Add `thread_safe: bool = False` field to `Config` with `DJ_THREAD_SAFE` env alias -2. Make `thread_safe` read-only after `Config` initialization -3. Add guards to `Config.__getattr__`, `Config.__setattr__`, `Config.__getitem__`, `Config.__setitem__` +2. Make `thread_safe` always read-only after initialization +3. When `thread_safe=True`, make all `dj.config` writes raise `ThreadSafetyError` 4. Add guard to `dj.conn()` 5. Add guard to `Schema.__init__` when `connection=None` 6. Add `Connection.from_config()` class method that: - - Accepts all connection params and settings as kwargs - - Creates a new `Config` instance for `conn.config` - - Marks connection settings as read-only after connection + - Copies global `dj.config` + - Applies kwargs overrides + - Creates mutable `conn.config` 7. Add `ThreadSafetyError` exception ## Exceptions ```python class ThreadSafetyError(DataJointError): - """Raised when accessing global state in thread-safe mode.""" + """Raised when modifying global state in thread-safe mode.""" ``` Error messages: -- Config access: `"Global config is inaccessible in thread-safe mode. Use Connection.from_config() with explicit configuration."` -- `dj.conn()`: `"dj.conn() is disabled in thread-safe mode. Use Connection.from_config() with explicit configuration."` -- Schema without connection: `"Schema requires explicit connection in thread-safe mode. Use Schema(..., connection=conn)."` +- Config write: `"Global config is read-only in thread-safe mode. Use conn.config for connection-scoped settings."` +- `dj.conn()`: `"dj.conn() is disabled in thread-safe mode. Use Connection.from_config()."` +- Schema without connection: `"Schema requires explicit connection in thread-safe mode."` From 7c57b26cc2181942ef1b545ae203ed9f9fee0377 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 13 Feb 2026 16:58:03 -0600 Subject: [PATCH 07/26] docs: Remove from_config(), just expose conn.config Simpler API: - Use existing Connection() constructor - conn.config copies from global dj.config - conn.config is always mutable for per-connection settings Co-Authored-By: Claude Opus 4.5 --- docs/design/thread-safe-mode.md | 48 +++++++++++++++------------------ 1 file changed, 22 insertions(+), 26 deletions(-) diff --git a/docs/design/thread-safe-mode.md b/docs/design/thread-safe-mode.md index f670e6cf2..0ee4b6b88 100644 --- a/docs/design/thread-safe-mode.md +++ b/docs/design/thread-safe-mode.md @@ -6,7 +6,7 @@ DataJoint uses global state (`dj.config`, `dj.conn()`) that is not thread-safe. ## Solution -Add `thread_safe` mode that makes global config read-only and requires explicit connections with mutable connection-scoped settings. +Add `thread_safe` mode that makes global config read-only and provides connection-scoped mutable settings via `conn.config`. ## API @@ -23,64 +23,60 @@ export DJ_THREAD_SAFE=true {"thread_safe": true} ``` -### Connection.from_config() - -Creates a connection with explicit configuration. Works in both modes. +### Create Connections ```python -conn = dj.Connection.from_config( +conn = dj.Connection( host="localhost", user="user", password="password", - safemode=False, - display_limit=25, ) + +# Modify settings per-connection +conn.config.safemode = False +conn.config.display_limit = 25 + schema = dj.Schema("my_schema", connection=conn) ``` -**Parameters:** -- `host` (required): Database hostname -- `user` (required): Database username -- `password` (required): Database password -- `port`: Database port (default: 3306) -- Any other setting (e.g., `safemode`, `display_limit`, `stores`) +### conn.config -**Config creation:** Copies global `dj.config`, then applies kwargs. Creates `conn.config` which is always mutable. +Every connection has a `config` attribute that: +- Copies from global `dj.config` at connection time +- Is always mutable (even in thread-safe mode) +- Provides connection-scoped settings ```python -conn = dj.Connection.from_config(host="localhost", user="u", password="p") -conn.config.safemode = False # Always OK: conn.config is mutable -conn.config.display_limit = 25 # Always OK +conn.config.safemode # Read setting +conn.config.safemode = False # Write setting (always allowed) +conn.config.stores = {...} # Configure stores for this connection ``` ## Behavior | Operation | `thread_safe=False` | `thread_safe=True` | |-----------|--------------------|--------------------| -| `dj.config` read | Works | Works (read-only) | +| `dj.config` read | Works | Works | | `dj.config` write | Works | Raises `ThreadSafetyError` | | `dj.conn()` | Works | Raises `ThreadSafetyError` | | `dj.Schema("name")` | Works | Raises `ThreadSafetyError` | -| `Connection.from_config()` | Works | Works | +| `dj.Connection(...)` | Works | Works | | `conn.config` read/write | Works | Works | | `Schema(..., connection=conn)` | Works | Works | ## Read-Only Settings -- `thread_safe`: Always read-only after initialization (set via env var or config file only) +- `thread_safe`: Always read-only (set via env var or config file only) - All of `dj.config`: Read-only when `thread_safe=True` ## Implementation 1. Add `thread_safe: bool = False` field to `Config` with `DJ_THREAD_SAFE` env alias 2. Make `thread_safe` always read-only after initialization -3. When `thread_safe=True`, make all `dj.config` writes raise `ThreadSafetyError` +3. When `thread_safe=True`, make `dj.config` writes raise `ThreadSafetyError` 4. Add guard to `dj.conn()` 5. Add guard to `Schema.__init__` when `connection=None` -6. Add `Connection.from_config()` class method that: - - Copies global `dj.config` - - Applies kwargs overrides - - Creates mutable `conn.config` +6. Add `conn.config` to `Connection` that copies from global `dj.config` 7. Add `ThreadSafetyError` exception ## Exceptions @@ -92,5 +88,5 @@ class ThreadSafetyError(DataJointError): Error messages: - Config write: `"Global config is read-only in thread-safe mode. Use conn.config for connection-scoped settings."` -- `dj.conn()`: `"dj.conn() is disabled in thread-safe mode. Use Connection.from_config()."` +- `dj.conn()`: `"dj.conn() is disabled in thread-safe mode. Use Connection() with explicit parameters."` - Schema without connection: `"Schema requires explicit connection in thread-safe mode."` From 8a51db4b9fa177c0acc7df333786407f11ee6814 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 13 Feb 2026 17:04:49 -0600 Subject: [PATCH 08/26] docs: Specify internal refactoring to use conn.config All runtime operations must use self.connection.config instead of global config: - table.py: safemode for delete/drop - schemas.py: safemode, create_tables - preview.py: display settings - diagram.py: diagram_direction - jobs.py: all jobs settings - autopopulate.py: jobs settings - declare.py: add_job_metadata - connection.py: reconnect, query_cache - hash_registry.py, codecs: stores, download_path Co-Authored-By: Claude Opus 4.5 --- docs/design/thread-safe-mode.md | 66 ++++++++++++++++++++++++++++----- 1 file changed, 56 insertions(+), 10 deletions(-) diff --git a/docs/design/thread-safe-mode.md b/docs/design/thread-safe-mode.md index 0ee4b6b88..99c343d1b 100644 --- a/docs/design/thread-safe-mode.md +++ b/docs/design/thread-safe-mode.md @@ -34,7 +34,7 @@ conn = dj.Connection( # Modify settings per-connection conn.config.safemode = False -conn.config.display_limit = 25 +conn.config.display.limit = 25 schema = dj.Schema("my_schema", connection=conn) ``` @@ -71,22 +71,68 @@ conn.config.stores = {...} # Configure stores for this connection ## Implementation -1. Add `thread_safe: bool = False` field to `Config` with `DJ_THREAD_SAFE` env alias -2. Make `thread_safe` always read-only after initialization -3. When `thread_safe=True`, make `dj.config` writes raise `ThreadSafetyError` -4. Add guard to `dj.conn()` -5. Add guard to `Schema.__init__` when `connection=None` -6. Add `conn.config` to `Connection` that copies from global `dj.config` -7. Add `ThreadSafetyError` exception +### 1. Add thread_safe setting +- Add `thread_safe: bool = False` field to `Config` with `DJ_THREAD_SAFE` env alias +- Make `thread_safe` always read-only after initialization +- When `thread_safe=True`, make `dj.config` writes raise `ThreadSafetyError` -## Exceptions +### 2. Add guards for global state +- `dj.conn()`: Raise `ThreadSafetyError` when `thread_safe=True` +- `Schema.__init__`: Raise `ThreadSafetyError` when `connection=None` and `thread_safe=True` + +### 3. Add conn.config +- `Connection.__init__`: Create `self.config` as copy of global `dj.config` +- `conn.config` is always mutable + +### 4. Refactor internal code to use conn.config + +All runtime operations must use `self.connection.config` instead of global `config`: + +**table.py:** +- `Table.delete()`: Use `self.connection.config.safemode` +- `Table.drop()`: Use `self.connection.config.safemode` + +**schemas.py:** +- `Schema.drop()`: Use `self.connection.config.safemode` +- `Schema.__init__`: Use `self.connection.config.database.create_tables` + +**preview.py:** +- Use `connection.config.display.limit` +- Use `connection.config.display.width` +- Use `connection.config.display.show_tuple_count` +- Note: Preview functions need connection passed in or accessed via table + +**diagram.py:** +- Use `schema.connection.config.display.diagram_direction` + +**jobs.py:** +- Use `self.connection.config.jobs.*` for all jobs settings +- `version_method`, `default_priority`, `stale_timeout`, `keep_completed` + +**autopopulate.py:** +- Use `self.connection.config.jobs.allow_new_pk_fields_in_computed_tables` +- Use `self.connection.config.jobs.auto_refresh` + +**declare.py:** +- Use `connection.config.jobs.add_job_metadata` + +**connection.py:** +- Use `self.config.database.reconnect` for reconnect behavior +- Use `self.config.query_cache` for query caching + +**hash_registry.py, staged_insert.py, builtin_codecs/\*:** +- Use `connection.config.get_store_spec()` for store configuration +- Use `connection.config.download_path` for downloads + +### 5. Add ThreadSafetyError exception ```python class ThreadSafetyError(DataJointError): """Raised when modifying global state in thread-safe mode.""" ``` -Error messages: +## Error Messages + - Config write: `"Global config is read-only in thread-safe mode. Use conn.config for connection-scoped settings."` - `dj.conn()`: `"dj.conn() is disabled in thread-safe mode. Use Connection() with explicit parameters."` - Schema without connection: `"Schema requires explicit connection in thread-safe mode."` From 726007da556be5844f22ac4ef4ba9ceb310645e2 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 13 Feb 2026 17:10:15 -0600 Subject: [PATCH 09/26] docs: Add connection flow from Schema to Tables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Explains how connections propagate: - Connection → Schema → Table classes → Table instances - Schema falls back to conn() if no connection provided - Tables inherit connection from schema via _connection class attribute - In thread_safe mode, Schema("name") fails, Schema("name", connection=conn) works Co-Authored-By: Claude Opus 4.5 --- docs/design/thread-safe-mode.md | 54 ++++++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/docs/design/thread-safe-mode.md b/docs/design/thread-safe-mode.md index 99c343d1b..7154fd9e4 100644 --- a/docs/design/thread-safe-mode.md +++ b/docs/design/thread-safe-mode.md @@ -52,6 +52,57 @@ conn.config.safemode = False # Write setting (always allowed) conn.config.stores = {...} # Configure stores for this connection ``` +## Connection Flow: Schema → Tables + +### How connections propagate + +``` +Connection + ↓ +Schema (stores connection) + ↓ +Table classes (inherit connection from schema) + ↓ +Table instances (access connection via class) +``` + +### Schema behavior + +```python +# If connection provided, use it +schema = dj.Schema("name", connection=conn) # schema.connection = conn + +# If no connection, fall back to global conn() +schema = dj.Schema("name") # schema.connection = dj.conn() +``` + +### Table behavior + +Tables automatically inherit their connection from their schema: + +```python +@schema +class Mouse(dj.Manual): + definition = "..." + +# Mouse._connection is set by @schema decorator +# Mouse().connection returns Mouse._connection (from schema) +``` + +### In thread_safe=True mode + +```python +# This fails - conn() raises ThreadSafetyError +schema = dj.Schema("name") + +# This works - explicit connection +conn = dj.Connection(host="localhost", user="u", password="p") +schema = dj.Schema("name", connection=conn) + +# Tables work automatically via schema's connection +Mouse().insert(...) # Uses schema.connection.config for settings +``` + ## Behavior | Operation | `thread_safe=False` | `thread_safe=True` | @@ -59,10 +110,11 @@ conn.config.stores = {...} # Configure stores for this connection | `dj.config` read | Works | Works | | `dj.config` write | Works | Raises `ThreadSafetyError` | | `dj.conn()` | Works | Raises `ThreadSafetyError` | -| `dj.Schema("name")` | Works | Raises `ThreadSafetyError` | +| `dj.Schema("name")` | Works (uses `conn()`) | Raises `ThreadSafetyError` | | `dj.Connection(...)` | Works | Works | | `conn.config` read/write | Works | Works | | `Schema(..., connection=conn)` | Works | Works | +| Table operations | Use `conn.config` | Use `conn.config` | ## Read-Only Settings From b929627b94786ca9534d33a40e84e4ad9e6d832e Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 13 Feb 2026 17:11:29 -0600 Subject: [PATCH 10/26] docs: Global connection assigns dj.config for uniform structure - dj.conn().config IS dj.config (same object) - dj.Connection(...).config is COPY of dj.config (independent) - All internal code uses self.connection.config uniformly Co-Authored-By: Claude Opus 4.5 --- docs/design/thread-safe-mode.md | 44 ++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/docs/design/thread-safe-mode.md b/docs/design/thread-safe-mode.md index 7154fd9e4..41cd952cb 100644 --- a/docs/design/thread-safe-mode.md +++ b/docs/design/thread-safe-mode.md @@ -41,15 +41,23 @@ schema = dj.Schema("my_schema", connection=conn) ### conn.config -Every connection has a `config` attribute that: -- Copies from global `dj.config` at connection time -- Is always mutable (even in thread-safe mode) -- Provides connection-scoped settings +Every connection has a `config` attribute for uniform access: + +| Connection source | `conn.config` | +|-------------------|---------------| +| `dj.conn()` | **Is** `dj.config` (same object) | +| `dj.Connection(...)` | **Copy** of `dj.config` (independent) | + +This ensures all internal code can use `self.connection.config` uniformly. ```python -conn.config.safemode # Read setting -conn.config.safemode = False # Write setting (always allowed) -conn.config.stores = {...} # Configure stores for this connection +# Global connection - config is dj.config +conn = dj.conn() +conn.config.safemode = False # Modifies dj.config + +# Explicit connection - config is independent copy +conn = dj.Connection(host="localhost", user="u", password="p") +conn.config.safemode = False # Only affects this connection ``` ## Connection Flow: Schema → Tables @@ -57,7 +65,7 @@ conn.config.stores = {...} # Configure stores for this connection ### How connections propagate ``` -Connection +Connection (has .config) ↓ Schema (stores connection) ↓ @@ -89,6 +97,17 @@ class Mouse(dj.Manual): # Mouse().connection returns Mouse._connection (from schema) ``` +### Uniform config access + +All internal code uses `self.connection.config`: + +```python +# Works the same whether connection is from dj.conn() or dj.Connection() +self.connection.config.safemode +self.connection.config.display.limit +self.connection.config.stores +``` + ### In thread_safe=True mode ```python @@ -110,9 +129,10 @@ Mouse().insert(...) # Uses schema.connection.config for settings | `dj.config` read | Works | Works | | `dj.config` write | Works | Raises `ThreadSafetyError` | | `dj.conn()` | Works | Raises `ThreadSafetyError` | +| `dj.conn().config` | Is `dj.config` | N/A | | `dj.Schema("name")` | Works (uses `conn()`) | Raises `ThreadSafetyError` | | `dj.Connection(...)` | Works | Works | -| `conn.config` read/write | Works | Works | +| `conn.config` | Copy of `dj.config` | Copy of `dj.config` | | `Schema(..., connection=conn)` | Works | Works | | Table operations | Use `conn.config` | Use `conn.config` | @@ -132,9 +152,9 @@ Mouse().insert(...) # Uses schema.connection.config for settings - `dj.conn()`: Raise `ThreadSafetyError` when `thread_safe=True` - `Schema.__init__`: Raise `ThreadSafetyError` when `connection=None` and `thread_safe=True` -### 3. Add conn.config -- `Connection.__init__`: Create `self.config` as copy of global `dj.config` -- `conn.config` is always mutable +### 3. Add conn.config to all connections +- `dj.conn()`: Set `conn.config = dj.config` (same object for backward compatibility) +- `dj.Connection(...)`: Set `self.config = copy(dj.config)` (independent copy) ### 4. Refactor internal code to use conn.config From ba637d51928761fbda2cb0e1a204b653d95e2946 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 13 Feb 2026 17:16:22 -0600 Subject: [PATCH 11/26] docs: Clarify Connection always copies, dj.conn() overrides - Connection.__init__ always creates self.config = copy(dj.config) - dj.conn() overrides after creation: conn.config = dj.config Co-Authored-By: Claude Opus 4.5 --- docs/design/thread-safe-mode.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/design/thread-safe-mode.md b/docs/design/thread-safe-mode.md index 41cd952cb..cc32d7680 100644 --- a/docs/design/thread-safe-mode.md +++ b/docs/design/thread-safe-mode.md @@ -153,8 +153,8 @@ Mouse().insert(...) # Uses schema.connection.config for settings - `Schema.__init__`: Raise `ThreadSafetyError` when `connection=None` and `thread_safe=True` ### 3. Add conn.config to all connections -- `dj.conn()`: Set `conn.config = dj.config` (same object for backward compatibility) -- `dj.Connection(...)`: Set `self.config = copy(dj.config)` (independent copy) +- `Connection.__init__`: Always creates `self.config = copy(dj.config)` (independent copy) +- `dj.conn()`: After connection creation, overrides `conn.config = dj.config` (same object for backward compatibility) ### 4. Refactor internal code to use conn.config From 05b70fbe4545f50f7fd0a5c74d4459f17e3cbeac Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 13 Feb 2026 17:24:36 -0600 Subject: [PATCH 12/26] docs: Address mixed connections and override() behavior - Mixed scenarios: dj.config affects global connection schemas only - Explicit connection schemas have independent config - dj.config.override() affects only schemas using dj.conn() - conn.config.override() affects only that connection's schemas - In thread_safe=True: dj.config.override() raises ThreadSafetyError Co-Authored-By: Claude Opus 4.5 --- docs/design/thread-safe-mode.md | 66 +++++++++++++++++++++++++++++++-- 1 file changed, 63 insertions(+), 3 deletions(-) diff --git a/docs/design/thread-safe-mode.md b/docs/design/thread-safe-mode.md index cc32d7680..56151d820 100644 --- a/docs/design/thread-safe-mode.md +++ b/docs/design/thread-safe-mode.md @@ -108,18 +108,78 @@ self.connection.config.display.limit self.connection.config.stores ``` +### In thread_safe=False mode (default) + +Schemas without explicit connection use global connection, controlled by `dj.config`: + +```python +schema = dj.Schema("name") # Uses dj.conn() +# schema.connection.config IS dj.config (same object) +# All tables controlled by dj.config uniformly + +dj.config.safemode = False # Affects all tables in schema +Mouse().delete() # Uses dj.config.safemode +``` + +### Mixed connections (thread_safe=False) + +When some schemas use global connection and others use explicit connections: + +```python +# Schema using global connection +schema1 = dj.Schema("lab") # schema1.connection.config IS dj.config + +# Schema using explicit connection +conn = dj.Connection(host="localhost", user="u", password="p") +schema2 = dj.Schema("analysis", connection=conn) # schema2.connection.config is independent + +# dj.config affects only schema1 +dj.config.safemode = False # Affects schema1 tables +Mouse().delete() # safemode=False (from dj.config) + +# conn.config affects only schema2 +conn.config.safemode = True # Affects schema2 tables +Analysis().delete() # safemode=True (from conn.config) + +# They are independent +dj.config.safemode # False +conn.config.safemode # True +``` + +### override() behavior + +```python +# Global config override - affects schemas using dj.conn() +with dj.config.override(safemode=False): + Mouse().delete() # safemode=False (schema1, global connection) + Analysis().delete() # safemode=True (schema2, unchanged - has own config) + +# Connection-scoped override - affects only that connection +with conn.config.override(safemode=False): + Mouse().delete() # safemode=True (schema1, unchanged - uses dj.config) + Analysis().delete() # safemode=False (schema2, overridden) +``` + ### In thread_safe=True mode ```python # This fails - conn() raises ThreadSafetyError schema = dj.Schema("name") -# This works - explicit connection +# This works - explicit connection with independent config conn = dj.Connection(host="localhost", user="u", password="p") schema = dj.Schema("name", connection=conn) -# Tables work automatically via schema's connection -Mouse().insert(...) # Uses schema.connection.config for settings +# Tables use connection-scoped config +conn.config.safemode = False # Only affects this connection +Mouse().delete() # Uses conn.config.safemode + +# dj.config.override() raises ThreadSafetyError (modifies global state) +with dj.config.override(safemode=False): # ThreadSafetyError + +# conn.config.override() works (connection-scoped) +with conn.config.override(safemode=False): # OK + Mouse().delete() ``` ## Behavior From bb7adfdd5e29bdac69171e3411f6ec0497206c00 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 13 Feb 2026 17:35:06 -0600 Subject: [PATCH 13/26] docs: Rewrite spec with context-based approach New approach using dj.new() for isolated contexts: - Each context has one config and one connection - ctx.Schema() auto-uses context's connection - ctx.Manual, ctx.Lookup, etc. for table base classes - dj module acts as singleton context (legacy API) - thread_safe=True blocks singleton, allows dj.new() Co-Authored-By: Claude Opus 4.5 --- docs/design/thread-safe-mode.md | 350 ++++++++++++++------------------ 1 file changed, 150 insertions(+), 200 deletions(-) diff --git a/docs/design/thread-safe-mode.md b/docs/design/thread-safe-mode.md index 56151d820..2429369d0 100644 --- a/docs/design/thread-safe-mode.md +++ b/docs/design/thread-safe-mode.md @@ -6,265 +6,215 @@ DataJoint uses global state (`dj.config`, `dj.conn()`) that is not thread-safe. ## Solution -Add `thread_safe` mode that makes global config read-only and provides connection-scoped mutable settings via `conn.config`. +Introduce **context** objects that encapsulate config and connection. The `dj` module itself is the singleton (legacy) context. New isolated contexts are created with `dj.new()`. ## API -### Enable Thread-Safe Mode +### Legacy API (singleton context) -Set via environment variable or config file (read-only after initialization): - -```bash -export DJ_THREAD_SAFE=true -``` - -```json -// datajoint.json -{"thread_safe": true} -``` - -### Create Connections +The `dj` module acts as the default singleton context: ```python -conn = dj.Connection( - host="localhost", - user="user", - password="password", -) - -# Modify settings per-connection -conn.config.safemode = False -conn.config.display.limit = 25 - -schema = dj.Schema("my_schema", connection=conn) -``` - -### conn.config +import datajoint as dj -Every connection has a `config` attribute for uniform access: - -| Connection source | `conn.config` | -|-------------------|---------------| -| `dj.conn()` | **Is** `dj.config` (same object) | -| `dj.Connection(...)` | **Copy** of `dj.config` (independent) | - -This ensures all internal code can use `self.connection.config` uniformly. - -```python -# Global connection - config is dj.config -conn = dj.conn() -conn.config.safemode = False # Modifies dj.config +dj.config.safemode = False +dj.conn(host="localhost", user="u", password="p") +schema = dj.Schema("my_schema") # Uses dj's connection -# Explicit connection - config is independent copy -conn = dj.Connection(host="localhost", user="u", password="p") -conn.config.safemode = False # Only affects this connection -``` - -## Connection Flow: Schema → Tables - -### How connections propagate - -``` -Connection (has .config) - ↓ -Schema (stores connection) - ↓ -Table classes (inherit connection from schema) - ↓ -Table instances (access connection via class) -``` - -### Schema behavior - -```python -# If connection provided, use it -schema = dj.Schema("name", connection=conn) # schema.connection = conn - -# If no connection, fall back to global conn() -schema = dj.Schema("name") # schema.connection = dj.conn() -``` - -### Table behavior - -Tables automatically inherit their connection from their schema: - -```python @schema class Mouse(dj.Manual): definition = "..." - -# Mouse._connection is set by @schema decorator -# Mouse().connection returns Mouse._connection (from schema) ``` -### Uniform config access +### New API (isolated context) -All internal code uses `self.connection.config`: +Create isolated contexts with `dj.new()`: ```python -# Works the same whether connection is from dj.conn() or dj.Connection() -self.connection.config.safemode -self.connection.config.display.limit -self.connection.config.stores -``` +import datajoint as dj -### In thread_safe=False mode (default) +ctx = dj.new() # New context with its own config copy +ctx.config.safemode = False +ctx.connect(host="localhost", user="u", password="p") +schema = ctx.Schema("my_schema") # Uses ctx's connection -Schemas without explicit connection use global connection, controlled by `dj.config`: - -```python -schema = dj.Schema("name") # Uses dj.conn() -# schema.connection.config IS dj.config (same object) -# All tables controlled by dj.config uniformly - -dj.config.safemode = False # Affects all tables in schema -Mouse().delete() # Uses dj.config.safemode +@schema +class Mouse(ctx.Manual): + definition = "..." ``` -### Mixed connections (thread_safe=False) +### Context structure -When some schemas use global connection and others use explicit connections: +Each context has: +- **One config** - copy of settings at creation time +- **One connection** - established via `ctx.connect()` +- **Schema factory** - `ctx.Schema()` auto-uses context's connection +- **Table base classes** - `ctx.Manual`, `ctx.Lookup`, `ctx.Imported`, `ctx.Computed`, `ctx.Part` ```python -# Schema using global connection -schema1 = dj.Schema("lab") # schema1.connection.config IS dj.config - -# Schema using explicit connection -conn = dj.Connection(host="localhost", user="u", password="p") -schema2 = dj.Schema("analysis", connection=conn) # schema2.connection.config is independent - -# dj.config affects only schema1 -dj.config.safemode = False # Affects schema1 tables -Mouse().delete() # safemode=False (from dj.config) - -# conn.config affects only schema2 -conn.config.safemode = True # Affects schema2 tables -Analysis().delete() # safemode=True (from conn.config) - -# They are independent -dj.config.safemode # False -conn.config.safemode # True +ctx = dj.new() +ctx.config # Config instance (copy of dj.config at creation) +ctx.connect(...) # Establish connection +ctx.Schema(...) # Create schema using ctx's connection +ctx.Manual # Base class for manual tables +ctx.Lookup # Base class for lookup tables +ctx.Imported # Base class for imported tables +ctx.Computed # Base class for computed tables +ctx.Part # Base class for part tables ``` -### override() behavior +### Thread-safe mode -```python -# Global config override - affects schemas using dj.conn() -with dj.config.override(safemode=False): - Mouse().delete() # safemode=False (schema1, global connection) - Analysis().delete() # safemode=True (schema2, unchanged - has own config) - -# Connection-scoped override - affects only that connection -with conn.config.override(safemode=False): - Mouse().delete() # safemode=True (schema1, unchanged - uses dj.config) - Analysis().delete() # safemode=False (schema2, overridden) +```bash +export DJ_THREAD_SAFE=true ``` -### In thread_safe=True mode +When `thread_safe=True`: +- `dj.conn()` raises `ThreadSafetyError` +- `dj.Schema()` raises `ThreadSafetyError` +- `dj.config` is read-only +- `dj.new()` works - isolated contexts are always allowed ```python -# This fails - conn() raises ThreadSafetyError -schema = dj.Schema("name") - -# This works - explicit connection with independent config -conn = dj.Connection(host="localhost", user="u", password="p") -schema = dj.Schema("name", connection=conn) +# thread_safe=True -# Tables use connection-scoped config -conn.config.safemode = False # Only affects this connection -Mouse().delete() # Uses conn.config.safemode +dj.Schema("name") # ThreadSafetyError +dj.conn() # ThreadSafetyError +dj.config.safemode = False # ThreadSafetyError -# dj.config.override() raises ThreadSafetyError (modifies global state) -with dj.config.override(safemode=False): # ThreadSafetyError - -# conn.config.override() works (connection-scoped) -with conn.config.override(safemode=False): # OK - Mouse().delete() +ctx = dj.new() # OK - isolated context +ctx.config.safemode = False # OK - context's own config +ctx.connect(...) # OK +ctx.Schema("name") # OK ``` -## Behavior +## Behavior Summary | Operation | `thread_safe=False` | `thread_safe=True` | |-----------|--------------------|--------------------| | `dj.config` read | Works | Works | -| `dj.config` write | Works | Raises `ThreadSafetyError` | -| `dj.conn()` | Works | Raises `ThreadSafetyError` | -| `dj.conn().config` | Is `dj.config` | N/A | -| `dj.Schema("name")` | Works (uses `conn()`) | Raises `ThreadSafetyError` | -| `dj.Connection(...)` | Works | Works | -| `conn.config` | Copy of `dj.config` | Copy of `dj.config` | -| `Schema(..., connection=conn)` | Works | Works | -| Table operations | Use `conn.config` | Use `conn.config` | +| `dj.config` write | Works | `ThreadSafetyError` | +| `dj.conn()` | Works | `ThreadSafetyError` | +| `dj.Schema()` | Works | `ThreadSafetyError` | +| `dj.new()` | Works | Works | +| `ctx.config` read/write | Works | Works | +| `ctx.connect()` | Works | Works | +| `ctx.Schema()` | Works | Works | -## Read-Only Settings +## Context Lifecycle -- `thread_safe`: Always read-only (set via env var or config file only) -- All of `dj.config`: Read-only when `thread_safe=True` +```python +# Create context +ctx = dj.new() -## Implementation +# Configure +ctx.config.database.host = "localhost" +ctx.config.safemode = False +ctx.config.stores = {...} -### 1. Add thread_safe setting -- Add `thread_safe: bool = False` field to `Config` with `DJ_THREAD_SAFE` env alias -- Make `thread_safe` always read-only after initialization -- When `thread_safe=True`, make `dj.config` writes raise `ThreadSafetyError` +# Connect +ctx.connect( + host="localhost", # Or use ctx.config.database.host + user="user", + password="password", +) -### 2. Add guards for global state -- `dj.conn()`: Raise `ThreadSafetyError` when `thread_safe=True` -- `Schema.__init__`: Raise `ThreadSafetyError` when `connection=None` and `thread_safe=True` +# Use +schema = ctx.Schema("my_schema") -### 3. Add conn.config to all connections -- `Connection.__init__`: Always creates `self.config = copy(dj.config)` (independent copy) -- `dj.conn()`: After connection creation, overrides `conn.config = dj.config` (same object for backward compatibility) +@schema +class Mouse(ctx.Manual): + definition = """ + mouse_id: int + """ -### 4. Refactor internal code to use conn.config +Mouse().insert1({"mouse_id": 1}) -All runtime operations must use `self.connection.config` instead of global `config`: +# Cleanup (optional - closes connection) +ctx.close() +``` -**table.py:** -- `Table.delete()`: Use `self.connection.config.safemode` -- `Table.drop()`: Use `self.connection.config.safemode` +## Legacy Compatibility -**schemas.py:** -- `Schema.drop()`: Use `self.connection.config.safemode` -- `Schema.__init__`: Use `self.connection.config.database.create_tables` +The singleton `dj` context works exactly as before: -**preview.py:** -- Use `connection.config.display.limit` -- Use `connection.config.display.width` -- Use `connection.config.display.show_tuple_count` -- Note: Preview functions need connection passed in or accessed via table +```python +# These are equivalent: +dj.conn() # Singleton connection +dj.config # Singleton config +dj.Schema("name") # Uses singleton connection -**diagram.py:** -- Use `schema.connection.config.display.diagram_direction` +# Internally, dj module delegates to singleton context +``` -**jobs.py:** -- Use `self.connection.config.jobs.*` for all jobs settings -- `version_method`, `default_priority`, `stale_timeout`, `keep_completed` +## Implementation -**autopopulate.py:** -- Use `self.connection.config.jobs.allow_new_pk_fields_in_computed_tables` -- Use `self.connection.config.jobs.auto_refresh` +### 1. Create Context class -**declare.py:** -- Use `connection.config.jobs.add_job_metadata` +```python +class Context: + def __init__(self, config: Config): + self.config = config + self._connection = None + + def connect(self, host, user, password, ...): + self._connection = Connection(...) + self._connection.config = self.config + + def conn(self): + return self._connection + + def Schema(self, name, ...): + return Schema(name, connection=self._connection, ...) + + # Table base classes that reference this context + @property + def Manual(self): ... + @property + def Lookup(self): ... + # etc. +``` -**connection.py:** -- Use `self.config.database.reconnect` for reconnect behavior -- Use `self.config.query_cache` for query caching +### 2. Add dj.new() -**hash_registry.py, staged_insert.py, builtin_codecs/\*:** -- Use `connection.config.get_store_spec()` for store configuration -- Use `connection.config.download_path` for downloads +```python +def new() -> Context: + """Create a new isolated context with its own config and connection.""" + config_copy = copy(config) # Copy current global config + return Context(config_copy) +``` -### 5. Add ThreadSafetyError exception +### 3. Make dj module act as singleton context ```python -class ThreadSafetyError(DataJointError): - """Raised when modifying global state in thread-safe mode.""" +# In datajoint/__init__.py +_singleton_context = Context(config) + +def conn(...): + if config.thread_safe: + raise ThreadSafetyError(...) + return _singleton_context.conn(...) + +def Schema(...): + if config.thread_safe: + raise ThreadSafetyError(...) + return _singleton_context.Schema(...) ``` +### 4. Add thread_safe guards + +- `dj.conn()`: Raise `ThreadSafetyError` when `thread_safe=True` +- `dj.Schema()`: Raise `ThreadSafetyError` when `thread_safe=True` +- `dj.config` writes: Raise `ThreadSafetyError` when `thread_safe=True` + +### 5. Refactor internal code + +All internal code uses `self.connection.config` instead of global `config`: +- Tables access config via `self.connection.config` +- Connection has reference to its context's config + ## Error Messages -- Config write: `"Global config is read-only in thread-safe mode. Use conn.config for connection-scoped settings."` -- `dj.conn()`: `"dj.conn() is disabled in thread-safe mode. Use Connection() with explicit parameters."` -- Schema without connection: `"Schema requires explicit connection in thread-safe mode."` +- `dj.conn()`: `"dj.conn() is disabled in thread-safe mode. Use ctx = dj.new() to create an isolated context."` +- `dj.Schema()`: `"dj.Schema() is disabled in thread-safe mode. Use ctx = dj.new() to create an isolated context."` +- `dj.config` write: `"Global config is read-only in thread-safe mode. Use ctx = dj.new() for isolated config."` From f92af1c8b75264d63f29f4ef96cf9f01581bb03a Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 13 Feb 2026 17:40:28 -0600 Subject: [PATCH 14/26] docs: Simplify context - only config, connection, Schema - ctx exposes only: config, connection, Schema() - Connection created at context construction via dj.new() - Tables still use dj.Manual, dj.Lookup as base classes - thread_safe=True: dj.config only allows thread_safe access Co-Authored-By: Claude Opus 4.5 --- docs/design/thread-safe-mode.md | 189 +++++++++++++------------------- 1 file changed, 76 insertions(+), 113 deletions(-) diff --git a/docs/design/thread-safe-mode.md b/docs/design/thread-safe-mode.md index 2429369d0..35cbac0ef 100644 --- a/docs/design/thread-safe-mode.md +++ b/docs/design/thread-safe-mode.md @@ -6,20 +6,18 @@ DataJoint uses global state (`dj.config`, `dj.conn()`) that is not thread-safe. ## Solution -Introduce **context** objects that encapsulate config and connection. The `dj` module itself is the singleton (legacy) context. New isolated contexts are created with `dj.new()`. +Introduce **context** objects that encapsulate config and connection. The `dj` module provides the singleton (legacy) context. New isolated contexts are created with `dj.new()`. ## API ### Legacy API (singleton context) -The `dj` module acts as the default singleton context: - ```python import datajoint as dj dj.config.safemode = False dj.conn(host="localhost", user="u", password="p") -schema = dj.Schema("my_schema") # Uses dj's connection +schema = dj.Schema("my_schema") @schema class Mouse(dj.Manual): @@ -28,39 +26,34 @@ class Mouse(dj.Manual): ### New API (isolated context) -Create isolated contexts with `dj.new()`: - ```python import datajoint as dj -ctx = dj.new() # New context with its own config copy +ctx = dj.new( + host="localhost", + user="user", + password="password", +) ctx.config.safemode = False -ctx.connect(host="localhost", user="u", password="p") -schema = ctx.Schema("my_schema") # Uses ctx's connection +schema = ctx.Schema("my_schema") @schema -class Mouse(ctx.Manual): +class Mouse(dj.Manual): definition = "..." ``` ### Context structure -Each context has: -- **One config** - copy of settings at creation time -- **One connection** - established via `ctx.connect()` -- **Schema factory** - `ctx.Schema()` auto-uses context's connection -- **Table base classes** - `ctx.Manual`, `ctx.Lookup`, `ctx.Imported`, `ctx.Computed`, `ctx.Part` +Each context exposes only: +- `ctx.config` - Config instance (copy of `dj.config` at creation) +- `ctx.connection` - Connection (created at context construction) +- `ctx.Schema()` - Schema factory using context's connection ```python -ctx = dj.new() -ctx.config # Config instance (copy of dj.config at creation) -ctx.connect(...) # Establish connection -ctx.Schema(...) # Create schema using ctx's connection -ctx.Manual # Base class for manual tables -ctx.Lookup # Base class for lookup tables -ctx.Imported # Base class for imported tables -ctx.Computed # Base class for computed tables -ctx.Part # Base class for part tables +ctx = dj.new(host="localhost", user="u", password="p") +ctx.config # Config instance +ctx.connection # Connection instance +ctx.Schema("name") # Creates schema using ctx.connection ``` ### Thread-safe mode @@ -72,79 +65,71 @@ export DJ_THREAD_SAFE=true When `thread_safe=True`: - `dj.conn()` raises `ThreadSafetyError` - `dj.Schema()` raises `ThreadSafetyError` -- `dj.config` is read-only +- `dj.config` only allows access to `thread_safe` (all other access raises `ThreadSafetyError`) - `dj.new()` works - isolated contexts are always allowed ```python # thread_safe=True -dj.Schema("name") # ThreadSafetyError -dj.conn() # ThreadSafetyError +dj.config.thread_safe # OK - allowed +dj.config.safemode # ThreadSafetyError dj.config.safemode = False # ThreadSafetyError +dj.conn() # ThreadSafetyError +dj.Schema("name") # ThreadSafetyError -ctx = dj.new() # OK - isolated context -ctx.config.safemode = False # OK - context's own config -ctx.connect(...) # OK -ctx.Schema("name") # OK +ctx = dj.new(host="h", user="u", password="p") # OK +ctx.config.safemode = False # OK +ctx.Schema("name") # OK ``` ## Behavior Summary | Operation | `thread_safe=False` | `thread_safe=True` | |-----------|--------------------|--------------------| -| `dj.config` read | Works | Works | -| `dj.config` write | Works | `ThreadSafetyError` | +| `dj.config.thread_safe` | Works | Works | +| `dj.config.*` (other) | Works | `ThreadSafetyError` | | `dj.conn()` | Works | `ThreadSafetyError` | | `dj.Schema()` | Works | `ThreadSafetyError` | | `dj.new()` | Works | Works | -| `ctx.config` read/write | Works | Works | -| `ctx.connect()` | Works | Works | +| `ctx.config.*` | Works | Works | +| `ctx.connection` | Works | Works | | `ctx.Schema()` | Works | Works | -## Context Lifecycle +## Usage Example ```python -# Create context -ctx = dj.new() - -# Configure -ctx.config.database.host = "localhost" -ctx.config.safemode = False -ctx.config.stores = {...} +import datajoint as dj -# Connect -ctx.connect( - host="localhost", # Or use ctx.config.database.host +# Create isolated context +ctx = dj.new( + host="localhost", user="user", password="password", ) -# Use +# Configure +ctx.config.safemode = False +ctx.config.stores = {"raw": {"protocol": "file", "location": "/data"}} + +# Create schema schema = ctx.Schema("my_schema") @schema -class Mouse(ctx.Manual): +class Mouse(dj.Manual): definition = """ mouse_id: int """ -Mouse().insert1({"mouse_id": 1}) - -# Cleanup (optional - closes connection) -ctx.close() -``` - -## Legacy Compatibility - -The singleton `dj` context works exactly as before: - -```python -# These are equivalent: -dj.conn() # Singleton connection -dj.config # Singleton config -dj.Schema("name") # Uses singleton connection +@schema +class Session(dj.Manual): + definition = """ + -> Mouse + session_date: date + """ -# Internally, dj module delegates to singleton context +# Use tables +Mouse().insert1({"mouse_id": 1}) +Mouse().delete() # Uses ctx.config.safemode ``` ## Implementation @@ -153,68 +138,46 @@ dj.Schema("name") # Uses singleton connection ```python class Context: - def __init__(self, config: Config): - self.config = config - self._connection = None - - def connect(self, host, user, password, ...): - self._connection = Connection(...) - self._connection.config = self.config - - def conn(self): - return self._connection - - def Schema(self, name, ...): - return Schema(name, connection=self._connection, ...) - - # Table base classes that reference this context - @property - def Manual(self): ... - @property - def Lookup(self): ... - # etc. + def __init__(self, host, user, password, port=3306, ...): + self.config = copy(dj.config) # Independent config copy + self.connection = Connection(host, user, password, port, ...) + self.connection._config = self.config # Link config to connection + + def Schema(self, name, **kwargs): + return Schema(name, connection=self.connection, **kwargs) ``` ### 2. Add dj.new() ```python -def new() -> Context: +def new(host, user, password, **kwargs) -> Context: """Create a new isolated context with its own config and connection.""" - config_copy = copy(config) # Copy current global config - return Context(config_copy) + return Context(host, user, password, **kwargs) ``` -### 3. Make dj module act as singleton context +### 3. Add thread_safe guards + +In `dj.config`: +- Allow read/write of `thread_safe` always +- When `thread_safe=True`, block all other attribute access ```python -# In datajoint/__init__.py -_singleton_context = Context(config) - -def conn(...): - if config.thread_safe: - raise ThreadSafetyError(...) - return _singleton_context.conn(...) - -def Schema(...): - if config.thread_safe: - raise ThreadSafetyError(...) - return _singleton_context.Schema(...) +def __getattr__(self, name): + if name == "thread_safe": + return self._thread_safe + if self._thread_safe: + raise ThreadSafetyError("Global config is inaccessible in thread-safe mode.") + # ... normal access ``` -### 4. Add thread_safe guards - -- `dj.conn()`: Raise `ThreadSafetyError` when `thread_safe=True` -- `dj.Schema()`: Raise `ThreadSafetyError` when `thread_safe=True` -- `dj.config` writes: Raise `ThreadSafetyError` when `thread_safe=True` - -### 5. Refactor internal code +### 4. Refactor internal code -All internal code uses `self.connection.config` instead of global `config`: -- Tables access config via `self.connection.config` -- Connection has reference to its context's config +All internal code uses `self.connection._config` instead of global `config`: +- Tables access config via `self.connection._config` +- This works uniformly for both singleton and isolated contexts ## Error Messages -- `dj.conn()`: `"dj.conn() is disabled in thread-safe mode. Use ctx = dj.new() to create an isolated context."` -- `dj.Schema()`: `"dj.Schema() is disabled in thread-safe mode. Use ctx = dj.new() to create an isolated context."` -- `dj.config` write: `"Global config is read-only in thread-safe mode. Use ctx = dj.new() for isolated config."` +- `dj.config.*`: `"Global config is inaccessible in thread-safe mode. Use ctx = dj.new(...) for isolated config."` +- `dj.conn()`: `"dj.conn() is disabled in thread-safe mode. Use ctx = dj.new(...) to create an isolated context."` +- `dj.Schema()`: `"dj.Schema() is disabled in thread-safe mode. Use ctx = dj.new(...) to create an isolated context."` From f83248606fc5a1cf585f39865957f52ace229551 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 13 Feb 2026 17:52:36 -0600 Subject: [PATCH 15/26] docs: Singleton as lazily-loaded instance, fresh config per instance - dj.config, dj.conn(), dj.Schema() delegate to singleton instance - Singleton lazily loaded on first access - thread_safe checked at module import, blocks singleton access - inst.config created fresh (not copied from dj.config) - dj.instance() always works, creates isolated instance Co-Authored-By: Claude Opus 4.5 --- docs/design/thread-safe-mode.md | 166 ++++++++++++++++++-------------- 1 file changed, 95 insertions(+), 71 deletions(-) diff --git a/docs/design/thread-safe-mode.md b/docs/design/thread-safe-mode.md index 35cbac0ef..ed89f5e54 100644 --- a/docs/design/thread-safe-mode.md +++ b/docs/design/thread-safe-mode.md @@ -6,11 +6,11 @@ DataJoint uses global state (`dj.config`, `dj.conn()`) that is not thread-safe. ## Solution -Introduce **context** objects that encapsulate config and connection. The `dj` module provides the singleton (legacy) context. New isolated contexts are created with `dj.new()`. +Introduce **instance** objects that encapsulate config and connection. The `dj` module provides access to a lazily-loaded singleton instance. New isolated instances are created with `dj.instance()`. ## API -### Legacy API (singleton context) +### Legacy API (singleton instance) ```python import datajoint as dj @@ -24,36 +24,38 @@ class Mouse(dj.Manual): definition = "..." ``` -### New API (isolated context) +Internally, `dj.config`, `dj.conn()`, and `dj.Schema()` delegate to a lazily-loaded singleton instance. + +### New API (isolated instance) ```python import datajoint as dj -ctx = dj.new( +inst = dj.instance( host="localhost", user="user", password="password", ) -ctx.config.safemode = False -schema = ctx.Schema("my_schema") +inst.config.safemode = False +schema = inst.Schema("my_schema") @schema class Mouse(dj.Manual): definition = "..." ``` -### Context structure +### Instance structure -Each context exposes only: -- `ctx.config` - Config instance (copy of `dj.config` at creation) -- `ctx.connection` - Connection (created at context construction) -- `ctx.Schema()` - Schema factory using context's connection +Each instance has: +- `inst.config` - Config (created fresh at instance creation) +- `inst.connection` - Connection (created at instance creation) +- `inst.Schema()` - Schema factory using instance's connection ```python -ctx = dj.new(host="localhost", user="u", password="p") -ctx.config # Config instance -ctx.connection # Connection instance -ctx.Schema("name") # Creates schema using ctx.connection +inst = dj.instance(host="localhost", user="u", password="p") +inst.config # Config instance +inst.connection # Connection instance +inst.Schema("name") # Creates schema using inst.connection ``` ### Thread-safe mode @@ -62,57 +64,67 @@ ctx.Schema("name") # Creates schema using ctx.connection export DJ_THREAD_SAFE=true ``` -When `thread_safe=True`: +`thread_safe` is read from environment/config file at module import time. + +When `thread_safe=True`, accessing the singleton raises `ThreadSafetyError`: +- `dj.config` raises `ThreadSafetyError` - `dj.conn()` raises `ThreadSafetyError` - `dj.Schema()` raises `ThreadSafetyError` -- `dj.config` only allows access to `thread_safe` (all other access raises `ThreadSafetyError`) -- `dj.new()` works - isolated contexts are always allowed +- `dj.instance()` works - isolated instances are always allowed ```python # thread_safe=True -dj.config.thread_safe # OK - allowed -dj.config.safemode # ThreadSafetyError -dj.config.safemode = False # ThreadSafetyError -dj.conn() # ThreadSafetyError -dj.Schema("name") # ThreadSafetyError +dj.config # ThreadSafetyError +dj.conn() # ThreadSafetyError +dj.Schema("name") # ThreadSafetyError -ctx = dj.new(host="h", user="u", password="p") # OK -ctx.config.safemode = False # OK -ctx.Schema("name") # OK +inst = dj.instance(host="h", user="u", password="p") # OK +inst.config.safemode = False # OK +inst.Schema("name") # OK ``` ## Behavior Summary | Operation | `thread_safe=False` | `thread_safe=True` | |-----------|--------------------|--------------------| -| `dj.config.thread_safe` | Works | Works | -| `dj.config.*` (other) | Works | `ThreadSafetyError` | -| `dj.conn()` | Works | `ThreadSafetyError` | -| `dj.Schema()` | Works | `ThreadSafetyError` | -| `dj.new()` | Works | Works | -| `ctx.config.*` | Works | Works | -| `ctx.connection` | Works | Works | -| `ctx.Schema()` | Works | Works | +| `dj.config` | Singleton config | `ThreadSafetyError` | +| `dj.conn()` | Singleton connection | `ThreadSafetyError` | +| `dj.Schema()` | Uses singleton | `ThreadSafetyError` | +| `dj.instance()` | Works | Works | +| `inst.config` | Works | Works | +| `inst.connection` | Works | Works | +| `inst.Schema()` | Works | Works | + +## Singleton Lazy Loading + +The singleton instance is created lazily on first access to `dj.config`, `dj.conn()`, or `dj.Schema()`: + +```python +# First access triggers singleton creation +dj.config.safemode # Creates singleton, returns singleton.config.safemode +dj.conn() # Returns singleton.connection (connects if needed) +dj.Schema("name") # Returns singleton.Schema("name") +``` ## Usage Example ```python import datajoint as dj -# Create isolated context -ctx = dj.new( +# Create isolated instance +inst = dj.instance( host="localhost", user="user", password="password", ) # Configure -ctx.config.safemode = False -ctx.config.stores = {"raw": {"protocol": "file", "location": "/data"}} +inst.config.safemode = False +inst.config.stores = {"raw": {"protocol": "file", "location": "/data"}} # Create schema -schema = ctx.Schema("my_schema") +schema = inst.Schema("my_schema") @schema class Mouse(dj.Manual): @@ -120,64 +132,76 @@ class Mouse(dj.Manual): mouse_id: int """ -@schema -class Session(dj.Manual): - definition = """ - -> Mouse - session_date: date - """ - # Use tables Mouse().insert1({"mouse_id": 1}) -Mouse().delete() # Uses ctx.config.safemode +Mouse().delete() # Uses inst.config.safemode ``` ## Implementation -### 1. Create Context class +### 1. Create Instance class ```python -class Context: - def __init__(self, host, user, password, port=3306, ...): - self.config = copy(dj.config) # Independent config copy +class Instance: + def __init__(self, host, user, password, port=3306, **kwargs): + self.config = Config() # Fresh config with defaults + # Apply any config overrides from kwargs self.connection = Connection(host, user, password, port, ...) - self.connection._config = self.config # Link config to connection + self.connection._config = self.config def Schema(self, name, **kwargs): return Schema(name, connection=self.connection, **kwargs) ``` -### 2. Add dj.new() +### 2. Add dj.instance() ```python -def new(host, user, password, **kwargs) -> Context: - """Create a new isolated context with its own config and connection.""" - return Context(host, user, password, **kwargs) +def instance(host, user, password, **kwargs) -> Instance: + """Create a new isolated instance with its own config and connection.""" + return Instance(host, user, password, **kwargs) ``` -### 3. Add thread_safe guards - -In `dj.config`: -- Allow read/write of `thread_safe` always -- When `thread_safe=True`, block all other attribute access +### 3. Singleton with lazy loading ```python -def __getattr__(self, name): - if name == "thread_safe": - return self._thread_safe - if self._thread_safe: - raise ThreadSafetyError("Global config is inaccessible in thread-safe mode.") - # ... normal access +# Module level +_thread_safe = _load_thread_safe_from_env_or_config() +_singleton = None + +def _get_singleton(): + if _thread_safe: + raise ThreadSafetyError( + "Global DataJoint state is disabled in thread-safe mode. " + "Use dj.instance() to create an isolated instance." + ) + global _singleton + if _singleton is None: + _singleton = Instance( + host=_load_from_config("database.host"), + user=_load_from_config("database.user"), + password=_load_from_config("database.password"), + ... + ) + return _singleton + +# Public API +@property +def config(): + return _get_singleton().config + +def conn(): + return _get_singleton().connection + +def Schema(name, **kwargs): + return _get_singleton().Schema(name, **kwargs) ``` ### 4. Refactor internal code All internal code uses `self.connection._config` instead of global `config`: - Tables access config via `self.connection._config` -- This works uniformly for both singleton and isolated contexts +- This works uniformly for both singleton and isolated instances ## Error Messages -- `dj.config.*`: `"Global config is inaccessible in thread-safe mode. Use ctx = dj.new(...) for isolated config."` -- `dj.conn()`: `"dj.conn() is disabled in thread-safe mode. Use ctx = dj.new(...) to create an isolated context."` -- `dj.Schema()`: `"dj.Schema() is disabled in thread-safe mode. Use ctx = dj.new(...) to create an isolated context."` +- Singleton access: `"Global DataJoint state is disabled in thread-safe mode. Use dj.instance() to create an isolated instance."` From 6fe7497fa20751c99c37b7e31e5465d64b4116cb Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 13 Feb 2026 17:58:20 -0600 Subject: [PATCH 16/26] docs: Add inst.FreeTable(), clarify base classes vs instance methods - dj.Manual, dj.Lookup etc. used with @schema decorator (schema links connection) - inst.Schema(), inst.FreeTable() need connection directly - FreeTable added to Instance class Co-Authored-By: Claude Opus 4.5 --- docs/design/thread-safe-mode.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/docs/design/thread-safe-mode.md b/docs/design/thread-safe-mode.md index ed89f5e54..b7922a837 100644 --- a/docs/design/thread-safe-mode.md +++ b/docs/design/thread-safe-mode.md @@ -50,12 +50,29 @@ Each instance has: - `inst.config` - Config (created fresh at instance creation) - `inst.connection` - Connection (created at instance creation) - `inst.Schema()` - Schema factory using instance's connection +- `inst.FreeTable()` - FreeTable factory using instance's connection ```python inst = dj.instance(host="localhost", user="u", password="p") inst.config # Config instance inst.connection # Connection instance inst.Schema("name") # Creates schema using inst.connection +inst.FreeTable("db.tbl") # Access table using inst.connection +``` + +### Table base classes vs instance methods + +**Base classes** (`dj.Manual`, `dj.Lookup`, etc.) - Used with `@schema` decorator: +```python +@schema +class Mouse(dj.Manual): # dj.Manual - schema links to connection + definition = "..." +``` + +**Instance methods** (`inst.Schema()`, `inst.FreeTable()`) - Need connection directly: +```python +schema = inst.Schema("my_schema") # Uses inst.connection +table = inst.FreeTable("db.table") # Uses inst.connection ``` ### Thread-safe mode @@ -151,6 +168,9 @@ class Instance: def Schema(self, name, **kwargs): return Schema(name, connection=self.connection, **kwargs) + + def FreeTable(self, full_table_name): + return FreeTable(self.connection, full_table_name) ``` ### 2. Add dj.instance() From 32b52353a1ccbc23f12690aefec77d590414b715 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 13 Feb 2026 18:06:42 -0600 Subject: [PATCH 17/26] docs: Consolidate to single singleton instance, dj.Instance() - dj.Instance() (uppercase) for consistency with dj.Schema() - Single _singleton_instance created lazily - dj.config -> _singleton.config (via proxy) - dj.conn() -> _singleton.connection - dj.Schema() -> _singleton.Schema() - dj.FreeTable() -> _singleton.FreeTable() - All trigger same singleton creation Co-Authored-By: Claude Opus 4.5 --- docs/design/thread-safe-mode.md | 92 +++++++++++++++++++-------------- 1 file changed, 52 insertions(+), 40 deletions(-) diff --git a/docs/design/thread-safe-mode.md b/docs/design/thread-safe-mode.md index b7922a837..dac95981a 100644 --- a/docs/design/thread-safe-mode.md +++ b/docs/design/thread-safe-mode.md @@ -6,7 +6,7 @@ DataJoint uses global state (`dj.config`, `dj.conn()`) that is not thread-safe. ## Solution -Introduce **instance** objects that encapsulate config and connection. The `dj` module provides access to a lazily-loaded singleton instance. New isolated instances are created with `dj.instance()`. +Introduce **Instance** objects that encapsulate config and connection. The `dj` module provides access to a lazily-loaded singleton instance. New isolated instances are created with `dj.Instance()`. ## API @@ -16,7 +16,7 @@ Introduce **instance** objects that encapsulate config and connection. The `dj` import datajoint as dj dj.config.safemode = False -dj.conn(host="localhost", user="u", password="p") +dj.conn() # Triggers singleton creation, returns connection schema = dj.Schema("my_schema") @schema @@ -24,14 +24,19 @@ class Mouse(dj.Manual): definition = "..." ``` -Internally, `dj.config`, `dj.conn()`, and `dj.Schema()` delegate to a lazily-loaded singleton instance. +Internally, `dj.config`, `dj.conn()`, and `dj.Schema()` are aliases to the singleton instance: +- `dj.config` → `dj._singleton_instance.config` +- `dj.conn()` → `dj._singleton_instance.connection` +- `dj.Schema()` → `dj._singleton_instance.Schema()` + +The singleton is created lazily on first access to any of these. ### New API (isolated instance) ```python import datajoint as dj -inst = dj.instance( +inst = dj.Instance( host="localhost", user="user", password="password", @@ -53,7 +58,7 @@ Each instance has: - `inst.FreeTable()` - FreeTable factory using instance's connection ```python -inst = dj.instance(host="localhost", user="u", password="p") +inst = dj.Instance(host="localhost", user="u", password="p") inst.config # Config instance inst.connection # Connection instance inst.Schema("name") # Creates schema using inst.connection @@ -87,7 +92,7 @@ When `thread_safe=True`, accessing the singleton raises `ThreadSafetyError`: - `dj.config` raises `ThreadSafetyError` - `dj.conn()` raises `ThreadSafetyError` - `dj.Schema()` raises `ThreadSafetyError` -- `dj.instance()` works - isolated instances are always allowed +- `dj.Instance()` works - isolated instances are always allowed ```python # thread_safe=True @@ -96,7 +101,7 @@ dj.config # ThreadSafetyError dj.conn() # ThreadSafetyError dj.Schema("name") # ThreadSafetyError -inst = dj.instance(host="h", user="u", password="p") # OK +inst = dj.Instance(host="h", user="u", password="p") # OK inst.config.safemode = False # OK inst.Schema("name") # OK ``` @@ -105,32 +110,33 @@ inst.Schema("name") # OK | Operation | `thread_safe=False` | `thread_safe=True` | |-----------|--------------------|--------------------| -| `dj.config` | Singleton config | `ThreadSafetyError` | -| `dj.conn()` | Singleton connection | `ThreadSafetyError` | -| `dj.Schema()` | Uses singleton | `ThreadSafetyError` | -| `dj.instance()` | Works | Works | +| `dj.config` | `_singleton.config` | `ThreadSafetyError` | +| `dj.conn()` | `_singleton.connection` | `ThreadSafetyError` | +| `dj.Schema()` | `_singleton.Schema()` | `ThreadSafetyError` | +| `dj.Instance()` | Works | Works | | `inst.config` | Works | Works | | `inst.connection` | Works | Works | | `inst.Schema()` | Works | Works | ## Singleton Lazy Loading -The singleton instance is created lazily on first access to `dj.config`, `dj.conn()`, or `dj.Schema()`: +The singleton instance is created lazily on first access: ```python -# First access triggers singleton creation -dj.config.safemode # Creates singleton, returns singleton.config.safemode -dj.conn() # Returns singleton.connection (connects if needed) -dj.Schema("name") # Returns singleton.Schema("name") +dj.config # Creates singleton, returns _singleton.config +dj.conn() # Creates singleton, returns _singleton.connection +dj.Schema("name") # Creates singleton, returns _singleton.Schema("name") ``` +All three trigger creation of the same singleton instance. + ## Usage Example ```python import datajoint as dj # Create isolated instance -inst = dj.instance( +inst = dj.Instance( host="localhost", user="user", password="password", @@ -173,47 +179,53 @@ class Instance: return FreeTable(self.connection, full_table_name) ``` -### 2. Add dj.instance() - -```python -def instance(host, user, password, **kwargs) -> Instance: - """Create a new isolated instance with its own config and connection.""" - return Instance(host, user, password, **kwargs) -``` - -### 3. Singleton with lazy loading +### 2. Singleton with lazy loading ```python # Module level _thread_safe = _load_thread_safe_from_env_or_config() -_singleton = None +_singleton_instance = None def _get_singleton(): if _thread_safe: raise ThreadSafetyError( "Global DataJoint state is disabled in thread-safe mode. " - "Use dj.instance() to create an isolated instance." + "Use dj.Instance() to create an isolated instance." ) - global _singleton - if _singleton is None: - _singleton = Instance( - host=_load_from_config("database.host"), - user=_load_from_config("database.user"), - password=_load_from_config("database.password"), + global _singleton_instance + if _singleton_instance is None: + _singleton_instance = Instance( + host=_load_from_env_or_config("database.host"), + user=_load_from_env_or_config("database.user"), + password=_load_from_env_or_config("database.password"), ... ) - return _singleton + return _singleton_instance +``` + +### 3. Legacy API as aliases -# Public API -@property -def config(): - return _get_singleton().config +```python +# dj.config -> singleton.config +class _ConfigProxy: + def __getattr__(self, name): + return getattr(_get_singleton().config, name) + def __setattr__(self, name, value): + setattr(_get_singleton().config, name, value) +config = _ConfigProxy() + +# dj.conn() -> singleton.connection def conn(): return _get_singleton().connection +# dj.Schema() -> singleton.Schema() def Schema(name, **kwargs): return _get_singleton().Schema(name, **kwargs) + +# dj.FreeTable() -> singleton.FreeTable() +def FreeTable(full_table_name): + return _get_singleton().FreeTable(full_table_name) ``` ### 4. Refactor internal code @@ -224,4 +236,4 @@ All internal code uses `self.connection._config` instead of global `config`: ## Error Messages -- Singleton access: `"Global DataJoint state is disabled in thread-safe mode. Use dj.instance() to create an isolated instance."` +- Singleton access: `"Global DataJoint state is disabled in thread-safe mode. Use dj.Instance() to create an isolated instance."` From b251e862e0f1a9f944c4d7da6046fc6bfb13fe8f Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 13 Feb 2026 19:04:39 -0600 Subject: [PATCH 18/26] feat: Implement thread-safe mode with Instance class - Add Instance class that encapsulates config + connection - Add ThreadSafetyError exception for global state access - Add _ConfigProxy to delegate dj.config to global config - Add _get_singleton_connection for lazy connection creation - Update dj.conn(), dj.Schema(), dj.FreeTable() to use singleton - Connection now stores _config reference for instance isolation - Add DJ_THREAD_SAFE environment variable support - Add comprehensive tests for thread-safe mode When DJ_THREAD_SAFE=true: - dj.config raises ThreadSafetyError - dj.conn() raises ThreadSafetyError - dj.Schema() raises ThreadSafetyError (without explicit connection) - dj.FreeTable() raises ThreadSafetyError (without explicit connection) - dj.Instance() always works for isolated contexts Co-Authored-By: Claude Opus 4.5 --- docs/design/thread-safe-mode.md | 119 +++++++------ src/datajoint/__init__.py | 153 +++++++++++++++- src/datajoint/connection.py | 19 +- src/datajoint/errors.py | 4 + src/datajoint/instance.py | 301 ++++++++++++++++++++++++++++++++ tests/unit/test_thread_safe.py | 173 ++++++++++++++++++ 6 files changed, 707 insertions(+), 62 deletions(-) create mode 100644 src/datajoint/instance.py create mode 100644 tests/unit/test_thread_safe.py diff --git a/docs/design/thread-safe-mode.md b/docs/design/thread-safe-mode.md index dac95981a..ac6d94e5e 100644 --- a/docs/design/thread-safe-mode.md +++ b/docs/design/thread-safe-mode.md @@ -6,17 +6,22 @@ DataJoint uses global state (`dj.config`, `dj.conn()`) that is not thread-safe. ## Solution -Introduce **Instance** objects that encapsulate config and connection. The `dj` module provides access to a lazily-loaded singleton instance. New isolated instances are created with `dj.Instance()`. +Introduce **Instance** objects that encapsulate config and connection. The `dj` module provides a global config that can be modified before connecting, and a lazily-loaded singleton connection. New isolated instances are created with `dj.Instance()`. ## API -### Legacy API (singleton instance) +### Legacy API (global config + singleton connection) ```python import datajoint as dj +# Configure credentials (no connection yet) +dj.config.database.user = "user" +dj.config.database.password = "password" dj.config.safemode = False -dj.conn() # Triggers singleton creation, returns connection + +# First call to conn() or Schema() creates the singleton connection +dj.conn() # Creates connection using dj.config credentials schema = dj.Schema("my_schema") @schema @@ -24,12 +29,11 @@ class Mouse(dj.Manual): definition = "..." ``` -Internally, `dj.config`, `dj.conn()`, and `dj.Schema()` are aliases to the singleton instance: -- `dj.config` → `dj._singleton_instance.config` -- `dj.conn()` → `dj._singleton_instance.connection` -- `dj.Schema()` → `dj._singleton_instance.Schema()` - -The singleton is created lazily on first access to any of these. +Internally: +- `dj.config` → delegates to `_global_config` (with thread-safety check) +- `dj.conn()` → returns `_singleton_connection` (created lazily) +- `dj.Schema()` → uses `_singleton_connection` +- `dj.FreeTable()` → uses `_singleton_connection` ### New API (isolated instance) @@ -86,12 +90,13 @@ table = inst.FreeTable("db.table") # Uses inst.connection export DJ_THREAD_SAFE=true ``` -`thread_safe` is read from environment/config file at module import time. +`thread_safe` is checked dynamically on each access to global state. -When `thread_safe=True`, accessing the singleton raises `ThreadSafetyError`: +When `thread_safe=True`, accessing global state raises `ThreadSafetyError`: - `dj.config` raises `ThreadSafetyError` - `dj.conn()` raises `ThreadSafetyError` -- `dj.Schema()` raises `ThreadSafetyError` +- `dj.Schema()` raises `ThreadSafetyError` (without explicit connection) +- `dj.FreeTable()` raises `ThreadSafetyError` (without explicit connection) - `dj.Instance()` works - isolated instances are always allowed ```python @@ -110,26 +115,26 @@ inst.Schema("name") # OK | Operation | `thread_safe=False` | `thread_safe=True` | |-----------|--------------------|--------------------| -| `dj.config` | `_singleton.config` | `ThreadSafetyError` | -| `dj.conn()` | `_singleton.connection` | `ThreadSafetyError` | -| `dj.Schema()` | `_singleton.Schema()` | `ThreadSafetyError` | +| `dj.config` | `_global_config` | `ThreadSafetyError` | +| `dj.conn()` | `_singleton_connection` | `ThreadSafetyError` | +| `dj.Schema()` | Uses singleton | `ThreadSafetyError` | +| `dj.FreeTable()` | Uses singleton | `ThreadSafetyError` | | `dj.Instance()` | Works | Works | | `inst.config` | Works | Works | | `inst.connection` | Works | Works | | `inst.Schema()` | Works | Works | -## Singleton Lazy Loading +## Lazy Loading -The singleton instance is created lazily on first access: +The global config is created at module import time. The singleton connection is created lazily on first access: ```python -dj.config # Creates singleton, returns _singleton.config -dj.conn() # Creates singleton, returns _singleton.connection -dj.Schema("name") # Creates singleton, returns _singleton.Schema("name") +dj.config.database.user = "user" # Modifies global config (no connection yet) +dj.config.database.password = "pw" +dj.conn() # Creates singleton connection using global config +dj.Schema("name") # Uses existing singleton connection ``` -All three trigger creation of the same singleton instance. - ## Usage Example ```python @@ -167,7 +172,7 @@ Mouse().delete() # Uses inst.config.safemode ```python class Instance: def __init__(self, host, user, password, port=3306, **kwargs): - self.config = Config() # Fresh config with defaults + self.config = _create_config() # Fresh config with defaults # Apply any config overrides from kwargs self.connection = Connection(host, user, password, port, ...) self.connection._config = self.config @@ -179,58 +184,74 @@ class Instance: return FreeTable(self.connection, full_table_name) ``` -### 2. Singleton with lazy loading +### 2. Global config and singleton connection ```python # Module level -_thread_safe = _load_thread_safe_from_env_or_config() -_singleton_instance = None +_global_config = _create_config() # Created at import time +_singleton_connection = None # Created lazily -def _get_singleton(): - if _thread_safe: +def _check_thread_safe(): + if _load_thread_safe(): raise ThreadSafetyError( "Global DataJoint state is disabled in thread-safe mode. " "Use dj.Instance() to create an isolated instance." ) - global _singleton_instance - if _singleton_instance is None: - _singleton_instance = Instance( - host=_load_from_env_or_config("database.host"), - user=_load_from_env_or_config("database.user"), - password=_load_from_env_or_config("database.password"), + +def _get_singleton_connection(): + _check_thread_safe() + global _singleton_connection + if _singleton_connection is None: + _singleton_connection = Connection( + host=_global_config.database.host, + user=_global_config.database.user, + password=_global_config.database.password, ... ) - return _singleton_instance + _singleton_connection._config = _global_config + return _singleton_connection ``` -### 3. Legacy API as aliases +### 3. Legacy API with thread-safety checks ```python -# dj.config -> singleton.config +# dj.config -> global config with thread-safety check class _ConfigProxy: def __getattr__(self, name): - return getattr(_get_singleton().config, name) + _check_thread_safe() + return getattr(_global_config, name) def __setattr__(self, name, value): - setattr(_get_singleton().config, name, value) + _check_thread_safe() + setattr(_global_config, name, value) config = _ConfigProxy() -# dj.conn() -> singleton.connection +# dj.conn() -> singleton connection def conn(): - return _get_singleton().connection - -# dj.Schema() -> singleton.Schema() -def Schema(name, **kwargs): - return _get_singleton().Schema(name, **kwargs) - -# dj.FreeTable() -> singleton.FreeTable() -def FreeTable(full_table_name): - return _get_singleton().FreeTable(full_table_name) + return _get_singleton_connection() + +# dj.Schema() -> uses singleton connection +def Schema(name, connection=None, **kwargs): + if connection is None: + _check_thread_safe() + connection = _get_singleton_connection() + return _Schema(name, connection=connection, **kwargs) + +# dj.FreeTable() -> uses singleton connection +def FreeTable(conn_or_name, full_table_name=None): + if full_table_name is None: + # Called as FreeTable("db.table") + _check_thread_safe() + return _FreeTable(_get_singleton_connection(), conn_or_name) + else: + # Called as FreeTable(conn, "db.table") + return _FreeTable(conn_or_name, full_table_name) ``` ### 4. Refactor internal code All internal code uses `self.connection._config` instead of global `config`: +- Connection stores reference to its config as `self._config` - Tables access config via `self.connection._config` - This works uniformly for both singleton and isolated instances diff --git a/src/datajoint/__init__.py b/src/datajoint/__init__.py index 7f809487d..04a2deb5f 100644 --- a/src/datajoint/__init__.py +++ b/src/datajoint/__init__.py @@ -23,6 +23,7 @@ "config", "conn", "Connection", + "Instance", "Schema", "VirtualModule", "virtual_schema", @@ -52,6 +53,7 @@ "errors", "migrate", "DataJointError", + "ThreadSafetyError", "logger", "cli", "ValidationResult", @@ -72,17 +74,158 @@ NpyRef, ) from .blob import MatCell, MatStruct -from .connection import Connection, conn -from .errors import DataJointError +from .connection import Connection +from .errors import DataJointError, ThreadSafetyError from .expression import AndList, Not, Top, U +from .instance import Instance, _ConfigProxy, _get_singleton_connection, _global_config, _check_thread_safe from .logging import logger from .objectref import ObjectRef -from .schemas import Schema, VirtualModule, list_schemas, virtual_schema -from .settings import config -from .table import FreeTable, Table, ValidationResult +from .schemas import Schema as _Schema, VirtualModule, list_schemas, virtual_schema +from .table import FreeTable as _FreeTable, Table, ValidationResult from .user_tables import Computed, Imported, Lookup, Manual, Part from .version import __version__ +# ============================================================================= +# Singleton-aware API +# ============================================================================= +# config is a proxy that delegates to the singleton instance's config +config = _ConfigProxy() + + +def conn( + host: str | None = None, + user: str | None = None, + password: str | None = None, + *, + reset: bool = False, + use_tls: bool | dict | None = None, +) -> Connection: + """ + Return a persistent connection object. + + When called without arguments, returns the singleton connection. + When connection parameters are provided, creates a new Connection. + + Parameters + ---------- + host : str, optional + Database hostname. + user : str, optional + Database username. + password : str, optional + Database password. + reset : bool, optional + If True, reset existing connection. Default False. + use_tls : bool or dict, optional + TLS encryption option. + + Returns + ------- + Connection + Database connection. + + Raises + ------ + ThreadSafetyError + If thread_safe mode is enabled and using singleton. + """ + # If any connection params provided, use legacy behavior + if host is not None or user is not None or password is not None or reset: + from .connection import conn as _legacy_conn + + return _legacy_conn(host, user, password, reset=reset, use_tls=use_tls) + + # Otherwise use singleton connection + return _get_singleton_connection() + + +def Schema( + schema_name: str | None = None, + context: dict | None = None, + *, + connection: Connection | None = None, + create_schema: bool = True, + create_tables: bool | None = None, + add_objects: dict | None = None, +) -> _Schema: + """ + Create a Schema for binding table classes to a database schema. + + When connection is not provided, uses the singleton connection. + + Parameters + ---------- + schema_name : str, optional + Database schema name. + context : dict, optional + Namespace for foreign key lookup. + connection : Connection, optional + Database connection. Defaults to singleton connection. + create_schema : bool, optional + If False, raise error if schema doesn't exist. Default True. + create_tables : bool, optional + If False, raise error when accessing missing tables. + add_objects : dict, optional + Additional objects for declaration context. + + Returns + ------- + Schema + A Schema bound to the specified connection. + + Raises + ------ + ThreadSafetyError + If thread_safe mode is enabled and using singleton. + """ + if connection is None: + # Use singleton connection - will raise ThreadSafetyError if thread_safe=True + _check_thread_safe() + connection = _get_singleton_connection() + + return _Schema( + schema_name, + context=context, + connection=connection, + create_schema=create_schema, + create_tables=create_tables, + add_objects=add_objects, + ) + + +def FreeTable(conn_or_name, full_table_name: str | None = None) -> _FreeTable: + """ + Create a FreeTable for accessing a table without a dedicated class. + + Can be called in two ways: + - ``FreeTable("schema.table")`` - uses singleton connection + - ``FreeTable(connection, "schema.table")`` - uses provided connection + + Parameters + ---------- + conn_or_name : Connection or str + Either a Connection object, or the full table name if using singleton. + full_table_name : str, optional + Full table name when first argument is a connection. + + Returns + ------- + FreeTable + A FreeTable instance for the specified table. + + Raises + ------ + ThreadSafetyError + If thread_safe mode is enabled and using singleton. + """ + if full_table_name is None: + # Called as FreeTable("db.table") - use singleton connection + _check_thread_safe() + return _FreeTable(_get_singleton_connection(), conn_or_name) + else: + # Called as FreeTable(conn, "db.table") - use provided connection + return _FreeTable(conn_or_name, full_table_name) + # ============================================================================= # Lazy imports — heavy dependencies loaded on first access # ============================================================================= diff --git a/src/datajoint/connection.py b/src/datajoint/connection.py index 488a26e7d..934a6694a 100644 --- a/src/datajoint/connection.py +++ b/src/datajoint/connection.py @@ -187,8 +187,11 @@ def __init__( self._query_cache = None self._is_closed = True # Mark as closed until connect() succeeds + # Config reference - defaults to global config, but Instance sets its own + self._config = config + # Select adapter based on configured backend - backend = config["database.backend"] + backend = self._config["database.backend"] self.adapter = get_adapter(backend) self.connect() @@ -219,7 +222,7 @@ def connect(self) -> None: port=self.conn_info["port"], user=self.conn_info["user"], password=self.conn_info["passwd"], - charset=config["connection.charset"], + charset=self._config["connection.charset"], use_tls=self.conn_info.get("ssl"), ) except Exception as ssl_error: @@ -235,7 +238,7 @@ def connect(self) -> None: port=self.conn_info["port"], user=self.conn_info["user"], password=self.conn_info["passwd"], - charset=config["connection.charset"], + charset=self._config["connection.charset"], use_tls=False, # Explicitly disable SSL for fallback ) else: @@ -261,8 +264,8 @@ def set_query_cache(self, query_cache: str | None = None) -> None: def purge_query_cache(self) -> None: """Delete all cached query results.""" - if isinstance(config.get(cache_key), str) and pathlib.Path(config[cache_key]).is_dir(): - for path in pathlib.Path(config[cache_key]).iterdir(): + if isinstance(self._config.get(cache_key), str) and pathlib.Path(self._config[cache_key]).is_dir(): + for path in pathlib.Path(self._config[cache_key]).iterdir(): if not path.is_dir(): path.unlink() @@ -403,11 +406,11 @@ def query( if use_query_cache and not re.match(r"\s*(SELECT|SHOW)", query): raise errors.DataJointError("Only SELECT queries are allowed when query caching is on.") if use_query_cache: - if not config[cache_key]: + if not self._config[cache_key]: raise errors.DataJointError(f"Provide filepath dj.config['{cache_key}'] when using query caching.") # Cache key is backend-specific (no identifier normalization needed) hash_ = hashlib.md5((str(self._query_cache)).encode() + pack(args) + query.encode()).hexdigest() - cache_path = pathlib.Path(config[cache_key]) / str(hash_) + cache_path = pathlib.Path(self._config[cache_key]) / str(hash_) try: buffer = cache_path.read_bytes() except FileNotFoundError: @@ -416,7 +419,7 @@ def query( return EmulatedCursor(unpack(buffer)) if reconnect is None: - reconnect = config["database.reconnect"] + reconnect = self._config["database.reconnect"] logger.debug("Executing SQL:" + query[:query_log_max_length]) cursor = self.adapter.get_cursor(self._conn, as_dict=as_dict) try: diff --git a/src/datajoint/errors.py b/src/datajoint/errors.py index 7e10f021d..bba032b23 100644 --- a/src/datajoint/errors.py +++ b/src/datajoint/errors.py @@ -72,3 +72,7 @@ class MissingExternalFile(DataJointError): class BucketInaccessible(DataJointError): """S3 bucket is inaccessible.""" + + +class ThreadSafetyError(DataJointError): + """Global DataJoint state is disabled in thread-safe mode.""" diff --git a/src/datajoint/instance.py b/src/datajoint/instance.py new file mode 100644 index 000000000..309fef668 --- /dev/null +++ b/src/datajoint/instance.py @@ -0,0 +1,301 @@ +""" +DataJoint Instance for thread-safe operation. + +An Instance encapsulates a config and connection pair, providing isolated +database contexts for multi-tenant applications. +""" + +from __future__ import annotations + +import os +from typing import TYPE_CHECKING, Any + +from .connection import Connection +from .errors import ThreadSafetyError +from .settings import Config, _create_config + +if TYPE_CHECKING: + from .schemas import Schema as SchemaClass + from .table import FreeTable as FreeTableClass + + +def _load_thread_safe() -> bool: + """ + Load thread_safe setting from environment or config file. + + Returns + ------- + bool + True if thread-safe mode is enabled. + """ + # Check environment variable first + env_val = os.environ.get("DJ_THREAD_SAFE", "").lower() + if env_val in ("true", "1", "yes"): + return True + if env_val in ("false", "0", "no"): + return False + + # Default: thread-safe mode is off + return False + + +class Instance: + """ + Encapsulates a DataJoint configuration and connection. + + Each Instance has its own Config and Connection, providing isolation + for multi-tenant applications. Use ``dj.Instance()`` to create isolated + instances, or access the singleton via ``dj.config``, ``dj.conn()``, etc. + + Parameters + ---------- + host : str + Database hostname. + user : str + Database username. + password : str + Database password. + port : int, optional + Database port. Default from config or 3306. + use_tls : bool or dict, optional + TLS configuration. + **kwargs : Any + Additional config overrides applied to this instance's config. + + Attributes + ---------- + config : Config + Configuration for this instance. + connection : Connection + Database connection for this instance. + + Examples + -------- + >>> inst = dj.Instance(host="localhost", user="root", password="secret") + >>> inst.config.safemode = False + >>> schema = inst.Schema("my_schema") + """ + + def __init__( + self, + host: str, + user: str, + password: str, + port: int | None = None, + use_tls: bool | dict | None = None, + **kwargs: Any, + ) -> None: + # Create fresh config with defaults loaded from env/file + self.config = _create_config() + + # Apply any config overrides from kwargs + for key, value in kwargs.items(): + if hasattr(self.config, key): + setattr(self.config, key, value) + elif "__" in key: + # Handle nested keys like database__reconnect + parts = key.split("__") + obj = self.config + for part in parts[:-1]: + obj = getattr(obj, part) + setattr(obj, parts[-1], value) + + # Determine port + if port is None: + port = self.config.database.port + + # Create connection + self.connection = Connection(host, user, password, port, use_tls) + + # Attach config to connection so tables can access it + self.connection._config = self.config + + def Schema( + self, + schema_name: str, + *, + context: dict[str, Any] | None = None, + create_schema: bool = True, + create_tables: bool | None = None, + add_objects: dict[str, Any] | None = None, + ) -> "SchemaClass": + """ + Create a Schema bound to this instance's connection. + + Parameters + ---------- + schema_name : str + Database schema name. + context : dict, optional + Namespace for foreign key lookup. + create_schema : bool, optional + If False, raise error if schema doesn't exist. Default True. + create_tables : bool, optional + If False, raise error when accessing missing tables. + add_objects : dict, optional + Additional objects for declaration context. + + Returns + ------- + Schema + A Schema using this instance's connection. + """ + from .schemas import Schema + + return Schema( + schema_name, + context=context, + connection=self.connection, + create_schema=create_schema, + create_tables=create_tables, + add_objects=add_objects, + ) + + def FreeTable(self, full_table_name: str) -> "FreeTableClass": + """ + Create a FreeTable bound to this instance's connection. + + Parameters + ---------- + full_table_name : str + Full table name as ``'schema.table'`` or ```schema`.`table```. + + Returns + ------- + FreeTable + A FreeTable using this instance's connection. + """ + from .table import FreeTable + + return FreeTable(self.connection, full_table_name) + + def __repr__(self) -> str: + return f"Instance({self.connection!r})" + + +# ============================================================================= +# Singleton management +# ============================================================================= +# The global config is created at module load time and can be modified +# The singleton connection is created lazily when conn() or Schema() is called + +_global_config: Config = _create_config() +_singleton_connection: Connection | None = None + + +def _check_thread_safe() -> None: + """ + Check if thread-safe mode is enabled and raise if so. + + Raises + ------ + ThreadSafetyError + If thread_safe mode is enabled. + """ + if _load_thread_safe(): + raise ThreadSafetyError( + "Global DataJoint state is disabled in thread-safe mode. " + "Use dj.Instance() to create an isolated instance." + ) + + +def _get_singleton_connection() -> Connection: + """ + Get or create the singleton Connection. + + Uses credentials from the global config. + + Raises + ------ + ThreadSafetyError + If thread_safe mode is enabled. + DataJointError + If credentials are not configured. + """ + global _singleton_connection + + _check_thread_safe() + + if _singleton_connection is None: + from .errors import DataJointError + + host = _global_config.database.host + user = _global_config.database.user + password = _global_config.database.password + if password is not None: + password = password.get_secret_value() + port = _global_config.database.port + use_tls = _global_config.database.use_tls + + if user is None: + raise DataJointError( + "Database user not configured. Set dj.config['database.user'] or DJ_USER environment variable." + ) + if password is None: + raise DataJointError( + "Database password not configured. Set dj.config['database.password'] or DJ_PASS environment variable." + ) + + _singleton_connection = Connection(host, user, password, port, use_tls) + # Attach global config to connection + _singleton_connection._config = _global_config + + return _singleton_connection + + +class _ConfigProxy: + """ + Proxy that delegates to the global config, with thread-safety checks. + + In thread-safe mode, all access raises ThreadSafetyError. + """ + + def __getattr__(self, name: str) -> Any: + _check_thread_safe() + return getattr(_global_config, name) + + def __setattr__(self, name: str, value: Any) -> None: + _check_thread_safe() + setattr(_global_config, name, value) + + def __getitem__(self, key: str) -> Any: + _check_thread_safe() + return _global_config[key] + + def __setitem__(self, key: str, value: Any) -> None: + _check_thread_safe() + _global_config[key] = value + + def __delitem__(self, key: str) -> None: + _check_thread_safe() + del _global_config[key] + + def get(self, key: str, default: Any = None) -> Any: + _check_thread_safe() + return _global_config.get(key, default) + + def override(self, **kwargs: Any): + _check_thread_safe() + return _global_config.override(**kwargs) + + def load(self, filename: str) -> None: + _check_thread_safe() + return _global_config.load(filename) + + def get_store_spec(self, store: str | None = None, *, use_filepath_default: bool = False) -> dict[str, Any]: + _check_thread_safe() + return _global_config.get_store_spec(store, use_filepath_default=use_filepath_default) + + @staticmethod + def save_template( + path: str = "datajoint.json", + minimal: bool = True, + create_secrets_dir: bool = True, + ): + # save_template is a static method, no thread-safety check needed + return Config.save_template(path, minimal, create_secrets_dir) + + def __repr__(self) -> str: + if _load_thread_safe(): + return "ConfigProxy (thread-safe mode - use dj.Instance())" + return repr(_global_config) diff --git a/tests/unit/test_thread_safe.py b/tests/unit/test_thread_safe.py new file mode 100644 index 000000000..427c9a3ec --- /dev/null +++ b/tests/unit/test_thread_safe.py @@ -0,0 +1,173 @@ +"""Tests for thread-safe mode functionality.""" + +import os + +import pytest + + +class TestThreadSafeMode: + """Test thread-safe mode behavior.""" + + def test_thread_safe_env_var_true(self, monkeypatch): + """DJ_THREAD_SAFE=true enables thread-safe mode.""" + monkeypatch.setenv("DJ_THREAD_SAFE", "true") + + # Re-import to pick up the new env var + from datajoint.instance import _load_thread_safe + + assert _load_thread_safe() is True + + def test_thread_safe_env_var_false(self, monkeypatch): + """DJ_THREAD_SAFE=false disables thread-safe mode.""" + monkeypatch.setenv("DJ_THREAD_SAFE", "false") + + from datajoint.instance import _load_thread_safe + + assert _load_thread_safe() is False + + def test_thread_safe_env_var_1(self, monkeypatch): + """DJ_THREAD_SAFE=1 enables thread-safe mode.""" + monkeypatch.setenv("DJ_THREAD_SAFE", "1") + + from datajoint.instance import _load_thread_safe + + assert _load_thread_safe() is True + + def test_thread_safe_env_var_yes(self, monkeypatch): + """DJ_THREAD_SAFE=yes enables thread-safe mode.""" + monkeypatch.setenv("DJ_THREAD_SAFE", "yes") + + from datajoint.instance import _load_thread_safe + + assert _load_thread_safe() is True + + def test_thread_safe_default_false(self, monkeypatch): + """Thread-safe mode defaults to False.""" + monkeypatch.delenv("DJ_THREAD_SAFE", raising=False) + + from datajoint.instance import _load_thread_safe + + assert _load_thread_safe() is False + + +class TestConfigProxyThreadSafe: + """Test ConfigProxy behavior in thread-safe mode.""" + + def test_config_access_raises_in_thread_safe_mode(self, monkeypatch): + """Accessing config raises ThreadSafetyError in thread-safe mode.""" + monkeypatch.setenv("DJ_THREAD_SAFE", "true") + + import datajoint as dj + from datajoint.errors import ThreadSafetyError + + with pytest.raises(ThreadSafetyError): + _ = dj.config.database + + def test_config_access_works_in_normal_mode(self, monkeypatch): + """Accessing config works in normal mode.""" + monkeypatch.setenv("DJ_THREAD_SAFE", "false") + + import datajoint as dj + + # Should not raise + host = dj.config.database.host + assert isinstance(host, str) + + def test_config_set_raises_in_thread_safe_mode(self, monkeypatch): + """Setting config raises ThreadSafetyError in thread-safe mode.""" + monkeypatch.setenv("DJ_THREAD_SAFE", "true") + + import datajoint as dj + from datajoint.errors import ThreadSafetyError + + with pytest.raises(ThreadSafetyError): + dj.config.safemode = False + + def test_save_template_works_in_thread_safe_mode(self, monkeypatch, tmp_path): + """save_template is a static method and works in thread-safe mode.""" + monkeypatch.setenv("DJ_THREAD_SAFE", "true") + + import datajoint as dj + + # Should not raise - save_template is static + config_file = tmp_path / "datajoint.json" + dj.config.save_template(str(config_file), create_secrets_dir=False) + assert config_file.exists() + + +class TestConnThreadSafe: + """Test conn() behavior in thread-safe mode.""" + + def test_conn_raises_in_thread_safe_mode(self, monkeypatch): + """conn() raises ThreadSafetyError in thread-safe mode.""" + monkeypatch.setenv("DJ_THREAD_SAFE", "true") + + import datajoint as dj + from datajoint.errors import ThreadSafetyError + + with pytest.raises(ThreadSafetyError): + dj.conn() + + +class TestSchemaThreadSafe: + """Test Schema behavior in thread-safe mode.""" + + def test_schema_raises_in_thread_safe_mode(self, monkeypatch): + """Schema() raises ThreadSafetyError in thread-safe mode without connection.""" + monkeypatch.setenv("DJ_THREAD_SAFE", "true") + + import datajoint as dj + from datajoint.errors import ThreadSafetyError + + with pytest.raises(ThreadSafetyError): + dj.Schema("test_schema") + + +class TestFreeTableThreadSafe: + """Test FreeTable behavior in thread-safe mode.""" + + def test_freetable_raises_in_thread_safe_mode(self, monkeypatch): + """FreeTable() raises ThreadSafetyError in thread-safe mode without connection.""" + monkeypatch.setenv("DJ_THREAD_SAFE", "true") + + import datajoint as dj + from datajoint.errors import ThreadSafetyError + + with pytest.raises(ThreadSafetyError): + dj.FreeTable("test.table") + + +class TestInstance: + """Test Instance class.""" + + def test_instance_import(self): + """Instance class is importable.""" + from datajoint import Instance + + assert Instance is not None + + def test_instance_always_allowed_in_thread_safe_mode(self, monkeypatch): + """Instance() is allowed even in thread-safe mode.""" + monkeypatch.setenv("DJ_THREAD_SAFE", "true") + + from datajoint import Instance + + # Instance class should be accessible + # (actual creation requires valid credentials) + assert callable(Instance) + + +class TestThreadSafetyError: + """Test ThreadSafetyError exception.""" + + def test_error_is_datajoint_error(self): + """ThreadSafetyError is a subclass of DataJointError.""" + from datajoint.errors import DataJointError, ThreadSafetyError + + assert issubclass(ThreadSafetyError, DataJointError) + + def test_error_in_exports(self): + """ThreadSafetyError is exported from datajoint.""" + import datajoint as dj + + assert hasattr(dj, "ThreadSafetyError") From 042dbf20c28daf1ebd2d1e61e0ac67b2b71612ef Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 13 Feb 2026 19:20:43 -0600 Subject: [PATCH 19/26] fix: Make conn() with credentials update singleton connection - conn(host, user, password) now updates the singleton connection instead of creating a separate connection - Remove irrelevant safemode=False from spec examples - thread_safe is set via DJ_THREAD_SAFE env var or config file Co-Authored-By: Claude Opus 4.5 --- docs/design/thread-safe-mode.md | 14 +++++----- src/datajoint/__init__.py | 46 +++++++++++++++++++++++++-------- 2 files changed, 41 insertions(+), 19 deletions(-) diff --git a/docs/design/thread-safe-mode.md b/docs/design/thread-safe-mode.md index ac6d94e5e..794df6194 100644 --- a/docs/design/thread-safe-mode.md +++ b/docs/design/thread-safe-mode.md @@ -18,7 +18,6 @@ import datajoint as dj # Configure credentials (no connection yet) dj.config.database.user = "user" dj.config.database.password = "password" -dj.config.safemode = False # First call to conn() or Schema() creates the singleton connection dj.conn() # Creates connection using dj.config credentials @@ -29,6 +28,11 @@ class Mouse(dj.Manual): definition = "..." ``` +Alternatively, pass credentials directly to `conn()`: +```python +dj.conn(host="localhost", user="user", password="password") +``` + Internally: - `dj.config` → delegates to `_global_config` (with thread-safety check) - `dj.conn()` → returns `_singleton_connection` (created lazily) @@ -45,7 +49,6 @@ inst = dj.Instance( user="user", password="password", ) -inst.config.safemode = False schema = inst.Schema("my_schema") @schema @@ -107,7 +110,6 @@ dj.conn() # ThreadSafetyError dj.Schema("name") # ThreadSafetyError inst = dj.Instance(host="h", user="u", password="p") # OK -inst.config.safemode = False # OK inst.Schema("name") # OK ``` @@ -147,10 +149,6 @@ inst = dj.Instance( password="password", ) -# Configure -inst.config.safemode = False -inst.config.stores = {"raw": {"protocol": "file", "location": "/data"}} - # Create schema schema = inst.Schema("my_schema") @@ -162,7 +160,7 @@ class Mouse(dj.Manual): # Use tables Mouse().insert1({"mouse_id": 1}) -Mouse().delete() # Uses inst.config.safemode +Mouse().fetch() ``` ## Implementation diff --git a/src/datajoint/__init__.py b/src/datajoint/__init__.py index 04a2deb5f..9359c0eb7 100644 --- a/src/datajoint/__init__.py +++ b/src/datajoint/__init__.py @@ -103,17 +103,18 @@ def conn( """ Return a persistent connection object. - When called without arguments, returns the singleton connection. - When connection parameters are provided, creates a new Connection. + When called without arguments, returns the singleton connection using + credentials from dj.config. When connection parameters are provided, + updates the singleton connection with the new credentials. Parameters ---------- host : str, optional - Database hostname. + Database hostname. If provided, updates singleton. user : str, optional - Database username. + Database username. If provided, updates singleton. password : str, optional - Database password. + Database password. If provided, updates singleton. reset : bool, optional If True, reset existing connection. Default False. use_tls : bool or dict, optional @@ -127,15 +128,38 @@ def conn( Raises ------ ThreadSafetyError - If thread_safe mode is enabled and using singleton. + If thread_safe mode is enabled. """ - # If any connection params provided, use legacy behavior - if host is not None or user is not None or password is not None or reset: - from .connection import conn as _legacy_conn + from .instance import _singleton_connection, _check_thread_safe, _global_config + import datajoint.instance as instance_module - return _legacy_conn(host, user, password, reset=reset, use_tls=use_tls) + _check_thread_safe() + + # If credentials provided or reset requested, (re)create the singleton + if host is not None or user is not None or password is not None or reset: + # Use provided values or fall back to config + host = host if host is not None else _global_config.database.host + user = user if user is not None else _global_config.database.user + password = password if password is not None else _global_config.database.password + if password is not None and hasattr(password, 'get_secret_value'): + password = password.get_secret_value() + port = _global_config.database.port + use_tls = use_tls if use_tls is not None else _global_config.database.use_tls + + if user is None: + from .errors import DataJointError + raise DataJointError( + "Database user not configured. Set dj.config['database.user'] or pass user= argument." + ) + if password is None: + from .errors import DataJointError + raise DataJointError( + "Database password not configured. Set dj.config['database.password'] or pass password= argument." + ) + + instance_module._singleton_connection = Connection(host, user, password, port, use_tls) + instance_module._singleton_connection._config = _global_config - # Otherwise use singleton connection return _get_singleton_connection() From 5758adfeb17a5639630ac6ec2183ead9a66cf4be Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Wed, 18 Feb 2026 12:55:01 -0600 Subject: [PATCH 20/26] fix: Remove unused import, fix mock_cache fixture for 2.0 settings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove unused `from typing import Callable` in connection.py (lint failure) - Update mock_cache fixture: `cache` → `download_path` (KeyError in test_attach) Co-Authored-By: Claude Opus 4.6 --- src/datajoint/connection.py | 1 - tests/conftest.py | 8 ++++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/datajoint/connection.py b/src/datajoint/connection.py index 934a6694a..827a7a9bd 100644 --- a/src/datajoint/connection.py +++ b/src/datajoint/connection.py @@ -11,7 +11,6 @@ import re import warnings from contextlib import contextmanager -from typing import Callable from . import errors from .adapters import get_adapter diff --git a/tests/conftest.py b/tests/conftest.py index 4d6adf09c..8efaab745 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -536,13 +536,13 @@ def mock_stores(stores_config): @pytest.fixture def mock_cache(tmpdir_factory): - og_cache = dj.config.get("cache") - dj.config["cache"] = tmpdir_factory.mktemp("cache") + og_cache = dj.config.get("download_path") + dj.config["download_path"] = str(tmpdir_factory.mktemp("cache")) yield if og_cache is None: - del dj.config["cache"] + del dj.config["download_path"] else: - dj.config["cache"] = og_cache + dj.config["download_path"] = og_cache @pytest.fixture(scope="session") From 9d9d6757cc880010ebae89f4be3a99d9c4b0c664 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Wed, 18 Feb 2026 13:01:48 -0600 Subject: [PATCH 21/26] fix: Resolve lint and test failures in CI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove unused `_singleton_connection` import in __init__.py (F401) - Remove unused `os` import in test_thread_safe.py (F401) - Remove unused `Callable` import in connection.py (F401) - Fix mock_cache fixture: `cache` → `download_path` for 2.0 settings Co-Authored-By: Claude Opus 4.6 --- src/datajoint/__init__.py | 14 +++++++------- src/datajoint/instance.py | 8 +++----- tests/unit/test_thread_safe.py | 2 -- 3 files changed, 10 insertions(+), 14 deletions(-) diff --git a/src/datajoint/__init__.py b/src/datajoint/__init__.py index 9359c0eb7..d7db3e32d 100644 --- a/src/datajoint/__init__.py +++ b/src/datajoint/__init__.py @@ -130,8 +130,8 @@ def conn( ThreadSafetyError If thread_safe mode is enabled. """ - from .instance import _singleton_connection, _check_thread_safe, _global_config import datajoint.instance as instance_module + from pydantic import SecretStr _check_thread_safe() @@ -140,19 +140,18 @@ def conn( # Use provided values or fall back to config host = host if host is not None else _global_config.database.host user = user if user is not None else _global_config.database.user - password = password if password is not None else _global_config.database.password - if password is not None and hasattr(password, 'get_secret_value'): - password = password.get_secret_value() + raw_password = password if password is not None else _global_config.database.password + password = raw_password.get_secret_value() if isinstance(raw_password, SecretStr) else raw_password port = _global_config.database.port use_tls = use_tls if use_tls is not None else _global_config.database.use_tls if user is None: from .errors import DataJointError - raise DataJointError( - "Database user not configured. Set dj.config['database.user'] or pass user= argument." - ) + + raise DataJointError("Database user not configured. Set dj.config['database.user'] or pass user= argument.") if password is None: from .errors import DataJointError + raise DataJointError( "Database password not configured. Set dj.config['database.password'] or pass password= argument." ) @@ -250,6 +249,7 @@ def FreeTable(conn_or_name, full_table_name: str | None = None) -> _FreeTable: # Called as FreeTable(conn, "db.table") - use provided connection return _FreeTable(conn_or_name, full_table_name) + # ============================================================================= # Lazy imports — heavy dependencies loaded on first access # ============================================================================= diff --git a/src/datajoint/instance.py b/src/datajoint/instance.py index 309fef668..bd057aa57 100644 --- a/src/datajoint/instance.py +++ b/src/datajoint/instance.py @@ -194,8 +194,7 @@ def _check_thread_safe() -> None: """ if _load_thread_safe(): raise ThreadSafetyError( - "Global DataJoint state is disabled in thread-safe mode. " - "Use dj.Instance() to create an isolated instance." + "Global DataJoint state is disabled in thread-safe mode. " "Use dj.Instance() to create an isolated instance." ) @@ -221,9 +220,8 @@ def _get_singleton_connection() -> Connection: host = _global_config.database.host user = _global_config.database.user - password = _global_config.database.password - if password is not None: - password = password.get_secret_value() + raw_password = _global_config.database.password + password = raw_password.get_secret_value() if raw_password is not None else None port = _global_config.database.port use_tls = _global_config.database.use_tls diff --git a/tests/unit/test_thread_safe.py b/tests/unit/test_thread_safe.py index 427c9a3ec..bec45e434 100644 --- a/tests/unit/test_thread_safe.py +++ b/tests/unit/test_thread_safe.py @@ -1,7 +1,5 @@ """Tests for thread-safe mode functionality.""" -import os - import pytest From 0011cd65e8dffbf860553f9213d6b97bf7f047a1 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Wed, 18 Feb 2026 13:25:50 -0600 Subject: [PATCH 22/26] docs: Document thread-safety rationale for codec registry The global codec registry is effectively immutable after import: registration runs under Python's import lock, and the only runtime mutation (_load_entry_points) is idempotent under the GIL. Per-instance isolation is unnecessary since codecs are part of the type system, not connection-scoped state. Co-Authored-By: Claude Opus 4.6 --- src/datajoint/codecs.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/datajoint/codecs.py b/src/datajoint/codecs.py index 5c192d46e..f4741a5e4 100644 --- a/src/datajoint/codecs.py +++ b/src/datajoint/codecs.py @@ -43,7 +43,15 @@ class MyTable(dj.Manual): logger = logging.getLogger(__name__.split(".")[0]) -# Global codec registry - maps name to Codec instance +# Global codec registry - maps name to Codec instance. +# +# Thread safety: This registry is effectively immutable after import. +# Registration happens in __init_subclass__ during class definition, which is +# serialized by Python's import lock. The only runtime mutation is +# _load_entry_points(), which is idempotent and guarded by a bool flag; +# under CPython's GIL, concurrent calls may do redundant work but cannot +# corrupt the dict. Codecs are part of the type system (tied to code, not to +# any particular connection or tenant), so per-instance isolation is unnecessary. _codec_registry: dict[str, Codec] = {} _entry_points_loaded: bool = False From 845efc05c81ab072bee0155e98f0f389393e859f Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Wed, 18 Feb 2026 13:28:28 -0600 Subject: [PATCH 23/26] docs: Add global state audit to thread-safe mode spec Catalog all 8 module-level mutable globals with thread-safety classification: guarded (config, connection), safe by design (codec registry), or low risk (logging, blob flags, import caches). Co-Authored-By: Claude Opus 4.6 --- docs/design/thread-safe-mode.md | 34 +++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/docs/design/thread-safe-mode.md b/docs/design/thread-safe-mode.md index 794df6194..0068f8d5e 100644 --- a/docs/design/thread-safe-mode.md +++ b/docs/design/thread-safe-mode.md @@ -253,6 +253,40 @@ All internal code uses `self.connection._config` instead of global `config`: - Tables access config via `self.connection._config` - This works uniformly for both singleton and isolated instances +## Global State Audit + +All module-level mutable state was reviewed for thread-safety implications. + +### Guarded (blocked in thread-safe mode) + +| State | Location | Mechanism | +|-------|----------|-----------| +| `config` singleton | `settings.py:979` | `_ConfigProxy` raises `ThreadSafetyError`; use `inst.config` instead | +| `conn()` singleton | `connection.py:108` | `_check_thread_safe()` guard; use `inst.connection` instead | + +These are the two globals that carry connection-scoped state (credentials, database settings) and are the primary source of cross-tenant interference. + +### Safe by design (no guard needed) + +| State | Location | Rationale | +|-------|----------|-----------| +| `_codec_registry` | `codecs.py:47` | Effectively immutable after import. Registration runs in `__init_subclass__` under Python's import lock. Runtime mutation (`_load_entry_points`) is idempotent under the GIL. Codecs are part of the type system, not connection-scoped. | +| `_entry_points_loaded` | `codecs.py:48` | Bool flag for idempotent lazy loading; worst case under concurrent access is redundant work, not corruption. | + +### Low risk (no guard needed) + +| State | Location | Rationale | +|-------|----------|-----------| +| Logging side effects | `logging.py:8,17,40-45,56` | Standard Python logging configuration. Monkey-patches `Logger` and replaces `sys.excepthook` at import time. Not DataJoint-specific mutable state. | +| `use_32bit_dims` | `blob.py:65` | Runtime flag affecting deserialization. Rarely changed; not connection-scoped. | +| `compression` dict | `blob.py:61` | Decompressor function registry. Populated at import time, effectively read-only thereafter. | +| `_lazy_modules` | `__init__.py:92` | Import caching via `globals()` mutation. Protected by Python's import lock. | +| `ADAPTERS` dict | `adapters/__init__.py:16` | Backend registry. Populated at import time, read-only in practice. | + +### Design principle + +Only state that is **connection-scoped** (credentials, database settings, connection objects) needs thread-safe guards. State that is **code-scoped** (type registries, import caches, logging configuration) is shared across all threads by design and does not vary between tenants. + ## Error Messages - Singleton access: `"Global DataJoint state is disabled in thread-safe mode. Use dj.Instance() to create an isolated instance."` From 04a406d6d0eaea670b41d7312ca59cbbd3698e1e Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Wed, 18 Feb 2026 14:04:35 -0600 Subject: [PATCH 24/26] refactor: replace global config reads with connection-scoped config All internal code now reads configuration from self.connection._config instead of the global config singleton. This ensures thread-safe mode works correctly: each Instance's connection carries its own config, and tables/schemas/jobs access it through the connection. Changes across 9 files: - schemas.py: safemode, create_tables default - table.py: safemode in delete/drop, config passed to declare() - expression.py: loglevel in __repr__ - preview.py: display.* settings via query_expression.connection._config - autopopulate.py: jobs.allow_new_pk_fields, jobs.auto_refresh - jobs.py: jobs.default_priority, stale_timeout, keep_completed - declare.py: jobs.add_job_metadata (config param threaded through) - diagram.py: display.diagram_direction (connection stored on instance) - staged_insert.py: get_store_spec() Removed unused `from .settings import config` imports from 7 modules. Co-Authored-By: Claude Opus 4.6 --- src/datajoint/autopopulate.py | 10 +++------- src/datajoint/declare.py | 9 +++++++-- src/datajoint/diagram.py | 7 ++++--- src/datajoint/expression.py | 3 +-- src/datajoint/jobs.py | 28 ++++++++++++++-------------- src/datajoint/preview.py | 4 ++-- src/datajoint/schemas.py | 10 ++++++---- src/datajoint/staged_insert.py | 5 ++--- src/datajoint/table.py | 7 +++---- 9 files changed, 42 insertions(+), 41 deletions(-) diff --git a/src/datajoint/autopopulate.py b/src/datajoint/autopopulate.py index 7660e43ec..ae8be3b82 100644 --- a/src/datajoint/autopopulate.py +++ b/src/datajoint/autopopulate.py @@ -146,10 +146,8 @@ def _declare_check(self, primary_key: list[str], fk_attribute_map: dict[str, tup If native (non-FK) PK attributes are found, unless bypassed via ``dj.config.jobs.allow_new_pk_fields_in_computed_tables = True``. """ - from .settings import config - # Check if validation is bypassed - if config.jobs.allow_new_pk_fields_in_computed_tables: + if self.connection._config.jobs.allow_new_pk_fields_in_computed_tables: return # Check for native (non-FK) primary key attributes @@ -477,8 +475,6 @@ def _populate_distributed( """ from tqdm import tqdm - from .settings import config - # Define a signal handler for SIGTERM def handler(signum, frame): logger.info("Populate terminated by SIGTERM") @@ -489,7 +485,7 @@ def handler(signum, frame): try: # Refresh job queue if configured if refresh is None: - refresh = config.jobs.auto_refresh + refresh = self.connection._config.jobs.auto_refresh if refresh: # Use delay=-1 to ensure jobs are immediately schedulable # (avoids race condition with scheduled_time <= CURRENT_TIMESTAMP(3) check) @@ -659,7 +655,7 @@ def _populate1( key, start_time=datetime.datetime.fromtimestamp(start_time), duration=duration, - version=_get_job_version(), + version=_get_job_version(self.connection._config), ) if jobs is not None: diff --git a/src/datajoint/declare.py b/src/datajoint/declare.py index 375daa07e..fe50e8a66 100644 --- a/src/datajoint/declare.py +++ b/src/datajoint/declare.py @@ -15,7 +15,6 @@ from .codecs import lookup_codec from .condition import translate_attribute from .errors import DataJointError -from .settings import config # Core DataJoint types - scientist-friendly names that are fully supported # These are recorded in field comments using :type: syntax for reconstruction @@ -401,7 +400,7 @@ def prepare_declare( def declare( - full_table_name: str, definition: str, context: dict, adapter + full_table_name: str, definition: str, context: dict, adapter, *, config=None ) -> tuple[str, list[str], list[str], dict[str, tuple[str, str]], list[str], list[str]]: r""" Parse a definition and generate SQL CREATE TABLE statement. @@ -416,6 +415,8 @@ def declare( Namespace for resolving foreign key references. adapter : DatabaseAdapter Database adapter for backend-specific SQL generation. + config : Config, optional + Configuration object. If None, falls back to global config. Returns ------- @@ -464,6 +465,10 @@ def declare( ) = prepare_declare(definition, context, adapter) # Add hidden job metadata for Computed/Imported tables (not parts) + if config is None: + from .settings import config as _config + + config = _config if config.jobs.add_job_metadata: # Check if this is a Computed (__) or Imported (_) table, but not a Part (contains __ in middle) is_computed = table_name.startswith("__") and "__" not in table_name[2:] diff --git a/src/datajoint/diagram.py b/src/datajoint/diagram.py index 7034d122b..75e00c21c 100644 --- a/src/datajoint/diagram.py +++ b/src/datajoint/diagram.py @@ -16,7 +16,6 @@ from .dependencies import topo_sort from .errors import DataJointError -from .settings import config from .table import Table, lookup_class_name from .user_tables import Computed, Imported, Lookup, Manual, Part, _AliasNode, _get_tier @@ -105,6 +104,7 @@ def __init__(self, source, context=None) -> None: self.nodes_to_show = set(source.nodes_to_show) self._expanded_nodes = set(source._expanded_nodes) self.context = source.context + self._connection = source._connection super().__init__(source) return @@ -126,6 +126,7 @@ def __init__(self, source, context=None) -> None: raise DataJointError("Could not find database connection in %s" % repr(source[0])) # initialize graph from dependencies + self._connection = connection connection.dependencies.load() super().__init__(connection.dependencies) @@ -584,7 +585,7 @@ def make_dot(self): Tables are grouped by schema, with the Python module name shown as the group label when available. """ - direction = config.display.diagram_direction + direction = self._connection._config.display.diagram_direction graph = self._make_graph() # Apply collapse logic if needed @@ -857,7 +858,7 @@ def make_mermaid(self) -> str: Session --> Neuron """ graph = self._make_graph() - direction = config.display.diagram_direction + direction = self._connection._config.display.diagram_direction # Apply collapse logic if needed graph, collapsed_counts = self._apply_collapse(graph) diff --git a/src/datajoint/expression.py b/src/datajoint/expression.py index 883853cd3..9b36cf6d0 100644 --- a/src/datajoint/expression.py +++ b/src/datajoint/expression.py @@ -20,7 +20,6 @@ from .errors import DataJointError from .codecs import decode_attribute from .preview import preview, repr_html -from .settings import config logger = logging.getLogger(__name__.split(".")[0]) @@ -1247,7 +1246,7 @@ def __repr__(self): str String representation of the QueryExpression. """ - return super().__repr__() if config["loglevel"].lower() == "debug" else self.preview() + return super().__repr__() if self.connection._config["loglevel"].lower() == "debug" else self.preview() def preview(self, limit=None, width=None): """ diff --git a/src/datajoint/jobs.py b/src/datajoint/jobs.py index e5499eb8e..cf0981836 100644 --- a/src/datajoint/jobs.py +++ b/src/datajoint/jobs.py @@ -24,16 +24,22 @@ logger = logging.getLogger(__name__.split(".")[0]) -def _get_job_version() -> str: +def _get_job_version(config=None) -> str: """ Get version string based on config settings. + Parameters + ---------- + config : Config, optional + Configuration object. If None, falls back to global config. + Returns ------- str Version string, or empty string if version tracking disabled. """ - from .settings import config + if config is None: + from .settings import config method = config.jobs.version_method if method is None or method == "none": @@ -349,17 +355,15 @@ def refresh( 3. Remove stale jobs: jobs older than stale_timeout whose keys not in key_source 4. Remove orphaned jobs: reserved jobs older than orphan_timeout (if specified) """ - from .settings import config - # Ensure jobs table exists if not self.is_declared: self.declare() # Get defaults from config if priority is None: - priority = config.jobs.default_priority + priority = self.connection._config.jobs.default_priority if stale_timeout is None: - stale_timeout = config.jobs.stale_timeout + stale_timeout = self.connection._config.jobs.stale_timeout result = {"added": 0, "removed": 0, "orphaned": 0, "re_pended": 0} @@ -392,7 +396,7 @@ def refresh( pass # Job already exists # 2. Re-pend success jobs if keep_completed=True - if config.jobs.keep_completed: + if self.connection._config.jobs.keep_completed: # Success jobs whose keys are in key_source but not in target # Disable semantic_check for Job table operations (job table PK has different lineage than target) success_to_repend = self.completed.restrict(key_source, semantic_check=False).restrict( @@ -463,7 +467,7 @@ def reserve(self, key: dict) -> bool: "pid": os.getpid(), "connection_id": self.connection.connection_id, "user": self.connection.get_user(), - "version": _get_job_version(), + "version": _get_job_version(self.connection._config), } try: @@ -490,9 +494,7 @@ def complete(self, key: dict, duration: float | None = None) -> None: - If True: updates status to ``'success'`` with completion time and duration - If False: deletes the job entry """ - from .settings import config - - if config.jobs.keep_completed: + if self.connection._config.jobs.keep_completed: # Use server time for completed_time server_now = self.connection.query("SELECT CURRENT_TIMESTAMP").fetchone()[0] pk = self._get_pk(key) @@ -550,13 +552,11 @@ def ignore(self, key: dict) -> None: key : dict Primary key dict of the job. """ - from .settings import config - pk = self._get_pk(key) if pk in self: self.update1({**pk, "status": "ignore"}) else: - priority = config.jobs.default_priority + priority = self.connection._config.jobs.default_priority self.insert1({**pk, "status": "ignore", "priority": priority}) def progress(self) -> dict: diff --git a/src/datajoint/preview.py b/src/datajoint/preview.py index 92d09d874..0b80ad15f 100644 --- a/src/datajoint/preview.py +++ b/src/datajoint/preview.py @@ -2,8 +2,6 @@ import json -from .settings import config - def _format_object_display(json_data): """Format object metadata for display in query results.""" @@ -44,6 +42,7 @@ def _get_blob_placeholder(heading, field_name, html_escape=False): def preview(query_expression, limit, width): heading = query_expression.heading rel = query_expression.proj(*heading.non_blobs) + config = query_expression.connection._config # Object fields use codecs - not specially handled in simplified model object_fields = [] if limit is None: @@ -105,6 +104,7 @@ def get_display_value(tup, f, idx): def repr_html(query_expression): heading = query_expression.heading rel = query_expression.proj(*heading.non_blobs) + config = query_expression.connection._config # Object fields use codecs - not specially handled in simplified model object_fields = [] tuples = rel.to_arrays(limit=config["display.limit"] + 1) diff --git a/src/datajoint/schemas.py b/src/datajoint/schemas.py index 2955fd67d..04ff057c3 100644 --- a/src/datajoint/schemas.py +++ b/src/datajoint/schemas.py @@ -23,7 +23,6 @@ from .connection import Connection from .heading import Heading from .jobs import Job -from .settings import config from .table import FreeTable, lookup_class_name from .user_tables import Computed, Imported, Lookup, Manual, Part, _get_tier from .utils import to_camel_case, user_choice @@ -120,7 +119,7 @@ def __init__( self.database = None self.context = context self.create_schema = create_schema - self.create_tables = create_tables if create_tables is not None else config.database.create_tables + self.create_tables = create_tables # None means "use connection config default" self.add_objects = add_objects self.declare_list = [] if schema_name: @@ -293,7 +292,10 @@ def _decorate_table(self, table_class: type, context: dict[str, Any], assert_dec # instantiate the class, declare the table if not already instance = table_class() is_declared = instance.is_declared - if not is_declared and not assert_declared and self.create_tables: + create_tables = ( + self.create_tables if self.create_tables is not None else self.connection._config.database.create_tables + ) + if not is_declared and not assert_declared and create_tables: instance.declare(context) self.connection.dependencies.clear() is_declared = is_declared or instance.is_declared @@ -409,7 +411,7 @@ def drop(self, prompt: bool | None = None) -> None: AccessError If insufficient permissions to drop the schema. """ - prompt = config["safemode"] if prompt is None else prompt + prompt = self.connection._config["safemode"] if prompt is None else prompt if not self.exists: logger.info("Schema named `{database}` does not exist. Doing nothing.".format(database=self.database)) diff --git a/src/datajoint/staged_insert.py b/src/datajoint/staged_insert.py index 6ac3819e4..1f6ee7afb 100644 --- a/src/datajoint/staged_insert.py +++ b/src/datajoint/staged_insert.py @@ -14,7 +14,6 @@ import fsspec from .errors import DataJointError -from .settings import config from .storage import StorageBackend, build_object_path @@ -69,7 +68,7 @@ def _ensure_backend(self): """Ensure storage backend is initialized.""" if self._backend is None: try: - spec = config.get_store_spec() # Uses stores.default + spec = self._table.connection._config.get_store_spec() # Uses stores.default self._backend = StorageBackend(spec) except DataJointError: raise DataJointError( @@ -110,7 +109,7 @@ def _get_storage_path(self, field: str, ext: str = "") -> str: ) # Get storage spec (uses stores.default) - spec = config.get_store_spec() + spec = self._table.connection._config.get_store_spec() partition_pattern = spec.get("partition_pattern") token_length = spec.get("token_length", 8) diff --git a/src/datajoint/table.py b/src/datajoint/table.py index 59279489e..a6bc7d2c9 100644 --- a/src/datajoint/table.py +++ b/src/datajoint/table.py @@ -23,7 +23,6 @@ ) from .expression import QueryExpression from .heading import Heading -from .settings import config from .staged_insert import staged_insert1 as _staged_insert1 from .utils import get_master, is_camel_case, user_choice @@ -153,7 +152,7 @@ def declare(self, context=None): "Class names must be in CamelCase, starting with a capital letter." ) sql, _external_stores, primary_key, fk_attribute_map, pre_ddl, post_ddl = declare( - self.full_table_name, self.definition, context, self.connection.adapter + self.full_table_name, self.definition, context, self.connection.adapter, config=self.connection._config ) # Call declaration hook for validation (subclasses like AutoPopulate can override) @@ -1119,7 +1118,7 @@ def strip_quotes(s): raise DataJointError("Exceeded maximum number of delete attempts.") return delete_count - prompt = config["safemode"] if prompt is None else prompt + prompt = self.connection._config["safemode"] if prompt is None else prompt # Start transaction if transaction: @@ -1227,7 +1226,7 @@ def drop(self, prompt: bool | None = None): raise DataJointError( "A table with an applied restriction cannot be dropped. Call drop() on the unrestricted Table." ) - prompt = config["safemode"] if prompt is None else prompt + prompt = self.connection._config["safemode"] if prompt is None else prompt self.connection.dependencies.load() do_drop = True From 092d79fa200576be5be35e5d6e86b847d439d8bf Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Wed, 18 Feb 2026 14:35:44 -0600 Subject: [PATCH 25/26] fix: unify global config singleton and fix conn() persistence - instance._global_config now reuses settings.config instead of creating a duplicate Config object. This ensures dj.config["safemode"] = False actually affects self.connection._config["safemode"] reads. - schemas.py now uses _get_singleton_connection() from instance.py instead of the old conn() from connection.py, eliminating the duplicate singleton connection holder. - dj.conn() now only creates a new connection when the singleton doesn't exist or reset=True (not on every call with credentials). - test_uppercase_schema: use prompt=False for drop() calls. Co-Authored-By: Claude Opus 4.6 --- src/datajoint/__init__.py | 8 ++++++-- src/datajoint/instance.py | 5 +++-- src/datajoint/schemas.py | 6 +++--- tests/integration/test_schema.py | 4 ++-- 4 files changed, 14 insertions(+), 9 deletions(-) diff --git a/src/datajoint/__init__.py b/src/datajoint/__init__.py index d7db3e32d..68eac160f 100644 --- a/src/datajoint/__init__.py +++ b/src/datajoint/__init__.py @@ -135,8 +135,12 @@ def conn( _check_thread_safe() - # If credentials provided or reset requested, (re)create the singleton - if host is not None or user is not None or password is not None or reset: + # If reset requested, always recreate + # If credentials provided and no singleton exists, create one + # If credentials provided and singleton exists, return existing singleton + if reset or ( + instance_module._singleton_connection is None and (host is not None or user is not None or password is not None) + ): # Use provided values or fall back to config host = host if host is not None else _global_config.database.host user = user if user is not None else _global_config.database.user diff --git a/src/datajoint/instance.py b/src/datajoint/instance.py index bd057aa57..c60e267e1 100644 --- a/src/datajoint/instance.py +++ b/src/datajoint/instance.py @@ -12,7 +12,7 @@ from .connection import Connection from .errors import ThreadSafetyError -from .settings import Config, _create_config +from .settings import Config, _create_config, config as _settings_config if TYPE_CHECKING: from .schemas import Schema as SchemaClass @@ -179,7 +179,8 @@ def __repr__(self) -> str: # The global config is created at module load time and can be modified # The singleton connection is created lazily when conn() or Schema() is called -_global_config: Config = _create_config() +# Reuse the config created in settings.py — there must be exactly one global config +_global_config: Config = _settings_config _singleton_connection: Connection | None = None diff --git a/src/datajoint/schemas.py b/src/datajoint/schemas.py index 04ff057c3..694250c7d 100644 --- a/src/datajoint/schemas.py +++ b/src/datajoint/schemas.py @@ -16,8 +16,8 @@ import warnings from typing import TYPE_CHECKING, Any -from .connection import conn from .errors import AccessError, DataJointError +from .instance import _get_singleton_connection if TYPE_CHECKING: from .connection import Connection @@ -173,7 +173,7 @@ def activate( if connection is not None: self.connection = connection if self.connection is None: - self.connection = conn() + self.connection = _get_singleton_connection() self.database = schema_name if create_schema is not None: self.create_schema = create_schema @@ -860,7 +860,7 @@ def list_schemas(connection: Connection | None = None) -> list[str]: """ return [ r[0] - for r in (connection or conn()).query( + for r in (connection or _get_singleton_connection()).query( 'SELECT schema_name FROM information_schema.schemata WHERE schema_name <> "information_schema"' ) ] diff --git a/tests/integration/test_schema.py b/tests/integration/test_schema.py index ef621765d..cf053df62 100644 --- a/tests/integration/test_schema.py +++ b/tests/integration/test_schema.py @@ -265,5 +265,5 @@ class Recording(dj.Manual): id: smallint """ - schema2.drop() - schema1.drop() + schema2.drop(prompt=False) + schema1.drop(prompt=False) From 2429a8ab0502ec67771021e9c2d0b5b26c2cbcfb Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Wed, 18 Feb 2026 14:38:03 -0600 Subject: [PATCH 26/26] docs: document connection-scoped config architecture in thread-safe mode spec Adds Architecture section covering the object graph, config flow for both singleton and Instance paths, and a table of all connection-scoped config reads across 9 modules. Co-Authored-By: Claude Opus 4.6 --- docs/design/thread-safe-mode.md | 107 ++++++++++++++++++++++++++++---- 1 file changed, 94 insertions(+), 13 deletions(-) diff --git a/docs/design/thread-safe-mode.md b/docs/design/thread-safe-mode.md index 0068f8d5e..297cb619b 100644 --- a/docs/design/thread-safe-mode.md +++ b/docs/design/thread-safe-mode.md @@ -163,6 +163,89 @@ Mouse().insert1({"mouse_id": 1}) Mouse().fetch() ``` +## Architecture + +### Object graph + +There is exactly **one** global `Config` object created at import time in `settings.py`. Both the legacy API and the `Instance` API hang off `Connection` objects, each of which carries a `_config` reference. + +``` +settings.py + config = _create_config() ← THE single global Config + +instance.py + _global_config = settings.config ← same object (not a copy) + _singleton_connection = None ← lazily created Connection + +__init__.py + dj.config = _ConfigProxy() ← proxy → _global_config (with thread-safety check) + dj.conn() ← returns _singleton_connection + dj.Schema() ← uses _singleton_connection + dj.FreeTable() ← uses _singleton_connection + +Connection (singleton) + _config → _global_config ← same Config that dj.config writes to + +Connection (Instance) + _config → fresh Config ← isolated per-instance +``` + +### Config flow: singleton path + +``` +dj.config["safemode"] = False + ↓ _ConfigProxy.__setitem__ +_global_config["safemode"] = False (same object as settings.config) + ↓ +Connection._config["safemode"] (points to _global_config) + ↓ +schema.drop() reads self.connection._config["safemode"] → False ✓ +``` + +### Config flow: Instance path + +``` +inst = dj.Instance(host=..., user=..., password=...) + ↓ +inst.config = _create_config() (fresh Config, independent) +inst.connection._config = inst.config + ↓ +inst.config["safemode"] = False + ↓ +schema.drop() reads self.connection._config["safemode"] → False ✓ +``` + +### Key invariant + +**All runtime config reads go through `self.connection._config`**, never through the global `config` directly. This ensures both the singleton and Instance paths read the correct config. + +### Connection-scoped config reads + +Every module that previously imported `from .settings import config` now reads config from the connection: + +| Module | What was read | How it's read now | +|--------|--------------|-------------------| +| `schemas.py` | `config["safemode"]`, `config.database.create_tables` | `self.connection._config[...]` | +| `table.py` | `config["safemode"]` in `delete()`, `drop()` | `self.connection._config["safemode"]` | +| `expression.py` | `config["loglevel"]` in `__repr__()` | `self.connection._config["loglevel"]` | +| `preview.py` | `config["display.*"]` (8 reads) | `query_expression.connection._config[...]` | +| `autopopulate.py` | `config.jobs.allow_new_pk_fields`, `auto_refresh` | `self.connection._config.jobs.*` | +| `jobs.py` | `config.jobs.default_priority`, `stale_timeout`, `keep_completed` | `self.connection._config.jobs.*` | +| `declare.py` | `config.jobs.add_job_metadata` | `config` param (threaded from `table.py`) | +| `diagram.py` | `config.display.diagram_direction` | `self._connection._config.display.*` | +| `staged_insert.py` | `config.get_store_spec()` | `self._table.connection._config.get_store_spec()` | + +### Functions that receive config as a parameter + +Some module-level functions cannot access `self.connection`. Config is threaded through: + +| Function | Caller | How config arrives | +|----------|--------|--------------------| +| `declare()` in `declare.py` | `Table.declare()` in `table.py` | `config=self.connection._config` kwarg | +| `_get_job_version()` in `jobs.py` | `AutoPopulate._make_tuples()`, `Job.reserve()` | `config=self.connection._config` positional arg | + +Both functions accept `config=None` and fall back to the global `settings.config` for backward compatibility. + ## Implementation ### 1. Create Instance class @@ -185,8 +268,11 @@ class Instance: ### 2. Global config and singleton connection ```python -# Module level -_global_config = _create_config() # Created at import time +# settings.py - THE single global config +config = _create_config() # Created at import time + +# instance.py - reuses the same config object +_global_config = settings.config # Same reference, not a copy _singleton_connection = None # Created lazily def _check_thread_safe(): @@ -224,8 +310,12 @@ class _ConfigProxy: config = _ConfigProxy() -# dj.conn() -> singleton connection -def conn(): +# dj.conn() -> singleton connection (persistent across calls) +def conn(host=None, user=None, password=None, *, reset=False): + _check_thread_safe() + if reset or (_singleton_connection is None and credentials_provided): + _singleton_connection = Connection(...) + _singleton_connection._config = _global_config return _get_singleton_connection() # dj.Schema() -> uses singleton connection @@ -238,21 +328,12 @@ def Schema(name, connection=None, **kwargs): # dj.FreeTable() -> uses singleton connection def FreeTable(conn_or_name, full_table_name=None): if full_table_name is None: - # Called as FreeTable("db.table") _check_thread_safe() return _FreeTable(_get_singleton_connection(), conn_or_name) else: - # Called as FreeTable(conn, "db.table") return _FreeTable(conn_or_name, full_table_name) ``` -### 4. Refactor internal code - -All internal code uses `self.connection._config` instead of global `config`: -- Connection stores reference to its config as `self._config` -- Tables access config via `self.connection._config` -- This works uniformly for both singleton and isolated instances - ## Global State Audit All module-level mutable state was reviewed for thread-safety implications.