diff --git a/CHANGELOG.md b/CHANGELOG.md index 276a85af44..602951bac3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ This changelog summarizes major changes between GraalVM versions of the Python language runtime. The main focus is on user-observable behavior of the engine. ## Version 25.1.0 +* Treat foreign buffer objects as Python buffer-compatible binary objects, so APIs like `memoryview`, `bytes`, `bytearray`, `binascii.hexlify`, and `io.BytesIO` work naturally on them when embedding GraalPy in Java. This allows passing binary data between Python and Java's `ByteBuffer` and `ByteSequence` types with minimal (sometimes zero) copies. * Add support for [Truffle source options](https://www.graalvm.org/truffle/javadoc/com/oracle/truffle/api/source/Source.SourceBuilder.html#option(java.lang.String,java.lang.String)): * The `python.Optimize` option can be used to specify the optimization level, like the `-O` (level 1) and `-OO` (level 2) commandline options. * The `python.NewGlobals` option can be used to run a source with a fresh globals dictionary instead of the main module globals, which is useful for embeddings that want isolated top-level execution. diff --git a/docs/user/Interoperability.md b/docs/user/Interoperability.md index 507cbe826f..1f402bf540 100644 --- a/docs/user/Interoperability.md +++ b/docs/user/Interoperability.md @@ -118,6 +118,60 @@ assert l == [6] See the [Interop Types to Python](#interop-types-to-python) section for more interop traits and how they map to Python types. +## Passing Binary Data Between Java and Python + +Passing binary data between Java and Python deserves attention: + +- Java code typically uses `byte[]` or `java.nio.ByteBuffer` +- Python code typically uses `bytes`, `bytearray`, `memoryview`, or file-like APIs such as `io.BytesIO` + +### Java to Python + +Raw Java `byte[]` are accessible as `list`-like objects in Python. +Only integral values that fit into a signed `byte` can be read from or written to such objects. +Python, on the other hand, usually exposes binary data as unsigned byte values. +To achieve the equivalent of a "re-interpreting cast", Java byte arrays should be passed to Python using `ByteBuffer.wrap(byte[])`: + +```java +import java.nio.ByteBuffer; +byte[] data = ...; +ByteBuffer buffer = ByteBuffer.wrap(data); // does not copy +context.getBindings("python").putMember("java_buffer", buffer); +``` + +Python can then use the object through buffer-oriented binary data APIs: + +```python +memoryview(java_buffer) # does not copy +bytes(java_buffer) # copies into an immutable Python-owned buffer +bytearray(java_buffer) # copies into a mutable Python-owned buffer +io.BytesIO(java_buffer) # copies into BytesIO's internal storage +``` + +### Python to Java + +Python `bytes` and other bytes-like objects can be interpreted like any `java.lang.List`. +Because Python bytes are usually unsigned, however, they cannot simply be converted via `Value#as(byte[].class)` if any values are larger than 127. +The Graal polyglot sdk provides `org.graalvm.polyglot.io.ByteSequence` as a target type to deal with this issue explicitly. + +```java +import org.graalvm.polyglot.Value; +import org.graalvm.polyglot.io.ByteSequence; +Value result = context.eval("python", "b'hello'"); +ByteSequence seq = result.as(ByteSequence.class); // does not copy +``` + +`ByteSequence` keeps the data as a Python-owned byte sequence without immediately copying. +It provides a `toByteArray()` method that deals with re-interpreting unsigned Python bytes as signed Java bytes. + +```java +import java.nio.charset.StandardCharsets; +import org.graalvm.polyglot.io.ByteSequence; +ByteSequence seq = result.as(ByteSequence.class); +byte[] bytes = seq.toByteArray(); // copies into Java byte[] +String s = new String(bytes, StandardCharsets.UTF_8); +``` + ## Call Other Languages from Python The _polyglot_ API allows non-JVM specific interactions with other languages from Python scripts. diff --git a/graalpython/com.oracle.graal.python.test/src/com/oracle/graal/python/test/interop/HostInteropTest.java b/graalpython/com.oracle.graal.python.test/src/com/oracle/graal/python/test/interop/HostInteropTest.java index ab99e374da..4d54def051 100644 --- a/graalpython/com.oracle.graal.python.test/src/com/oracle/graal/python/test/interop/HostInteropTest.java +++ b/graalpython/com.oracle.graal.python.test/src/com/oracle/graal/python/test/interop/HostInteropTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2023, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -46,6 +46,7 @@ import static org.junit.Assert.assertTrue; import java.math.BigInteger; +import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.time.LocalDate; import java.time.LocalTime; @@ -53,6 +54,7 @@ import org.graalvm.polyglot.Context; import org.graalvm.polyglot.Value; +import org.graalvm.polyglot.io.ByteSequence; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -713,4 +715,79 @@ public void testByteBuffer() { t.writeBufferDouble(ByteOrder.LITTLE_ENDIAN, 0, 12345.6789123); assertEquals(12345.6789123, t.readBufferDouble(ByteOrder.LITTLE_ENDIAN, 0), 0.0); } + + @Test + public void testHostByteBufferAsPythonBuffer() { + byte[] writable = new byte[]{1, 2, 3, 4}; + context.getBindings("python").putMember("writable_bb", ByteBuffer.wrap(writable)); + context.getBindings("python").putMember("readonly_bb", ByteBuffer.wrap(new byte[]{-1, 5, 6, 7, 8}).asReadOnlyBuffer()); + + context.eval("python", """ + import binascii + import io + + mv = memoryview(writable_bb) + assert not mv.readonly + assert mv.tobytes() == b"\\x01\\x02\\x03\\x04" + assert bytes(writable_bb) == b"\\x01\\x02\\x03\\x04" + assert bytearray(writable_bb) == bytearray(b"\\x01\\x02\\x03\\x04") + assert binascii.hexlify(writable_bb) == b"01020304" + bio = io.BytesIO() + assert bio.write(writable_bb) == 4 + assert bio.getvalue() == b"\\x01\\x02\\x03\\x04" + mv[1] = 9 + assert io.BytesIO(b"abcd").readinto(writable_bb) == 4 + assert bytes(writable_bb) == b"abcd" + + ro = memoryview(readonly_bb) + assert ro.readonly + assert ro.tobytes() == b"\\xff\\x05\\x06\\x07\\x08" + assert bytes(readonly_bb) == b"\\xff\\x05\\x06\\x07\\x08" + assert bytearray(readonly_bb) == bytearray(b"\\xff\\x05\\x06\\x07\\x08") + assert io.BytesIO().write(readonly_bb) == 5 + try: + ro[0] = 1 + raise AssertionError("expected memoryview write to fail") + except TypeError: + pass + try: + io.BytesIO(b"wxyz").readinto(readonly_bb) + raise AssertionError("expected readinto to fail") + except TypeError: + pass + """); + + assertArrayEquals(new byte[]{'a', 'b', 'c', 'd'}, writable); + } + + @Test + public void testHostByteSequenceAsPythonBuffer() { + byte[] bytes = new byte[]{10, 20, 30, 40}; + context.getBindings("python").putMember("seq", ByteSequence.create(bytes)); + + context.eval("python", """ + import binascii + import io + + mv = memoryview(seq) + assert mv.readonly + assert mv.tobytes() == b"\\x0a\\x14\\x1e\\x28" + assert bytes(seq) == b"\\x0a\\x14\\x1e\\x28" + assert bytearray(seq) == bytearray(b"\\x0a\\x14\\x1e\\x28") + assert binascii.hexlify(seq) == b"0a141e28" + bio = io.BytesIO() + assert bio.write(seq) == 4 + assert bio.getvalue() == b"\\x0a\\x14\\x1e\\x28" + try: + mv[0] = 1 + raise AssertionError("expected memoryview write to fail") + except TypeError: + pass + try: + io.BytesIO(b"abcd").readinto(seq) + raise AssertionError("expected readinto to fail") + except TypeError: + pass + """); + } } diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/buffer/ForeignBufferAcquireExports.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/buffer/ForeignBufferAcquireExports.java new file mode 100644 index 0000000000..0c31775649 --- /dev/null +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/buffer/ForeignBufferAcquireExports.java @@ -0,0 +1,299 @@ +/* + * Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * The Universal Permissive License (UPL), Version 1.0 + * + * Subject to the condition set forth below, permission is hereby granted to any + * person obtaining a copy of this software, associated documentation and/or + * data (collectively the "Software"), free of charge and under any and all + * copyright rights in the Software, and any and all patent rights owned or + * freely licensable by each licensor hereunder covering either (i) the + * unmodified Software as contributed to or provided by such licensor, or (ii) + * the Larger Works (as defined below), to deal in both + * + * (a) the Software, and + * + * (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if + * one is included with the Software each a "Larger Work" to which the Software + * is contributed by such licensors), + * + * without restriction, including without limitation the rights to copy, create + * derivative works of, display, perform, and distribute the Software and make, + * use, sell, offer for sale, import, export, have made, and have sold the + * Software and the Larger Work(s), and to sublicense the foregoing rights on + * either these or other terms. + * + * This license is subject to the following condition: + * + * The above copyright notice and either this complete permission notice or at a + * minimum a reference to the UPL must be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +package com.oracle.graal.python.builtins.objects.buffer; + +import static com.oracle.graal.python.builtins.PythonBuiltinClassType.BufferError; +import static com.oracle.graal.python.builtins.PythonBuiltinClassType.IndexError; +import static com.oracle.graal.python.builtins.PythonBuiltinClassType.TypeError; + +import java.nio.ByteOrder; + +import com.oracle.graal.python.nodes.ErrorMessages; +import com.oracle.graal.python.nodes.PRaiseNode; +import com.oracle.graal.python.nodes.util.CastToJavaIntExactNode; +import com.oracle.graal.python.util.BufferFormat; +import com.oracle.truffle.api.CompilerDirectives; +import com.oracle.truffle.api.dsl.Bind; +import com.oracle.truffle.api.dsl.Cached; +import com.oracle.truffle.api.interop.InteropLibrary; +import com.oracle.truffle.api.interop.InvalidBufferOffsetException; +import com.oracle.truffle.api.interop.UnsupportedMessageException; +import com.oracle.truffle.api.library.CachedLibrary; +import com.oracle.truffle.api.library.ExportLibrary; +import com.oracle.truffle.api.library.ExportMessage; +import com.oracle.truffle.api.nodes.Node; +import com.oracle.truffle.api.strings.TruffleString; + +@ExportLibrary(value = PythonBufferAcquireLibrary.class, receiverType = Object.class) +final class ForeignBufferAcquireExports { + + @ExportMessage + static boolean hasBuffer(Object receiver, + @CachedLibrary("receiver") InteropLibrary interop) { + return interop.hasBufferElements(receiver); + } + + @ExportMessage + static Object acquire(Object receiver, int flags, + @Bind Node inliningTarget, + @Cached CastToJavaIntExactNode castInt, + @CachedLibrary("receiver") InteropLibrary interop) { + if (!interop.hasBufferElements(receiver)) { + throw PRaiseNode.raiseStatic(inliningTarget, TypeError, ErrorMessages.BYTESLIKE_OBJ_REQUIRED, receiver); + } + + long bufferSize; + try { + bufferSize = interop.getBufferSize(receiver); + } catch (UnsupportedMessageException e) { + throw CompilerDirectives.shouldNotReachHere(e); + } + + boolean readonly; + try { + readonly = !interop.isBufferWritable(receiver); + } catch (UnsupportedMessageException e) { + throw CompilerDirectives.shouldNotReachHere(e); + } + if (BufferFlags.requestsWritable(flags) && readonly) { + throw PRaiseNode.raiseStatic(inliningTarget, BufferError, ErrorMessages.OBJ_IS_NOT_WRITABLE); + } + return new ForeignBufferAdapter(receiver, castInt.execute(inliningTarget, bufferSize), readonly); + } + + @ExportLibrary(PythonBufferAccessLibrary.class) + static final class ForeignBufferAdapter { + final Object foreignBuffer; + final int len; + final boolean readonly; + + ForeignBufferAdapter(Object foreignBuffer, int len, boolean readonly) { + this.foreignBuffer = foreignBuffer; + this.len = len; + this.readonly = readonly; + } + + @ExportMessage + @SuppressWarnings("static-method") + boolean isBuffer() { + return true; + } + + @ExportMessage + boolean isReadonly() { + return readonly; + } + + @ExportMessage + int getBufferLength() { + return len; + } + + @ExportMessage + Object getOwner() { + return foreignBuffer; + } + + @ExportMessage + byte readByte(int byteOffset, + @CachedLibrary("this.foreignBuffer") InteropLibrary interop) { + try { + return interop.readBufferByte(foreignBuffer, byteOffset); + } catch (UnsupportedMessageException | InvalidBufferOffsetException e) { + throw CompilerDirectives.shouldNotReachHere(e); + } + } + + @ExportMessage + void writeByte(int byteOffset, byte value, + @CachedLibrary("this.foreignBuffer") InteropLibrary interop) { + assert !readonly; + try { + interop.writeBufferByte(foreignBuffer, byteOffset, value); + } catch (UnsupportedMessageException | InvalidBufferOffsetException e) { + throw CompilerDirectives.shouldNotReachHere(e); + } + } + + @ExportMessage + void readIntoByteArray(int srcOffset, byte[] dest, int destOffset, int length, + @Bind Node inliningTarget, + @CachedLibrary("this.foreignBuffer") InteropLibrary interop) { + try { + interop.readBuffer(foreignBuffer, srcOffset, dest, destOffset, length); + } catch (InvalidBufferOffsetException e) { + throw PRaiseNode.raiseStatic(inliningTarget, IndexError, ErrorMessages.STRUCT_OFFSET_OUT_OF_RANGE, e.getByteOffset(), len); + } catch (UnsupportedMessageException e) { + throw CompilerDirectives.shouldNotReachHere(e); + } + } + + @ExportMessage + void readIntoBuffer(int srcOffset, Object dest, int destOffset, int length, PythonBufferAccessLibrary otherLib, + @Bind Node inliningTarget, + @CachedLibrary("this.foreignBuffer") InteropLibrary interop) { + if (otherLib.hasInternalByteArray(dest)) { + readIntoByteArray(srcOffset, otherLib.getInternalByteArray(dest), destOffset, length, inliningTarget, interop); + } else { + for (int i = 0; i < length; i++) { + otherLib.writeByte(dest, destOffset + i, readByte(srcOffset + i, interop)); + } + } + } + + @ExportMessage + short readShortByteOrder(int byteOffset, ByteOrder byteOrder, + @CachedLibrary("this.foreignBuffer") InteropLibrary interop) { + try { + return interop.readBufferShort(foreignBuffer, byteOrder, byteOffset); + } catch (UnsupportedMessageException | InvalidBufferOffsetException e) { + throw CompilerDirectives.shouldNotReachHere(e); + } + } + + @ExportMessage + int readIntByteOrder(int byteOffset, ByteOrder byteOrder, + @CachedLibrary("this.foreignBuffer") InteropLibrary interop) { + try { + return interop.readBufferInt(foreignBuffer, byteOrder, byteOffset); + } catch (UnsupportedMessageException | InvalidBufferOffsetException e) { + throw CompilerDirectives.shouldNotReachHere(e); + } + } + + @ExportMessage + long readLongByteOrder(int byteOffset, ByteOrder byteOrder, + @CachedLibrary("this.foreignBuffer") InteropLibrary interop) { + try { + return interop.readBufferLong(foreignBuffer, byteOrder, byteOffset); + } catch (UnsupportedMessageException | InvalidBufferOffsetException e) { + throw CompilerDirectives.shouldNotReachHere(e); + } + } + + @ExportMessage + float readFloatByteOrder(int byteOffset, ByteOrder byteOrder, + @CachedLibrary("this.foreignBuffer") InteropLibrary interop) { + try { + return interop.readBufferFloat(foreignBuffer, byteOrder, byteOffset); + } catch (UnsupportedMessageException | InvalidBufferOffsetException e) { + throw CompilerDirectives.shouldNotReachHere(e); + } + } + + @ExportMessage + double readDoubleByteOrder(int byteOffset, ByteOrder byteOrder, + @CachedLibrary("this.foreignBuffer") InteropLibrary interop) { + try { + return interop.readBufferDouble(foreignBuffer, byteOrder, byteOffset); + } catch (UnsupportedMessageException | InvalidBufferOffsetException e) { + throw CompilerDirectives.shouldNotReachHere(e); + } + } + + @ExportMessage + void writeShortByteOrder(int byteOffset, short value, ByteOrder byteOrder, + @CachedLibrary("this.foreignBuffer") InteropLibrary interop) { + assert !readonly; + try { + interop.writeBufferShort(foreignBuffer, byteOrder, byteOffset, value); + } catch (UnsupportedMessageException | InvalidBufferOffsetException e) { + throw CompilerDirectives.shouldNotReachHere(e); + } + } + + @ExportMessage + void writeIntByteOrder(int byteOffset, int value, ByteOrder byteOrder, + @CachedLibrary("this.foreignBuffer") InteropLibrary interop) { + assert !readonly; + try { + interop.writeBufferInt(foreignBuffer, byteOrder, byteOffset, value); + } catch (UnsupportedMessageException | InvalidBufferOffsetException e) { + throw CompilerDirectives.shouldNotReachHere(e); + } + } + + @ExportMessage + void writeLongByteOrder(int byteOffset, long value, ByteOrder byteOrder, + @CachedLibrary("this.foreignBuffer") InteropLibrary interop) { + assert !readonly; + try { + interop.writeBufferLong(foreignBuffer, byteOrder, byteOffset, value); + } catch (UnsupportedMessageException | InvalidBufferOffsetException e) { + throw CompilerDirectives.shouldNotReachHere(e); + } + } + + @ExportMessage + void writeFloatByteOrder(int byteOffset, float value, ByteOrder byteOrder, + @CachedLibrary("this.foreignBuffer") InteropLibrary interop) { + assert !readonly; + try { + interop.writeBufferFloat(foreignBuffer, byteOrder, byteOffset, value); + } catch (UnsupportedMessageException | InvalidBufferOffsetException e) { + throw CompilerDirectives.shouldNotReachHere(e); + } + } + + @ExportMessage + void writeDoubleByteOrder(int byteOffset, double value, ByteOrder byteOrder, + @CachedLibrary("this.foreignBuffer") InteropLibrary interop) { + assert !readonly; + try { + interop.writeBufferDouble(foreignBuffer, byteOrder, byteOffset, value); + } catch (UnsupportedMessageException | InvalidBufferOffsetException e) { + throw CompilerDirectives.shouldNotReachHere(e); + } + } + + @ExportMessage + @SuppressWarnings("static-method") + int getItemSize() { + return 1; + } + + @ExportMessage + @SuppressWarnings("static-method") + TruffleString getFormatString() { + return BufferFormat.T_UINT_8_TYPE_CODE; + } + } +} diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/buffer/PythonBufferAcquireLibrary.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/buffer/PythonBufferAcquireLibrary.java index f8fa54e715..973485799e 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/buffer/PythonBufferAcquireLibrary.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/buffer/PythonBufferAcquireLibrary.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -54,6 +54,7 @@ import com.oracle.truffle.api.frame.VirtualFrame; import com.oracle.truffle.api.interop.InteropLibrary; import com.oracle.truffle.api.library.GenerateLibrary; +import com.oracle.truffle.api.library.GenerateLibrary.DefaultExport; import com.oracle.truffle.api.library.GenerateLibrary.Abstract; import com.oracle.truffle.api.library.Library; import com.oracle.truffle.api.library.LibraryFactory; @@ -75,6 +76,7 @@ * to be released using {@link PythonBufferAccessLibrary#release(Object)} method when done. */ @GenerateLibrary(assertions = PythonBufferAcquireLibrary.Assertions.class) +@DefaultExport(ForeignBufferAcquireExports.class) public abstract class PythonBufferAcquireLibrary extends Library { /** * Return whether it is possible to acquire a read-only buffer for this object. The actual