Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
184 changes: 184 additions & 0 deletions graalpython/com.oracle.graal.python.test/src/tests/test_reparse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
# Copyright (c) 2026, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# The Universal Permissive License (UPL), Version 1.0
#
# Subject to the condition set forth below, permission is hereby granted to any
# person obtaining a copy of this software, associated documentation and/or
# data (collectively the "Software"), free of charge and under any and all
# copyright rights in the Software, and any and all patent rights owned or
# freely licensable by each licensor hereunder covering either (i) the
# unmodified Software as contributed to or provided by such licensor, or (ii)
# the Larger Works (as defined below), to deal in both
#
# (a) the Software, and
#
# (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if
# one is included with the Software each a "Larger Work" to which the Software
# is contributed by such licensors),
#
# without restriction, including without limitation the rights to copy, create
# derivative works of, display, perform, and distribute the Software and make,
# use, sell, offer for sale, import, export, have made, and have sold the
# Software and the Larger Work(s), and to sublicense the foregoing rights on
# either these or other terms.
#
# This license is subject to the following condition:
#
# The above copyright notice and either this complete permission notice or at a
# minimum a reference to the UPL must be included in all copies or substantial
# portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import compileall
import contextlib
import os
import re
import socket
import subprocess
import sys
import tempfile
import time
import unittest
from pathlib import Path

SYNC_PREAMBLE = '''
import sys
import socket

with socket.create_connection(('localhost', int(sys.argv[1]))) as sock:
sock.recv(1)
'''


@contextlib.contextmanager
def pyc_reparse(test_content, expect_success=True, python_options=()):
if sys.implementation.name != "graalpy" or not __graalpython__.is_bytecode_dsl_interpreter:
raise unittest.SkipTest("Reparsing tests are only meaningful on bytecode DSL interpreter")
with tempfile.TemporaryDirectory() as tempdir:
tempdir_path = Path(tempdir)
example_module_path = tempdir_path / "example.py"
with open(example_module_path, "w") as f:
f.write(SYNC_PREAMBLE)
f.write(test_content)
# Change mtime of the example module source to the past a bit to avoid mtime resolution issues
os.utime(example_module_path, (time.time() - 1000, time.time() - 1000))
compileall.compile_file(example_module_path, force=True, quiet=True)
pyc_files = list((tempdir_path / '__pycache__').glob('*.pyc'))
assert len(pyc_files) == 1, "Didn't find a .pyc file"
with socket.create_server(('0.0.0.0', 0)) as server:
port = server.getsockname()[1]
env = os.environ.copy()
env['PYTHONPATH'] = str(tempdir_path)
proc = subprocess.Popen(
[sys.executable, *python_options, "-m", "example", str(port)],
env=env,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
)
server.settimeout(3.0)
retries = 20
while retries:
try:
with server.accept()[0] as sock:
yield example_module_path, pyc_files[0]
sock.sendall(b"x")
break
except socket.timeout:
assert proc.poll() is None, proc.communicate()[0]
retries -= 1
else:
assert False, "Timed out wating for connection"
out = proc.communicate()[0]
if expect_success:
assert proc.wait() == 0, out
else:
assert proc.wait() == 1 and re.search(r"SystemError:.*--python\.KeepBytecodeInMemory", out), out


TRACING_TEST = '''
import sys

def foo():
a = 42
return a

lines = []

def tracefunc(frame, event, arg):
if event == "line" and frame.f_code is foo.__code__:
lines.append(frame.f_lineno)
return tracefunc

sys.settrace(tracefunc)
assert foo() == 42
firstlineno = foo.__code__.co_firstlineno
assert lines == [firstlineno + 1, firstlineno + 2], "Code didn't trace when expected"
'''


def test_reparse():
with pyc_reparse(TRACING_TEST):
pass


def test_reparse_deleted():
with pyc_reparse(TRACING_TEST, expect_success=False) as (example_file, pyc_file):
pyc_file.unlink()


def test_reparse_truncated():
with pyc_reparse(TRACING_TEST, expect_success=False) as (example_file, pyc_file):
with open(pyc_file, 'r+') as f:
f.truncate()


def test_reparse_truncated_part():
with pyc_reparse(TRACING_TEST, expect_success=False) as (example_file, pyc_file):
with open(pyc_file, 'r+') as f:
f.truncate(30)


def test_reparse_modified():
with pyc_reparse(TRACING_TEST, expect_success=False) as (example_file, pyc_file):
pyc_file.unlink()
with open(example_file, 'w') as f:
f.write(SYNC_PREAMBLE)
f.write(TRACING_TEST.replace('a = 42', 'a = 32'))
compileall.compile_file(example_file, force=True, quiet=True)
assert pyc_file.exists()


def test_reparse_disabled():
with pyc_reparse(TRACING_TEST, python_options=["--python.KeepBytecodeInMemory"], expect_success=True) \
as (example_file, pyc_file):
pyc_file.unlink()


CO_CODE_TEST = '''
def foo():
a = 42
return a

assert foo() == 42
foo.__code__ = foo.__code__.replace(co_code=foo.__code__.co_code)
assert foo() == 42
'''


def test_reparse_co_code():
with pyc_reparse(CO_CODE_TEST):
pass


def test_reparse_co_code_deleted():
with pyc_reparse(CO_CODE_TEST, expect_success=False) as (example_file, pyc_file):
pyc_file.unlink()
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,16 @@ public final class PythonLanguage extends TruffleLanguage<PythonContext> {
public static final int GRAALVM_MICRO;
public static final String DEV_TAG;

/* Magic number used to mark pyc files */
public static final int MAGIC_NUMBER = 21000 + Compiler.BYTECODE_VERSION * 10;
public static final byte[] MAGIC_NUMBER_BYTES = new byte[4];

static {
PythonUtils.ARRAY_ACCESSOR_LE.putInt(PythonLanguage.MAGIC_NUMBER_BYTES, 0, PythonLanguage.MAGIC_NUMBER);
PythonLanguage.MAGIC_NUMBER_BYTES[2] = '\r';
PythonLanguage.MAGIC_NUMBER_BYTES[3] = '\n';
}

/**
* The version generated at build time is stored in an ASCII-compatible way. Add build time, we
* added the ordinal value of some base character (in this case {@code '!'}) to ensure that we
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,17 @@
import static com.oracle.graal.python.PythonLanguage.GRAALVM_MICRO;
import static com.oracle.graal.python.PythonLanguage.GRAALVM_MINOR;
import static com.oracle.graal.python.PythonLanguage.J_GRAALPYTHON_ID;
import static com.oracle.graal.python.PythonLanguage.MAGIC_NUMBER;
import static com.oracle.graal.python.PythonLanguage.MAGIC_NUMBER_BYTES;
import static com.oracle.graal.python.PythonLanguage.RELEASE_LEVEL;
import static com.oracle.graal.python.PythonLanguage.RELEASE_LEVEL_FINAL;
import static com.oracle.graal.python.nodes.BuiltinNames.J_EXTEND;
import static com.oracle.graal.python.nodes.BuiltinNames.J___GRAALPYTHON__;
import static com.oracle.graal.python.nodes.BuiltinNames.T_FORMAT;
import static com.oracle.graal.python.nodes.BuiltinNames.T_MTIME;
import static com.oracle.graal.python.nodes.BuiltinNames.T_SHA3;
import static com.oracle.graal.python.nodes.BuiltinNames.T_SIZE;
import static com.oracle.graal.python.nodes.BuiltinNames.T__IMP;
import static com.oracle.graal.python.nodes.BuiltinNames.T___GRAALPYTHON__;
import static com.oracle.graal.python.nodes.BuiltinNames.T___MAIN__;
import static com.oracle.graal.python.nodes.SpecialAttributeNames.T___NAME__;
Expand All @@ -63,6 +69,7 @@
import static com.oracle.graal.python.runtime.exception.PythonErrorType.ImportError;
import static com.oracle.graal.python.runtime.exception.PythonErrorType.SystemError;
import static com.oracle.graal.python.runtime.exception.PythonErrorType.TypeError;
import static com.oracle.graal.python.util.PythonUtils.ARRAY_ACCESSOR_LE;
import static com.oracle.graal.python.util.PythonUtils.TS_ENCODING;
import static com.oracle.graal.python.util.PythonUtils.toTruffleStringUncached;
import static com.oracle.graal.python.util.PythonUtils.tsLiteral;
Expand Down Expand Up @@ -128,8 +135,10 @@
import com.oracle.graal.python.builtins.objects.str.StringUtils;
import com.oracle.graal.python.builtins.objects.tuple.PTuple;
import com.oracle.graal.python.lib.OsEnvironGetNode;
import com.oracle.graal.python.lib.PyNumberLongNode;
import com.oracle.graal.python.lib.PyObjectCallMethodObjArgs;
import com.oracle.graal.python.lib.PyObjectGetItem;
import com.oracle.graal.python.lib.PyObjectStrAsTruffleStringNode;
import com.oracle.graal.python.nodes.ErrorMessages;
import com.oracle.graal.python.nodes.PConstructAndRaiseNode;
import com.oracle.graal.python.nodes.PRaiseNode;
Expand All @@ -150,6 +159,9 @@
import com.oracle.graal.python.nodes.object.GetClassNode;
import com.oracle.graal.python.nodes.object.GetOrCreateDictNode;
import com.oracle.graal.python.nodes.statement.AbstractImportNode;
import com.oracle.graal.python.nodes.util.CannotCastException;
import com.oracle.graal.python.nodes.util.CastToJavaLongLossyNode;
import com.oracle.graal.python.nodes.util.CastToJavaStringNode;
import com.oracle.graal.python.nodes.util.CastToTruffleStringNode;
import com.oracle.graal.python.nodes.util.ToNativePrimitiveStorageNode;
import com.oracle.graal.python.runtime.ExecutionContext;
Expand Down Expand Up @@ -457,6 +469,102 @@ private static Object[] convertToObjectArray(TruffleString[] arr) {
return objectArr;
}

@Builtin(name = "load_bytecode_file", minNumOfPositionalArgs = 3)
@GenerateNodeFactory
abstract static class LoadBytecodeFileNode extends PythonBuiltinNode {

static final TruffleString T_CHECK_HASH_BASED_PYCS = tsLiteral("check_hash_based_pycs");
static final TruffleString T__BOOTSTRAP = tsLiteral("_bootstrap");
public static final TruffleString T__VERBOSE_MESSAGE = tsLiteral("_verbose_message");
public static final TruffleString MESSAGE = tsLiteral("'{} matches {}'");

@Specialization
static Object doit(VirtualFrame frame, Object bytecodePath, Object sourcePath, Object statResult,
@Bind Node inliningTarget,
@Bind PythonContext context,
@Cached("createFor($node)") BoundaryCallData boundaryCallData) {
Object savedState = BoundaryCallContext.enter(frame, boundaryCallData);
try {
return doLoadBytecodeFile(bytecodePath, sourcePath, statResult, inliningTarget, context);
} finally {
BoundaryCallContext.exit(frame, boundaryCallData, savedState);
}
}

@TruffleBoundary
private static Object doLoadBytecodeFile(Object bytecodePath, Object sourcePath, Object statResult, Node inliningTarget, PythonContext context) {
/*
* This builtin is used to load a bytecode file (.pyc) in a way that we can trust that
* it really comes from that file. It enables unloading serialized DSL bytecode from
* memory, so that it can be reparsed later from the same file. It also provides the
* cache key for CallTarget cache in multicontext mode.
*/
try {
// get_data
TruffleString strBytecodePath = PyObjectStrAsTruffleStringNode.executeUncached(bytecodePath);
TruffleFile bytecodeFile = context.getEnv().getPublicTruffleFile(strBytecodePath.toJavaStringUncached());
byte[] bytes = bytecodeFile.readAllBytes();
// _classify_pyc
if (bytes.length < 16 || !Arrays.equals(bytes, 0, 4, MAGIC_NUMBER_BYTES, 0, 4)) {
return PNone.NONE;
}
int flags = ARRAY_ACCESSOR_LE.getInt(bytes, 4);
if ((flags & ~0b11) != 0) {
return PNone.NONE;
}
long cacheKey;
boolean hashBased = (flags & 0b1) != 0;
// Note that mtime-based validation is the default, hashing is opt-in
if (hashBased) {
boolean checkSource = (flags & 0b10) != 0;
cacheKey = ARRAY_ACCESSOR_LE.getLong(bytes, 16);
String checkHashBasedPycs = "";
try {
checkHashBasedPycs = CastToJavaStringNode.getUncached().execute(context.lookupBuiltinModule(T__IMP).getAttribute(T_CHECK_HASH_BASED_PYCS));
} catch (CannotCastException e) {
// ignore
}
if (!checkHashBasedPycs.equals("never") && (checkSource || checkHashBasedPycs.equals("always"))) {
// get_data
TruffleString strSourcePath = PyObjectStrAsTruffleStringNode.executeUncached(sourcePath);
TruffleFile sourceFile = context.getEnv().getPublicTruffleFile(strSourcePath.toJavaStringUncached());
byte[] sourceBytes = sourceFile.readAllBytes();
long sourceHash = ARRAY_ACCESSOR_LE.getLong(ImpModuleBuiltins.SourceHashNode.hashSource(MAGIC_NUMBER, sourceBytes, sourceBytes.length), 0);
// _validate_hash_pyc
if (cacheKey != sourceHash) {
return PNone.NONE;
}
}
} else {
// _validate_timestamp_pyc
Object mTimeObj = PyNumberLongNode.executeUncached(PyObjectGetItem.executeUncached(statResult, T_MTIME));
long mTime = CastToJavaLongLossyNode.executeUncached(mTimeObj);
if (Integer.toUnsignedLong(ARRAY_ACCESSOR_LE.getInt(bytes, 8)) != mTime) {
return PNone.NONE;
}
Object sizeObj = PyObjectGetItem.executeUncached(statResult, T_SIZE);
if (sizeObj != PNone.NONE) {
long size = CastToJavaLongLossyNode.executeUncached(sizeObj);
if (Integer.toUnsignedLong(ARRAY_ACCESSOR_LE.getInt(bytes, 12)) != size) {
return PNone.NONE;
}
}
cacheKey = ARRAY_ACCESSOR_LE.getLong(bytes, 8);
}
if (context.getOption(PythonOptions.VerboseFlag)) {
Object message = PyObjectCallMethodObjArgs.executeUncached(MESSAGE, T_FORMAT, bytecodePath, sourcePath);
CallNode.executeUncached(context.lookupBuiltinModule(T__BOOTSTRAP).getAttribute(T__VERBOSE_MESSAGE), message);
}
return MarshalModuleBuiltins.fromBytecodeFile(context, bytecodeFile, bytes, 16, bytes.length - 16, cacheKey);
} catch (MarshalModuleBuiltins.Marshal.MarshalError me) {
throw PRaiseNode.raiseStatic(inliningTarget, me.type, me.message, me.arguments);
} catch (IOException | SecurityException | UnsupportedOperationException | IllegalArgumentException e) {
LOGGER.fine(() -> PythonUtils.formatJString("Failed to load bytecode file using load_bytecode_file: %s", e));
return PNone.NONE;
}
}
}

@Builtin(name = "read_file", minNumOfPositionalArgs = 1)
@GenerateNodeFactory
public abstract static class ReadFileNode extends PythonUnaryBuiltinNode {
Expand Down
Loading
Loading