Skip to content

Commit b98bc47

Browse files
committed
[3.14] gh-91048: Fix external inspection multi-threaded performance (GH-136005)
(cherry picked from commit 5334732) Co-authored-by: Pablo Galindo Salgado <Pablogsal@gmail.com>
1 parent 42e13b8 commit b98bc47

8 files changed

+226
-104
lines changed

Include/internal/pycore_global_objects_fini_generated.h

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Include/internal/pycore_global_strings.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -631,6 +631,7 @@ struct _Py_global_strings {
631631
STRUCT_FOR_ID(offset_src)
632632
STRUCT_FOR_ID(on_type_read)
633633
STRUCT_FOR_ID(onceregistry)
634+
STRUCT_FOR_ID(only_active_thread)
634635
STRUCT_FOR_ID(only_keys)
635636
STRUCT_FOR_ID(oparg)
636637
STRUCT_FOR_ID(opcode)

Include/internal/pycore_runtime_init_generated.h

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Include/internal/pycore_unicodeobject_generated.h

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Lib/test/test_external_inspection.py

Lines changed: 121 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import threading
88
from asyncio import staggered, taskgroups, base_events, tasks
99
from unittest.mock import ANY
10-
from test.support import os_helper, SHORT_TIMEOUT, busy_retry
10+
from test.support import os_helper, SHORT_TIMEOUT, busy_retry, requires_gil_enabled
1111
from test.support.script_helper import make_script
1212
from test.support.socket_helper import find_unused_port
1313

@@ -876,6 +876,126 @@ def test_self_trace(self):
876876
],
877877
)
878878

879+
@skip_if_not_supported
880+
@unittest.skipIf(
881+
sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED,
882+
"Test only runs on Linux with process_vm_readv support",
883+
)
884+
@requires_gil_enabled("Free threaded builds don't have an 'active thread'")
885+
def test_only_active_thread(self):
886+
# Test that only_active_thread parameter works correctly
887+
port = find_unused_port()
888+
script = textwrap.dedent(
889+
f"""\
890+
import time, sys, socket, threading
891+
892+
# Connect to the test process
893+
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
894+
sock.connect(('localhost', {port}))
895+
896+
def worker_thread(name, barrier, ready_event):
897+
barrier.wait() # Synchronize thread start
898+
ready_event.wait() # Wait for main thread signal
899+
# Sleep to keep thread alive
900+
time.sleep(10_000)
901+
902+
def main_work():
903+
# Do busy work to hold the GIL
904+
sock.sendall(b"working\\n")
905+
count = 0
906+
while count < 100000000:
907+
count += 1
908+
if count % 10000000 == 0:
909+
pass # Keep main thread busy
910+
sock.sendall(b"done\\n")
911+
912+
# Create synchronization primitives
913+
num_threads = 3
914+
barrier = threading.Barrier(num_threads + 1) # +1 for main thread
915+
ready_event = threading.Event()
916+
917+
# Start worker threads
918+
threads = []
919+
for i in range(num_threads):
920+
t = threading.Thread(target=worker_thread, args=(f"Worker-{{i}}", barrier, ready_event))
921+
t.start()
922+
threads.append(t)
923+
924+
# Wait for all threads to be ready
925+
barrier.wait()
926+
927+
# Signal ready to parent process
928+
sock.sendall(b"ready\\n")
929+
930+
# Signal threads to start waiting
931+
ready_event.set()
932+
933+
# Give threads time to start sleeping
934+
time.sleep(0.1)
935+
936+
# Now do busy work to hold the GIL
937+
main_work()
938+
"""
939+
)
940+
941+
with os_helper.temp_dir() as work_dir:
942+
script_dir = os.path.join(work_dir, "script_pkg")
943+
os.mkdir(script_dir)
944+
945+
# Create a socket server to communicate with the target process
946+
server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
947+
server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
948+
server_socket.bind(("localhost", port))
949+
server_socket.settimeout(SHORT_TIMEOUT)
950+
server_socket.listen(1)
951+
952+
script_name = _make_test_script(script_dir, "script", script)
953+
client_socket = None
954+
try:
955+
p = subprocess.Popen([sys.executable, script_name])
956+
client_socket, _ = server_socket.accept()
957+
server_socket.close()
958+
959+
# Wait for ready signal
960+
response = b""
961+
while b"ready" not in response:
962+
response += client_socket.recv(1024)
963+
964+
# Wait for the main thread to start its busy work
965+
while b"working" not in response:
966+
response += client_socket.recv(1024)
967+
968+
# Get stack trace with all threads
969+
unwinder_all = RemoteUnwinder(p.pid, all_threads=True)
970+
all_traces = unwinder_all.get_stack_trace()
971+
972+
# Get stack trace with only GIL holder
973+
unwinder_gil = RemoteUnwinder(p.pid, only_active_thread=True)
974+
gil_traces = unwinder_gil.get_stack_trace()
975+
976+
except PermissionError:
977+
self.skipTest(
978+
"Insufficient permissions to read the stack trace"
979+
)
980+
finally:
981+
if client_socket is not None:
982+
client_socket.close()
983+
p.kill()
984+
p.terminate()
985+
p.wait(timeout=SHORT_TIMEOUT)
986+
987+
# Verify we got multiple threads in all_traces
988+
self.assertGreater(len(all_traces), 1, "Should have multiple threads")
989+
990+
# Verify we got exactly one thread in gil_traces
991+
self.assertEqual(len(gil_traces), 1, "Should have exactly one GIL holder")
992+
993+
# The GIL holder should be in the all_traces list
994+
gil_thread_id = gil_traces[0][0]
995+
all_thread_ids = [trace[0] for trace in all_traces]
996+
self.assertIn(gil_thread_id, all_thread_ids,
997+
"GIL holder should be among all threads")
998+
879999

8801000
if __name__ == "__main__":
8811001
unittest.main()

Modules/_remote_debugging_module.c

Lines changed: 68 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -64,12 +64,14 @@
6464
#endif
6565

6666
#ifdef Py_GIL_DISABLED
67-
#define INTERP_STATE_MIN_SIZE MAX(MAX(offsetof(PyInterpreterState, _code_object_generation) + sizeof(uint64_t), \
68-
offsetof(PyInterpreterState, tlbc_indices.tlbc_generation) + sizeof(uint32_t)), \
69-
offsetof(PyInterpreterState, threads.head) + sizeof(void*))
67+
#define INTERP_STATE_MIN_SIZE MAX(MAX(MAX(offsetof(PyInterpreterState, _code_object_generation) + sizeof(uint64_t), \
68+
offsetof(PyInterpreterState, tlbc_indices.tlbc_generation) + sizeof(uint32_t)), \
69+
offsetof(PyInterpreterState, threads.head) + sizeof(void*)), \
70+
offsetof(PyInterpreterState, _gil.last_holder) + sizeof(PyThreadState*))
7071
#else
71-
#define INTERP_STATE_MIN_SIZE MAX(offsetof(PyInterpreterState, _code_object_generation) + sizeof(uint64_t), \
72-
offsetof(PyInterpreterState, threads.head) + sizeof(void*))
72+
#define INTERP_STATE_MIN_SIZE MAX(MAX(offsetof(PyInterpreterState, _code_object_generation) + sizeof(uint64_t), \
73+
offsetof(PyInterpreterState, threads.head) + sizeof(void*)), \
74+
offsetof(PyInterpreterState, _gil.last_holder) + sizeof(PyThreadState*))
7375
#endif
7476
#define INTERP_STATE_BUFFER_SIZE MAX(INTERP_STATE_MIN_SIZE, 256)
7577

@@ -206,6 +208,7 @@ typedef struct {
206208
uint64_t code_object_generation;
207209
_Py_hashtable_t *code_object_cache;
208210
int debug;
211+
int only_active_thread;
209212
RemoteDebuggingState *cached_state; // Cached module state
210213
#ifdef Py_GIL_DISABLED
211214
// TLBC cache invalidation tracking
@@ -2496,6 +2499,7 @@ _remote_debugging.RemoteUnwinder.__init__
24962499
pid: int
24972500
*
24982501
all_threads: bool = False
2502+
only_active_thread: bool = False
24992503
debug: bool = False
25002504
25012505
Initialize a new RemoteUnwinder object for debugging a remote Python process.
@@ -2504,6 +2508,8 @@ Initialize a new RemoteUnwinder object for debugging a remote Python process.
25042508
pid: Process ID of the target Python process to debug
25052509
all_threads: If True, initialize state for all threads in the process.
25062510
If False, only initialize for the main thread.
2511+
only_active_thread: If True, only sample the thread holding the GIL.
2512+
Cannot be used together with all_threads=True.
25072513
debug: If True, chain exceptions to explain the sequence of events that
25082514
lead to the exception.
25092515
@@ -2514,15 +2520,33 @@ process, including examining thread states, stack frames and other runtime data.
25142520
PermissionError: If access to the target process is denied
25152521
OSError: If unable to attach to the target process or access its memory
25162522
RuntimeError: If unable to read debug information from the target process
2523+
ValueError: If both all_threads and only_active_thread are True
25172524
[clinic start generated code]*/
25182525

25192526
static int
25202527
_remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self,
25212528
int pid, int all_threads,
2529+
int only_active_thread,
25222530
int debug)
2523-
/*[clinic end generated code: output=3982f2a7eba49334 input=48a762566b828e91]*/
2531+
/*[clinic end generated code: output=13ba77598ecdcbe1 input=8f8f12504e17da04]*/
25242532
{
2533+
// Validate that all_threads and only_active_thread are not both True
2534+
if (all_threads && only_active_thread) {
2535+
PyErr_SetString(PyExc_ValueError,
2536+
"all_threads and only_active_thread cannot both be True");
2537+
return -1;
2538+
}
2539+
2540+
#ifdef Py_GIL_DISABLED
2541+
if (only_active_thread) {
2542+
PyErr_SetString(PyExc_ValueError,
2543+
"only_active_thread is not supported when Py_GIL_DISABLED is not defined");
2544+
return -1;
2545+
}
2546+
#endif
2547+
25252548
self->debug = debug;
2549+
self->only_active_thread = only_active_thread;
25262550
self->cached_state = NULL;
25272551
if (_Py_RemoteDebug_InitProcHandle(&self->handle, pid) < 0) {
25282552
set_exception_cause(self, PyExc_RuntimeError, "Failed to initialize process handle");
@@ -2602,13 +2626,18 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self,
26022626
@critical_section
26032627
_remote_debugging.RemoteUnwinder.get_stack_trace
26042628
2605-
Returns a list of stack traces for all threads in the target process.
2629+
Returns a list of stack traces for threads in the target process.
26062630
26072631
Each element in the returned list is a tuple of (thread_id, frame_list), where:
26082632
- thread_id is the OS thread identifier
26092633
- frame_list is a list of tuples (function_name, filename, line_number) representing
26102634
the Python stack frames for that thread, ordered from most recent to oldest
26112635
2636+
The threads returned depend on the initialization parameters:
2637+
- If only_active_thread was True: returns only the thread holding the GIL
2638+
- If all_threads was True: returns all threads
2639+
- Otherwise: returns only the main thread
2640+
26122641
Example:
26132642
[
26142643
(1234, [
@@ -2632,7 +2661,7 @@ Each element in the returned list is a tuple of (thread_id, frame_list), where:
26322661

26332662
static PyObject *
26342663
_remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self)
2635-
/*[clinic end generated code: output=666192b90c69d567 input=331dbe370578badf]*/
2664+
/*[clinic end generated code: output=666192b90c69d567 input=f756f341206f9116]*/
26362665
{
26372666
PyObject* result = NULL;
26382667
// Read interpreter state into opaque buffer
@@ -2655,6 +2684,28 @@ _remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self
26552684
_Py_hashtable_clear(self->code_object_cache);
26562685
}
26572686

2687+
// If only_active_thread is true, we need to determine which thread holds the GIL
2688+
PyThreadState* gil_holder = NULL;
2689+
if (self->only_active_thread) {
2690+
// The GIL state is already in interp_state_buffer, just read from there
2691+
// Check if GIL is locked
2692+
int gil_locked = GET_MEMBER(int, interp_state_buffer,
2693+
self->debug_offsets.interpreter_state.gil_runtime_state_locked);
2694+
2695+
if (gil_locked) {
2696+
// Get the last holder (current holder when GIL is locked)
2697+
gil_holder = GET_MEMBER(PyThreadState*, interp_state_buffer,
2698+
self->debug_offsets.interpreter_state.gil_runtime_state_holder);
2699+
} else {
2700+
// GIL is not locked, return empty list
2701+
result = PyList_New(0);
2702+
if (!result) {
2703+
set_exception_cause(self, PyExc_MemoryError, "Failed to create empty result list");
2704+
}
2705+
goto exit;
2706+
}
2707+
}
2708+
26582709
#ifdef Py_GIL_DISABLED
26592710
// Check TLBC generation and invalidate cache if needed
26602711
uint32_t current_tlbc_generation = GET_MEMBER(uint32_t, interp_state_buffer,
@@ -2666,7 +2717,10 @@ _remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self
26662717
#endif
26672718

26682719
uintptr_t current_tstate;
2669-
if (self->tstate_addr == 0) {
2720+
if (self->only_active_thread && gil_holder != NULL) {
2721+
// We have the GIL holder, process only that thread
2722+
current_tstate = (uintptr_t)gil_holder;
2723+
} else if (self->tstate_addr == 0) {
26702724
// Get threads head from buffer
26712725
current_tstate = GET_MEMBER(uintptr_t, interp_state_buffer,
26722726
self->debug_offsets.interpreter_state.threads_head);
@@ -2700,10 +2754,14 @@ _remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self
27002754
if (self->tstate_addr) {
27012755
break;
27022756
}
2757+
2758+
// If we're only processing the GIL holder, we're done after one iteration
2759+
if (self->only_active_thread && gil_holder != NULL) {
2760+
break;
2761+
}
27032762
}
27042763

27052764
exit:
2706-
_Py_RemoteDebug_ClearCache(&self->handle);
27072765
return result;
27082766
}
27092767

@@ -2827,11 +2885,9 @@ _remote_debugging_RemoteUnwinder_get_all_awaited_by_impl(RemoteUnwinderObject *s
28272885
goto result_err;
28282886
}
28292887

2830-
_Py_RemoteDebug_ClearCache(&self->handle);
28312888
return result;
28322889

28332890
result_err:
2834-
_Py_RemoteDebug_ClearCache(&self->handle);
28352891
Py_XDECREF(result);
28362892
return NULL;
28372893
}
@@ -2898,11 +2954,9 @@ _remote_debugging_RemoteUnwinder_get_async_stack_trace_impl(RemoteUnwinderObject
28982954
goto cleanup;
28992955
}
29002956

2901-
_Py_RemoteDebug_ClearCache(&self->handle);
29022957
return result;
29032958

29042959
cleanup:
2905-
_Py_RemoteDebug_ClearCache(&self->handle);
29062960
Py_XDECREF(result);
29072961
return NULL;
29082962
}
@@ -2928,7 +2982,6 @@ RemoteUnwinder_dealloc(PyObject *op)
29282982
}
29292983
#endif
29302984
if (self->handle.pid != 0) {
2931-
_Py_RemoteDebug_ClearCache(&self->handle);
29322985
_Py_RemoteDebug_CleanupProcHandle(&self->handle);
29332986
}
29342987
PyObject_Del(self);

0 commit comments

Comments
 (0)