Skip to content

gh-135953: Implement sampling tool under profile.sample #135998

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions Lib/profile/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from .profile import run
from .profile import runctx
from .profile import Profile
from .profile import _Utils

__all__ = ['run', 'runctx', 'Profile']
81 changes: 81 additions & 0 deletions Lib/profile/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import io
import importlib.machinery
import os
import sys
from optparse import OptionParser

from .profile import runctx


def main():
usage = "profile.py [-o output_file_path] [-s sort] [-m module | scriptfile] [arg] ..."
parser = OptionParser(usage=usage)
parser.allow_interspersed_args = False
parser.add_option(
"-o",
"--outfile",
dest="outfile",
help="Save stats to <outfile>",
default=None,
)
parser.add_option(
"-m",
dest="module",
action="store_true",
help="Profile a library module.",
default=False,
)
parser.add_option(
"-s",
"--sort",
dest="sort",
help="Sort order when printing to stdout, based on pstats.Stats class",
default=-1,
)

if not sys.argv[1:]:
parser.print_usage()
sys.exit(2)

(options, args) = parser.parse_args()
sys.argv[:] = args

# The script that we're profiling may chdir, so capture the absolute path
# to the output file at startup.
if options.outfile is not None:
options.outfile = os.path.abspath(options.outfile)

if len(args) > 0 or options.pid:
if options.module:
import runpy

code = "run_module(modname, run_name='__main__')"
globs = {"run_module": runpy.run_module, "modname": args[0]}
else:
progname = args[0]
sys.path.insert(0, os.path.dirname(progname))
with io.open_code(progname) as fp:
code = compile(fp.read(), progname, "exec")
spec = importlib.machinery.ModuleSpec(
name="__main__", loader=None, origin=progname
)
globs = {
"__spec__": spec,
"__file__": spec.origin,
"__name__": spec.name,
"__package__": None,
"__cached__": None,
}
try:
runctx(code, globs, None, options.outfile, options.sort)
except BrokenPipeError as exc:
# Prevent "Exception ignored" during interpreter shutdown.
sys.stdout = None
sys.exit(exc.errno)
else:
parser.print_usage()
return parser


if __name__ == "__main__":
main()
11 changes: 11 additions & 0 deletions Lib/profile/collector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from abc import ABC, abstractmethod


class Collector(ABC):
@abstractmethod
def collect(self, stack_frames):
"""Collect profiling data from stack frames."""

@abstractmethod
def export(self, filename):
"""Export collected data to a file."""
63 changes: 0 additions & 63 deletions Lib/profile.py → Lib/profile/profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -550,66 +550,3 @@ def f(m, f1=f1):
return mean

#****************************************************************************

def main():
import os
from optparse import OptionParser

usage = "profile.py [-o output_file_path] [-s sort] [-m module | scriptfile] [arg] ..."
parser = OptionParser(usage=usage)
parser.allow_interspersed_args = False
parser.add_option('-o', '--outfile', dest="outfile",
help="Save stats to <outfile>", default=None)
parser.add_option('-m', dest="module", action="store_true",
help="Profile a library module.", default=False)
parser.add_option('-s', '--sort', dest="sort",
help="Sort order when printing to stdout, based on pstats.Stats class",
default=-1)

if not sys.argv[1:]:
parser.print_usage()
sys.exit(2)

(options, args) = parser.parse_args()
sys.argv[:] = args

# The script that we're profiling may chdir, so capture the absolute path
# to the output file at startup.
if options.outfile is not None:
options.outfile = os.path.abspath(options.outfile)

if len(args) > 0:
if options.module:
import runpy
code = "run_module(modname, run_name='__main__')"
globs = {
'run_module': runpy.run_module,
'modname': args[0]
}
else:
progname = args[0]
sys.path.insert(0, os.path.dirname(progname))
with io.open_code(progname) as fp:
code = compile(fp.read(), progname, 'exec')
spec = importlib.machinery.ModuleSpec(name='__main__', loader=None,
origin=progname)
globs = {
'__spec__': spec,
'__file__': spec.origin,
'__name__': spec.name,
'__package__': None,
'__cached__': None,
}
try:
runctx(code, globs, None, options.outfile, options.sort)
except BrokenPipeError as exc:
# Prevent "Exception ignored" during interpreter shutdown.
sys.stdout = None
sys.exit(exc.errno)
else:
parser.print_usage()
return parser

# When invoked as main program, invoke the profiler on a script
if __name__ == '__main__':
main()
83 changes: 83 additions & 0 deletions Lib/profile/pstats_collector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import collections
import marshal

from .collector import Collector


class PstatsCollector(Collector):
def __init__(self, sample_interval_usec):
self.result = collections.defaultdict(
lambda: dict(total_calls=0, total_rec_calls=0, inline_calls=0)
)
self.stats = {}
self.sample_interval_usec = sample_interval_usec
self.callers = collections.defaultdict(
lambda: collections.defaultdict(int)
)

def collect(self, stack_frames):
for thread_id, frames in stack_frames:
if not frames:
continue

top_frame = frames[0]
top_location = (
top_frame.filename,
top_frame.lineno,
top_frame.funcname,
)

self.result[top_location]["inline_calls"] += 1
self.result[top_location]["total_calls"] += 1

for i in range(1, len(frames)):
callee_frame = frames[i - 1]
caller_frame = frames[i]

callee = (
callee_frame.filename,
callee_frame.lineno,
callee_frame.funcname,
)
caller = (
caller_frame.filename,
caller_frame.lineno,
caller_frame.funcname,
)

self.callers[callee][caller] += 1

if len(frames) <= 1:
continue

for frame in frames[1:]:
location = (frame.filename, frame.lineno, frame.funcname)
self.result[location]["total_calls"] += 1

def export(self, filename):
self.create_stats()
self._dump_stats(filename)

def _dump_stats(self, file):
stats_with_marker = dict(self.stats)
stats_with_marker[("__sampled__",)] = True
with open(file, "wb") as f:
marshal.dump(stats_with_marker, f)

# Needed for compatibility with pstats.Stats
def create_stats(self):
sample_interval_sec = self.sample_interval_usec / 1_000_000
callers = {}
for fname, call_counts in self.result.items():
total = call_counts["inline_calls"] * sample_interval_sec
cumulative = call_counts["total_calls"] * sample_interval_sec
callers = dict(self.callers.get(fname, {}))
self.stats[fname] = (
call_counts["total_calls"],
call_counts["total_rec_calls"]
if call_counts["total_rec_calls"]
else call_counts["total_calls"],
total,
cumulative,
callers,
)
Loading
Loading