python · lkollar · Jun 4, 2025 · Jun 7, 2025 · Jun 22, 2025 · Jun 22, 2025
diff --git a/Lib/profile/__init__.py b/Lib/profile/__init__.py
@@ -0,0 +1,6 @@
+from .profile import run
+from .profile import runctx
+from .profile import Profile
+from .profile import _Utils
+
+__all__ = ['run', 'runctx', 'Profile']
diff --git a/Lib/profile/__main__.py b/Lib/profile/__main__.py
@@ -0,0 +1,81 @@
+import io
+import importlib.machinery
+import os
+import sys
+from optparse import OptionParser
+
+from .profile import runctx
+
+
+def main():
+    usage = "profile.py [-o output_file_path] [-s sort] [-m module | scriptfile] [arg] ..."
+    parser = OptionParser(usage=usage)
+    parser.allow_interspersed_args = False
+    parser.add_option(
+        "-o",
+        "--outfile",
+        dest="outfile",
+        help="Save stats to <outfile>",
+        default=None,
+    )
+    parser.add_option(
+        "-m",
+        dest="module",
+        action="store_true",
+        help="Profile a library module.",
+        default=False,
+    )
+    parser.add_option(
+        "-s",
+        "--sort",
+        dest="sort",
+        help="Sort order when printing to stdout, based on pstats.Stats class",
+        default=-1,
+    )
+
+    if not sys.argv[1:]:
+        parser.print_usage()
+        sys.exit(2)
+
+    (options, args) = parser.parse_args()
+    sys.argv[:] = args
+
+    # The script that we're profiling may chdir, so capture the absolute path
+    # to the output file at startup.
+    if options.outfile is not None:
+        options.outfile = os.path.abspath(options.outfile)
+
+    if len(args) > 0 or options.pid:
+        if options.module:
+            import runpy
+
+            code = "run_module(modname, run_name='__main__')"
+            globs = {"run_module": runpy.run_module, "modname": args[0]}
+        else:
+            progname = args[0]
+            sys.path.insert(0, os.path.dirname(progname))
+            with io.open_code(progname) as fp:
+                code = compile(fp.read(), progname, "exec")
+            spec = importlib.machinery.ModuleSpec(
+                name="__main__", loader=None, origin=progname
+            )
+            globs = {
+                "__spec__": spec,
+                "__file__": spec.origin,
+                "__name__": spec.name,
+                "__package__": None,
+                "__cached__": None,
+            }
+        try:
+            runctx(code, globs, None, options.outfile, options.sort)
+        except BrokenPipeError as exc:
+            # Prevent "Exception ignored" during interpreter shutdown.
+            sys.stdout = None
+            sys.exit(exc.errno)
+    else:
+        parser.print_usage()
+    return parser
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Lib/profile/collector.py b/Lib/profile/collector.py
@@ -0,0 +1,11 @@
+from abc import ABC, abstractmethod
+
+
+class Collector(ABC):
+    @abstractmethod
+    def collect(self, stack_frames):
+        """Collect profiling data from stack frames."""
+
+    @abstractmethod
+    def export(self, filename):
+        """Export collected data to a file."""
diff --git a/Lib/profile.py → Lib/profile/profile.py b/Lib/profile.py → Lib/profile/profile.py
@@ -550,66 +550,3 @@ def f(m, f1=f1):
         return mean
 
 #****************************************************************************
-
-def main():
-    import os
-    from optparse import OptionParser
-
-    usage = "profile.py [-o output_file_path] [-s sort] [-m module | scriptfile] [arg] ..."
-    parser = OptionParser(usage=usage)
-    parser.allow_interspersed_args = False
-    parser.add_option('-o', '--outfile', dest="outfile",
-        help="Save stats to <outfile>", default=None)
-    parser.add_option('-m', dest="module", action="store_true",
-        help="Profile a library module.", default=False)
-    parser.add_option('-s', '--sort', dest="sort",
-        help="Sort order when printing to stdout, based on pstats.Stats class",
-        default=-1)
-
-    if not sys.argv[1:]:
-        parser.print_usage()
-        sys.exit(2)
-
-    (options, args) = parser.parse_args()
-    sys.argv[:] = args
-
-    # The script that we're profiling may chdir, so capture the absolute path
-    # to the output file at startup.
-    if options.outfile is not None:
-        options.outfile = os.path.abspath(options.outfile)
-
-    if len(args) > 0:
-        if options.module:
-            import runpy
-            code = "run_module(modname, run_name='__main__')"
-            globs = {
-                'run_module': runpy.run_module,
-                'modname': args[0]
-            }
-        else:
-            progname = args[0]
-            sys.path.insert(0, os.path.dirname(progname))
-            with io.open_code(progname) as fp:
-                code = compile(fp.read(), progname, 'exec')
-            spec = importlib.machinery.ModuleSpec(name='__main__', loader=None,
-                                                  origin=progname)
-            globs = {
-                '__spec__': spec,
-                '__file__': spec.origin,
-                '__name__': spec.name,
-                '__package__': None,
-                '__cached__': None,
-            }
-        try:
-            runctx(code, globs, None, options.outfile, options.sort)
-        except BrokenPipeError as exc:
-            # Prevent "Exception ignored" during interpreter shutdown.
-            sys.stdout = None
-            sys.exit(exc.errno)
-    else:
-        parser.print_usage()
-    return parser
-
-# When invoked as main program, invoke the profiler on a script
-if __name__ == '__main__':
-    main()
diff --git a/Lib/profile/pstats_collector.py b/Lib/profile/pstats_collector.py
@@ -0,0 +1,83 @@
+import collections
+import marshal
+
+from .collector import Collector
+
+
+class PstatsCollector(Collector):
+    def __init__(self, sample_interval_usec):
+        self.result = collections.defaultdict(
+            lambda: dict(total_calls=0, total_rec_calls=0, inline_calls=0)
+        )
+        self.stats = {}
+        self.sample_interval_usec = sample_interval_usec
+        self.callers = collections.defaultdict(
+            lambda: collections.defaultdict(int)
+        )
+
+    def collect(self, stack_frames):
+        for thread_id, frames in stack_frames:
+            if not frames:
+                continue
+
+            top_frame = frames[0]
+            top_location = (
+                top_frame.filename,
+                top_frame.lineno,
+                top_frame.funcname,
+            )
+
+            self.result[top_location]["inline_calls"] += 1
+            self.result[top_location]["total_calls"] += 1
+
+            for i in range(1, len(frames)):
+                callee_frame = frames[i - 1]
+                caller_frame = frames[i]
+
+                callee = (
+                    callee_frame.filename,
+                    callee_frame.lineno,
+                    callee_frame.funcname,
+                )
+                caller = (
+                    caller_frame.filename,
+                    caller_frame.lineno,
+                    caller_frame.funcname,
+                )
+
+                self.callers[callee][caller] += 1
+
+            if len(frames) <= 1:
+                continue
+
+            for frame in frames[1:]:
+                location = (frame.filename, frame.lineno, frame.funcname)
+                self.result[location]["total_calls"] += 1
+
+    def export(self, filename):
+        self.create_stats()
+        self._dump_stats(filename)
+
+    def _dump_stats(self, file):
+        stats_with_marker = dict(self.stats)
+        stats_with_marker[("__sampled__",)] = True
+        with open(file, "wb") as f:
+            marshal.dump(stats_with_marker, f)
+
+    # Needed for compatibility with pstats.Stats
+    def create_stats(self):
+        sample_interval_sec = self.sample_interval_usec / 1_000_000
+        callers = {}
+        for fname, call_counts in self.result.items():
+            total = call_counts["inline_calls"] * sample_interval_sec
+            cumulative = call_counts["total_calls"] * sample_interval_sec
+            callers = dict(self.callers.get(fname, {}))
+            self.stats[fname] = (
+                call_counts["total_calls"],
+                call_counts["total_rec_calls"]
+                if call_counts["total_rec_calls"]
+                else call_counts["total_calls"],
+                total,
+                cumulative,
+                callers,
+            )