From 5c892a75074cd7b25632500668fd60f5330ca841 Mon Sep 17 00:00:00 2001 From: Benoit Chesneau Date: Sun, 3 May 2026 15:14:18 +0200 Subject: [PATCH] Audit docs, drop dead code, add snippet linter Fix two stale identifier references (Py_GIL_OWN, multi_executor fallback), delete priv/_erlang_impl/_ssl.py and three uncalled py_util/0,1,2,3 exports, and repair a broken SharedDict example in docs/shared-dict.md. Add tests for py:cast/4, py:async_gather/2 and py:dup_fd/1, plus test/coverage_audit.md mapping every public API to its suite. New scripts/lint_doc_snippets.escript validates py:Fn/N calls and Python syntax in fenced blocks; wired into CI and a Makefile target. --- .github/workflows/ci.yml | 3 + Makefile | 20 ++ docs/migration.md | 9 +- docs/owngil_internals.md | 61 ++++-- docs/scalability.md | 8 +- docs/security.md | 2 + docs/shared-dict.md | 17 +- priv/_erlang_impl/_ssl.py | 329 ------------------------------ scripts/lint_doc_snippets.escript | 257 +++++++++++++++++++++++ src/py_util.erl | 32 +-- test/coverage_audit.md | 88 ++++++++ test/py_SUITE.erl | 26 +++ test/py_fd_ops_SUITE.erl | 38 +++- 13 files changed, 502 insertions(+), 388 deletions(-) create mode 100644 Makefile delete mode 100644 priv/_erlang_impl/_ssl.py create mode 100755 scripts/lint_doc_snippets.escript create mode 100644 test/coverage_audit.md diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e14e356..c7771a8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -92,6 +92,9 @@ jobs: - name: Run xref run: rebar3 xref + - name: Lint docs + run: escript scripts/lint_doc_snippets.escript + # FreeBSD test using cross-platform action test-freebsd: name: FreeBSD 14 / Python ${{ matrix.python }} diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..925c1ea --- /dev/null +++ b/Makefile @@ -0,0 +1,20 @@ +.PHONY: all compile test lint-docs clean + +all: compile + +compile: + rebar3 compile + +test: + rebar3 ct --readable=compact + +# Validate fenced code blocks in README.md and docs/*.md. +# Erlang `py:Fn(...)` calls must reference a real export at the right +# arity; Python blocks must parse (IndentationError tolerated for +# tutorial fragments). Mark a block to skip with `` +# on the line immediately above the opening fence. +lint-docs: compile + escript scripts/lint_doc_snippets.escript + +clean: + rebar3 clean diff --git a/docs/migration.md b/docs/migration.md index 983bcf3..5f6e1e6 100644 --- a/docs/migration.md +++ b/docs/migration.md @@ -38,6 +38,7 @@ application:set_env(erlang_python, context_mode, owngil). **`py:num_executors/0`** - Removed. Contexts now use per-context worker threads. + ```erlang %% v2.x - check executor count N = py:num_executors(). @@ -254,6 +255,7 @@ N = py_context_router:num_contexts(). The function for non-blocking Python calls has been renamed to follow gen_server conventions: **Before (v1.8.x):** + ```erlang Ref = py:call_async(math, factorial, [100]), {ok, Result} = py:await(Ref). @@ -355,6 +357,7 @@ For more sophisticated web framework integration, consider the [Reactor API](rea The process-binding functions have been removed. The new architecture uses `py_context_router` for automatic scheduler-affinity routing. **Before (v1.8.x):** + ```erlang ok = py:bind(), ok = py:exec(<<"x = 42">>), @@ -760,9 +763,9 @@ ImportError: module does not support subinterpreters ``` Options: -1. Use Python < 3.12 (falls back to multi_executor mode) -2. Check if the library has a subinterpreter-compatible version -3. Isolate the library usage to a single context +1. Use Python 3.12 or 3.13: the runtime uses `worker` mode (subinterpreters require Python 3.14+). +2. Check if the library has a subinterpreter-compatible version. +3. Isolate the library usage to a single context. ### Python 3.14: `erlang_loop_import_failed` diff --git a/docs/owngil_internals.md b/docs/owngil_internals.md index 7ee0382..03ee606 100644 --- a/docs/owngil_internals.md +++ b/docs/owngil_internals.md @@ -425,22 +425,53 @@ class EchoProtocol(reactor.Protocol): ## Performance Characteristics -| Operation | Shared-GIL | OWN_GIL | -|-----------|-----------|---------| +| Operation | Worker (shared GIL) | OWN_GIL | +|-----------|--------------------|---------| | Call overhead | ~2.5μs | ~10μs | -| Throughput (single) | 400K/s | 100K/s | -| Parallelism | None | True | -| Resource usage | Lower | Higher (1 pthread per context) | - -Use OWN_GIL when: -- CPU-bound Python work that benefits from parallelism -- Long-running computations -- Need true concurrent Python execution - -Use worker mode when: -- I/O-bound or short operations -- High call frequency -- Resource constraints +| Throughput (single context) | ~400K/s | ~100K/s | +| Parallelism (N contexts) | GIL-bound | Linear up to N cores | +| Resource usage | One pthread per context | One pthread + one subinterpreter per context | + +## Pros and Cons + +### Pros + +- **True CPU parallelism.** Each context owns its GIL, so N contexts run on N cores at once. Worker mode serialises on the main GIL unless Python is built free-threaded (3.13t+). +- **Crash isolation.** A C-level fault in one subinterpreter leaves the others alive. Worker mode shares the main interpreter, so a corrupt module state can take everything down. +- **Clean namespace per context.** Each subinterpreter has its own `sys.modules`, so module-level state cannot bleed between contexts. Useful when running adversarial or untrusted code paths side by side. +- **Predictable scheduling.** Requests are dispatched via mutex/condvar IPC, not dirty schedulers, so OWN_GIL contexts will not be starved by other dirty NIF traffic. + +### Cons + +- **Python 3.14+ only.** Earlier versions have C-extension global-state bugs (`_decimal`, `numpy`, etc.) that crash inside subinterpreters. See [cpython#106078](https://github.com/python/cpython/issues/106078). +- **Higher per-call latency.** ~4x the round-trip cost of worker mode (~10μs vs ~2.5μs) because every call crosses a mutex/condvar handoff to the dedicated thread. +- **Higher memory.** Each subinterpreter imports its own copy of every module. A 50 MB module set across 8 contexts is ~400 MB resident, not 50 MB. +- **C-extension compatibility is not universal.** Extensions must opt in via the multi-phase init protocol (PEP 489) and `Py_mod_multiple_interpreters`. Pure-Python and well-behaved C extensions work; older ones fail at import inside the subinterpreter. +- **No shared Python state.** Module globals, class definitions, and cached objects are per-interpreter. Use `py:state_store/2` (ETS-backed) or `erlang.send` for cross-context data. +- **Callback re-entry is restricted.** When Python in an OWN_GIL context calls `erlang.call`, the callback runs on a thread worker, not back on the OWN_GIL thread (which cannot suspend). Re-entrant Python -> Erlang -> *same* OWN_GIL context calls will not work; use a different context for the nested call, or use `erlang.async_call` from asyncio code. +- **Process-local envs do not span interpreters.** A `py_env_resource_t` is bound to the interpreter that created it. Reusing one across contexts returns `{error, env_wrong_interpreter}`. + +### When to Use Each + +Use **OWN_GIL** when: + +- The workload is CPU-bound Python (ML inference, numpy/torch compute, parsing, codecs) and you want N-way parallelism per BEAM scheduler. +- You can pin the per-context memory budget and the modules in use are subinterpreter-safe. +- You are on Python 3.14+. + +Use **worker** (default) when: + +- You are on Python 3.12 or 3.13. +- Calls are short and frequent (every microsecond of overhead matters). +- You are running modules that are not subinterpreter-safe (some scientific stacks, older C extensions). +- You are already running free-threaded Python (3.13t+); worker mode gets parallelism for free without the per-interpreter memory cost. + +### Common Pitfalls + +- **Importing once is not enough.** Imports happen per subinterpreter. Pre-warming a worker context will not pre-warm the OWN_GIL contexts; do it inside each `py_context`. +- **Sharing Python objects across contexts.** Passing a `PyObject*` reference (via `py_state` or otherwise) between OWN_GIL contexts is undefined behaviour. Round-trip through Erlang terms or ETS-backed state. +- **Long-running tasks block the dispatcher.** A single OWN_GIL context processes one request at a time. If you have a 30-second compute job, parallelise across contexts; do not queue everything onto context 1. +- **Callback storms.** Heavy `erlang.call` use inside an OWN_GIL context routes to thread workers, which is fine, but the round-trip cost is then worker-style on top of OWN_GIL dispatch. For tight callback loops, prefer worker mode end-to-end. ## Benchmarking diff --git a/docs/scalability.md b/docs/scalability.md index a5c7eeb..cc32347 100644 --- a/docs/scalability.md +++ b/docs/scalability.md @@ -108,6 +108,10 @@ Ctx = py:context(1), - Higher memory usage (each interpreter loads modules separately) - Some C extensions don't support subinterpreters - Requires Python 3.14+ +- Higher per-call latency (~4x worker) +- Callback re-entry to the same context is restricted (`erlang.call` from inside an OWN_GIL context routes to a thread worker, not back to that context) + +For a fuller breakdown of OWN_GIL tradeoffs, common pitfalls, and a usage decision guide, see [OWN_GIL Internals: Pros and Cons](owngil_internals.md#pros-and-cons). ## Subinterpreter Architecture @@ -144,7 +148,7 @@ Ctx = py:context(1), │ │ └──────────┘ │ │ └──────────┘ │ │ └──────────┘ │ │ │ └──────────────┘ └──────────────┘ └──────────────┘ │ │ │ -│ Each thread owns its interpreter's GIL (Py_GIL_OWN) │ +│ Each thread owns its GIL (PyInterpreterConfig_OWN_GIL) │ │ No GIL contention between threads │ └─────────────────────────────────────────────────────────────────┘ ``` @@ -155,7 +159,7 @@ Ctx = py:context(1), **py_context_process**: Gen_server that owns a Python context reference and handles call/eval/exec operations. -**Subinterpreter Thread Pool (C)**: Manages N threads, each with its own Python subinterpreter created with `Py_NewInterpreterFromConfig()` and `Py_GIL_OWN`. +**Subinterpreter Thread Pool (C)**: Manages N threads, each with its own Python subinterpreter created with `Py_NewInterpreterFromConfig()` and `PyInterpreterConfig_OWN_GIL`. ### Request Flow diff --git a/docs/security.md b/docs/security.md index a9d52f0..5ba726a 100644 --- a/docs/security.md +++ b/docs/security.md @@ -42,6 +42,7 @@ This provides defense-in-depth - even if Python code tries to import `os` or `su When blocked operations are attempted, you'll see: + ```python >>> import subprocess >>> subprocess.run(['ls']) @@ -50,6 +51,7 @@ fork()/exec() would corrupt the Erlang runtime. Use Erlang ports (open_port/2) for subprocess management. ``` + ```python >>> import os >>> os.fork() diff --git a/docs/shared-dict.md b/docs/shared-dict.md index dddd25b..e94abe7 100644 --- a/docs/shared-dict.md +++ b/docs/shared-dict.md @@ -279,16 +279,21 @@ ok = py:shared_dict_destroy(Session). %% Create shared cache {ok, Cache} = py:shared_dict_new(), -%% Python can populate the cache +%% Inject the handle into Python globals (py:exec/1 has no locals +%% argument, so we stash it via py:eval with a side effect). +{ok, _} = py:eval( + <<"(globals().__setitem__('_cache_handle', handle), None)[-1]">>, + #{handle => Cache}), + +%% Python can now populate the cache ok = py:exec(<<" from erlang import SharedDict -cache = SharedDict(handle) -cache['computed'] = expensive_computation() -">>, -ok = py:eval(<<"1">>, #{<<"handle">> => Cache}), +cache = SharedDict(_cache_handle) +cache['computed'] = 42 +">>), %% Erlang can read cached values -CachedValue = py:shared_dict_get(Cache, <<"computed">>). +42 = py:shared_dict_get(Cache, <<"computed">>). ``` ## See Also diff --git a/priv/_erlang_impl/_ssl.py b/priv/_erlang_impl/_ssl.py deleted file mode 100644 index 133428f..0000000 --- a/priv/_erlang_impl/_ssl.py +++ /dev/null @@ -1,329 +0,0 @@ -# Copyright 2026 Benoit Chesneau -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -SSL/TLS transport using ssl.MemoryBIO. - -This module provides SSL/TLS support that works with the Erlang event -loop by using Python's ssl.MemoryBIO for encryption while letting -Erlang handle the socket I/O. - -Architecture: -- Raw socket data flows through Erlang (enif_select) -- Encryption/decryption happens in Python via MemoryBIO -- Application sees decrypted data -""" - -import ssl -from asyncio import transports -from typing import Any, Optional, Callable - -__all__ = ['SSLTransport', 'create_ssl_transport'] - - -class SSLTransport(transports.Transport): - """SSL transport using ssl.MemoryBIO for encryption. - - This transport wraps a raw transport and provides transparent - SSL/TLS encryption using Python's ssl module with MemoryBIO. - - The key insight is that MemoryBIO allows us to do SSL encryption - without requiring a real socket file descriptor, which works - perfectly with Erlang's enif_select model. - """ - - max_size = 256 * 1024 # 256 KB - - def __init__(self, loop, raw_transport, protocol, ssl_context, - server_hostname=None, server_side=False, - ssl_handshake_timeout=None, call_connection_made=True): - """Initialize the SSL transport. - - Args: - loop: The event loop. - raw_transport: The underlying raw transport. - protocol: The application protocol. - ssl_context: SSL context for encryption. - server_hostname: Hostname for SNI (client side). - server_side: True if this is a server connection. - ssl_handshake_timeout: Timeout for the SSL handshake. - call_connection_made: Whether to call connection_made. - """ - self._loop = loop - self._raw_transport = raw_transport - self._protocol = protocol - self._ssl_context = ssl_context - self._server_hostname = server_hostname - self._server_side = server_side - self._handshake_timeout = ssl_handshake_timeout - self._call_connection_made = call_connection_made - - # SSL state - self._incoming = ssl.MemoryBIO() - self._outgoing = ssl.MemoryBIO() - self._ssl_object = ssl_context.wrap_bio( - self._incoming, self._outgoing, - server_side=server_side, - server_hostname=server_hostname - ) - - # State flags - self._handshake_complete = False - self._closing = False - self._closed = False - self._write_buffer = [] - - # Extra info - self._extra = { - 'ssl_context': ssl_context, - } - - # Create a protocol that receives raw data - self._raw_protocol = _SSLRawProtocol(self) - - async def _start(self): - """Start the SSL transport and perform handshake.""" - # Replace the raw transport's protocol with ours - self._raw_transport._protocol = self._raw_protocol - - # Perform SSL handshake - await self._do_handshake() - - # Update extra info - self._extra['peercert'] = self._ssl_object.getpeercert() - self._extra['cipher'] = self._ssl_object.cipher() - self._extra['compression'] = self._ssl_object.compression() - self._extra['ssl_object'] = self._ssl_object - - # Notify application protocol - if self._call_connection_made: - self._loop.call_soon(self._protocol.connection_made, self) - - async def _do_handshake(self): - """Perform SSL handshake.""" - while not self._handshake_complete: - try: - self._ssl_object.do_handshake() - self._handshake_complete = True - except ssl.SSLWantReadError: - # Need to send data and receive more - self._flush_outgoing() - await self._wait_for_data() - except ssl.SSLWantWriteError: - # Need to send buffered data - self._flush_outgoing() - - def _flush_outgoing(self): - """Flush outgoing encrypted data to raw transport.""" - data = self._outgoing.read() - if data: - self._raw_transport.write(data) - - async def _wait_for_data(self): - """Wait for data from the raw transport.""" - fut = self._loop.create_future() - - def on_data(): - if not fut.done(): - fut.set_result(None) - - self._raw_protocol._read_waiter = on_data - try: - await fut - finally: - self._raw_protocol._read_waiter = None - - def _on_raw_data(self, data: bytes): - """Called when raw encrypted data is received. - - Args: - data: Encrypted data from the network. - """ - self._incoming.write(data) - - if not self._handshake_complete: - # Still handshaking, notify waiter - return - - # Decrypt and deliver to application - try: - while True: - chunk = self._ssl_object.read(self.max_size) - if chunk: - self._protocol.data_received(chunk) - else: - break - except ssl.SSLWantReadError: - pass - except ssl.SSLError as e: - self._fatal_error(e, 'SSL read error') - - def _on_raw_eof(self): - """Called when the raw transport receives EOF.""" - try: - self._ssl_object.unwrap() - except ssl.SSLError: - pass - - if hasattr(self._protocol, 'eof_received'): - self._protocol.eof_received() - - def write(self, data: bytes): - """Write data to the transport. - - Args: - data: Plaintext data to send. - """ - if self._closing or self._closed: - return - if not data: - return - - if not self._handshake_complete: - self._write_buffer.append(data) - return - - try: - self._ssl_object.write(data) - self._flush_outgoing() - except ssl.SSLError as e: - self._fatal_error(e, 'SSL write error') - - def writelines(self, list_of_data): - """Write a list of data items.""" - for data in list_of_data: - self.write(data) - - def write_eof(self): - """Close the write end of the transport.""" - if self._closing: - return - self._closing = True - - try: - self._ssl_object.unwrap() - self._flush_outgoing() - except ssl.SSLError: - pass - - self._raw_transport.write_eof() - - def can_write_eof(self): - return False - - def close(self): - """Close the transport.""" - if self._closed: - return - self._closing = True - - try: - self._ssl_object.unwrap() - self._flush_outgoing() - except ssl.SSLError: - pass - - self._raw_transport.close() - self._closed = True - - def is_closing(self): - return self._closing - - def abort(self): - """Close immediately without flushing.""" - self._closed = True - self._raw_transport.abort() - - def get_extra_info(self, name, default=None): - if name in self._extra: - return self._extra[name] - return self._raw_transport.get_extra_info(name, default) - - def get_write_buffer_size(self): - return self._raw_transport.get_write_buffer_size() - - def get_write_buffer_limits(self): - return self._raw_transport.get_write_buffer_limits() - - def set_write_buffer_limits(self, high=None, low=None): - self._raw_transport.set_write_buffer_limits(high, low) - - def pause_reading(self): - self._raw_transport.pause_reading() - - def resume_reading(self): - self._raw_transport.resume_reading() - - def is_reading(self): - return self._raw_transport.is_reading() - - def _fatal_error(self, exc, message='Fatal SSL error'): - """Handle fatal SSL errors.""" - self._loop.call_exception_handler({ - 'message': message, - 'exception': exc, - 'transport': self, - 'protocol': self._protocol, - }) - self.abort() - - -class _SSLRawProtocol: - """Protocol that receives raw encrypted data for SSLTransport.""" - - def __init__(self, ssl_transport): - self._ssl_transport = ssl_transport - self._read_waiter = None - - def connection_made(self, transport): - pass - - def data_received(self, data): - self._ssl_transport._on_raw_data(data) - if self._read_waiter is not None: - self._read_waiter() - - def eof_received(self): - self._ssl_transport._on_raw_eof() - - def connection_lost(self, exc): - self._ssl_transport._protocol.connection_lost(exc) - - -async def create_ssl_transport( - loop, raw_transport, protocol, ssl_context, - server_hostname=None, server_side=False, - ssl_handshake_timeout=None): - """Create an SSL transport wrapping a raw transport. - - Args: - loop: The event loop. - raw_transport: The underlying raw transport. - protocol: The application protocol. - ssl_context: SSL context for encryption. - server_hostname: Hostname for SNI (client side). - server_side: True if this is a server connection. - ssl_handshake_timeout: Timeout for the SSL handshake. - - Returns: - The SSL transport. - """ - transport = SSLTransport( - loop, raw_transport, protocol, ssl_context, - server_hostname=server_hostname, - server_side=server_side, - ssl_handshake_timeout=ssl_handshake_timeout - ) - await transport._start() - return transport diff --git a/scripts/lint_doc_snippets.escript b/scripts/lint_doc_snippets.escript new file mode 100755 index 0000000..a8f528b --- /dev/null +++ b/scripts/lint_doc_snippets.escript @@ -0,0 +1,257 @@ +#!/usr/bin/env escript +%% -*- erlang -*- +%%! -smp enable +%% +%% lint_doc_snippets — verifies fenced code blocks in README.md and +%% docs/*.md. +%% +%% * ```erlang blocks: every `py:Fn(Args)` call must reference a +%% function exported from py.erl at the correct arity. +%% * ```python blocks: must parse as Python (ast.parse). Anything +%% stricter (undefined names, wrong types) is out of scope; we are +%% after typos and removed-API references, not full type checking. +%% * Skip with `` on the line immediately above +%% the opening fence. Use sparingly and explain why in the +%% surrounding prose. +%% +%% Exit code is the number of failures (0 = clean). +%% +%% Run: rebar3 compile && escript scripts/lint_doc_snippets.escript + +main(Args) -> + Root = filename:dirname(filename:dirname(escript:script_name())), + EbinDir = filename:join([Root, "_build", "default", "lib", + "erlang_python", "ebin"]), + case filelib:is_dir(EbinDir) of + true -> + true = code:add_path(EbinDir); + false -> + io:format(standard_error, + "ebin not found at ~s; run `rebar3 compile` first~n", + [EbinDir]), + halt(2) + end, + PyExports = load_py_exports(), + Files = case Args of + [] -> default_files(Root); + _ -> Args + end, + Failures = lists:foldl( + fun(File, Acc) -> Acc + lint_file(File, PyExports) end, + 0, Files), + case Failures of + 0 -> + io:format("lint-docs: clean (~p file(s))~n", [length(Files)]), + halt(0); + N -> + io:format(standard_error, + "lint-docs: ~p failure(s)~n", [N]), + halt(N) + end. + +default_files(Root) -> + Readme = filename:join(Root, "README.md"), + Docs = filelib:wildcard(filename:join([Root, "docs", "*.md"])), + [Readme | Docs]. + +load_py_exports() -> + case code:ensure_loaded(py) of + {module, py} -> + sets:from_list(py:module_info(exports)); + Other -> + io:format(standard_error, + "could not load py.beam: ~p~n", [Other]), + halt(2) + end. + +%%% ---------------------------------------------------------------- +%%% per-file walk +%%% ---------------------------------------------------------------- + +lint_file(File, PyExports) -> + case file:read_file(File) of + {ok, Bin} -> + Lines = binary:split(Bin, <<"\n">>, [global]), + Numbered = lists:zip(lists:seq(1, length(Lines)), Lines), + Blocks = collect_blocks(Numbered, <<>>, []), + lists:foldl( + fun(Block, Acc) -> Acc + lint_block(File, Block, PyExports) end, + 0, Blocks); + {error, Reason} -> + io:format(standard_error, "~s: read failed (~p)~n", + [File, Reason]), + 1 + end. + +%% Walk numbered lines and emit {Lang, StartLine, BodyBin} for each +%% ```LANG ... ``` block. PrevLine carries the previous line so we can +%% detect a `` marker on the line immediately before +%% an opening fence. +collect_blocks([], _Prev, Acc) -> + lists:reverse(Acc); +collect_blocks([{_, Line} | Rest], Prev, Acc) -> + case classify(Line) of + {open, Lang} -> + Skip = is_skip_marker(Prev), + {Body, RestAfter, StartLine} = + consume_block(Rest, [], Skip, Lang), + NewAcc = case Skip of + true -> Acc; + false -> [{Lang, StartLine, Body} | Acc] + end, + collect_blocks(RestAfter, Line, NewAcc); + _ -> + collect_blocks(Rest, Line, Acc) + end. + +is_skip_marker(Line) -> + Trim = string:trim(Line), + Trim =:= <<"">>. + +classify(Line) -> + Trim = string:trim(Line), + case Trim of + <<"```erlang">> -> {open, erlang}; + <<"```python">> -> {open, python}; + <<"```py">> -> {open, python}; + <<"```">> -> close; + _ -> other + end. + +%% Consume lines until the closing ``` fence. Returns {BodyBin, RestLines, FirstLine}. +consume_block([], BodyAcc, _Skip, _Lang) -> + {iolist_to_binary(lists:reverse(BodyAcc)), [], 0}; +consume_block([{LineNo, Line} | Rest], BodyAcc, _Skip, Lang) -> + case classify(Line) of + close -> + {iolist_to_binary(lists:reverse(BodyAcc)), Rest, LineNo}; + _ -> + consume_block(Rest, [<> | BodyAcc], + _Skip, Lang) + end. + + +%%% ---------------------------------------------------------------- +%%% per-block lint +%%% ---------------------------------------------------------------- + +lint_block(File, {erlang, _Line, Body}, PyExports) -> + lint_erlang_block(File, Body, PyExports); +lint_block(File, {python, _Line, Body}, _) -> + lint_python_block(File, Body); +lint_block(_, _, _) -> + 0. + +%% Erlang: scan tokens, look for `py:Fn(...)` patterns and check exports. +lint_erlang_block(File, Body, PyExports) -> + case erl_scan:string(unicode:characters_to_list(Body)) of + {ok, Tokens, _} -> + check_py_calls(File, Tokens, PyExports); + {error, _Info, _Loc} -> + %% Erlang shell-style snippets (`1> ...`) and tutorial + %% fragments don't tokenise. Don't fail on those — the + %% goal is to catch wrong API names, not enforce + %% standalone parseability. + 0 + end. + +%% Walk the token list looking for the pattern: +%% atom(py) ':' atom(Fn) '(' ... ')' +%% The counter consumes tokens up to and including the matching `)`. +check_py_calls(_File, [], _Exports) -> 0; +check_py_calls(File, [{atom, _, py}, {':', _}, {atom, Loc, Fn}, + {'(', _} | Rest], Exports) -> + {Arity, After} = count_top_level_args(Rest, 1, 0, 0), + Bad = case sets:is_element({Fn, Arity}, Exports) of + true -> 0; + false -> + io:format(standard_error, + "~s:~p: undefined or wrong-arity call py:~s/~p~n", + [File, line_of(Loc), atom_to_list(Fn), Arity]), + 1 + end, + Bad + check_py_calls(File, After, Exports); +check_py_calls(File, [_ | Rest], Exports) -> + check_py_calls(File, Rest, Exports). + +line_of({Line, _Col}) -> Line; +line_of(Line) when is_integer(Line) -> Line; +line_of(_) -> 0. + +%% Count top-level arguments in a function call. Tracks nesting of +%% `(`, `[`, `{` so commas inside nested expressions are ignored. +%% Returns {Arity, TokensAfterClosingParen} — the caller advances past +%% the call before resuming the scan, so a `py:foo(...)` nested inside +%% another `py:bar(...)` is not double-counted. +count_top_level_args([{')', _} | Rest], 1, 0, _Args) -> + %% Empty arg list `f()`. + {0, Rest}; +count_top_level_args([{')', _} | Rest], 1, _Args, ArgsSoFar) -> + %% Closing the outer `(` — return arg count. + {ArgsSoFar + 1, Rest}; +count_top_level_args([], _Depth, ArgsSeen, ArgsSoFar) -> + %% Truncated snippet; best-effort count. + {ArgsSoFar + (case ArgsSeen of 0 -> 0; _ -> 1 end), []}; +count_top_level_args([{Tok, _} | Rest], Depth, _Args, ArgsSoFar) + when Tok =:= '('; Tok =:= '['; Tok =:= '{'; Tok =:= '<<' -> + count_top_level_args(Rest, Depth + 1, 1, ArgsSoFar); +count_top_level_args([{Tok, _} | Rest], Depth, _Args, ArgsSoFar) + when Tok =:= ')'; Tok =:= ']'; Tok =:= '}'; Tok =:= '>>' -> + count_top_level_args(Rest, Depth - 1, 1, ArgsSoFar); +%% `fun`, `case`, `if`, `try`, `receive`, `begin` open an `end`-terminated +%% block. Treat them as depth brackets so commas inside fun bodies +%% (which are part of one outer arg) are not counted. +count_top_level_args([{Kw, _} | Rest], Depth, _Args, ArgsSoFar) + when Kw =:= 'fun'; Kw =:= 'case'; Kw =:= 'if'; Kw =:= 'try'; + Kw =:= 'receive'; Kw =:= 'begin' -> + count_top_level_args(Rest, Depth + 1, 1, ArgsSoFar); +count_top_level_args([{'end', _} | Rest], Depth, _Args, ArgsSoFar) -> + count_top_level_args(Rest, Depth - 1, 1, ArgsSoFar); +count_top_level_args([{',', _} | Rest], 1, _ArgsSeen, ArgsSoFar) -> + count_top_level_args(Rest, 1, 0, ArgsSoFar + 1); +count_top_level_args([_Tok | Rest], Depth, _Args, ArgsSoFar) -> + count_top_level_args(Rest, Depth, 1, ArgsSoFar). + +%% Python: write the snippet to a temp file and run ast.parse on it. +%% IndentationError / TabError are tolerated because tutorial-style +%% fragments often omit outer scope (e.g. show only the body of an +%% if-block, or a single line of a larger function). SyntaxError still +%% fails the lint. +lint_python_block(File, Body) -> + TmpDir = case os:getenv("TMPDIR") of + false -> "/tmp"; + Dir -> Dir + end, + Tmp = filename:join(TmpDir, "lint_snip_" + ++ integer_to_list(erlang:unique_integer([positive])) + ++ ".py"), + ok = file:write_file(Tmp, Body), + Script = + "import ast,sys\n" + "src=open(sys.argv[1]).read()\n" + "try:\n" + " ast.parse(src)\n" + "except (IndentationError, TabError):\n" + " sys.exit(0)\n" + "except SyntaxError as e:\n" + " print(f'SyntaxError: {e}')\n" + " sys.exit(1)\n", + Cmd = "python3 -c " ++ shell_quote(Script) ++ " " ++ Tmp + ++ " 2>&1; echo __exit_$?__", + Out = os:cmd(Cmd), + file:delete(Tmp), + case re:run(Out, <<"__exit_0__\\s*$">>, [{capture, none}]) of + match -> 0; + nomatch -> + Trimmed = re:replace(Out, <<"__exit_\\d+__\\s*$">>, <<>>, + [{return, list}]), + io:format(standard_error, + "~s: python snippet failed:~n~s~n", + [File, Trimmed]), + 1 + end. + +%% Single-quote a string for safe inclusion in /bin/sh. +shell_quote(S) -> + Escaped = re:replace(S, "'", "'\\\\''", [global, {return, list}]), + "'" ++ Escaped ++ "'". diff --git a/src/py_util.erl b/src/py_util.erl index 415e267..5ed4319 100644 --- a/src/py_util.erl +++ b/src/py_util.erl @@ -18,15 +18,9 @@ -module(py_util). -export([ - to_binary/1, - send_response/3, - normalize_timeout/1, - normalize_timeout/2 + to_binary/1 ]). -%% Default timeout (30 seconds) --define(DEFAULT_TIMEOUT, 30000). - %%% ============================================================================ %%% API %%% ============================================================================ @@ -39,27 +33,3 @@ to_binary(List) when is_list(List) -> list_to_binary(List); to_binary(Bin) when is_binary(Bin) -> Bin. - -%% @doc Send a response to a caller. --spec send_response(pid(), reference(), {ok, term()} | {error, term()} | ok) -> ok. -send_response(Caller, Ref, {ok, Value}) -> - Caller ! {py_response, Ref, {ok, Value}}, - ok; -send_response(Caller, Ref, {error, Error}) -> - Caller ! {py_error, Ref, Error}, - ok; -send_response(Caller, Ref, ok) -> - Caller ! {py_response, Ref, {ok, none}}, - ok. - -%% @doc Normalize a timeout value, returning milliseconds or 0 for infinity. -%% Uses the default timeout for invalid values. --spec normalize_timeout(timeout()) -> non_neg_integer(). -normalize_timeout(Timeout) -> - normalize_timeout(Timeout, ?DEFAULT_TIMEOUT). - -%% @doc Normalize a timeout value with a custom default. --spec normalize_timeout(timeout(), non_neg_integer()) -> non_neg_integer(). -normalize_timeout(infinity, _Default) -> 0; -normalize_timeout(Ms, _Default) when is_integer(Ms), Ms > 0 -> Ms; -normalize_timeout(_, Default) -> Default. diff --git a/test/coverage_audit.md b/test/coverage_audit.md new file mode 100644 index 0000000..2502366 --- /dev/null +++ b/test/coverage_audit.md @@ -0,0 +1,88 @@ +# Documentation Snippet → Test Coverage Audit + +This file maps every public `py:*` and `erlang.*` API shown in the +README and `docs/*.md` to at least one `*_SUITE.erl` test that +exercises it. Update this table whenever a documented API is added, +renamed, or removed. + +## Erlang public API (`src/py.erl` exports) + +| API | Documented in | Test suite | Test case | +|---|---|---|---| +| `py:call/3` | README, getting-started.md, pools.md | `py_SUITE` | `test_call_math`, `test_call_json` | +| `py:call/4` | README, pools.md | `py_SUITE` | `test_call_kwargs` | +| `py:call/5` | pools.md | `py_pool_SUITE` | router-bound call tests | +| `py:cast/3` | README, threading.md | `py_SUITE` | `test_cast` | +| `py:cast/4` | README | `py_SUITE` | `test_cast_kwargs` | +| `py:cast/5` | (internal Ctx variant) | `py_SUITE` (indirect) | covered via `test_cast` | +| `py:eval/1` | README, getting-started.md | `py_SUITE` | `test_eval` | +| `py:eval/2` | README, getting-started.md | `py_SUITE` | `test_eval`, `test_eval_complex_locals` | +| `py:eval/3` | README | `py_SUITE` | `test_timeout` | +| `py:exec/1` | README, getting-started.md | `py_SUITE` | `test_exec` | +| `py:exec/2` | scalability.md (with Ctx) | `py_context_SUITE` | exec tests | +| `py:spawn_call/3` | README | `py_SUITE` | `test_spawn_call` | +| `py:await/1`, `py:await/2` | README | `py_SUITE` | `test_spawn_call` | +| `py:async_call/3` | README, asyncio.md, threading.md | `py_SUITE` | `test_asyncio_call` | +| `py:async_await/1`, `py:async_await/2` | README, asyncio.md | `py_SUITE` | `test_asyncio_call` | +| `py:async_gather/1` | README, asyncio.md | `py_SUITE` | `test_asyncio_gather` | +| `py:async_gather/2` | (timeout variant) | `py_SUITE` | `test_asyncio_gather_timeout` | +| `py:parallel/1` | README, scalability.md | `py_SUITE` | `test_parallel_execution` | +| `py:subinterp_supported/0` | scalability.md | `py_SUITE` | `test_subinterp_supported` | +| `py:execution_mode/0` | README, scalability.md | `py_SUITE` | `test_execution_mode` | +| `py:context/0`, `py:context/1` | README, process-bound-envs.md | `py_context_SUITE` | context creation | +| `py:state_set`, `state_fetch`, `state_incr`, `state_decr`, `state_remove`, `state_keys`, `state_clear` | README, getting-started.md | `py_SUITE` | `test_shared_state` | +| `py:register_function/2`, `register_function/3` | README, threading.md | `py_SUITE` | `test_erlang_callback`, `test_erlang_callback_mfa` | +| `py:unregister_function/1` | README | `py_SUITE` | `test_erlang_callback` | +| `py:stream/3`, `py:stream_eval/1` | README, streaming.md | `py_SUITE` | `test_streaming`, `test_stream_function` | +| `py:dup_fd/1` | reactor.md | `py_fd_ops_SUITE` | `dup_fd_test` | +| `py:configure_logging/0,1` | README, logging.md | `py_logging_SUITE` | logging tests | +| `py:enable_tracing/0`, `disable_tracing/0`, `get_traces/0`, `clear_traces/0` | README, logging.md | `py_logging_SUITE` | tracing tests | +| `py:activate_venv/1`, `deactivate_venv/0`, `venv_info/0` | README | `py_SUITE` | `test_venv` | +| `py:memory_stats/0`, `py:gc/0` | README | `py_SUITE` | `test_memory_stats`, `test_gc` | +| `py:tracemalloc_start/0`, `tracemalloc_stop/0` | README | `py_SUITE` | indirect via memory tests | +| `py:version/0` | (no doc) | not exercised | — | + +## Python public API (`erlang.*` and `from erlang import ...`) + +| API | Documented in | Test suite | +|---|---|---| +| `erlang.call(name, *args)` | README, threading.md | `py_SUITE` (`test_erlang_callback`) | +| `erlang.async_call(name, *args)` | threading.md, asyncio.md | `py_async_e2e_SUITE`, `py_thread_callback_SUITE` | +| `erlang.send(pid, msg)` | README, threading.md | `py_pid_send_SUITE` | +| `erlang.atom(name)` | (Python-side wrapper) | `py_owngil_features_SUITE`, `py_test_pid_send.py` | +| `erlang.Pid` | type-conversion.md | `py_pid_send_SUITE` | +| `erlang.Span`, `erlang.trace()` | logging.md | `py_logging_SUITE` | +| `erlang.run(main)`, `erlang.new_event_loop()` | asyncio.md | `py_asyncio_compat_SUITE`, `py_event_loop_SUITE` | +| `erlang.install()` | asyncio.md | `py_asyncio_policy_SUITE` | +| `erlang.spawn_task()` | asyncio.md | `py_async_task_SUITE` | +| `erlang.sleep()` | asyncio.md | `py_erlang_sleep_SUITE` | +| `erlang.SharedDict` | shared-dict.md | `py_SUITE` (`test_shared_dict_*`) | +| `erlang.Channel`, `erlang.ByteChannel` | channel.md, buffer.md | `py_channel_SUITE`, `py_byte_channel_SUITE` | +| `erlang.reactor.Protocol` and friends | reactor.md | `py_reactor_SUITE` | +| `erlang.state_set / state_get / state_delete / state_keys / state_incr / state_decr` (registered callbacks) | README, getting-started.md | `py_SUITE` (`test_shared_state`) | + +## Notes for future audits + +1. The `py:cast/5` (Ctx, Module, Func, Args, Kwargs) variant is not + directly exercised. It is a one-line wrapper around + `py_context:call/5` and shares its kwargs marshaling with + `py:call/5`. If a future change diverges them, add a dedicated test. + +2. `py:version/0` is exported but not documented. Either document it + or drop the export. + +3. `from erlang import run_command` (`docs/migration.md:426`) and + `from erlang import run_shell` (`docs/security.md:103`) refer to + user-side registrations, not built-ins. The surrounding prose makes + the example pattern explicit; do not mistake these for documented + APIs. + +4. `priv/_erlang_impl/_ssl.py` was removed in this audit (no importer). + Do not reintroduce unless an importer is wired up. + +5. `priv/_erlang_impl/_subprocess.py` is intentionally a stub that + raises `NotImplementedError` from `create_subprocess_shell` and + `create_subprocess_exec`. It is lazy-imported by + `priv/_erlang_impl/_loop.py:948,958` when an asyncio caller invokes + `loop.subprocess_shell()` / `loop.subprocess_exec()`, which are + blocked because `fork()` would corrupt the Erlang VM. Do not delete. diff --git a/test/py_SUITE.erl b/test/py_SUITE.erl index 5df9d98..d4b0f0a 100644 --- a/test/py_SUITE.erl +++ b/test/py_SUITE.erl @@ -17,6 +17,7 @@ test_eval_complex_locals/1, test_exec/1, test_cast/1, + test_cast_kwargs/1, test_spawn_call/1, test_type_conversions/1, test_nested_types/1, @@ -35,6 +36,7 @@ test_erlang_attr_syntax/1, test_asyncio_call/1, test_asyncio_gather/1, + test_asyncio_gather_timeout/1, test_subinterp_supported/1, test_parallel_execution/1, test_venv/1, @@ -76,6 +78,7 @@ all() -> test_eval_complex_locals, test_exec, test_cast, + test_cast_kwargs, test_spawn_call, test_type_conversions, test_nested_types, @@ -94,6 +97,7 @@ all() -> test_erlang_attr_syntax, test_asyncio_call, test_asyncio_gather, + test_asyncio_gather_timeout, test_subinterp_supported, test_parallel_execution, test_venv, @@ -230,6 +234,17 @@ test_cast(_Config) -> true = lists:sort([R1, R2]) =:= [<<"msg1">>, <<"msg2">>], ok. +test_cast_kwargs(_Config) -> + %% py:cast/4 must accept and forward a kwargs map without crashing. + %% The kwargs marshaling itself is exercised by test_call_kwargs/1; this + %% covers the cast/4 wrapper whose only difference from cast/3 is the + %% Kwargs map. + ok = py:cast(json, dumps, [#{<<"a">> => 1}], #{indent => 2}), + ok = py:cast(json, dumps, [[1, 2, 3]], #{separators => [<<", ">>, <<": ">>]}), + %% Allow the spawned executors to actually run the calls. + timer:sleep(100), + ok. + test_spawn_call(_Config) -> %% Test spawn_call with await Ref1 = py:spawn_call(math, sqrt, [100]), @@ -592,6 +607,17 @@ test_asyncio_gather(_Config) -> {ok, [4.0, 5.0, 6.0]} = py:async_gather(Calls), ok. +test_asyncio_gather_timeout(_Config) -> + %% py:async_gather/2 with an explicit timeout returns inside the budget + %% on success. + Calls = [ + {math, sqrt, [4]}, + {math, sqrt, [9]}, + {math, sqrt, [16]} + ], + {ok, [2.0, 3.0, 4.0]} = py:async_gather(Calls, 5000), + ok. + test_subinterp_supported(_Config) -> %% Test that subinterp_supported returns a boolean Result = py:subinterp_supported(), diff --git a/test/py_fd_ops_SUITE.erl b/test/py_fd_ops_SUITE.erl index c4b1873..d305274 100644 --- a/test/py_fd_ops_SUITE.erl +++ b/test/py_fd_ops_SUITE.erl @@ -27,7 +27,8 @@ socketpair_test/1, fd_read_write_test/1, fd_close_test/1, - fd_select_test/1 + fd_select_test/1, + dup_fd_test/1 ]). all() -> @@ -35,7 +36,8 @@ all() -> socketpair_test, fd_read_write_test, fd_close_test, - fd_select_test + fd_select_test, + dup_fd_test ]. init_per_suite(Config) -> @@ -142,3 +144,35 @@ fd_select_test(_Config) -> ok = py_nif:fd_close(Fd1), ok = py_nif:fd_close(Fd2), ok. + +%% @doc Test py:dup_fd/1 — duplicates an fd so caller and dup can be +%% closed independently and both refer to the same kernel descriptor. +dup_fd_test(_Config) -> + {ok, {Fd1, Fd2}} = py_nif:socketpair(), + + %% Duplicate Fd2; the dup must be a different integer. + {ok, DupFd2} = py:dup_fd(Fd2), + true = is_integer(DupFd2), + true = DupFd2 =/= Fd2, + true = DupFd2 >= 0, + + %% Write through Fd1 — both Fd2 and DupFd2 see the same data + %% because they reference the same socket end. + Payload = <<"dup_fd round-trip">>, + {ok, _} = py_nif:fd_write(Fd1, Payload), + timer:sleep(10), + {ok, ReadFromOriginal} = py_nif:fd_read(Fd2, 1024), + Payload = ReadFromOriginal, + + %% Close the original fd. The duplicate must remain usable. + ok = py_nif:fd_close(Fd2), + Payload2 = <<"after-close">>, + {ok, _} = py_nif:fd_write(Fd1, Payload2), + timer:sleep(10), + {ok, ReadFromDup} = py_nif:fd_read(DupFd2, 1024), + Payload2 = ReadFromDup, + + %% Clean up + ok = py_nif:fd_close(DupFd2), + ok = py_nif:fd_close(Fd1), + ok.