Source code for scitex_introspect._docstring

#!/usr/bin/env python3
# Timestamp: 2025-01-20
# File: /home/ywatanabe/proj/scitex-code/src/scitex/introspect/_docstring.py

"""Docstring extraction and parsing utilities."""

from __future__ import annotations

import inspect
import re
from typing import Literal

from ._resolve import get_type_info, resolve_object


def _parse_docstring(docstring: str) -> dict:
    """Parse numpy/google style docstring into sections."""
    sections = {
        "summary": "",
        "description": "",
        "parameters": "",
        "returns": "",
        "examples": "",
        "notes": "",
    }

    if not docstring:
        return sections

    section_patterns = [
        (r"Parameters?\s*\n[-=]+", "parameters"),
        (r"Returns?\s*\n[-=]+", "returns"),
        (r"Examples?\s*\n[-=]+", "examples"),
        (r"Notes?\s*\n[-=]+", "notes"),
        (r"Raises?\s*\n[-=]+", "raises"),
        (r"See Also\s*\n[-=]+", "see_also"),
    ]

    lines = docstring.split("\n")

    i = 0
    summary_lines = []
    while i < len(lines):
        line = lines[i].strip()
        if not line:
            if summary_lines:
                break
        elif any(re.match(p, line, re.IGNORECASE) for p, _ in section_patterns):
            break
        else:
            summary_lines.append(line)
        i += 1

    sections["summary"] = " ".join(summary_lines)

    current_section = "description"
    current_content = []

    for line in lines[i:]:
        matched = False
        for pattern, section_name in section_patterns:
            if re.match(pattern, line, re.IGNORECASE):
                if current_content:
                    sections[current_section] = "\n".join(current_content).strip()
                current_section = section_name
                current_content = []
                matched = True
                break

        if not matched:
            current_content.append(line)

    if current_content:
        sections[current_section] = "\n".join(current_content).strip()

    return sections


[docs] def get_docstring( dotted_path: str, format: Literal["raw", "parsed", "summary"] = "raw", ) -> dict: """ Get the docstring of a Python object. Parameters ---------- dotted_path : str Dotted path to the object format : str 'raw' - Return full docstring as-is 'parsed' - Parse numpy/google style into sections 'summary' - Return only first line/paragraph Returns ------- dict docstring: str sections: dict (if format='parsed') type_info: dict """ obj, error = resolve_object(dotted_path) if error: return {"success": False, "error": error} type_info = get_type_info(obj) docstring = inspect.getdoc(obj) or "" if format == "summary": lines = docstring.split("\n\n") summary = lines[0].strip() if lines else "" return { "success": True, "docstring": summary, "type_info": type_info, } if format == "parsed": sections = _parse_docstring(docstring) return { "success": True, "docstring": docstring, "sections": sections, "type_info": type_info, } return { "success": True, "docstring": docstring, "type_info": type_info, }