Source code for scitex_msword.profiles

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Timestamp: 2025-12-11 15:15:00
# File: /home/ywatanabe/proj/scitex-code/src/scitex/msword/profiles.py

"""
Profiles for mapping MS Word styles to SciTeX writer structures.

Each profile corresponds to a journal / conference template, such as:
- "generic"
- "mdpi-ijerph"
- "resna-2025"
- "iop-double-anonymous"

The profiles define:
- Which Word style names correspond to section headings
- How to detect captions for figures and tables
- How to handle references, lists, equations, etc.
- Layout settings (columns, margins, fonts)
"""

from __future__ import annotations

from dataclasses import dataclass, field
from typing import Callable, Dict, List, Optional


[docs] @dataclass class BaseWordProfile: """ Base configuration for mapping between DOCX and SciTeX writer documents. Attributes ---------- name : str Profile identifier (e.g., "mdpi-ijerph"). description : str Human-readable description. heading_styles : dict[int, str] Mapping from section depth (1, 2, 3...) to Word style names (e.g., {1: "Heading 1", 2: "Heading 2"}). caption_style : str Word style name used for figure/table captions. normal_style : str Default paragraph style. reference_section_titles : list[str] Titles that indicate the start of the reference section. figure_caption_prefixes : list[str] Prefixes that identify figure captions (e.g., ["Figure", "Fig."]). table_caption_prefixes : list[str] Prefixes that identify table captions (e.g., ["Table"]). list_styles : dict[str, str] Mapping for list styles (bullet, numbered). equation_style : str | None Style name for equations, if any. columns : int Number of columns in the layout (1 or 2). double_anonymous : bool Whether this profile requires double-anonymous formatting. """ name: str description: str heading_styles: Dict[int, str] = field(default_factory=dict) caption_style: str = "Caption" normal_style: str = "Normal" reference_section_titles: List[str] = field( default_factory=lambda: ["References", "REFERENCES"] ) figure_caption_prefixes: List[str] = field( default_factory=lambda: ["Figure", "Fig.", "Fig"] ) table_caption_prefixes: List[str] = field( default_factory=lambda: ["Table", "Tab.", "Tab"] ) list_styles: Dict[str, str] = field( default_factory=lambda: { "bullet": "List Bullet", "numbered": "List Number", } ) equation_style: Optional[str] = None columns: int = 1 double_anonymous: bool = False # Optional layout / typography hints (used by profiles like boost-2026). # These are advisory: writer code may consult them to customize fonts, # heading shading, and line spacing without hard-coding per-profile logic. body_font: Optional[str] = None body_font_size_pt: Optional[float] = None heading_background_hex: Optional[str] = None line_spacing: Optional[float] = None # Post-processing hooks post_import_hooks: List[Callable] = field(default_factory=list) pre_export_hooks: List[Callable] = field(default_factory=list)
# --- Concrete profiles ------------------------------------------------------ def _generic_profile() -> BaseWordProfile: """ Generic Word template profile. This profile is intentionally conservative and assumes that: - "Heading 1/2/3" are used for section headings. - "Caption" is used for figure/table captions. - "Normal" is the default body text. This should work reasonably well for many simple manuscripts. """ return BaseWordProfile( name="generic", description="Generic Word mapping with standard Heading styles.", heading_styles={ 1: "Heading 1", 2: "Heading 2", 3: "Heading 3", 4: "Heading 4", }, caption_style="Caption", normal_style="Normal", reference_section_titles=["References", "REFERENCES", "Bibliography"], ) def _mdpi_ijerph_profile() -> BaseWordProfile: """ MDPI IJERPH template profile. Based on the MDPI Word template structure: - Section headings use built-in heading styles. - References section is titled "References". - Single column layout. - Specific section order: Introduction, Materials and Methods, Results, Discussion, Conclusions. """ return BaseWordProfile( name="mdpi-ijerph", description="MDPI IJERPH (Int. J. Environ. Res. Public Health) Word template.", heading_styles={ 1: "Heading 1", 2: "Heading 2", 3: "Heading 3", }, caption_style="Caption", normal_style="Normal", reference_section_titles=["References"], columns=1, ) def _resna_2025_profile() -> BaseWordProfile: """ RESNA 2025 scientific paper template profile. The RESNA template: - Uses all-caps section headings (INTRODUCTION, METHODS, etc.) - Strict 4-page layout - Two-column format """ return BaseWordProfile( name="resna-2025", description="RESNA 2025 Scientific Paper Word template.", heading_styles={ 1: "Heading 1", # INTRODUCTION, METHODS, etc. 2: "Heading 2", # First-level sub-heading }, caption_style="Caption", normal_style="Normal", reference_section_titles=["References", "REFERENCES"], columns=2, ) def _iop_double_anonymous_profile() -> BaseWordProfile: """ IOP double-anonymous Word template profile. The IOP template uses custom styles: - IOPH1, IOPH2, IOPH3 for headings - IOPTitle for title - IOPAbsText for abstract - IOPAff for affiliations - Requires removal of author-identifying information """ return BaseWordProfile( name="iop-double-anonymous", description="IOP double-anonymous Word template.", heading_styles={ 1: "IOPH1", 2: "IOPH2", 3: "IOPH3", }, caption_style="Caption", normal_style="Normal", reference_section_titles=["References"], double_anonymous=True, ) def _ieee_profile() -> BaseWordProfile: """ IEEE conference/journal template profile. The IEEE template: - Two-column format - Roman numeral section numbering - Specific citation style """ return BaseWordProfile( name="ieee", description="IEEE conference/journal Word template.", heading_styles={ 1: "Heading 1", 2: "Heading 2", 3: "Heading 3", }, caption_style="Caption", normal_style="Normal", reference_section_titles=["References", "REFERENCES"], columns=2, ) def _springer_profile() -> BaseWordProfile: """ Springer Nature journal template profile. """ return BaseWordProfile( name="springer", description="Springer Nature journal Word template.", heading_styles={ 1: "Heading 1", 2: "Heading 2", 3: "Heading 3", }, caption_style="Caption", normal_style="Normal", reference_section_titles=["References"], columns=1, ) def _elsevier_profile() -> BaseWordProfile: """ Elsevier journal template profile. """ return BaseWordProfile( name="elsevier", description="Elsevier journal Word template.", heading_styles={ 1: "Heading 1", 2: "Heading 2", 3: "Heading 3", }, caption_style="Caption", normal_style="Normal", reference_section_titles=["References"], columns=1, ) def _boost_2026_profile() -> BaseWordProfile: """ JST BOOST 2026 grant application template profile. Layout convention (per BOOST v16 dogfooding): - Body text: 10.5pt MS Gothic - Headings: bold, with light-grey (#D9D9D9) background shading - Line spacing: 1.0 (single) - Single column The advisory ``body_font`` / ``body_font_size_pt`` / ``heading_background_hex`` / ``line_spacing`` fields let the writer layer (or downstream tooling such as the BOOST v16 builder) pick these up without hard-coding per-document logic. """ return BaseWordProfile( name="boost-2026", description="JST BOOST 2026 grant application Word template.", heading_styles={ 1: "Heading 1", 2: "Heading 2", 3: "Heading 3", }, caption_style="Caption", normal_style="Normal", reference_section_titles=["参考文献", "References"], columns=1, body_font="MS Gothic", body_font_size_pt=10.5, heading_background_hex="D9D9D9", line_spacing=1.0, ) # Registry of known profiles _PROFILES: Dict[str, BaseWordProfile] = { "generic": _generic_profile(), "mdpi-ijerph": _mdpi_ijerph_profile(), "mdpi": _mdpi_ijerph_profile(), # Alias "resna-2025": _resna_2025_profile(), "resna": _resna_2025_profile(), # Alias "iop-double-anonymous": _iop_double_anonymous_profile(), "iop": _iop_double_anonymous_profile(), # Alias "ieee": _ieee_profile(), "springer": _springer_profile(), "elsevier": _elsevier_profile(), "boost-2026": _boost_2026_profile(), "boost": _boost_2026_profile(), # Alias }
[docs] def list_profiles() -> list[str]: """ List available MS Word profiles. Returns ------- list[str] List of profile names (e.g., ["generic", "mdpi-ijerph", ...]). Examples -------- >>> from scitex.msword import list_profiles >>> profiles = list_profiles() >>> "generic" in profiles True """ return sorted(_PROFILES.keys())
[docs] def get_profile(name: str | None) -> BaseWordProfile: """ Get a Word profile by name. Parameters ---------- name : str | None Profile name. If None, "generic" is used. Returns ------- BaseWordProfile The requested profile. Raises ------ KeyError If the profile name is unknown. Examples -------- >>> from scitex.msword import get_profile >>> profile = get_profile("mdpi-ijerph") >>> profile.columns 1 """ if name is None: return _PROFILES["generic"] try: return _PROFILES[name] except KeyError as exc: available = ", ".join(list_profiles()) raise KeyError( f"Unknown MS Word profile: {name!r}. " f"Available profiles: {available}" ) from exc
[docs] def register_profile(profile: BaseWordProfile) -> None: """ Register a custom Word profile. Parameters ---------- profile : BaseWordProfile The profile to register. Examples -------- >>> from scitex.msword import BaseWordProfile, register_profile >>> custom = BaseWordProfile( ... name="my-journal", ... description="My custom journal template", ... heading_styles={1: "Title", 2: "Subtitle"}, ... ) >>> register_profile(custom) >>> "my-journal" in list_profiles() True """ _PROFILES[profile.name] = profile
__all__ = [ "BaseWordProfile", "list_profiles", "get_profile", "register_profile", ]