Source code for scitex_scholar.core.Scholar

#!/usr/bin/env python3
# Timestamp: "2026-01-24 (ywatanabe)"
# File: /home/ywatanabe/proj/scitex-python/src/scitex/scholar/core/Scholar.py

"""
Unified Scholar class for scientific literature management.

This is the main entry point for all scholar functionality, providing:
- Simple, intuitive API
- Smart defaults
- Method chaining
- Progressive disclosure of advanced features
"""

from __future__ import annotations

from pathlib import Path
from typing import Optional, Union

import scitex_logging as logging

from scitex_scholar.config import ScholarConfig

from ._mixins import (
    EnricherMixin,
    LibraryHandlerMixin,
    LoaderMixin,
    PDFDownloadMixin,
    PipelineMixin,
    ProjectHandlerMixin,
    SaverMixin,
    SearchMixin,
    ServiceMixin,
    URLFindingMixin,
)

logger = logging.getLogger(__name__)


[docs] class Scholar( EnricherMixin, URLFindingMixin, PDFDownloadMixin, LoaderMixin, SearchMixin, SaverMixin, ProjectHandlerMixin, LibraryHandlerMixin, PipelineMixin, ServiceMixin, ): """ Main interface for SciTeX Scholar - scientific literature management made simple. By default, papers are automatically enriched with: - Journal impact factors from impact_factor package (2024 JCR data) - Citation counts from Semantic Scholar (via DOI/title matching) Examples -------- Basic search with automatic enrichment:: scholar = Scholar() papers = scholar.search("deep learning neuroscience") # Papers now have impact_factor and citation_count populated papers.save("my_pac.bib") Disable automatic enrichment if needed:: config = ScholarConfig(enable_auto_enrich=False) scholar = Scholar(config=config) Search a specific source:: papers = scholar.search("transformer models", sources='arxiv') Advanced workflow:: papers = ( scholar.search("transformer models", year_min=2020) .filter(min_citations=50) .sort_by("impact_factor") .save("transformers.bib") ) Local library:: scholar._index_local_pdfs("./my_papers") local_papers = scholar.search_local("attention mechanism") """ @property def name(self): """Class name for logging.""" return self.__class__.__name__
[docs] def __init__( self, config: Optional[Union[ScholarConfig, str, Path]] = None, project: Optional[str] = None, project_description: Optional[str] = None, browser_mode: Optional[str] = None, ): """Initialize Scholar with configuration. Parameters ---------- config One of: - ``ScholarConfig`` instance - Path to YAML config file (str or Path) - ``None`` (uses ``ScholarConfig.load()`` to find config) project Default project name for operations. project_description Optional description for the project. browser_mode Browser mode (``'stealth'``, ``'interactive'``, ``'manual'``). """ self.config = self._init_config(config) self.browser_mode = browser_mode or "stealth" self.project = self.config.resolve("project", project, "default") self.workspace_dir = self.config.path_manager.get_workspace_dir() if project: self._ensure_project_exists(project, project_description) library_path = self.config.get_library_project_dir(self.project) if project: project_path = library_path / project logger.info( f"Scholar initialized with project '{project}' at {project_path}" ) else: logger.info(f"{self.name}: Scholar initialized (library: {library_path})")
__all__ = ["Scholar"] if __name__ == "__main__": from .Paper import Paper from .Papers import Papers def main(): """Demonstrate Scholar class usage - Clean API Demo.""" print("\n" + "=" * 60) print("Scholar Module Demo - Clean API") print("=" * 60 + "\n") # 1. Initialize Scholar print("1. Initialize Scholar") print("-" * 60) scholar = Scholar( project="demo_project", project_description="Demo project for testing Scholar API", ) print("Scholar initialized") print(f" Project: {scholar.project}") print() # 2. Project Management print("2. Project Management:") try: project_dir = scholar._create_project_metadata( "neural_networks_2024", description="Collection of neural network papers from 2024", ) print(" Created project: neural_networks_2024") print(f" Project directory: {project_dir}") projects = scholar.list_projects() print(f" Total projects in library: {len(projects)}") for proj in projects[:3]: print(f" - {proj['name']}: {proj.get('description', 'No desc')}") if len(projects) > 3: print(f" ... and {len(projects) - 3} more") except Exception as e: print(f" Project management demo skipped: {e}") print() # 3. Library Statistics print("3. Library Statistics:") try: stats = scholar.get_library_statistics() print(f" Total projects: {stats['total_projects']}") print(f" Total papers: {stats['total_papers']}") print(f" Storage usage: {stats['storage_mb']:.2f} MB") print(f" Library path: {stats['library_path']}") except Exception as e: print(f" Library statistics demo skipped: {e}") print() # 4. Working with Papers print("4. Working with Papers:") p1 = Paper() p1.metadata.basic.title = "Vision Transformer: An Image Is Worth 16x16 Words" p1.metadata.basic.authors = ["Dosovitskiy, Alexey", "Beyer, Lucas"] p1.metadata.basic.year = 2021 p1.metadata.publication.journal = "ICLR" p1.metadata.set_doi("10.48550/arXiv.2010.11929") sample_papers = [p1] papers = Papers( sample_papers, project="neural_networks_2024", config=scholar.config, ) print(f" Created collection with {len(papers)} papers") print() # 5. Configuration print("5. Configuration Management:") print(f" Scholar directory: {scholar.config.paths.scholar_dir}") print(f" Library directory: {scholar.config.get_library_project_dir()}") print() # 6. Service Components print("6. Service Components (Internal):") print(f" Scholar Engine: {type(scholar._scholar_engine).__name__}") print(f" Auth Manager: {type(scholar._auth_manager).__name__}") print(f" Browser Manager: {type(scholar._browser_manager).__name__}") print(f" Library Manager: {type(scholar._library_manager).__name__}") print() print("Scholar demo completed!") main() # EOF