Skip to content

Core

Core CLDK module.

Provides the top-level CLDK entry point used to initialize language-specific analysis, Treesitter parsers, and related utilities.

CLDK #

Core class for the Code Language Development Kit (CLDK).

Initialize with the desired programming language and use the exposed helpers to perform language-specific analysis.

Parameters:

Name Type Description Default
language str

Programming language (e.g., "java", "python", "c").

required

Attributes:

Name Type Description
language str

Programming language of the project.

Source code in cldk/core.py
class CLDK:
    """Core class for the Code Language Development Kit (CLDK).

    Initialize with the desired programming language and use the exposed
    helpers to perform language-specific analysis.

    Args:
        language (str): Programming language (e.g., "java", "python", "c").

    Attributes:
        language (str): Programming language of the project.
    """

    def __init__(self, language: str):
        self.language: str = language

    def analysis(
        self,
        project_path: str | Path | None = None,
        source_code: str | None = None,
        eager: bool = False,
        analysis_level: str = AnalysisLevel.symbol_table,
        target_files: List[str] | None = None,
        analysis_backend_path: str | None = None,
        analysis_json_path: str | Path = None,
        cache_dir: str | Path | None = None,
        use_codeql: bool = True,
    ) -> JavaAnalysis | PythonAnalysis | CAnalysis:
        """Initialize a language-specific analysis façade.

        Args:
            project_path (str | Path | None): Directory path of the project.
            source_code (str | None): Source code for single-file analysis.
            eager (bool): If True, forces regeneration of analysis databases.
            analysis_level (str): Analysis level. See AnalysisLevel.
            target_files (list[str] | None): Files to constrain analysis (optional).
            analysis_backend_path (str | None): Java only. Directory containing
                the ``codeanalyzer-*.jar`` to run. Not valid for Python — pass
                ``cache_dir`` instead.
            analysis_json_path (str | Path | None): Path to persist the analysis
                database / ``analysis.json``.
            cache_dir (str | Path | None): Python only. Cache home for the
                ``codeanalyzer-python`` backend — its virtualenv, CodeQL
                database, and ``analysis_cache.json`` (forwarded as the
                backend's ``cache_dir``). The backend owns all caching; when
                omitted it defaults to ``<project_path>/.codeanalyzer``.
                Ignored for other languages.
            use_codeql (bool): Python only, default True. Augments Jedi-resolved
                call edges with CodeQL-resolved edges; set False for a faster,
                Jedi-only analysis. Ignored for other languages.

        Returns:
            JavaAnalysis | PythonAnalysis | CAnalysis: Initialized analysis façade for the chosen language.

        Raises:
            CldkInitializationException: If both or neither of project_path and
                source_code are provided, or if the Java-only
                ``analysis_backend_path`` is passed in Python mode.
            NotImplementedError: If the specified language is unsupported.

        Examples:
            Initialize Python analysis with inline source code and verify type:

            >>> from cldk import CLDK
            >>> cldk = CLDK(language="python")
            >>> analysis = cldk.analysis(source_code='def f(): return 1')
            >>> from cldk.analysis.python import PythonAnalysis
            >>> isinstance(analysis, PythonAnalysis)
            True
        """

        if project_path is None and source_code is None:
            raise CldkInitializationException("Either project_path or source_code must be provided.")

        if project_path is not None and source_code is not None:
            raise CldkInitializationException("Both project_path and source_code are provided. Please provide " "only one.")

        # Normalize project_path: expand ~ and resolve to absolute path
        if project_path is not None:
            project_path = Path(project_path).expanduser().resolve()
            if not project_path.is_dir():
                raise CldkInitializationException(f"project_path does not exist or is not a directory: {project_path}")

        if self.language == "java":
            return JavaAnalysis(
                project_dir=project_path,
                source_code=source_code,
                analysis_level=analysis_level,
                analysis_backend_path=analysis_backend_path,
                analysis_json_path=analysis_json_path,
                target_files=target_files,
                eager_analysis=eager,
            )
        elif self.language == "python":
            if source_code is not None:
                raise CldkInitializationException("source_code mode is not supported for Python; please pass project_path.")
            if analysis_backend_path is not None:
                raise CldkInitializationException(
                    "analysis_backend_path is Java-only (it locates codeanalyzer-*.jar). "
                    "For Python, use cache_dir for the backend's virtualenv/CodeQL cache."
                )
            return PythonAnalysis(
                project_dir=project_path,
                analysis_level=analysis_level,
                cache_dir=cache_dir,
                analysis_json_path=analysis_json_path,
                target_files=target_files,
                eager_analysis=eager,
                use_codeql=use_codeql,
            )
        elif self.language == "c":
            return CAnalysis(project_dir=project_path)
        else:
            raise NotImplementedError(f"Analysis support for {self.language} is not implemented yet.")

    def treesitter_parser(self):
        """Return a Treesitter parser for the selected language.

        Returns:
            TreesitterJava: Parser for Java language.

        Raises:
            NotImplementedError: If the language is unsupported.

        Examples:
            Get a Java Treesitter parser:

            >>> from cldk import CLDK
            >>> parser = CLDK(language="java").treesitter_parser()
            >>> parser.__class__.__name__
            'TreesitterJava'
        """
        if self.language == "java":
            return TreesitterJava()
        else:
            raise NotImplementedError(f"Treesitter parser for {self.language} is not implemented yet.")

    def tree_sitter_utils(self, source_code: str) -> [TreesitterSanitizer | NotImplementedError]:  # type: ignore
        """Return Treesitter utilities for the selected language.

        Args:
            source_code (str): Source code to initialize the utilities with.

        Returns:
            TreesitterSanitizer: Utility wrapper for Java Treesitter operations.

        Raises:
            NotImplementedError: If the language is unsupported.

        Examples:
            Create Java Treesitter sanitizer utilities:

            >>> from cldk import CLDK
            >>> utils = CLDK(language="java").tree_sitter_utils('class A {}')
            >>> from cldk.utils.sanitization.java import TreesitterSanitizer
            >>> isinstance(utils, TreesitterSanitizer)
            True
        """
        if self.language == "java":
            return TreesitterSanitizer(source_code=source_code)
        else:
            raise NotImplementedError(f"Treesitter parser for {self.language} is not implemented yet.")

analysis(project_path=None, source_code=None, eager=False, analysis_level=AnalysisLevel.symbol_table, target_files=None, analysis_backend_path=None, analysis_json_path=None, cache_dir=None, use_codeql=True) #

Initialize a language-specific analysis façade.

Parameters:

Name Type Description Default
project_path str | Path | None

Directory path of the project.

None
source_code str | None

Source code for single-file analysis.

None
eager bool

If True, forces regeneration of analysis databases.

False
analysis_level str

Analysis level. See AnalysisLevel.

symbol_table
target_files list[str] | None

Files to constrain analysis (optional).

None
analysis_backend_path str | None

Java only. Directory containing the codeanalyzer-*.jar to run. Not valid for Python — pass cache_dir instead.

None
analysis_json_path str | Path | None

Path to persist the analysis database / analysis.json.

None
cache_dir str | Path | None

Python only. Cache home for the codeanalyzer-python backend — its virtualenv, CodeQL database, and analysis_cache.json (forwarded as the backend's cache_dir). The backend owns all caching; when omitted it defaults to <project_path>/.codeanalyzer. Ignored for other languages.

None
use_codeql bool

Python only, default True. Augments Jedi-resolved call edges with CodeQL-resolved edges; set False for a faster, Jedi-only analysis. Ignored for other languages.

True

Returns:

Type Description
JavaAnalysis | PythonAnalysis | CAnalysis

JavaAnalysis | PythonAnalysis | CAnalysis: Initialized analysis façade for the chosen language.

Raises:

Type Description
CldkInitializationException

If both or neither of project_path and source_code are provided, or if the Java-only analysis_backend_path is passed in Python mode.

NotImplementedError

If the specified language is unsupported.

Examples:

Initialize Python analysis with inline source code and verify type:

>>> from cldk import CLDK
>>> cldk = CLDK(language="python")
>>> analysis = cldk.analysis(source_code='def f(): return 1')
>>> from cldk.analysis.python import PythonAnalysis
>>> isinstance(analysis, PythonAnalysis)
True
Source code in cldk/core.py
def analysis(
    self,
    project_path: str | Path | None = None,
    source_code: str | None = None,
    eager: bool = False,
    analysis_level: str = AnalysisLevel.symbol_table,
    target_files: List[str] | None = None,
    analysis_backend_path: str | None = None,
    analysis_json_path: str | Path = None,
    cache_dir: str | Path | None = None,
    use_codeql: bool = True,
) -> JavaAnalysis | PythonAnalysis | CAnalysis:
    """Initialize a language-specific analysis façade.

    Args:
        project_path (str | Path | None): Directory path of the project.
        source_code (str | None): Source code for single-file analysis.
        eager (bool): If True, forces regeneration of analysis databases.
        analysis_level (str): Analysis level. See AnalysisLevel.
        target_files (list[str] | None): Files to constrain analysis (optional).
        analysis_backend_path (str | None): Java only. Directory containing
            the ``codeanalyzer-*.jar`` to run. Not valid for Python — pass
            ``cache_dir`` instead.
        analysis_json_path (str | Path | None): Path to persist the analysis
            database / ``analysis.json``.
        cache_dir (str | Path | None): Python only. Cache home for the
            ``codeanalyzer-python`` backend — its virtualenv, CodeQL
            database, and ``analysis_cache.json`` (forwarded as the
            backend's ``cache_dir``). The backend owns all caching; when
            omitted it defaults to ``<project_path>/.codeanalyzer``.
            Ignored for other languages.
        use_codeql (bool): Python only, default True. Augments Jedi-resolved
            call edges with CodeQL-resolved edges; set False for a faster,
            Jedi-only analysis. Ignored for other languages.

    Returns:
        JavaAnalysis | PythonAnalysis | CAnalysis: Initialized analysis façade for the chosen language.

    Raises:
        CldkInitializationException: If both or neither of project_path and
            source_code are provided, or if the Java-only
            ``analysis_backend_path`` is passed in Python mode.
        NotImplementedError: If the specified language is unsupported.

    Examples:
        Initialize Python analysis with inline source code and verify type:

        >>> from cldk import CLDK
        >>> cldk = CLDK(language="python")
        >>> analysis = cldk.analysis(source_code='def f(): return 1')
        >>> from cldk.analysis.python import PythonAnalysis
        >>> isinstance(analysis, PythonAnalysis)
        True
    """

    if project_path is None and source_code is None:
        raise CldkInitializationException("Either project_path or source_code must be provided.")

    if project_path is not None and source_code is not None:
        raise CldkInitializationException("Both project_path and source_code are provided. Please provide " "only one.")

    # Normalize project_path: expand ~ and resolve to absolute path
    if project_path is not None:
        project_path = Path(project_path).expanduser().resolve()
        if not project_path.is_dir():
            raise CldkInitializationException(f"project_path does not exist or is not a directory: {project_path}")

    if self.language == "java":
        return JavaAnalysis(
            project_dir=project_path,
            source_code=source_code,
            analysis_level=analysis_level,
            analysis_backend_path=analysis_backend_path,
            analysis_json_path=analysis_json_path,
            target_files=target_files,
            eager_analysis=eager,
        )
    elif self.language == "python":
        if source_code is not None:
            raise CldkInitializationException("source_code mode is not supported for Python; please pass project_path.")
        if analysis_backend_path is not None:
            raise CldkInitializationException(
                "analysis_backend_path is Java-only (it locates codeanalyzer-*.jar). "
                "For Python, use cache_dir for the backend's virtualenv/CodeQL cache."
            )
        return PythonAnalysis(
            project_dir=project_path,
            analysis_level=analysis_level,
            cache_dir=cache_dir,
            analysis_json_path=analysis_json_path,
            target_files=target_files,
            eager_analysis=eager,
            use_codeql=use_codeql,
        )
    elif self.language == "c":
        return CAnalysis(project_dir=project_path)
    else:
        raise NotImplementedError(f"Analysis support for {self.language} is not implemented yet.")

treesitter_parser() #

Return a Treesitter parser for the selected language.

Returns:

Name Type Description
TreesitterJava

Parser for Java language.

Raises:

Type Description
NotImplementedError

If the language is unsupported.

Examples:

Get a Java Treesitter parser:

>>> from cldk import CLDK
>>> parser = CLDK(language="java").treesitter_parser()
>>> parser.__class__.__name__
'TreesitterJava'
Source code in cldk/core.py
def treesitter_parser(self):
    """Return a Treesitter parser for the selected language.

    Returns:
        TreesitterJava: Parser for Java language.

    Raises:
        NotImplementedError: If the language is unsupported.

    Examples:
        Get a Java Treesitter parser:

        >>> from cldk import CLDK
        >>> parser = CLDK(language="java").treesitter_parser()
        >>> parser.__class__.__name__
        'TreesitterJava'
    """
    if self.language == "java":
        return TreesitterJava()
    else:
        raise NotImplementedError(f"Treesitter parser for {self.language} is not implemented yet.")

tree_sitter_utils(source_code) #

Return Treesitter utilities for the selected language.

Parameters:

Name Type Description Default
source_code str

Source code to initialize the utilities with.

required

Returns:

Name Type Description
TreesitterSanitizer [TreesitterSanitizer | NotImplementedError]

Utility wrapper for Java Treesitter operations.

Raises:

Type Description
NotImplementedError

If the language is unsupported.

Examples:

Create Java Treesitter sanitizer utilities:

>>> from cldk import CLDK
>>> utils = CLDK(language="java").tree_sitter_utils('class A {}')
>>> from cldk.utils.sanitization.java import TreesitterSanitizer
>>> isinstance(utils, TreesitterSanitizer)
True
Source code in cldk/core.py
def tree_sitter_utils(self, source_code: str) -> [TreesitterSanitizer | NotImplementedError]:  # type: ignore
    """Return Treesitter utilities for the selected language.

    Args:
        source_code (str): Source code to initialize the utilities with.

    Returns:
        TreesitterSanitizer: Utility wrapper for Java Treesitter operations.

    Raises:
        NotImplementedError: If the language is unsupported.

    Examples:
        Create Java Treesitter sanitizer utilities:

        >>> from cldk import CLDK
        >>> utils = CLDK(language="java").tree_sitter_utils('class A {}')
        >>> from cldk.utils.sanitization.java import TreesitterSanitizer
        >>> isinstance(utils, TreesitterSanitizer)
        True
    """
    if self.language == "java":
        return TreesitterSanitizer(source_code=source_code)
    else:
        raise NotImplementedError(f"Treesitter parser for {self.language} is not implemented yet.")