Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
110 changes: 110 additions & 0 deletions api/analyzers/javascript/analyzer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
from pathlib import Path
from typing import Optional

from multilspy import SyncLanguageServer
from ...entities.entity import Entity
from ...entities.file import File
from ..analyzer import AbstractAnalyzer

import tree_sitter_javascript as tsjs
from tree_sitter import Language, Node

import logging
logger = logging.getLogger('code_graph')


class JavaScriptAnalyzer(AbstractAnalyzer):
def __init__(self) -> None:
super().__init__(Language(tsjs.language()))

def add_dependencies(self, path: Path, files: list[Path]):
pass

def get_entity_label(self, node: Node) -> str:
if node.type == 'function_declaration':
return "Function"
elif node.type == 'class_declaration':
return "Class"
elif node.type == 'method_definition':
return "Method"
raise ValueError(f"Unknown entity type: {node.type}")

def get_entity_name(self, node: Node) -> str:
if node.type in ['function_declaration', 'class_declaration', 'method_definition']:
name_node = node.child_by_field_name('name')
if name_node is None:
return ''
return name_node.text.decode('utf-8')
raise ValueError(f"Unknown entity type: {node.type}")

def get_entity_docstring(self, node: Node) -> Optional[str]:
if node.type in ['function_declaration', 'class_declaration', 'method_definition']:
if node.prev_sibling and node.prev_sibling.type == 'comment':
return node.prev_sibling.text.decode('utf-8')
return None
raise ValueError(f"Unknown entity type: {node.type}")

def get_entity_types(self) -> list[str]:
return ['function_declaration', 'class_declaration', 'method_definition']

def add_symbols(self, entity: Entity) -> None:
if entity.node.type == 'class_declaration':
heritage = entity.node.child_by_field_name('body')
if heritage is None:
return
superclass_node = entity.node.child_by_field_name('name')
# Check for `extends` clause via class_heritage
for child in entity.node.children:
if child.type == 'class_heritage':
for heritage_child in child.children:
if heritage_child.type == 'identifier':
entity.add_symbol("base_class", heritage_child)
elif entity.node.type in ['function_declaration', 'method_definition']:
query = self.language.query("(call_expression) @reference.call")
captures = query.captures(entity.node)
if 'reference.call' in captures:
for caller in captures['reference.call']:
entity.add_symbol("call", caller)
query = self.language.query("(formal_parameters (identifier) @parameter)")
captures = query.captures(entity.node)
if 'parameter' in captures:
for parameter in captures['parameter']:
entity.add_symbol("parameters", parameter)

def is_dependency(self, file_path: str) -> bool:
return "node_modules" in file_path

def resolve_path(self, file_path: str, path: Path) -> str:
return file_path

def resolve_type(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, node: Node) -> list[Entity]:
res = []
for file, resolved_node in self.resolve(files, lsp, file_path, path, node):
type_dec = self.find_parent(resolved_node, ['class_declaration'])
if type_dec in file.entities:
res.append(file.entities[type_dec])
return res

def resolve_method(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, node: Node) -> list[Entity]:
res = []
if node.type == 'call_expression':
func_node = node.child_by_field_name('function')
if func_node and func_node.type == 'member_expression':
func_node = func_node.child_by_field_name('property')
if func_node:
node = func_node
for file, resolved_node in self.resolve(files, lsp, file_path, path, node):
method_dec = self.find_parent(resolved_node, ['function_declaration', 'method_definition', 'class_declaration'])
if method_dec and method_dec.type == 'class_declaration':
continue
if method_dec in file.entities:
res.append(file.entities[method_dec])
return res

def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> list[Entity]:
if key in ["base_class", "parameters"]:
return self.resolve_type(files, lsp, file_path, path, symbol)
elif key in ["call"]:
return self.resolve_method(files, lsp, file_path, path, symbol)
else:
raise ValueError(f"Unknown key {key}")
9 changes: 6 additions & 3 deletions api/analyzers/source_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from .java.analyzer import JavaAnalyzer
from .python.analyzer import PythonAnalyzer
from .csharp.analyzer import CSharpAnalyzer
from .javascript.analyzer import JavaScriptAnalyzer

from multilspy import SyncLanguageServer
from multilspy.multilspy_config import MultilspyConfig
Expand All @@ -26,7 +27,8 @@
# '.h': CAnalyzer(),
'.py': PythonAnalyzer(),
'.java': JavaAnalyzer(),
'.cs': CSharpAnalyzer()}
'.cs': CSharpAnalyzer(),
'.js': JavaScriptAnalyzer()}

class NullLanguageServer:
def start_server(self):
Expand Down Expand Up @@ -143,7 +145,8 @@
lsps[".cs"] = SyncLanguageServer.create(config, logger, str(path))
else:
lsps[".cs"] = NullLanguageServer()
with lsps[".java"].start_server(), lsps[".py"].start_server(), lsps[".cs"].start_server():
lsps[".js"] = NullLanguageServer()
with lsps[".java"].start_server(), lsps[".py"].start_server(), lsps[".cs"].start_server(), lsps[".js"].start_server():
files_len = len(self.files)
for i, file_path in enumerate(files):
file = self.files[file_path]
Expand Down Expand Up @@ -174,7 +177,7 @@

def analyze_sources(self, path: Path, ignore: list[str], graph: Graph) -> None:
path = path.resolve()
files = list(path.rglob("*.java")) + list(path.rglob("*.py")) + list(path.rglob("*.cs"))
files = list(path.rglob("*.java")) + list(path.rglob("*.py")) + list(path.rglob("*.cs")) + list(path.rglob("*.js"))

Check failure

Code scanning / CodeQL

Uncontrolled data used in path expression High

This path depends on a
user-provided value
.
This path depends on a
user-provided value
.

Copilot Autofix

AI 1 day ago

In general, to fix this you should ensure that any filesystem path derived from user input is constrained to a safe root directory and normalized before use. That means: (1) define a base directory under which all analyses must occur; (2) resolve the user-supplied path relative to that base; (3) reject anything whose normalized/real path escapes that base (e.g., via .. or absolute paths); and (4) only then pass the safe resolved path into the rest of the code. This prevents a client from causing the server to traverse or operate on arbitrary parts of the filesystem.

For this codebase, the best minimally invasive fix is to introduce such validation in SourceAnalyzer.analyze_local_folder, since that is the point where the untrusted path: str first enters the analyzer layer and gets converted to a Path. We can:

  • Decide on a safe root for analysis, e.g. an environment-variable-controlled base like CODE_GRAPH_PROJECTS_ROOT, defaulting to the current working directory if unset. This keeps behavior similar while allowing operators to constrain where analyses can occur.
  • In analyze_local_folder, convert both the configured base path and the user path into absolute, resolved Path objects using .resolve().
  • If path is not under the base (check via relative_to or a simple prefix check), log an error and raise an exception instead of proceeding.
  • Pass the resolved, safe Path to analyze_sources so that downstream calls (path.rglob(...)) operate only within the validated directory tree.

This change only touches api/analyzers/source_analyzer.py, keeps the external API of analyze_local_folder unchanged, and preserves existing functionality for valid paths that lie under the configured base directory. We will add a small helper method inside SourceAnalyzer to encapsulate the “ensure path under base” logic and call it from analyze_local_folder. We do not need new imports beyond what already exists.

Suggested changeset 1
api/analyzers/source_analyzer.py

Autofix patch

Autofix patch
Run the following command in your local git repository to apply this patch
cat << 'EOF' | git apply
diff --git a/api/analyzers/source_analyzer.py b/api/analyzers/source_analyzer.py
--- a/api/analyzers/source_analyzer.py
+++ b/api/analyzers/source_analyzer.py
@@ -1,6 +1,7 @@
 from contextlib import nullcontext
 from pathlib import Path
 from typing import Optional
+import os
 
 from api.entities.entity import Entity
 from api.entities.file import File
@@ -184,6 +185,34 @@
         # Second pass analysis of the source code
         self.second_pass(graph, files, path)
 
+    def _resolve_and_validate_path(self, user_path: str) -> Path:
+        """
+        Resolve a user-supplied path against a safe root and ensure it does not escape.
+
+        The safe root can be configured via the CODE_GRAPH_PROJECTS_ROOT environment
+        variable; if unset, the current working directory is used.
+        """
+        base_dir_env = os.environ.get("CODE_GRAPH_PROJECTS_ROOT")
+        if base_dir_env:
+            base_dir = Path(base_dir_env)
+        else:
+            base_dir = Path.cwd()
+
+        base_dir = base_dir.resolve()
+        candidate = Path(user_path)
+        if not candidate.is_absolute():
+            candidate = base_dir / candidate
+        candidate = candidate.resolve()
+
+        try:
+            # Ensure candidate is within base_dir
+            candidate.relative_to(base_dir)
+        except ValueError:
+            logging.error(f"Requested path '{candidate}' is outside of allowed base directory '{base_dir}'")
+            raise ValueError("Requested path is not allowed")
+
+        return candidate
+
     def analyze_local_folder(self, path: str, g: Graph, ignore: Optional[list[str]] = []) -> None:
         """
         Analyze path.
@@ -195,8 +224,11 @@
 
         logging.info(f"Analyzing local folder {path}")
 
+        # Resolve and validate the provided path against a safe root
+        safe_path = self._resolve_and_validate_path(path)
+
         # Analyze source files
-        self.analyze_sources(Path(path), ignore, g)
+        self.analyze_sources(safe_path, ignore, g)
 
         logging.info("Done analyzing path")
 
EOF
@@ -1,6 +1,7 @@
from contextlib import nullcontext
from pathlib import Path
from typing import Optional
import os

from api.entities.entity import Entity
from api.entities.file import File
@@ -184,6 +185,34 @@
# Second pass analysis of the source code
self.second_pass(graph, files, path)

def _resolve_and_validate_path(self, user_path: str) -> Path:
"""
Resolve a user-supplied path against a safe root and ensure it does not escape.

The safe root can be configured via the CODE_GRAPH_PROJECTS_ROOT environment
variable; if unset, the current working directory is used.
"""
base_dir_env = os.environ.get("CODE_GRAPH_PROJECTS_ROOT")
if base_dir_env:
base_dir = Path(base_dir_env)
else:
base_dir = Path.cwd()

base_dir = base_dir.resolve()
candidate = Path(user_path)
if not candidate.is_absolute():
candidate = base_dir / candidate
candidate = candidate.resolve()

try:
# Ensure candidate is within base_dir
candidate.relative_to(base_dir)
except ValueError:
logging.error(f"Requested path '{candidate}' is outside of allowed base directory '{base_dir}'")
raise ValueError("Requested path is not allowed")

return candidate

def analyze_local_folder(self, path: str, g: Graph, ignore: Optional[list[str]] = []) -> None:
"""
Analyze path.
@@ -195,8 +224,11 @@

logging.info(f"Analyzing local folder {path}")

# Resolve and validate the provided path against a safe root
safe_path = self._resolve_and_validate_path(path)

# Analyze source files
self.analyze_sources(Path(path), ignore, g)
self.analyze_sources(safe_path, ignore, g)

logging.info("Done analyzing path")

Copilot is powered by AI and may make mistakes. Always verify output.

Check failure

Code scanning / CodeQL

Uncontrolled data used in path expression High

This path depends on a
user-provided value
.
This path depends on a
user-provided value
.

Copilot Autofix

AI 1 day ago

General approach: treat user-provided paths as relative or confined within a known safe root directory on the server, and validate/normalize them before using them as a root for filesystem traversal. Use Path.resolve() and then verify that the resolved path is within a configured base directory (for example, the directory where repositories are stored). Reject or error out if the requested path escapes this root. This prevents a client from causing analysis of arbitrary directories like /etc or /.

Best concrete fix here: centralize the trust boundary in SourceAnalyzer.analyze_local_folder by:

  1. Introducing a function that returns the safe root directory (e.g., from an environment variable or a default like REPOS_ROOT under the project). Since we must not assume wider project structure, we’ll use an environment variable CODEGRAPH_ROOT with a reasonable default (current working directory).
  2. In analyze_local_folder, convert the incoming path string into a Path, resolve it, and then check that it is contained within the safe root by comparing Path.is_relative_to (Python 3.9+) or an equivalent prefix check.
  3. If the check fails, log an error and raise an exception; if it passes, proceed to call analyze_sources with the resolved safe Path.
  4. Optionally, apply similar confinement in analyze_local_repository, which also takes a user-controlled path, before interacting with pygit2.Repository.

This confines all downstream uses, including the rglob on line 180, without changing the external API signatures or the functional behavior for legitimate, in-root paths. The only edits needed are within api/analyzers/source_analyzer.py: adding imports for os (for environment variable) if needed, a helper to get/validate the root, and modifications to analyze_local_folder (and analyze_local_repository) to perform normalization and the “within root” check.


Suggested changeset 1
api/analyzers/source_analyzer.py

Autofix patch

Autofix patch
Run the following command in your local git repository to apply this patch
cat << 'EOF' | git apply
diff --git a/api/analyzers/source_analyzer.py b/api/analyzers/source_analyzer.py
--- a/api/analyzers/source_analyzer.py
+++ b/api/analyzers/source_analyzer.py
@@ -1,6 +1,7 @@
 from contextlib import nullcontext
 from pathlib import Path
 from typing import Optional
+import os
 
 from api.entities.entity import Entity
 from api.entities.file import File
@@ -21,6 +22,36 @@
 # Configure logging
 logging.basicConfig(level=logging.DEBUG, format='%(filename)s - %(asctime)s - %(levelname)s - %(message)s')
 
+
+def _get_safe_root() -> Path:
+    """
+    Returns the root directory under which analysis is allowed.
+    The root can be configured via the CODEGRAPH_ROOT environment variable,
+    otherwise the current working directory is used.
+    """
+    root_env = os.environ.get("CODEGRAPH_ROOT")
+    if root_env:
+        return Path(root_env).resolve()
+    return Path.cwd().resolve()
+
+
+def _ensure_within_root(requested_path: Path) -> Path:
+    """
+    Resolve the requested path and ensure it resides within the safe root.
+    Raises a ValueError if the path is outside the allowed root.
+    """
+    safe_root = _get_safe_root()
+    resolved = requested_path.resolve()
+    try:
+        # Python 3.9+: Path.is_relative_to
+        if resolved.is_relative_to(safe_root):
+            return resolved
+    except AttributeError:
+        # Fallback for older Python versions
+        if os.path.commonpath([str(safe_root), str(resolved)]) == str(safe_root):
+            return resolved
+    raise ValueError(f"Path '{resolved}' is outside of the allowed root '{safe_root}'")
+
 # List of available analyzers
 analyzers: dict[str, AbstractAnalyzer] = {
     # '.c': CAnalyzer(),
@@ -195,8 +226,15 @@
 
         logging.info(f"Analyzing local folder {path}")
 
+        # Normalize and validate that the path is within the allowed root
+        try:
+            target_path = _ensure_within_root(Path(path))
+        except ValueError as e:
+            logging.error(str(e))
+            raise
+
         # Analyze source files
-        self.analyze_sources(Path(path), ignore, g)
+        self.analyze_sources(target_path, ignore, g)
 
         logging.info("Done analyzing path")
 
@@ -213,12 +250,19 @@
 
         from pygit2.repository import Repository
 
-        proj_name = Path(path).name
+        # Normalize and validate repository path
+        try:
+            repo_path = _ensure_within_root(Path(path))
+        except ValueError as e:
+            logging.error(str(e))
+            raise
+
+        proj_name = repo_path.name
         graph = Graph(proj_name)
-        self.analyze_local_folder(path, graph, ignore)
+        self.analyze_local_folder(str(repo_path), graph, ignore)
 
         # Save processed commit hash to the DB
-        repo = Repository(path)
+        repo = Repository(str(repo_path))
         current_commit = repo.walk(repo.head.target).__next__()
         graph.set_graph_commit(current_commit.short_id)
 
EOF
Copilot is powered by AI and may make mistakes. Always verify output.

Check failure

Code scanning / CodeQL

Uncontrolled data used in path expression High

This path depends on a
user-provided value
.
This path depends on a
user-provided value
.

Copilot Autofix

AI 1 day ago

General approach: normalize and validate the user-supplied path before using it for filesystem traversal. Optionally (and recommended), restrict analysis to live under a configured safe root directory. Even if we cannot see configuration here, we can at least normalize and refuse paths that do not resolve to directories or that attempt to climb above an optional root.

Best concrete fix within the shown code:

  1. In SourceAnalyzer.analyze_local_folder, convert the incoming path: str to a normalized, absolute Path using Path(path).resolve(strict=True) inside a try block.
  2. Optionally support an environment variable CODEGRAPH_REPOS_ROOT (a safe base directory). If set, ensure the requested path is inside that root. This mirrors the “safe-root” pattern in the background section and doesn’t change existing behavior when the env var is unset.
  3. Pass this validated Path into analyze_sources instead of creating a new Path from the raw string.
  4. Keep the public function signature unchanged to avoid breaking callers.

Concretely, in api/analyzers/source_analyzer.py:

  • Modify analyze_local_folder so that:
    • It resolves path with base_path = Path(path).resolve(strict=True).
    • If CODEGRAPH_REPOS_ROOT is set in the environment, also resolves that to root = Path(os.environ["CODEGRAPH_REPOS_ROOT"]).resolve(strict=True) and verifies str(base_path).startswith(str(root)). If not, it raises ValueError or logs and returns early.
    • It calls self.analyze_sources(base_path, ignore, g) instead of Path(path).

This adds robust validation at the boundary where untrusted data enters the analyzer, addresses all CodeQL variants that flow through this method, and preserves existing functionality when no root is configured.

We will need to add an import os at the top of api/analyzers/source_analyzer.py to access environment variables.


Suggested changeset 1
api/analyzers/source_analyzer.py

Autofix patch

Autofix patch
Run the following command in your local git repository to apply this patch
cat << 'EOF' | git apply
diff --git a/api/analyzers/source_analyzer.py b/api/analyzers/source_analyzer.py
--- a/api/analyzers/source_analyzer.py
+++ b/api/analyzers/source_analyzer.py
@@ -18,6 +18,7 @@
 from multilspy.multilspy_logger import MultilspyLogger
 
 import logging
+import os
 # Configure logging
 logging.basicConfig(level=logging.DEBUG, format='%(filename)s - %(asctime)s - %(levelname)s - %(message)s')
 
@@ -193,10 +194,29 @@
             ignore (List(str)): List of paths to skip
         """
 
-        logging.info(f"Analyzing local folder {path}")
+        try:
+            base_path = Path(path).resolve(strict=True)
+        except FileNotFoundError:
+            logging.error("Path '%s' does not exist or is not accessible", path)
+            return
 
+        safe_root = os.environ.get("CODEGRAPH_REPOS_ROOT")
+        if safe_root:
+            try:
+                root_path = Path(safe_root).resolve(strict=True)
+            except FileNotFoundError:
+                logging.error("Configured CODEGRAPH_REPOS_ROOT '%s' does not exist", safe_root)
+                return
+            base_path_str = str(base_path)
+            root_path_str = str(root_path)
+            if not base_path_str.startswith(root_path_str.rstrip(os.sep) + os.sep) and base_path_str != root_path_str:
+                logging.error("Path '%s' is outside of allowed root '%s'", base_path, root_path)
+                return
+
+        logging.info(f"Analyzing local folder {base_path}")
+
         # Analyze source files
-        self.analyze_sources(Path(path), ignore, g)
+        self.analyze_sources(base_path, ignore, g)
 
         logging.info("Done analyzing path")
 
EOF
@@ -18,6 +18,7 @@
from multilspy.multilspy_logger import MultilspyLogger

import logging
import os
# Configure logging
logging.basicConfig(level=logging.DEBUG, format='%(filename)s - %(asctime)s - %(levelname)s - %(message)s')

@@ -193,10 +194,29 @@
ignore (List(str)): List of paths to skip
"""

logging.info(f"Analyzing local folder {path}")
try:
base_path = Path(path).resolve(strict=True)
except FileNotFoundError:
logging.error("Path '%s' does not exist or is not accessible", path)
return

safe_root = os.environ.get("CODEGRAPH_REPOS_ROOT")
if safe_root:
try:
root_path = Path(safe_root).resolve(strict=True)
except FileNotFoundError:
logging.error("Configured CODEGRAPH_REPOS_ROOT '%s' does not exist", safe_root)
return
base_path_str = str(base_path)
root_path_str = str(root_path)
if not base_path_str.startswith(root_path_str.rstrip(os.sep) + os.sep) and base_path_str != root_path_str:
logging.error("Path '%s' is outside of allowed root '%s'", base_path, root_path)
return

logging.info(f"Analyzing local folder {base_path}")

# Analyze source files
self.analyze_sources(Path(path), ignore, g)
self.analyze_sources(base_path, ignore, g)

logging.info("Done analyzing path")

Copilot is powered by AI and may make mistakes. Always verify output.

Check failure

Code scanning / CodeQL

Uncontrolled data used in path expression High

This path depends on a
user-provided value
.
This path depends on a
user-provided value
.

Copilot Autofix

AI 1 day ago

In general, to fix uncontrolled path usage you must (1) normalize the user-provided path, and (2) enforce that it lies within an allowed base directory (or otherwise belongs to an allow‑list) before using it. For directory‑wide operations like rglob, define a server‑side base directory (e.g., from an environment variable or a constant), resolve both base and user path, and then verify that the user path is a subpath of the base. If the check fails, reject the request.

For this codebase, the most direct and non‑disruptive fix is inside SourceAnalyzer.analyze_sources, because that is where Path(path) is used to traverse the filesystem with rglob. We can:

  1. Define an allowed root directory for analysis using an environment variable such as CODE_GRAPH_BASE_DIR (defaulting to the current working directory or another reasonable root).
  2. Resolve both the base directory and the user‑supplied path.
  3. Check that path is equal to the base dir or is a subdirectory of it. In Python ≥3.9 this can be done safely with Path.is_relative_to; we can implement a small helper using Path.relative_to if needed.
  4. If the check fails, raise a clear exception instead of proceeding.

This keeps all existing behavior for callers who already point to directories under the chosen base, and restricts path traversal to safe locations. The only file that needs code changes is api/analyzers/source_analyzer.py. We will:

  • Import os to read the environment variable.
  • In analyze_sources, compute base_dir = Path(os.getenv("CODE_GRAPH_BASE_DIR", ".")).resolve() and validate that the resolved path is under base_dir before doing rglob.
  • Log an error and raise ValueError (or similar) if the check fails.

No changes are needed in tests/index.py or api/index.py to achieve the core mitigation, since their use of SourceAnalyzer will automatically be constrained by this validation.


Suggested changeset 1
api/analyzers/source_analyzer.py

Autofix patch

Autofix patch
Run the following command in your local git repository to apply this patch
cat << 'EOF' | git apply
diff --git a/api/analyzers/source_analyzer.py b/api/analyzers/source_analyzer.py
--- a/api/analyzers/source_analyzer.py
+++ b/api/analyzers/source_analyzer.py
@@ -1,6 +1,7 @@
 from contextlib import nullcontext
 from pathlib import Path
 from typing import Optional
+import os
 
 from api.entities.entity import Entity
 from api.entities.file import File
@@ -176,7 +177,16 @@
         self.second_pass(graph, files, path)
 
     def analyze_sources(self, path: Path, ignore: list[str], graph: Graph) -> None:
+        # Resolve the target path and enforce that it lies within an allowed base directory.
         path = path.resolve()
+        base_dir_env = os.getenv("CODE_GRAPH_BASE_DIR", ".")
+        base_dir = Path(base_dir_env).resolve()
+        try:
+            # This will raise ValueError if 'path' is not inside 'base_dir'.
+            path.relative_to(base_dir)
+        except ValueError:
+            logging.error("Refusing to analyze path '%s' outside of base directory '%s'", path, base_dir)
+            raise ValueError(f"Path '{path}' is not allowed for analysis")
         files = list(path.rglob("*.java")) + list(path.rglob("*.py")) + list(path.rglob("*.cs")) + list(path.rglob("*.js"))
         # First pass analysis of the source code
         self.first_pass(path, files, ignore, graph)
EOF
@@ -1,6 +1,7 @@
from contextlib import nullcontext
from pathlib import Path
from typing import Optional
import os

from api.entities.entity import Entity
from api.entities.file import File
@@ -176,7 +177,16 @@
self.second_pass(graph, files, path)

def analyze_sources(self, path: Path, ignore: list[str], graph: Graph) -> None:
# Resolve the target path and enforce that it lies within an allowed base directory.
path = path.resolve()
base_dir_env = os.getenv("CODE_GRAPH_BASE_DIR", ".")
base_dir = Path(base_dir_env).resolve()
try:
# This will raise ValueError if 'path' is not inside 'base_dir'.
path.relative_to(base_dir)
except ValueError:
logging.error("Refusing to analyze path '%s' outside of base directory '%s'", path, base_dir)
raise ValueError(f"Path '{path}' is not allowed for analysis")
files = list(path.rglob("*.java")) + list(path.rglob("*.py")) + list(path.rglob("*.cs")) + list(path.rglob("*.js"))
# First pass analysis of the source code
self.first_pass(path, files, ignore, graph)
Copilot is powered by AI and may make mistakes. Always verify output.
# First pass analysis of the source code
self.first_pass(path, files, ignore, graph)

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ dependencies = [
"tree-sitter-c>=0.24.1,<0.25.0",
"tree-sitter-python>=0.25.0,<0.26.0",
"tree-sitter-java>=0.23.5,<0.24.0",
"tree-sitter-javascript>=0.23.0",
"tree-sitter-c-sharp>=0.23.1,<0.24.0",
"flask>=3.1.0,<4.0.0",
"python-dotenv>=1.0.1,<2.0.0",
Expand Down
Loading