diff --git a/README.md b/README.md
index 7bbab6b7..b4e210a0 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
-
+
HelixDB: a database built from scratch to be the storage backend for any AI application.
diff --git a/docs/full_logo.png b/assets/full_logo.png
similarity index 100%
rename from docs/full_logo.png
rename to assets/full_logo.png
diff --git a/docs/icon-1.png b/assets/icon-1.png
similarity index 100%
rename from docs/icon-1.png
rename to assets/icon-1.png
diff --git a/helix-cli/src/utils.rs b/helix-cli/src/utils.rs
index 19c98f8c..3b8bee92 100644
--- a/helix-cli/src/utils.rs
+++ b/helix-cli/src/utils.rs
@@ -7,9 +7,12 @@ use futures_util::StreamExt;
use helix_db::{
helix_engine::traversal_core::config::Config,
helixc::{
- analyzer::analyzer::analyze,
+ analyzer::analyze,
generator::{Source as GeneratedSource, tsdisplay::ToTypeScript},
- parser::helix_parser::{Content, HelixParser, HxFile, Source},
+ parser::{
+ HelixParser,
+ types::{Content, HxFile, Source},
+ },
},
utils::styled_string::StyledString,
};
@@ -956,12 +959,7 @@ pub fn copy_repo_dir_for_build(src: &std::path::Path, dst: &std::path::Path) ->
if let Some(file_name) = entry.file_name().to_str()
&& matches!(
file_name,
- ".git"
- | ".gitignore"
- | ".github"
- | ".DS_Store"
- | "target"
- | "docs"
+ ".git" | ".gitignore" | ".github" | ".DS_Store" | "target" | "docs"
)
{
continue;
diff --git a/helix-db/mod.rs b/helix-db/mod.rs
deleted file mode 100644
index 3f359453..00000000
--- a/helix-db/mod.rs
+++ /dev/null
@@ -1,2 +0,0 @@
-pub mod bm25_benches;
-
diff --git a/helix-db/src/helix_engine/README.md b/helix-db/src/helix_engine/README.md
deleted file mode 100644
index 9461b44a..00000000
--- a/helix-db/src/helix_engine/README.md
+++ /dev/null
@@ -1,19 +0,0 @@
-### Overview
-
-##### Router
-- Parse request
-- Verify request
-- Route request to api handler
-
-##### API
-- Pull thread from thread pool
-- Transfer request to thread with corresponding handler
-- Execute request
-
-##### Graph Engine
-- Core graph implementation
-- Interface with rocks db
-- Handles calls from API via internal UNIX socket
-
-#### Process of Uploading Queries
-
\ No newline at end of file
diff --git a/helix-db/src/helix_engine/bm25/README.md b/helix-db/src/helix_engine/bm25/README.md
deleted file mode 100644
index fd2b3700..00000000
--- a/helix-db/src/helix_engine/bm25/README.md
+++ /dev/null
@@ -1,150 +0,0 @@
-# BM25 Implementation for HelixDB
-
-This module provides a complete BM25 (Best Matching 25) implementation for full-text search in your graph vector database. BM25 is a probabilistic ranking function used by search engines to estimate the relevance of documents to a given search query.
-
-## Features
-
-- **Disk-based storage**: Uses LMDB for persistent inverted index storage
-- **Full BM25 scoring**: Implements the complete BM25 algorithm with configurable parameters
-- **CRUD operations**: Support for inserting, updating, and deleting documents
-- **Hybrid search ready**: Designed to work alongside vector similarity search
-- **Efficient tokenization**: Includes text preprocessing and tokenization
-- **Scalable**: Handles large document collections efficiently
-
-## Architecture
-
-The BM25 implementation uses four LMDB databases:
-
-1. **Inverted Index** (`bm25_inverted_index`): Maps terms to posting lists containing document IDs and term frequencies
-2. **Document Lengths** (`bm25_doc_lengths`): Stores the length of each indexed document
-3. **Term Frequencies** (`bm25_term_frequencies`): Stores document frequency for each term
-4. **Metadata** (`bm25_metadata`): Stores global statistics like total documents and average document length
-
-## Usage
-
-### Basic Text Search
-
-```rust
-use helixdb::helix_engine::{
- bm25::BM25,
- storage_core::HelixGraphStorage,
-};
-
-// Assuming you have a HelixGraphStorage instance
-let storage = HelixGraphStorage::new(db_path, config)?;
-
-// Index a document
-let doc_id = node.id;
-let text = "The quick brown fox jumps over the lazy dog";
-storage.insert_doc(doc_id, text)?;
-
-// Search for documents
-let results = storage.search("quick fox", 10)?;
-for (doc_id, score) in results {
- println!("Document {}: Score {:.4}", doc_id, score);
-}
-```
-
-### Document Management
-
-```rust
-// Update a document (deletes old and re-indexes)
-storage.update_doc(doc_id, "Updated text content")?;
-
-// Delete a document from the index
-storage.delete_doc(doc_id)?;
-```
-
-### Hybrid Search (BM25 + Vector Similarity)
-
-```rust
-use helixdb::helix_engine::bm25::HybridSearch;
-
-// Combine BM25 text search with vector similarity
-let query_text = "machine learning";
-let query_vector = Some(&[0.1, 0.2, 0.3, ...]); // Your query vector
-let alpha = 0.7; // Weight for BM25 vs vector similarity (0.7 = 70% BM25, 30% vector)
-let limit = 10;
-
-let results = storage.hybrid_search(query_text, query_vector, alpha, limit)?;
-```
-
-### Automatic Node Indexing
-
-The implementation automatically extracts text from nodes by combining:
-- Node label
-- All property keys and values
-
-```rust
-// This node will be indexed as: "Person name John Doe age 30"
-let node = Node {
- id: uuid,
- label: "Person".to_string(),
- properties: Some(hashmap!{
- "name".to_string() => Value::String("John Doe".to_string()),
- "age".to_string() => Value::Integer(30),
- }),
-};
-```
-
-## BM25 Algorithm Details
-
-The implementation uses the standard BM25 formula:
-
-```
-score(D,Q) = Σ IDF(qi) * (f(qi,D) * (k1 + 1)) / (f(qi,D) + k1 * (1 - b + b * |D| / avgdl))
-```
-
-Where:
-- `D` is a document
-- `Q` is a query
-- `qi` is the i-th query term
-- `f(qi,D)` is the term frequency of qi in document D
-- `|D|` is the length of document D
-- `avgdl` is the average document length
-- `k1` and `b` are tuning parameters (default: k1=1.2, b=0.75)
-- `IDF(qi)` is the inverse document frequency of qi
-
-## Configuration
-
-Default BM25 parameters:
-- `k1 = 1.2`: Controls term frequency saturation
-- `b = 0.75`: Controls length normalization
-
-These can be adjusted based on your specific use case:
-- Higher `k1` values give more weight to term frequency
-- Higher `b` values give more weight to document length normalization
-
-## Performance Considerations
-
-1. **Indexing**: O(n) where n is the number of unique terms in the document
-2. **Search**: O(m * k) where m is the number of query terms and k is the average posting list length
-3. **Storage**: Efficient disk-based storage with LMDB's memory-mapped files
-4. **Memory**: Minimal memory usage as data is stored on disk
-
-## Integration with Vector Search
-
-The BM25 implementation is designed to work seamlessly with your existing vector similarity search:
-
-1. **Complementary**: BM25 handles exact term matching while vectors handle semantic similarity
-2. **Hybrid scoring**: Combine scores using weighted averages
-3. **Fallback**: Use BM25 when vector search returns insufficient results
-4. **Filtering**: Use BM25 to pre-filter candidates for vector search
-
-## Example Use Cases
-
-1. **Document Search**: Full-text search across node properties
-2. **Hybrid Retrieval**: Combine keyword and semantic search
-3. **Query Expansion**: Use BM25 to find related terms for vector queries
-4. **Faceted Search**: Filter by text criteria before vector similarity
-5. **Autocomplete**: Fast prefix matching for search suggestions
-
-## Error Handling
-
-The implementation provides comprehensive error handling:
-- Database connection errors
-- Serialization/deserialization errors
-- Missing document errors
-- Invalid query errors
-
-All errors are wrapped in the `GraphError` type for consistent error handling across the system.
\ No newline at end of file
diff --git a/helix-db/src/helix_engine/bm25/bm25.rs b/helix-db/src/helix_engine/bm25/bm25.rs
index 4b17d754..06ea3e45 100644
--- a/helix-db/src/helix_engine/bm25/bm25.rs
+++ b/helix-db/src/helix_engine/bm25/bm25.rs
@@ -431,7 +431,7 @@ impl BM25Flatten for HashMap
{
.fold(String::with_capacity(self.len() * 4), |mut s, (k, v)| {
s.push_str(k);
s.push(' ');
- s.push_str(&v.to_string());
+ s.push_str(&v.inner_stringify());
s.push(' ');
s
})
diff --git a/helix-db/src/helix_engine/macros.rs b/helix-db/src/helix_engine/macros.rs
index 77d9bfa5..e3ce0775 100644
--- a/helix-db/src/helix_engine/macros.rs
+++ b/helix-db/src/helix_engine/macros.rs
@@ -178,9 +178,6 @@ pub mod macros {
#[macro_export]
macro_rules! traversal_remapping {
($remapping_vals:expr, $var_name:expr, $should_spread:expr, $new_name:expr => $traversal:expr) => {{
- // TODO: ref?
- // Apply remappings to the nested traversal result
-
let nested_return_value = ReturnValue::from_traversal_value_array_with_mixin(
$traversal,
$remapping_vals.borrow_mut(),
diff --git a/helix-db/src/helix_engine/queryProcess.png b/helix-db/src/helix_engine/queryProcess.png
deleted file mode 100644
index 68e766d4..00000000
Binary files a/helix-db/src/helix_engine/queryProcess.png and /dev/null differ
diff --git a/helix-db/src/helix_engine/tests/traversal_tests/drop_tests.rs b/helix-db/src/helix_engine/tests/traversal_tests/drop_tests.rs
index b83b5457..2ccde09f 100644
--- a/helix-db/src/helix_engine/tests/traversal_tests/drop_tests.rs
+++ b/helix-db/src/helix_engine/tests/traversal_tests/drop_tests.rs
@@ -49,10 +49,10 @@ fn test_drop_edge() {
let node1 = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props!()), None)
- .collect_to_val();
+ .collect_to_obj();
let node2 = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props!()), None)
- .collect_to_val();
+ .collect_to_obj();
let edge = G::new_mut(Arc::clone(&storage), &mut txn)
.add_e(
"knows",
@@ -62,7 +62,7 @@ fn test_drop_edge() {
false,
EdgeType::Node,
)
- .collect_to_val();
+ .collect_to_obj();
txn.commit().unwrap();
let mut txn = storage.graph_env.write_txn().unwrap();
@@ -99,10 +99,10 @@ fn test_drop_node() {
let node = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props!("name" => "test")), None)
- .collect_to_val();
+ .collect_to_obj();
let node2 = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props!("name" => "test2")), None)
- .collect_to_val();
+ .collect_to_obj();
let _ = G::new_mut(Arc::clone(&storage), &mut txn)
.add_e(
"knows",
@@ -112,7 +112,7 @@ fn test_drop_node() {
false,
EdgeType::Node,
)
- .collect_to_val();
+ .collect_to_obj();
txn.commit().unwrap();
let mut txn = storage.graph_env.write_txn().unwrap();
let traversal = G::new(Arc::clone(&storage), &txn)
@@ -142,12 +142,12 @@ fn test_drop_traversal() {
let node = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", None, None)
- .collect_to_val();
+ .collect_to_obj();
for _ in 0..10 {
let new_node = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", None, None)
- .collect_to_val();
+ .collect_to_obj();
let _ = G::new_mut(Arc::clone(&storage), &mut txn)
.add_e(
"knows",
@@ -157,7 +157,7 @@ fn test_drop_traversal() {
false,
EdgeType::Node,
)
- .collect_to_val();
+ .collect_to_obj();
}
txn.commit().unwrap();
@@ -211,14 +211,14 @@ fn test_node_deletion_in_existing_graph() {
let source_node = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", None, None)
- .collect_to_val();
+ .collect_to_obj();
let mut other_nodes = Vec::new();
for _ in 0..10 {
let other_node = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", None, None)
- .collect_to_val();
+ .collect_to_obj();
other_nodes.push(other_node);
}
@@ -233,7 +233,7 @@ fn test_node_deletion_in_existing_graph() {
false,
EdgeType::Node,
)
- .collect_to_val();
+ .collect_to_obj();
// 20 edges from source to other nodes
let _ = G::new_mut(Arc::clone(&storage), &mut txn)
@@ -245,7 +245,7 @@ fn test_node_deletion_in_existing_graph() {
false,
EdgeType::Node,
)
- .collect_to_val();
+ .collect_to_obj();
let _ = G::new_mut(Arc::clone(&storage), &mut txn)
.add_e(
"knows",
@@ -255,7 +255,7 @@ fn test_node_deletion_in_existing_graph() {
false,
EdgeType::Node,
)
- .collect_to_val();
+ .collect_to_obj();
}
let edges = G::new(Arc::clone(&storage), &txn)
@@ -307,19 +307,19 @@ fn test_edge_deletion_in_existing_graph() {
let node1 = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", None, None)
- .collect_to_val();
+ .collect_to_obj();
let node2 = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", None, None)
- .collect_to_val();
+ .collect_to_obj();
let edge = G::new_mut(Arc::clone(&storage), &mut txn)
.add_e("knows", None, node1.id(), node2.id(), false, EdgeType::Node)
- .collect_to_val();
+ .collect_to_obj();
let edge2 = G::new_mut(Arc::clone(&storage), &mut txn)
.add_e("knows", None, node2.id(), node1.id(), false, EdgeType::Node)
- .collect_to_val();
+ .collect_to_obj();
txn.commit().unwrap();
@@ -352,7 +352,7 @@ fn test_vector_deletion_in_existing_graph() {
let node: TraversalValue = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", None, None)
- .collect_to_val();
+ .collect_to_obj();
let mut other_vectors = Vec::new();
@@ -363,13 +363,13 @@ fn test_vector_deletion_in_existing_graph() {
"vector",
None,
)
- .collect_to_val();
+ .collect_to_obj();
other_vectors.push(other_vector);
}
let vector = G::new_mut(Arc::clone(&storage), &mut txn)
.insert_v:: bool>(&[1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "vector", None)
- .collect_to_val();
+ .collect_to_obj();
for other_vector in &other_vectors {
let random_vector = other_vectors[rand::rng().random_range(0..other_vectors.len())].id();
@@ -382,13 +382,13 @@ fn test_vector_deletion_in_existing_graph() {
false,
EdgeType::Node,
)
- .collect_to_val();
+ .collect_to_obj();
let _ = G::new_mut(Arc::clone(&storage), &mut txn)
.add_e("knows", None, node.id(), vector.id(), false, EdgeType::Vec)
- .collect_to_val();
+ .collect_to_obj();
let _ = G::new_mut(Arc::clone(&storage), &mut txn)
.add_e("knows", None, vector.id(), node.id(), false, EdgeType::Node)
- .collect_to_val();
+ .collect_to_obj();
}
let edges = G::new(Arc::clone(&storage), &txn)
diff --git a/helix-db/src/helix_engine/tests/traversal_tests/edge_traversal_tests.rs b/helix-db/src/helix_engine/tests/traversal_tests/edge_traversal_tests.rs
index 953f86ed..80ca286e 100644
--- a/helix-db/src/helix_engine/tests/traversal_tests/edge_traversal_tests.rs
+++ b/helix-db/src/helix_engine/tests/traversal_tests/edge_traversal_tests.rs
@@ -185,10 +185,10 @@ fn test_in_n() {
let person1 = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("per son", Some(props!()), None)
- .collect_to_val();
+ .collect_to_obj();
let person2 = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props!()), None)
- .collect_to_val();
+ .collect_to_obj();
let edge = G::new_mut(Arc::clone(&storage), &mut txn)
.add_e(
@@ -199,7 +199,7 @@ fn test_in_n() {
false,
EdgeType::Node,
)
- .collect_to_val();
+ .collect_to_obj();
txn.commit().unwrap();
let txn = storage.graph_env.read_txn().unwrap();
let traversal = G::new(Arc::clone(&storage), &txn)
@@ -218,10 +218,10 @@ fn test_out_n() {
let person1 = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props!()), None)
- .collect_to_val();
+ .collect_to_obj();
let person2 = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props!()), None)
- .collect_to_val();
+ .collect_to_obj();
let edge = G::new_mut(Arc::clone(&storage), &mut txn)
.add_e(
@@ -232,7 +232,7 @@ fn test_out_n() {
false,
EdgeType::Node,
)
- .collect_to_val();
+ .collect_to_obj();
txn.commit().unwrap();
let txn = storage.graph_env.read_txn().unwrap();
let traversal = G::new(Arc::clone(&storage), &txn)
@@ -254,7 +254,7 @@ fn test_edge_properties() {
let node1 = node1.first().unwrap().clone();
let node2 = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props!()), None)
- .collect_to_val();
+ .collect_to_obj();
let props = props! { "since" => 2020, "date" => 1744965900, "name" => "hello"};
let _ = G::new_mut(Arc::clone(&storage), &mut txn)
.add_e(
@@ -405,15 +405,15 @@ fn test_add_e_between_node_and_vector() {
let node = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", None, None)
- .collect_to_val();
+ .collect_to_obj();
let vector = G::new_mut(Arc::clone(&storage), &mut txn)
.insert_v:: bool>(&[1.0, 2.0, 3.0], "vector", None)
- .collect_to_val();
+ .collect_to_obj();
let _ = G::new_mut(Arc::clone(&storage), &mut txn)
.add_e("knows", None, node.id(), vector.id(), false, EdgeType::Vec)
- .collect_to_val();
+ .collect_to_obj();
txn.commit().unwrap();
diff --git a/helix-db/src/helix_engine/tests/traversal_tests/filter_tests.rs b/helix-db/src/helix_engine/tests/traversal_tests/filter_tests.rs
index a71e870a..cec83c06 100644
--- a/helix-db/src/helix_engine/tests/traversal_tests/filter_tests.rs
+++ b/helix-db/src/helix_engine/tests/traversal_tests/filter_tests.rs
@@ -259,17 +259,17 @@ fn test_filter_chain() {
Some(props! { "age" => 25, "name" => "Alice" }),
None,
)
- .collect_to_val();
+ .collect_to_obj();
let person2 = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n(
"person",
Some(props! { "age" => 30, "name" => "Bob" }),
None,
)
- .collect_to_val();
+ .collect_to_obj();
let _ = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props! { "age" => 35 }), None)
- .collect_to_val();
+ .collect_to_obj();
txn.commit().unwrap();
let txn = storage.graph_env.read_txn().unwrap();
diff --git a/helix-db/src/helix_engine/tests/traversal_tests/node_traversal_tests.rs b/helix-db/src/helix_engine/tests/traversal_tests/node_traversal_tests.rs
index 76ecad8b..1a265c57 100644
--- a/helix-db/src/helix_engine/tests/traversal_tests/node_traversal_tests.rs
+++ b/helix-db/src/helix_engine/tests/traversal_tests/node_traversal_tests.rs
@@ -375,7 +375,7 @@ fn test_with_id_type() {
let node = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props! { "name" => "test" }), None)
- .collect_to_val();
+ .collect_to_obj();
txn.commit().unwrap();
#[derive(Serialize, Deserialize, Debug)]
struct Input {
@@ -407,11 +407,11 @@ fn test_double_add_and_double_fetch() {
let original_node1 = G::new_mut(Arc::clone(&db), &mut txn)
.add_n("person", Some(props! { "entity_name" => "person1" }), None)
- .collect_to_val();
+ .collect_to_obj();
let original_node2 = G::new_mut(Arc::clone(&db), &mut txn)
.add_n("person", Some(props! { "entity_name" => "person2" }), None)
- .collect_to_val();
+ .collect_to_obj();
txn.commit().unwrap();
@@ -449,7 +449,7 @@ fn test_double_add_and_double_fetch() {
let _e = G::new_mut(Arc::clone(&db), &mut txn)
.add_e("knows", None, node1.id(), node2.id(), false, EdgeType::Node)
- .collect_to_val();
+ .collect_to_obj();
txn.commit().unwrap();
diff --git a/helix-db/src/helix_engine/tests/traversal_tests/remapping_tests.rs b/helix-db/src/helix_engine/tests/traversal_tests/remapping_tests.rs
index 0b190912..e63d2b6d 100644
--- a/helix-db/src/helix_engine/tests/traversal_tests/remapping_tests.rs
+++ b/helix-db/src/helix_engine/tests/traversal_tests/remapping_tests.rs
@@ -61,7 +61,7 @@ fn test_exclude_field_remapping() {
Some(props! { "text" => "test", "other" => "other" }),
None,
)
- .collect_to_val();
+ .collect_to_obj();
let traversal = G::new(Arc::clone(&storage), &txn)
.n_from_type("person")
@@ -127,7 +127,7 @@ fn test_field_remapping() {
Some(props! { "text" => original.new_name.clone(), "other" => "other" }),
None,
)
- .collect_to_val();
+ .collect_to_obj();
let traversal = G::new(Arc::clone(&storage), &txn)
.n_from_type("person")
@@ -180,7 +180,7 @@ fn test_identifier_remapping() {
Some(props! { "field" => original.new_value.clone(), "other" => "other" }),
None,
)
- .collect_to_val();
+ .collect_to_obj();
let traversal = G::new(Arc::clone(&storage), &txn)
.n_from_type("person")
@@ -231,14 +231,14 @@ fn test_traversal_remapping() {
Some(props! { "text" => "test", "other" => "other" }),
None,
)
- .collect_to_val();
+ .collect_to_obj();
let _other_node = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n(
"friemd",
Some(props! { "text" => "test", "other" => "other" }),
None,
)
- .collect_to_val();
+ .collect_to_obj();
let _edge = G::new_mut(Arc::clone(&storage), &mut txn)
.add_e(
"knows",
@@ -248,7 +248,7 @@ fn test_traversal_remapping() {
false,
EdgeType::Node,
)
- .collect_to_val();
+ .collect_to_obj();
txn.commit().unwrap();
let txn = storage.graph_env.read_txn().unwrap();
@@ -316,7 +316,7 @@ fn test_value_remapping() {
Some(props! { "text" => "test", "other" => "other" }),
None,
)
- .collect_to_val();
+ .collect_to_obj();
let traversal = G::new(Arc::clone(&storage), &txn)
.n_from_type("person")
@@ -366,14 +366,14 @@ fn test_exists_remapping() {
Some(props! { "text" => "test", "other" => "other" }),
None,
)
- .collect_to_val();
+ .collect_to_obj();
let _other_node = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n(
"person",
Some(props! { "text" => "test", "other" => "other" }),
None,
)
- .collect_to_val();
+ .collect_to_obj();
let _edge = G::new_mut(Arc::clone(&storage), &mut txn)
.add_e(
"knows",
@@ -383,7 +383,7 @@ fn test_exists_remapping() {
false,
EdgeType::Node,
)
- .collect_to_val();
+ .collect_to_obj();
let traversal = G::new(Arc::clone(&storage), &txn)
.n_from_type("person")
@@ -426,10 +426,10 @@ fn test_one_of_each_remapping() {
let _node = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props! { "old_name" => "test" }), None)
- .collect_to_val();
+ .collect_to_obj();
let _other_node = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props! {}), None)
- .collect_to_val();
+ .collect_to_obj();
let _edge = G::new_mut(Arc::clone(&storage), &mut txn)
.add_e(
"knows",
@@ -439,7 +439,7 @@ fn test_one_of_each_remapping() {
false,
EdgeType::Node,
)
- .collect_to_val();
+ .collect_to_obj();
let traversal = G::new(Arc::clone(&storage), &txn)
.n_from_type("person")
@@ -508,10 +508,10 @@ fn test_nested_remapping() {
let _node = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props! { "old_name" => "test" }), None)
- .collect_to_val();
+ .collect_to_obj();
let _other_node = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("thing", Some(props! {}), None)
- .collect_to_val();
+ .collect_to_obj();
let _edge = G::new_mut(Arc::clone(&storage), &mut txn)
.add_e(
"knows",
@@ -521,7 +521,7 @@ fn test_nested_remapping() {
false,
EdgeType::Node,
)
- .collect_to_val();
+ .collect_to_obj();
let user = G::new(Arc::clone(&storage), &txn)
.n_from_id(&_node.id())
@@ -577,10 +577,10 @@ fn test_double_nested_remapping() {
let _node = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props! {}), None)
- .collect_to_val();
+ .collect_to_obj();
let _other_node = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("thing", Some(props! {}), None)
- .collect_to_val();
+ .collect_to_obj();
let _edge = G::new_mut(Arc::clone(&storage), &mut txn)
.add_e(
"knows",
@@ -590,7 +590,7 @@ fn test_double_nested_remapping() {
false,
EdgeType::Node,
)
- .collect_to_val();
+ .collect_to_obj();
let user = G::new(Arc::clone(&storage), &txn)
.n_from_id(&_node.id())
@@ -655,10 +655,10 @@ fn test_nested_with_other_remapping() {
let _node = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props! { "old_name" => "test" }), None)
- .collect_to_val();
+ .collect_to_obj();
let _other_node = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props! {}), None)
- .collect_to_val();
+ .collect_to_obj();
let _edge = G::new_mut(Arc::clone(&storage), &mut txn)
.add_e(
"knows",
@@ -668,7 +668,7 @@ fn test_nested_with_other_remapping() {
false,
EdgeType::Node,
)
- .collect_to_val();
+ .collect_to_obj();
let traversal = G::new(Arc::clone(&storage), &txn)
.n_from_type("person")
@@ -756,10 +756,10 @@ fn test_remapping_with_traversal_from_source() {
let _node = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props! { "old_name" => "test" }), None)
- .collect_to_val();
+ .collect_to_obj();
let _other_node = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props! { "other_name" => "other" }), None)
- .collect_to_val();
+ .collect_to_obj();
let _edge = G::new_mut(Arc::clone(&storage), &mut txn)
.add_e(
"knows",
@@ -769,7 +769,7 @@ fn test_remapping_with_traversal_from_source() {
false,
EdgeType::Node,
)
- .collect_to_val();
+ .collect_to_obj();
let remapping_vals = RemappingMap::new();
diff --git a/helix-db/src/helix_engine/tests/traversal_tests/secondary_index_tests.rs b/helix-db/src/helix_engine/tests/traversal_tests/secondary_index_tests.rs
index 2b361b6e..15d61461 100644
--- a/helix-db/src/helix_engine/tests/traversal_tests/secondary_index_tests.rs
+++ b/helix-db/src/helix_engine/tests/traversal_tests/secondary_index_tests.rs
@@ -47,12 +47,12 @@ fn test_delete_node_with_secondary_index() {
let node = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props! { "name" => "John" }), Some(&["name"]))
- .collect_to_val();
+ .collect_to_obj();
let node_id = node.id(); // Save the ID before moving
let _ = G::new_mut_from(Arc::clone(&storage), &mut txn, node)
.update(Some(props! { "name" => "Jane" }))
- .collect_to_val();
+ .collect_to_obj();
txn.commit().unwrap();
@@ -105,11 +105,11 @@ fn test_update_of_secondary_indices() {
let node = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props! { "name" => "John" }), Some(&["name"]))
- .collect_to_val();
+ .collect_to_obj();
let _ = G::new_mut_from(Arc::clone(&storage), &mut txn, node)
.update(Some(props! { "name" => "Jane" }))
- .collect_to_val();
+ .collect_to_obj();
txn.commit().unwrap();
diff --git a/helix-db/src/helix_engine/tests/traversal_tests/shortest_path_tests.rs b/helix-db/src/helix_engine/tests/traversal_tests/shortest_path_tests.rs
index fc1c8a0e..70cc82e5 100644
--- a/helix-db/src/helix_engine/tests/traversal_tests/shortest_path_tests.rs
+++ b/helix-db/src/helix_engine/tests/traversal_tests/shortest_path_tests.rs
@@ -29,16 +29,16 @@ fn test_shortest_path() {
let node1 = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props!("name" => "node1")), None)
- .collect_to_val();
+ .collect_to_obj();
let node2 = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props!("name" => "node2")), None)
- .collect_to_val();
+ .collect_to_obj();
let node3 = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props!("name" => "node3")), None)
- .collect_to_val();
+ .collect_to_obj();
let node4 = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props!("name" => "node4")), None)
- .collect_to_val();
+ .collect_to_obj();
let edge1 = G::new_mut(Arc::clone(&storage), &mut txn)
.add_e(
@@ -49,7 +49,7 @@ fn test_shortest_path() {
false,
EdgeType::Node,
)
- .collect_to_val();
+ .collect_to_obj();
let edge2 = G::new_mut(Arc::clone(&storage), &mut txn)
.add_e(
"knows",
@@ -59,7 +59,7 @@ fn test_shortest_path() {
false,
EdgeType::Node,
)
- .collect_to_val();
+ .collect_to_obj();
let edge3 = G::new_mut(Arc::clone(&storage), &mut txn)
.add_e(
"knows",
@@ -69,7 +69,7 @@ fn test_shortest_path() {
false,
EdgeType::Node,
)
- .collect_to_val();
+ .collect_to_obj();
txn.commit().unwrap();
let txn = storage.graph_env.read_txn().unwrap();
diff --git a/helix-db/src/helix_engine/tests/traversal_tests/update_tests.rs b/helix-db/src/helix_engine/tests/traversal_tests/update_tests.rs
index 2bbcac4a..12b8e5a3 100644
--- a/helix-db/src/helix_engine/tests/traversal_tests/update_tests.rs
+++ b/helix-db/src/helix_engine/tests/traversal_tests/update_tests.rs
@@ -36,10 +36,10 @@ fn test_update_node() {
let node = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props!("name" => "test")), None)
- .collect_to_val();
+ .collect_to_obj();
let _ = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props!("name" => "test2")), None)
- .collect_to_val();
+ .collect_to_obj();
txn.commit().unwrap();
let mut txn = storage.graph_env.write_txn().unwrap();
@@ -62,7 +62,7 @@ fn test_update_node() {
.check_property("name")
.unwrap()
.into_owned()
- .to_string(),
+ .inner_stringify(),
"john"
);
}
diff --git a/helix-db/src/helix_engine/tests/traversal_tests/util_tests.rs b/helix-db/src/helix_engine/tests/traversal_tests/util_tests.rs
index be48f359..9598fc61 100644
--- a/helix-db/src/helix_engine/tests/traversal_tests/util_tests.rs
+++ b/helix-db/src/helix_engine/tests/traversal_tests/util_tests.rs
@@ -44,15 +44,15 @@ fn test_order_node_by_asc() {
let node = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props! { "age" => 30 }), None)
- .collect_to_val();
+ .collect_to_obj();
let node2 = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props! { "age" => 20 }), None)
- .collect_to_val();
+ .collect_to_obj();
let node3 = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props! { "age" => 10 }), None)
- .collect_to_val();
+ .collect_to_obj();
txn.commit().unwrap();
@@ -75,15 +75,15 @@ fn test_order_node_by_desc() {
let node = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props! { "age" => 30 }), None)
- .collect_to_val();
+ .collect_to_obj();
let node2 = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props! { "age" => 20 }), None)
- .collect_to_val();
+ .collect_to_obj();
let node3 = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props! { "age" => 10 }), None)
- .collect_to_val();
+ .collect_to_obj();
txn.commit().unwrap();
@@ -106,15 +106,15 @@ fn test_order_edge_by_asc() {
let node = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props! { "age" => 30 }), None)
- .collect_to_val();
+ .collect_to_obj();
let node2 = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props! { "age" => 20 }), None)
- .collect_to_val();
+ .collect_to_obj();
let node3 = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props! { "age" => 10 }), None)
- .collect_to_val();
+ .collect_to_obj();
let edge = G::new_mut(Arc::clone(&storage), &mut txn)
.add_e(
@@ -125,7 +125,7 @@ fn test_order_edge_by_asc() {
false,
EdgeType::Node,
)
- .collect_to_val();
+ .collect_to_obj();
let edge2 = G::new_mut(Arc::clone(&storage), &mut txn)
.add_e(
@@ -136,7 +136,7 @@ fn test_order_edge_by_asc() {
false,
EdgeType::Node,
)
- .collect_to_val();
+ .collect_to_obj();
txn.commit().unwrap();
@@ -159,15 +159,15 @@ fn test_order_edge_by_desc() {
let node = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props! { "age" => 30 }), None)
- .collect_to_val();
+ .collect_to_obj();
let node2 = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props! { "age" => 20 }), None)
- .collect_to_val();
+ .collect_to_obj();
let node3 = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props! { "age" => 10 }), None)
- .collect_to_val();
+ .collect_to_obj();
let edge = G::new_mut(Arc::clone(&storage), &mut txn)
.add_e(
@@ -178,7 +178,7 @@ fn test_order_edge_by_desc() {
false,
EdgeType::Node,
)
- .collect_to_val();
+ .collect_to_obj();
let edge2 = G::new_mut(Arc::clone(&storage), &mut txn)
.add_e(
@@ -189,7 +189,7 @@ fn test_order_edge_by_desc() {
false,
EdgeType::Node,
)
- .collect_to_val();
+ .collect_to_obj();
txn.commit().unwrap();
@@ -213,15 +213,15 @@ fn test_order_vector_by_asc() {
let vector = G::new_mut(Arc::clone(&storage), &mut txn)
.insert_v::(&[1.0, 2.0, 3.0], "vector", Some(props! { "age" => 30 }))
- .collect_to_val();
+ .collect_to_obj();
let vector2 = G::new_mut(Arc::clone(&storage), &mut txn)
.insert_v::(&[1.0, 2.0, 3.0], "vector", Some(props! { "age" => 20 }))
- .collect_to_val();
+ .collect_to_obj();
let vector3 = G::new_mut(Arc::clone(&storage), &mut txn)
.insert_v::(&[1.0, 2.0, 3.0], "vector", Some(props! { "age" => 10 }))
- .collect_to_val();
+ .collect_to_obj();
txn.commit().unwrap();
@@ -245,15 +245,15 @@ fn test_order_vector_by_desc() {
let vector = G::new_mut(Arc::clone(&storage), &mut txn)
.insert_v::(&[1.0, 2.0, 3.0], "vector", Some(props! { "age" => 30 }))
- .collect_to_val();
+ .collect_to_obj();
let vector2 = G::new_mut(Arc::clone(&storage), &mut txn)
.insert_v::(&[1.0, 2.0, 3.0], "vector", Some(props! { "age" => 20 }))
- .collect_to_val();
+ .collect_to_obj();
let vector3 = G::new_mut(Arc::clone(&storage), &mut txn)
.insert_v::(&[1.0, 2.0, 3.0], "vector", Some(props! { "age" => 10 }))
- .collect_to_val();
+ .collect_to_obj();
txn.commit().unwrap();
@@ -276,15 +276,15 @@ fn test_dedup() {
let node = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props! { "age" => 30 }), None)
- .collect_to_val();
+ .collect_to_obj();
let node2 = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props! { "age" => 20 }), None)
- .collect_to_val();
+ .collect_to_obj();
let node3 = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props! { "age" => 10 }), None)
- .collect_to_val();
+ .collect_to_obj();
let _edge = G::new_mut(Arc::clone(&storage), &mut txn)
.add_e(
@@ -295,7 +295,7 @@ fn test_dedup() {
false,
EdgeType::Node,
)
- .collect_to_val();
+ .collect_to_obj();
let _edge2 = G::new_mut(Arc::clone(&storage), &mut txn)
.add_e(
@@ -306,7 +306,7 @@ fn test_dedup() {
false,
EdgeType::Node,
)
- .collect_to_val();
+ .collect_to_obj();
txn.commit().unwrap();
diff --git a/helix-db/src/helix_engine/tests/traversal_tests/vector_traversal_tests.rs b/helix-db/src/helix_engine/tests/traversal_tests/vector_traversal_tests.rs
index 1461b760..cd8751b3 100644
--- a/helix-db/src/helix_engine/tests/traversal_tests/vector_traversal_tests.rs
+++ b/helix-db/src/helix_engine/tests/traversal_tests/vector_traversal_tests.rs
@@ -52,15 +52,15 @@ fn test_from_v() {
let node = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", None, None)
- .collect_to_val();
+ .collect_to_obj();
let vector = G::new_mut(Arc::clone(&storage), &mut txn)
.insert_v:: bool>(&[1.0, 2.0, 3.0], "vector", None)
- .collect_to_val();
+ .collect_to_obj();
let _ = G::new_mut(Arc::clone(&storage), &mut txn)
.add_e("knows", None, vector.id(), node.id(), false, EdgeType::Vec)
- .collect_to_val();
+ .collect_to_obj();
txn.commit().unwrap();
@@ -83,15 +83,15 @@ fn test_to_v() {
let node = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", None, None)
- .collect_to_val();
+ .collect_to_obj();
let vector = G::new_mut(Arc::clone(&storage), &mut txn)
.insert_v:: bool>(&[1.0, 2.0, 3.0], "vector", None)
- .collect_to_val();
+ .collect_to_obj();
let _ = G::new_mut(Arc::clone(&storage), &mut txn)
.add_e("knows", None, node.id(), vector.id(), false, EdgeType::Vec)
- .collect_to_val();
+ .collect_to_obj();
txn.commit().unwrap();
println!("node: {node:?}");
@@ -116,7 +116,7 @@ fn test_brute_force_vector_search() {
let node = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", None, None)
- .collect_to_val();
+ .collect_to_obj();
let vectors = vec![
vec![1.0, 2.0, 3.0],
@@ -128,7 +128,7 @@ fn test_brute_force_vector_search() {
for vector in vectors {
let vector_id = G::new_mut(Arc::clone(&storage), &mut txn)
.insert_v:: bool>(&vector, "vector", None)
- .collect_to_val()
+ .collect_to_obj()
.id();
let _ = G::new_mut(Arc::clone(&storage), &mut txn)
.add_e(
@@ -139,7 +139,7 @@ fn test_brute_force_vector_search() {
false,
EdgeType::Vec,
)
- .collect_to_val()
+ .collect_to_obj()
.id();
vector_ids.push(vector_id);
}
@@ -169,15 +169,15 @@ fn test_order_by_desc() {
let node = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props! { "age" => 10 }), None)
- .collect_to_val();
+ .collect_to_obj();
let node2 = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props! { "age" => 20 }), None)
- .collect_to_val();
+ .collect_to_obj();
let node3 = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", Some(props! { "age" => 30 }), None)
- .collect_to_val();
+ .collect_to_obj();
txn.commit().unwrap();
@@ -214,7 +214,7 @@ fn test_vector_search() {
];
let _ = G::new_mut(Arc::clone(&storage), &mut txn)
.insert_v:: bool>(&random_vector, "vector", None)
- .collect_to_val();
+ .collect_to_obj();
println!("inserted vector: {i:?}");
i += 1;
}
@@ -235,7 +235,7 @@ fn test_vector_search() {
for vector in vectors {
let node = G::new_mut(Arc::clone(&storage), &mut txn)
.insert_v:: bool>(&vector, "vector", None)
- .collect_to_val();
+ .collect_to_obj();
inserted_vectors.push(node.id());
println!("inserted vector: {i:?}");
i += 1;
@@ -269,13 +269,13 @@ fn test_delete_vector() {
let vector = G::new_mut(Arc::clone(&storage), &mut txn)
.insert_v:: bool>(&[1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "vector", None)
- .collect_to_val();
+ .collect_to_obj();
let node = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("person", None, None)
- .collect_to_val();
+ .collect_to_obj();
let _ = G::new_mut(Arc::clone(&storage), &mut txn)
.add_e("knows", None, node.id(), vector.id(), false, EdgeType::Vec)
- .collect_to_val();
+ .collect_to_obj();
txn.commit().unwrap();
@@ -349,11 +349,11 @@ fn test_drop_vectors_then_add_them_back() {
let entity = G::new_mut(Arc::clone(&storage), &mut txn)
.add_n("Entity", Some(props! { "name" => "entity1" }), None)
- .collect_to_val();
+ .collect_to_obj();
let embedding = G::new_mut(Arc::clone(&storage), &mut txn)
.insert_v:: bool>(&[1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "vector", None)
- .collect_to_val();
+ .collect_to_obj();
let _ = G::new_mut(Arc::clone(&storage), &mut txn)
.add_e(
@@ -364,7 +364,7 @@ fn test_drop_vectors_then_add_them_back() {
false,
EdgeType::Node,
)
- .collect_to_val();
+ .collect_to_obj();
txn.commit().unwrap();
@@ -451,7 +451,7 @@ fn test_drop_vectors_then_add_them_back() {
let embedding = G::new_mut(Arc::clone(&storage), &mut txn)
.insert_v:: bool>(&[1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "vector", None)
- .collect_to_val();
+ .collect_to_obj();
let _ = G::new_mut(Arc::clone(&storage), &mut txn)
.add_e(
@@ -462,7 +462,7 @@ fn test_drop_vectors_then_add_them_back() {
false,
EdgeType::Node,
)
- .collect_to_val();
+ .collect_to_obj();
txn.commit().unwrap();
diff --git a/helix-db/src/helix_engine/traversal_core/README.md b/helix-db/src/helix_engine/traversal_core/README.md
deleted file mode 100644
index 70b7bb31..00000000
--- a/helix-db/src/helix_engine/traversal_core/README.md
+++ /dev/null
@@ -1,3 +0,0 @@
-## Graph Core
-
-This will be the core graph traversals. Actual storage of data will be done through the storage layer. The code in Graph Core will be responsible for using the storage layer to perform graph traversals.
\ No newline at end of file
diff --git a/helix-db/src/helix_engine/traversal_core/config.rs b/helix-db/src/helix_engine/traversal_core/config.rs
index 84080761..390b3eed 100644
--- a/helix-db/src/helix_engine/traversal_core/config.rs
+++ b/helix-db/src/helix_engine/traversal_core/config.rs
@@ -1,6 +1,6 @@
use crate::{
helix_engine::types::GraphError,
- helixc::analyzer::analyzer::{INTROSPECTION_DATA, SECONDARY_INDICES},
+ helixc::analyzer::{INTROSPECTION_DATA, SECONDARY_INDICES},
};
use serde::{Deserialize, Serialize};
use std::{fmt, path::PathBuf};
diff --git a/helix-db/src/helix_engine/traversal_core/ops/source/n_from_index.rs b/helix-db/src/helix_engine/traversal_core/ops/source/n_from_index.rs
index 409ba2c3..d1f56896 100644
--- a/helix-db/src/helix_engine/traversal_core/ops/source/n_from_index.rs
+++ b/helix-db/src/helix_engine/traversal_core/ops/source/n_from_index.rs
@@ -81,7 +81,6 @@ impl<'a, I: Iterator- >, K: Into
.storage
.secondary_indices
.get(index)
- // TODO: this
.ok_or(GraphError::New(format!(
"Secondary Index {index} not found"
)))
diff --git a/helix-db/src/helix_engine/traversal_core/ops/vectors/insert.rs b/helix-db/src/helix_engine/traversal_core/ops/vectors/insert.rs
index 3be0dbc3..383cbc47 100644
--- a/helix-db/src/helix_engine/traversal_core/ops/vectors/insert.rs
+++ b/helix-db/src/helix_engine/traversal_core/ops/vectors/insert.rs
@@ -8,7 +8,6 @@ use crate::{
},
protocol::value::Value,
};
-use std::sync::Arc;
pub struct InsertVIterator {
inner: std::iter::Once>,
@@ -31,14 +30,6 @@ pub trait InsertVAdapter<'a, 'b>: Iterator
- RwTraversalIterator<'a, 'b, impl Iterator
- >>
where
F: Fn(&HVector, &RoTxn) -> bool;
-
- fn insert_vs(
- self,
- queries: &[Vec],
- fields: Option>,
- ) -> RwTraversalIterator<'a, 'b, impl Iterator
- >>
- where
- F: Fn(&HVector, &RoTxn) -> bool;
}
impl<'a, 'b, I: Iterator
- >> InsertVAdapter<'a, 'b>
@@ -77,32 +68,4 @@ impl<'a, 'b, I: Iterator
- >> InsertVAdap
txn: self.txn,
}
}
-
- fn insert_vs(
- self,
- queries: &[Vec],
- fields: Option>,
- ) -> RwTraversalIterator<'a, 'b, impl Iterator
- >>
- where
- F: Fn(&HVector, &RoTxn) -> bool,
- {
- let txn = self.txn;
- let storage = Arc::clone(&self.storage);
- let iter = queries
- .iter()
- .map(|vec| {
- let vector = storage.vectors.insert::(txn, vec, fields.clone()); // TODO: remove clone
- match vector {
- Ok(vector) => Ok(TraversalValue::Vector(vector)),
- Err(e) => Err(GraphError::from(e)),
- }
- })
- .collect::>();
-
- RwTraversalIterator {
- inner: iter.into_iter(),
- storage: self.storage,
- txn,
- }
- }
}
diff --git a/helix-db/src/helix_engine/traversal_core/traversal_iter.rs b/helix-db/src/helix_engine/traversal_core/traversal_iter.rs
index 7afb54a1..06818313 100644
--- a/helix-db/src/helix_engine/traversal_core/traversal_iter.rs
+++ b/helix-db/src/helix_engine/traversal_core/traversal_iter.rs
@@ -108,41 +108,52 @@ impl<'scope, 'env, I: Iterator
- >>
}
}
- pub fn collect_to>(self) -> B
- where
- I: Iterator
- >,
- {
+ pub fn take_and_collect_to>(self, n: usize) -> B {
+ self.inner
+ .filter_map(|item| item.ok())
+ .take(n)
+ .collect::()
+ }
+
+ pub fn collect_to>(self) -> B {
self.inner.filter_map(|item| item.ok()).collect::()
}
- pub fn collect_to_val(self) -> TraversalValue
- where
- I: Iterator
- >,
- {
- match self
- .inner
+ pub fn collect_dedup>(self) -> B {
+ self.inner
.filter_map(|item| item.ok())
- .collect::>()
- .first()
- {
- Some(val) => val.clone(), // TODO: Remove clone
- None => TraversalValue::Empty,
- }
+ .unique()
+ .collect::()
}
+
pub fn collect_to_obj(self) -> TraversalValue {
match self.inner.filter_map(|item| item.ok()).next() {
Some(val) => val,
None => TraversalValue::Empty,
}
}
-}
-// pub trait TraversalIteratorMut<'a> {
-// type Inner: Iterator
- >;
-// fn next<'b>(
-// &mut self,
-// storage: Arc,
-// txn: &'b mut RwTxn<'a>,
-// ) -> Option>;
+ pub fn count_to_val(self) -> Value {
+ Value::from(self.inner.count())
+ }
-// }
+ pub fn map_value_or(
+ mut self,
+ default: bool,
+ f: impl Fn(&Value) -> bool,
+ ) -> Result {
+ let val = match &self.inner.next() {
+ Some(Ok(TraversalValue::Value(val))) => {
+ println!("value : {val:?}");
+ Ok(f(val))
+ }
+ Some(Ok(_)) => Err(GraphError::ConversionError(
+ "Expected value, got something else".to_string(),
+ )),
+ Some(Err(err)) => Err(GraphError::from(err.to_string())),
+ None => Ok(default),
+ };
+ println!("result: {val:?}");
+ val
+ }
+}
diff --git a/helix-db/src/helix_engine/types.rs b/helix-db/src/helix_engine/types.rs
index bdba418a..be21efc4 100644
--- a/helix-db/src/helix_engine/types.rs
+++ b/helix-db/src/helix_engine/types.rs
@@ -1,4 +1,4 @@
-use crate::{helix_gateway::router::router::IoContFn, helixc::parser::parser_methods::ParserError};
+use crate::{helix_gateway::router::router::IoContFn, helixc::parser::ParserError};
use core::fmt;
use heed3::Error as HeedError;
use sonic_rs::Error as SonicError;
diff --git a/helix-db/src/helix_engine/vector_core/vector_core.rs b/helix-db/src/helix_engine/vector_core/vector_core.rs
index cc2fd2b3..83fe8603 100644
--- a/helix-db/src/helix_engine/vector_core/vector_core.rs
+++ b/helix-db/src/helix_engine/vector_core/vector_core.rs
@@ -106,8 +106,6 @@ impl VectorCore {
#[inline]
fn get_new_level(&self) -> usize {
- // TODO: look at using the XOR shift algorithm for random number generation
- // Should instead using an atomic mutable seed and the XOR shift algorithm
let mut rng = rand::rng();
let r: f64 = rng.random::();
(-r.ln() * self.config.m_l).floor() as usize
@@ -175,7 +173,6 @@ impl VectorCore {
for result in iter {
let (key, _) = result?;
- // TODO: fix here because not working at all
let mut arr = [0u8; 16];
let len = std::cmp::min(key.len(), 16);
arr[..len].copy_from_slice(&key[prefix_len..(prefix_len + len)]);
@@ -188,7 +185,6 @@ impl VectorCore {
let vector = self.get_vector(txn, neighbor_id, level, false)?;
let passes_filters = match filter {
- // TODO: look at implementing a macro that actually just runs each function rather than iterating through
Some(filter_slice) => filter_slice.iter().all(|f| f(&vector, txn)),
None => true,
};
@@ -196,12 +192,6 @@ impl VectorCore {
if passes_filters {
neighbors.push(vector);
}
-
- //if let Ok(vector) = self.get_vector(txn, neighbor_id, level, true) {
- // if filter.is_none() || filter.unwrap().iter().all(|f| f(&vector, txn)) {
- // neighbors.push(vector);
- // }
- //}
}
neighbors.shrink_to_fit();
diff --git a/helix-db/src/helix_gateway/README.md b/helix-db/src/helix_gateway/README.md
deleted file mode 100644
index 7c494c5d..00000000
--- a/helix-db/src/helix_gateway/README.md
+++ /dev/null
@@ -1,14 +0,0 @@
-## Plan
-
-- Have thread pool with N number of threads that will be instanciated with arc mutex instances of the graph storage
-- These instances will be pavssed to router which will lock the mutex and pass the graph reference to the handler to perform the operation, dropping the lock after.
-
-- The API Router is a general purpose bit of code that parses the incoming request, and then calls the appropriate handler function. The handler function is responsible for performing the actual work of the API call.
-- The connection will just be a tcp stream that will run in the fetched thread from the thread pool.
-
-#### Process
-1. DB connection is tcp stream
-2. When connection is made, get thread from pool and move tcp stream into thread. Stream will receive request on thread and read into router passing thread's storage instance.
-3. Router uses thread's storage instance to process job
-
-
\ No newline at end of file
diff --git a/helix-db/src/helix_gateway/embedding_providers/embedding_providers.rs b/helix-db/src/helix_gateway/embedding_providers/embedding_providers.rs
index ee36d90e..d092db08 100644
--- a/helix-db/src/helix_gateway/embedding_providers/embedding_providers.rs
+++ b/helix-db/src/helix_gateway/embedding_providers/embedding_providers.rs
@@ -5,9 +5,6 @@ use sonic_rs::{JsonContainerTrait, json};
use std::env;
use url::Url;
-// TODO: add support for rust native embedding model libs as well so it runs fully built in
-// in case we have a gpu or something on the server we're running it on
-
/// Trait for embedding models to fetch text embeddings.
#[allow(async_fn_in_trait)]
pub trait EmbeddingModel {
diff --git a/helix-db/src/helix_gateway/gateway.png b/helix-db/src/helix_gateway/gateway.png
deleted file mode 100644
index ee73361b..00000000
Binary files a/helix-db/src/helix_gateway/gateway.png and /dev/null differ
diff --git a/helix-db/src/helix_gateway/mcp/tools_tests.rs b/helix-db/src/helix_gateway/mcp/tools_tests.rs
index f02a73f0..2c17b2f6 100644
--- a/helix-db/src/helix_gateway/mcp/tools_tests.rs
+++ b/helix-db/src/helix_gateway/mcp/tools_tests.rs
@@ -75,7 +75,7 @@ fn test_mcp_tool_search_vector() {
// creates nodes and vectors
let node = G::new_mut(Arc::clone(&engine.storage), &mut txn)
.add_n("person", None, None)
- .collect_to_val();
+ .collect_to_obj();
let mut vectors = vec![
vec![1.0, 1.0, 1.0],
vec![0.0, 0.0, 0.0],
@@ -95,11 +95,11 @@ fn test_mcp_tool_search_vector() {
for vector in vectors {
let vector = G::new_mut(Arc::clone(&engine.storage), &mut txn)
.insert_v:: bool>(&vector, "vector", None)
- .collect_to_val();
+ .collect_to_obj();
let _ = G::new_mut(Arc::clone(&engine.storage), &mut txn)
.add_e("knows", None, node.id(), vector.id(), false, EdgeType::Vec)
- .collect_to_val();
+ .collect_to_obj();
}
txn.commit().unwrap();
let txn = engine.storage.graph_env.read_txn().unwrap();
diff --git a/helix-db/src/helix_gateway/worker_pool/mod.rs b/helix-db/src/helix_gateway/worker_pool/mod.rs
index 3f7c430b..e64adb14 100644
--- a/helix-db/src/helix_gateway/worker_pool/mod.rs
+++ b/helix-db/src/helix_gateway/worker_pool/mod.rs
@@ -33,8 +33,8 @@ impl WorkerPool {
"Expected number of threads in thread pool to be more than 0, got {size}"
);
- let (req_tx, req_rx) = flume::bounded::(1000); // TODO: make this configurable
- let (cont_tx, cont_rx) = flume::bounded::(1000); // TODO: make this configurable
+ let (req_tx, req_rx) = flume::bounded::(1000);
+ let (cont_tx, cont_rx) = flume::bounded::(1000);
let workers = (0..size)
.map(|_| {
diff --git a/helix-db/src/helixc/PLAN.md b/helix-db/src/helixc/PLAN.md
deleted file mode 100644
index 082f1043..00000000
--- a/helix-db/src/helixc/PLAN.md
+++ /dev/null
@@ -1,508 +0,0 @@
-## db native functions
-
-## `add_e`
-
----
-
-#### deps
-
-```rs
-self
-label: &'a str
-properties: Vec<(String, Value)>
-from: &'a str
-to: &'a str
-secondary_indices: Option<&'a [String]>
-
-add_e(&label, properties, from, to, Some(idxs))
-```
-
-#### query
-
-```
-QUERY addE({field1: value1, field2: value2, ...})
- e <- AddE({field1: value1, field2: value2, ...})::From(from_node_id)::To(to_node_id)
-```
-
-#### HQL rules
-
-- nothing can come before `AddE`
-
-## `add_n`
-
----
-
-#### deps
-
-```rs
-self
-label: &'a str
-properties: Vec<(String, Value)>
-secondary_indices: Option<&'a [String]>
-// needs G::new_mut
-add_n(&label, properties, Some(idxs))
-```
-
-#### query
-
-```
-QUERY addN({field1: value1, field2: value2, ...})
- n <- AddN({field1: value1, field2: value2, ...})
-```
-
-#### HQL rules
-
-- nothing can come before or after `AddN`
-
-## `add_v`
-
----
-
-#### deps
-
-```rs
-query: Vec
-vec_label: String
-fields: Option> (HashMap::from(props! { ... }))
-// needs G::new_mut
-add_v(&query, &label, Some(fields))
-```
-
-#### query
-
-```
-QUERY insertV(vec: [F64], label: String)
- vec <- AddV(vec, {field1: value1, field2: value2, ...})
-```
-
-#### HQL rules
-
-- nothing can come before or after `AddV`
-
-## `drop`
-
----
-
-#### deps
-
-```rs
-query: Vec
-vec_label: String
-fields: Option> (HashMap::from(props! { ... }))
-// needs G::new_mut
-add_v(&query, &label, Some(fields))
-```
-
-#### query
-
-```
-QUERY insertV(vec: [F64], label: String)
- vec <- AddV(vec, {field1: value1, field2: value2, ...})
-```
-
-#### HQL rules
-
-- nothing can come before or after `AddV`
-
-## `dedup`
-
----
-
-#### deps
-
-```rs
-none
-
-.dedup()
-```
-
-#### query
-
-```
-QUERY dedup() =>
- nodes <- N::Out::Dedup()
-```
-
-#### HQL rules
-
-- anything that returns a collection of traversal items (nodes, edges, vectors) can be deduped
-- it deduplicates in place
-- for explicitness
-- only object access can come after `Dedup` for explicitness (each source item is pushed through the entire traversal pipeline so the results are deduplicated instead any intermediate results)
- - only objects because don't want to implicitly only deduplicate at the end if dedup is used in the middle of a traversal.
-
-## `filter_ref`
-
----
-
-#### deps
-
-```rs
-f: Fn(&Result, &RoTxn) -> Result
-
-.filter_ref(|val: TraversalValue, txn: &'a RoTxn<'a>| -> Result {
- // return true if val should be included
- // return false if val should be excluded
- // e.g. the following filters out all nodes with a name that is not "John"
- if let Ok(TraversalValue::Node(node)) = val {
- if let Some(value) = node.check_property("name") {
- match value {
- Value::String(name) => return Ok(name == "John"),
- _ => return Ok(false),
- }
- }
- }
- false
-})
-```
-
-- Note that the Result is used because lmdb gets can be used and may fail thus returning an error.
-
-#### query
-
-```
-QUERY filter_ref() =>
- nodes <- N::WHERE(_::{name}::EQ("John"))
-```
-
-#### HQL rules
-
-- anything that returns a collection of traversal items (nodes, edges, vectors) can be filtered
-- only an anonymous or an `EXISTS` traversal that evaluates to a boolean can be used in the `WHERE` clause
-
-## `for ... in ...`
-
----
-
-#### deps
-
-> instead of using a for loop as we are currently doing
-> could use a `.iter()` that takes iterates through the parameter vec
-
-> QUESTION: do we allow the iterated value to be the result of a traversal?
-> e.g. `FOR node in N::Out {...}`
-
-```rs
-for data in data.nodes {
- // do something with data
-}
-```
-
-#### query
-
-```
-QUERY for_in(nodes: [Type]) =>
- FOR node IN nodes {
- // do something with node
- }
-```
-
-#### HQL rules
-
-- the iterated parameter must be a collection of items (nodes, edges, vectors)
-- TODO: the iterated value can be the result of a traversal
-- you can have nested for loops
-
-## `range`
-
----
-
-#### deps
-
-```rs
-start: i32
-end: i32
-
-.range(start, end)
-```
-
-#### query
-
-```
-QUERY range(start: Int, end: Int) =>
- nodes <- N::Range(start, end)
-```
-
-#### HQL rules
-
-- the start and end must be integers
-- the start must be less than the end
-- the start and end must be positive
-- if the start is greater than the length of the collection, it will return an empty collection
-- if the end is greater than the length of the collection, it will return the collection from the start index to the end of the collection
-
-## `update`
-
----
-
-#### deps
-
-```rs
-// needs G::new_mut
-.update(props: Vec<(String, Value)>)
-```
-
-#### query
-
-```
-QUERY update(node_id: ID, newNode: NodeType) =>
- N(node_id)::Update(newNode)
- // assuming the node type has the fields `field1` and `field2`
- N(node_id)::Update({field1: value1, field2: value2, ...})
-```
-
-#### HQL rules
-
-- the value passed into Update must be of the corresponding node type
-- or it can be a partial object that is on the node type
-
-## `out`
-
----
-
-#### deps
-
-```rs
-edge_label: &'a str
-
-.out(edge_label: &'a str)
-```
-
-#### query
-
-```
-QUERY out() =>
- nodes <- N::Out
-```
-
-## `out_e`
-
----
-
-#### deps
-
-```rs
-edge_label: &'a str
-
-.out_e(edge_label: &'a str)
-```
-
-#### query
-
-```
-QUERY out_e() =>
- edges <- N::OutE
-```
-
-## `in_`
-
----
-
-#### deps
-
-```rs
-edge_label: &'a str
-
-.in(edge_label: &'a str)
-```
-
-#### query
-
-```
-QUERY in() =>
- nodes <- N::In
-```
-
-## `in_e`
-
----
-
-#### deps
-
-```rs
-edge_label: &'a str
-
-.in_e(edge_label: &'a str)
-```
-
-#### query
-
-```
-QUERY in_e() =>
- edges <- N::InE
-```
-
-## `from_n`
-
----
-
-#### deps
-
-```rs
-.from_n()
-```
-
-#### query
-
-```
-QUERY from_n(edge_id: ID) =>
- nodes <- E(edge_id)::FromN()
-```
-
-## `to_n`
-
----
-
-#### deps
-
-```rs
-.to_n()
-```
-
-#### query
-
-```
-QUERY to_n(edge_id: ID) =>
- nodes <- E(edge_id)::ToN()
-```
-
-
-## `search_v`
-
----
-
-#### deps
-
-```rs
-query: Vec
-k: usize
-filter: Option<&[Fn(&HVector) -> bool]>
-
-.search_v(query: Vec, k: usize, filter: Option<&[Fn(&HVector) -> bool]>)
-```
-
-#### query
-
-```
-QUERY search_v(query: [F64], k: Int, doc_type: String) =>
- vectors <- SearchV(query, k)::PREFILTER(_::{doc_type}::EQ(docType))
-```
-
-#### HQL rules
-
-- the prefilter must be an `EXISTS` traversal or an anonymous traversal that evaluates to a boolean
-- the prefilter acts exactly like a `WHERE` clause
-- the k must be an integer
-- the query must be a vector
-
-## `e_from_id`
-
----
-
-#### deps
-
-```rs
-edge_id: &u128
-
-.e_from_id(edge_id: &u128)
-```
-
-#### query
-
-```
-QUERY e_from_id(edge_id: ID) =>
- edges <- E(edge_id)
-```
-
-#### HQL rules
-- Type must exist in schema
-- the ID type is a UUID String in HQL
- - This will get converted to a u128 inside the query
-
-## `e_from_types`
-
----
-
-#### deps
-
-```rs
-edge_label: &'a str
-
-.e_from_types(edge_label: &'a str)
-```
-
-#### query
-
-```
-QUERY e_from_types() =>
- edges <- E
-```
-
-#### HQL rules
-- Type must exist in schema
-
-## `n_from_id`
-
----
-
-#### deps
-
-```rs
-node_id: &u128
-
-.n_from_id(node_id: &u128)
-```
-
-#### query
-
-```
-QUERY n_from_id(node_id: ID) =>
- nodes <- N(node_id)
-```
-
-#### HQL rules
-- Type must exist in schema
-- the ID type is a UUID String in HQL
- - This will get converted to a u128 inside the query
-
-## `n_from_types`
-
----
-
-#### deps
-
-```rs
-edge_label: &'a str
-
-.n_from_types(edge_label: &'a str)
-```
-
-#### query
-
-```
-QUERY n_from_types() =>
- nodes <- N
-```
-
-#### HQL rules
-- Type must exist in schema
-
-# TODO
-
-## `bulk_add_e`
-
-## `bulk_add_n`
-
-## `insert_vs`
-
-## `filter_mut`
-
----
-
-#### deps
-
-```rs
-self
-
-.filter_mut(filter)
-```
diff --git a/helix-db/src/helixc/analyzer/README.md b/helix-db/src/helixc/analyzer/README.md
index 2768540a..7592f4fc 100644
--- a/helix-db/src/helixc/analyzer/README.md
+++ b/helix-db/src/helixc/analyzer/README.md
@@ -1,3 +1,41 @@
-# Interpreter
+# Analyzer Module
-The Interpreter takes in the AST and
\ No newline at end of file
+## Overview
+The analyzer module performs static analysis and type checking on the HelixQL AST, ensuring queries are grammatically and semantically correct before code generation.
+
+## Structure
+
+### Core Components
+- **`mod.rs`** - Main analyzer entry point, orchestrates validation passes
+- **`types.rs`** - Type system definitions and type inference structures
+- **`diagnostic.rs`** - Diagnostic messages and error reporting
+- **`error_codes.rs`** - Error code definitions and messages
+- **`errors.rs`** - Error handling utilities
+- **`fix.rs`** - Auto-fix suggestions for common errors
+- **`pretty.rs`** - Pretty printing utilities for diagnostics
+- **`utils.rs`** - Helper functions for analysis
+
+### Validation Methods (in `methods/`)
+- **`schema_methods.rs`** - Schema validation and field lookup building
+- **`query_validation.rs`** - Query structure and parameter validation
+- **`migration_validation.rs`** - Schema migration consistency checks
+- **`statement_validation.rs`** - Statement-level validation
+- **`traversal_validation.rs`** - Graph traversal operation validation
+- **`graph_step_validation.rs`** - Individual graph step validation
+- **`object_validation.rs`** - Object literal and remapping validation
+- **`infer_expr_type.rs`** - Expression type inference
+- **`exclude_validation.rs`** - Field exclusion validation
+
+## Analysis Flow
+
+1. **Input**: Parsed AST from the parser module
+2. **Schema Validation**: Verifies schema definitions are valid
+3. **Migration Validation**: Ensures migrations are consistent across versions
+4. **Query Validation**: Type-checks queries against schemas
+5. **Output**: Diagnostics (errors/warnings) and validated AST for code generation
+
+
+## Error Handling
+- Error codes provide consistent, searchable error identification
+- Diagnostics include source location for precise error reporting
+- Fix suggestions help users resolve common issues
\ No newline at end of file
diff --git a/helix-db/src/helixc/analyzer/analyzer.rs b/helix-db/src/helixc/analyzer/analyzer.rs
deleted file mode 100644
index ddba9c80..00000000
--- a/helix-db/src/helixc/analyzer/analyzer.rs
+++ /dev/null
@@ -1,271 +0,0 @@
-//! Semantic analyzer for Helix‑QL.
-use crate::helixc::{
- analyzer::{
- diagnostic::Diagnostic,
- methods::{
- migration_validation::validate_migration,
- query_validation::validate_query,
- schema_methods::{build_field_lookups, check_schema, SchemaVersionMap},
- },
- types::Type,
- },
- generator::Source as GeneratedSource,
- parser::helix_parser::{EdgeSchema, ExpressionType, Field, Query, ReturnType, Source},
-};
-use itertools::Itertools;
-use serde::Serialize;
-use std::{
- borrow::Cow,
- collections::{HashMap, HashSet},
- sync::OnceLock,
-};
-
-pub fn analyze(src: &Source) -> (Vec, GeneratedSource) {
- let mut ctx = Ctx::new(src);
- ctx.check_schema();
- ctx.check_schema_migrations();
- ctx.check_queries();
- (ctx.diagnostics, ctx.output)
-}
-
-/// Internal working context shared by all passes.
-pub(crate) struct Ctx<'a> {
- pub(super) src: &'a Source,
- /// Quick look‑ups
- pub(super) node_set: HashSet<&'a str>,
- pub(super) vector_set: HashSet<&'a str>,
- pub(super) edge_map: HashMap<&'a str, &'a EdgeSchema>,
- pub(super) node_fields: HashMap<&'a str, HashMap<&'a str, Cow<'a, Field>>>,
- pub(super) edge_fields: HashMap<&'a str, HashMap<&'a str, Cow<'a, Field>>>,
- pub(super) vector_fields: HashMap<&'a str, HashMap<&'a str, Cow<'a, Field>>>,
- pub(super) all_schemas: SchemaVersionMap<'a>,
- pub(super) diagnostics: Vec,
- pub(super) output: GeneratedSource,
-}
-
-pub static INTROSPECTION_DATA: OnceLock = OnceLock::new();
-pub static SECONDARY_INDICES: OnceLock> = OnceLock::new();
-
-impl<'a> Ctx<'a> {
- pub(super) fn new(src: &'a Source) -> Self {
- // Build field look‑ups once
- let all_schemas = build_field_lookups(src);
- let (node_fields, edge_fields, vector_fields) = all_schemas.get_latest();
-
- let output = GeneratedSource {
- src: src.source.clone(),
- ..Default::default()
- };
-
- let ctx = Self {
- node_set: src
- .get_latest_schema()
- .node_schemas
- .iter()
- .map(|n| n.name.1.as_str())
- .collect(),
- vector_set: src
- .get_latest_schema()
- .vector_schemas
- .iter()
- .map(|v| v.name.as_str())
- .collect(),
- edge_map: src
- .get_latest_schema()
- .edge_schemas
- .iter()
- .map(|e| (e.name.1.as_str(), e))
- .collect(),
- node_fields,
- edge_fields,
- vector_fields,
- all_schemas,
- src,
- diagnostics: Vec::new(),
- output,
- };
-
- INTROSPECTION_DATA
- .set(IntrospectionData::from_schema(&ctx))
- .ok();
-
- SECONDARY_INDICES
- .set(
- src.get_latest_schema()
- .node_schemas
- .iter()
- .flat_map(|schema| {
- schema
- .fields
- .iter()
- .filter(|f| f.is_indexed())
- .map(|f| f.name.clone())
- })
- .dedup()
- .collect(),
- )
- .ok();
- ctx
- }
-
- #[allow(unused)]
- pub(super) fn get_item_fields(
- &self,
- item_type: &Type,
- ) -> Option<&HashMap<&str, Cow<'_, Field>>> {
- match item_type {
- Type::Node(Some(node_type)) | Type::Nodes(Some(node_type)) => {
- self.node_fields.get(node_type.as_str())
- }
- Type::Edge(Some(edge_type)) | Type::Edges(Some(edge_type)) => {
- self.edge_fields.get(edge_type.as_str())
- }
- Type::Vector(Some(vector_type)) | Type::Vectors(Some(vector_type)) => {
- self.vector_fields.get(vector_type.as_str())
- }
- _ => None,
- }
- }
-
- // ---------- Pass #1: schema --------------------------
- /// Validate that every edge references declared node types.
- pub(super) fn check_schema(&mut self) {
- check_schema(self);
- }
-
- // ---------- Pass #1.5: schema migrations --------------------------
- pub(super) fn check_schema_migrations(&mut self) {
- for m in &self.src.migrations {
- validate_migration(self, m);
- }
- }
-
- // ---------- Pass #2: queries -------------------------
- pub(super) fn check_queries(&mut self) {
- for q in &self.src.queries {
- validate_query(self, q);
- }
- }
-}
-
-#[derive(Serialize)]
-pub struct IntrospectionData {
- schema: SchemaData,
- queries: Vec,
-}
-
-impl IntrospectionData {
- fn from_schema(ctx: &Ctx) -> Self {
- let queries = ctx.src.queries.iter().map(QueryData::from_query).collect();
- Self {
- schema: SchemaData::from_ctx(ctx),
- queries,
- }
- }
-}
-
-#[derive(Serialize)]
-pub struct SchemaData {
- nodes: Vec,
- vectors: Vec,
- edges: Vec,
-}
-
-impl SchemaData {
- fn from_ctx(ctx: &Ctx) -> Self {
- let nodes = ctx.node_fields.iter().map(NodeData::from_entry).collect();
- let vectors = ctx.vector_fields.iter().map(NodeData::from_entry).collect();
- let edges = ctx.edge_map.iter().map(EdgeData::from_entry).collect();
-
- SchemaData {
- nodes,
- vectors,
- edges,
- }
- }
-}
-
-#[derive(Serialize)]
-pub struct NodeData {
- name: String,
- properties: HashMap,
-}
-
-impl NodeData {
- fn from_entry(val: (&&str, &HashMap<&str, Cow>)) -> Self {
- let properties = val
- .1
- .iter()
- .map(|(n, f)| (n.to_string(), f.field_type.to_string()))
- .collect();
- NodeData {
- name: val.0.to_string(),
- properties,
- }
- }
-}
-
-#[derive(Serialize)]
-pub struct EdgeData {
- name: String,
- from: String,
- to: String,
- properties: HashMap,
-}
-
-impl EdgeData {
- fn from_entry((name, es): (&&str, &&EdgeSchema)) -> Self {
- let properties = es
- .properties
- .iter()
- .flatten()
- .map(|f| (f.name.to_string(), f.field_type.to_string()))
- .collect();
-
- EdgeData {
- name: name.to_string(),
- from: es.from.1.clone(),
- to: es.to.1.clone(),
- properties,
- }
- }
-}
-
-#[derive(Serialize)]
-pub struct QueryData {
- name: String,
- parameters: HashMap,
- returns: Vec,
-}
-
-impl QueryData {
- fn from_query(query: &Query) -> Self {
- let parameters = query
- .parameters
- .iter()
- .map(|p| (p.name.1.clone(), p.param_type.1.to_string()))
- .collect();
-
- let returns = query
- .return_values
- .iter()
- .flat_map(|e| {
- if let ReturnType::Expression(expr) = e {
- if let ExpressionType::Identifier(ident) = &expr.expr {
- Some(ident.clone())
- } else {
- None
- }
- } else {
- None
- }
- })
- .collect();
-
- QueryData {
- name: query.name.to_string(),
- parameters,
- returns,
- }
- }
-}
diff --git a/helix-db/src/helixc/analyzer/error_codes.rs b/helix-db/src/helixc/analyzer/error_codes.rs
index 03e2a1e5..cae647ed 100644
--- a/helix-db/src/helixc/analyzer/error_codes.rs
+++ b/helix-db/src/helixc/analyzer/error_codes.rs
@@ -209,7 +209,7 @@ implement_error_code!(E202,
implement_error_code!(E203, "cannot access properties on the type `{}`" => { type_name }, "ensure the type is a node, edge, or vector" => {});
implement_error_code!(E204, "field `{}` is a reserved field name" => { field_name }, "rename the field" => {});
implement_error_code!(E205,
- "type of value `{}` does not match field type `{}` for {} type `{}`" => { value, field_type, item_type, item_type_name },
+ "type of value `{}` is `{}`, which does not match field type `{}` for {} type `{}`" => { value, value_type, field_type, item_type, item_type_name },
"change the value type to match the field type defined in the schema" => {});
implement_error_code!(E206, "invalid value type `{}`" => { value_type }, "use a literal or an identifier" => {});
implement_error_code!(E207, "edge type `{}` exists but it is not a valid edge type for the given {} type `{}`" => { edge_type, item_type, item_type_name }, "check the schema field names" => {});
diff --git a/helix-db/src/helixc/analyzer/errors.rs b/helix-db/src/helixc/analyzer/errors.rs
index e6ba8f4a..7dcee21e 100644
--- a/helix-db/src/helixc/analyzer/errors.rs
+++ b/helix-db/src/helixc/analyzer/errors.rs
@@ -1,11 +1,11 @@
use crate::helixc::{
analyzer::{
- analyzer::Ctx,
+ Ctx,
diagnostic::{Diagnostic, DiagnosticSeverity},
error_codes::ErrorCode,
fix::Fix,
},
- parser::{helix_parser::Query, location::Loc},
+ parser::{location::Loc, types::Query},
};
pub(crate) fn push_schema_err(
diff --git a/helix-db/src/helixc/analyzer/methods/exclude_validation.rs b/helix-db/src/helixc/analyzer/methods/exclude_validation.rs
index 9f013c38..9a84cd46 100644
--- a/helix-db/src/helixc/analyzer/methods/exclude_validation.rs
+++ b/helix-db/src/helixc/analyzer/methods/exclude_validation.rs
@@ -3,12 +3,12 @@ use crate::{
generate_error,
helixc::{
analyzer::{
- analyzer::Ctx,
+ Ctx,
errors::{push_query_err, push_query_err_with_fix},
fix::Fix,
types::Type,
},
- parser::{helix_parser::*, location::Loc},
+ parser::{types::*, location::Loc},
},
};
use paste::paste;
diff --git a/helix-db/src/helixc/analyzer/methods/graph_step_validation.rs b/helix-db/src/helixc/analyzer/methods/graph_step_validation.rs
index 4289451d..c3f55c13 100644
--- a/helix-db/src/helixc/analyzer/methods/graph_step_validation.rs
+++ b/helix-db/src/helixc/analyzer/methods/graph_step_validation.rs
@@ -7,7 +7,7 @@ use crate::{
helix_engine::traversal_core::ops::source::add_e::EdgeType,
helixc::{
analyzer::{
- analyzer::Ctx,
+ Ctx,
errors::push_query_err,
types::Type,
utils::{gen_identifier_or_param, is_valid_identifier},
@@ -21,7 +21,7 @@ use crate::{
},
utils::{GenRef, GeneratedValue, Separator, VecData},
},
- parser::helix_parser::*,
+ parser::types::*,
},
};
use paste::paste;
@@ -322,12 +322,10 @@ pub(crate) fn apply_graph_step<'a>(
}
(ShortestPath(sp), Type::Nodes(_) | Type::Node(_)) => {
let type_arg = sp.type_arg.clone().map(GenRef::Literal);
- // check edge type is valid
traversal
.steps
.push(Separator::Period(GeneratedStep::ShortestPath(
match (sp.from.clone(), sp.to.clone()) {
- // TODO: get rid of clone
(Some(from), Some(to)) => GeneratedShortestPath {
label: type_arg,
from: Some(GenRef::from(from)),
diff --git a/helix-db/src/helixc/analyzer/methods/infer_expr_type.rs b/helix-db/src/helixc/analyzer/methods/infer_expr_type.rs
index 27b5d7bb..ee6ea33b 100644
--- a/helix-db/src/helixc/analyzer/methods/infer_expr_type.rs
+++ b/helix-db/src/helixc/analyzer/methods/infer_expr_type.rs
@@ -1,11 +1,12 @@
//! Semantic analyzer for Helix‑QL.
use crate::helixc::analyzer::error_codes::ErrorCode;
+use crate::helixc::analyzer::utils::{DEFAULT_VAR_NAME, is_in_scope};
use crate::helixc::generator::utils::EmbedData;
use crate::{
generate_error,
helixc::{
analyzer::{
- analyzer::Ctx,
+ Ctx,
errors::push_query_err,
methods::traversal_validation::validate_traversal,
types::Type,
@@ -14,7 +15,7 @@ use crate::{
},
},
generator::{
- bool_op::BoExp,
+ bool_ops::BoExp,
queries::Query as GeneratedQuery,
source_steps::{
AddE, AddN, AddV, SearchBM25, SearchVector as GeneratedSearchVector, SourceStep,
@@ -26,7 +27,7 @@ use crate::{
},
utils::{GenRef, GeneratedValue, Separator, VecData},
},
- parser::helix_parser::*,
+ parser::types::*,
},
protocol::date::Date,
};
@@ -57,7 +58,6 @@ pub(crate) fn infer_expr_type<'a>(
parent_ty: Option,
gen_query: &mut GeneratedQuery,
) -> (Type, Option) {
- // TODO: Look at returning statement as well or passing mut query to push to
use ExpressionType::*;
let expr: &ExpressionType = &expression.expr;
match expr {
@@ -98,32 +98,40 @@ pub(crate) fn infer_expr_type<'a>(
Type::Boolean,
Some(GeneratedStatement::Literal(GenRef::Literal(b.to_string()))),
),
+ // Gets expression type for each element in the array
+ // Checks if all elements are of the same type
+ // Returns the type of the array and the statements to generate from the array
ArrayLiteral(a) => {
let mut inner_array_ty = None;
- let stmts = a
- .iter()
- .map(|e| {
- let (ty, stmt) = infer_expr_type(
- ctx,
- e,
- scope,
- original_query,
- parent_ty.clone(),
- gen_query,
- );
- if inner_array_ty.is_none() {
- inner_array_ty = Some(ty);
- } else {
- // TODO handle type is same for all elements
+ let result = a.iter().try_fold(Vec::new(), |mut stmts, e| {
+ let (ty, stmt) =
+ infer_expr_type(ctx, e, scope, original_query, parent_ty.clone(), gen_query);
+ let type_str = ty.kind_str();
+ if let Some(inner_array_ty) = &inner_array_ty {
+ if inner_array_ty != &ty {
+ generate_error!(ctx, original_query, e.loc.clone(), E306, type_str);
}
- // TODO handle none for stmt
- stmt.unwrap()
- })
- .collect::>();
- (
- inner_array_ty.unwrap(),
- Some(GeneratedStatement::Array(stmts)),
- )
+ } else {
+ inner_array_ty = Some(ty);
+ }
+ match stmt {
+ Some(s) => {
+ stmts.push(s);
+ Ok(stmts)
+ }
+ None => {
+ generate_error!(ctx, original_query, e.loc.clone(), E306, type_str);
+ Err(())
+ }
+ }
+ });
+ match result {
+ Ok(stmts) => (
+ Type::Array(Box::new(inner_array_ty.unwrap())),
+ Some(GeneratedStatement::Array(stmts)),
+ ),
+ Err(()) => (Type::Unknown, Some(GeneratedStatement::Empty)),
+ }
}
Traversal(tr) => {
let mut gen_traversal = GeneratedTraversal::default();
@@ -136,7 +144,6 @@ pub(crate) fn infer_expr_type<'a>(
&mut gen_traversal,
gen_query,
);
- // push query
let stmt = GeneratedStatement::Traversal(gen_traversal);
if matches!(expr, Exists(_)) {
@@ -167,14 +174,13 @@ pub(crate) fn infer_expr_type<'a>(
.filter_map(|p| p.default_value.clone().map(|v| (p.name.clone(), v)))
.collect::>();
- // Validate fields if both type and fields are present
+ // Validate fields of add node by traversing the fields
+ // checking they exist in the schema, then checking their types
let (properties, secondary_indices) = match &add.fields {
- Some(fields) => {
- // Get the field set before validation
- // TODO: Check field types
- let field_set = ctx.node_fields.get(ty.as_str()).cloned();
- if let Some(field_set) = field_set {
- for (field_name, value) in fields {
+ Some(fields_to_add) => {
+ let field_set_from_schema = ctx.node_fields.get(ty.as_str()).cloned();
+ if let Some(field_set) = field_set_from_schema {
+ for (field_name, field_value) in fields_to_add {
if !field_set.contains_key(field_name.as_str()) {
generate_error!(
ctx,
@@ -186,7 +192,7 @@ pub(crate) fn infer_expr_type<'a>(
ty.as_str()
);
}
- match value {
+ match field_value {
ValueType::Identifier { value, loc } => {
if is_valid_identifier(
ctx,
@@ -202,10 +208,36 @@ pub(crate) fn infer_expr_type<'a>(
E301,
value.as_str()
);
- };
+ } else {
+ let variable_type = scope.get(value.as_str()).unwrap();
+ if variable_type
+ != &Type::from(
+ field_set
+ .get(field_name.as_str())
+ .unwrap()
+ .field_type
+ .clone(),
+ )
+ {
+ generate_error!(
+ ctx,
+ original_query,
+ loc.clone(),
+ E205,
+ value.as_str(),
+ &variable_type.to_string(),
+ &field_set
+ .get(field_name.as_str())
+ .unwrap()
+ .field_type
+ .to_string(),
+ "node",
+ ty.as_str()
+ );
+ }
+ }
}
ValueType::Literal { value, loc } => {
- // check against type
let field_type = ctx
.node_fields
.get(ty.as_str())
@@ -221,6 +253,7 @@ pub(crate) fn infer_expr_type<'a>(
loc.clone(),
E205,
value.as_str(),
+ &value.to_string(),
&field_type.to_string(),
"node",
ty.as_str()
@@ -231,7 +264,7 @@ pub(crate) fn infer_expr_type<'a>(
}
}
}
- let mut properties = fields
+ let mut properties = fields_to_add
.iter()
.map(|(field_name, value)| {
(
@@ -267,13 +300,7 @@ pub(crate) fn infer_expr_type<'a>(
)),
}
}
- ValueType::Identifier { value, loc } => {
- is_valid_identifier(
- ctx,
- original_query,
- loc.clone(),
- value.as_str(),
- );
+ ValueType::Identifier { value, .. } => {
gen_identifier_or_param(
original_query,
value,
@@ -397,7 +424,34 @@ pub(crate) fn infer_expr_type<'a>(
E301,
value.as_str()
);
- };
+ } else {
+ let variable_type = scope.get(value.as_str()).unwrap();
+ if variable_type
+ != &Type::from(
+ field_set
+ .get(field_name.as_str())
+ .unwrap()
+ .field_type
+ .clone(),
+ )
+ {
+ generate_error!(
+ ctx,
+ original_query,
+ loc.clone(),
+ E205,
+ value.as_str(),
+ &variable_type.to_string(),
+ &field_set
+ .get(field_name.as_str())
+ .unwrap()
+ .field_type
+ .to_string(),
+ "edge",
+ ty.as_str()
+ );
+ }
+ }
}
ValueType::Literal { value, loc } => {
// check against type
@@ -416,6 +470,7 @@ pub(crate) fn infer_expr_type<'a>(
loc.clone(),
E205,
value.as_str(),
+ &value.to_string(),
&field_type.to_string(),
"edge",
ty.as_str()
@@ -533,7 +588,6 @@ pub(crate) fn infer_expr_type<'a>(
from,
label,
properties,
- // secondary_indices: None, // TODO: Add secondary indices by checking against labeled `INDEX` fields in schema
};
let stmt = GeneratedStatement::Traversal(GeneratedTraversal {
source_step: Separator::Period(SourceStep::AddE(add_e)),
@@ -592,7 +646,34 @@ pub(crate) fn infer_expr_type<'a>(
E301,
value.as_str()
);
- };
+ } else {
+ let variable_type = scope.get(value.as_str()).unwrap();
+ if variable_type
+ != &Type::from(
+ field_set
+ .get(field_name.as_str())
+ .unwrap()
+ .field_type
+ .clone(),
+ )
+ {
+ generate_error!(
+ ctx,
+ original_query,
+ loc.clone(),
+ E205,
+ value.as_str(),
+ &variable_type.to_string(),
+ &field_set
+ .get(field_name.as_str())
+ .unwrap()
+ .field_type
+ .to_string(),
+ "vector",
+ ty.as_str()
+ );
+ }
+ }
}
ValueType::Literal { value, loc } => {
// check against type
@@ -611,6 +692,7 @@ pub(crate) fn infer_expr_type<'a>(
loc.clone(),
E205,
value.as_str(),
+ &value.to_string(),
&field_type.to_string(),
"vector",
ty.as_str()
@@ -1056,10 +1138,9 @@ pub(crate) fn infer_expr_type<'a>(
assert!(matches!(stmt, Some(GeneratedStatement::Traversal(_))));
let traversal = match stmt.unwrap() {
GeneratedStatement::Traversal(mut tr) => {
- // TODO: FIX VALUE HERE
let source_variable = match tr.source_step.inner() {
SourceStep::Identifier(id) => id.inner().clone(),
- _ => "val".to_string(),
+ _ => DEFAULT_VAR_NAME.to_string(),
};
tr.traversal_type = TraversalType::NestedFrom(GenRef::Std(source_variable));
tr.should_collect = ShouldCollect::No;
@@ -1074,7 +1155,6 @@ pub(crate) fn infer_expr_type<'a>(
}
Empty => (Type::Unknown, Some(GeneratedStatement::Empty)),
BM25Search(bm25_search) => {
- // TODO: look into how best do type checking for type passed in
if let Some(ref ty) = bm25_search.type_arg
&& !ctx.node_set.contains(ty.as_str())
{
@@ -1092,18 +1172,8 @@ pub(crate) fn infer_expr_type<'a>(
}
Some(ValueType::Identifier { value: i, loc: _ }) => {
is_valid_identifier(ctx, original_query, bm25_search.loc.clone(), i.as_str());
- // if is in params then use data.
- let _ = type_in_scope(
- ctx,
- original_query,
- bm25_search.loc.clone(),
- scope,
- i.as_str(),
- );
- if original_query.parameters.iter().any(|p| p.name.1 == *i)
- || scope.get(i.as_str()).is_some()
- {
+ if is_in_scope(scope, i.as_str()) {
gen_identifier_or_param(original_query, i, true, false)
} else {
generate_error!(
@@ -1206,9 +1276,5 @@ pub(crate) fn infer_expr_type<'a>(
})),
)
}
- _ => {
- println!("Unknown expression: {expr:?}");
- todo!()
- }
}
}
diff --git a/helix-db/src/helixc/analyzer/methods/migration_validation.rs b/helix-db/src/helixc/analyzer/methods/migration_validation.rs
index 3e9fb21c..97d663cd 100644
--- a/helix-db/src/helixc/analyzer/methods/migration_validation.rs
+++ b/helix-db/src/helixc/analyzer/methods/migration_validation.rs
@@ -1,6 +1,6 @@
use crate::{
helixc::{
- analyzer::analyzer::Ctx,
+ analyzer::Ctx,
generator::{
migrations::{
GeneratedMigration, GeneratedMigrationItemMapping,
@@ -8,7 +8,7 @@ use crate::{
},
utils::{GenRef, GeneratedValue, Separator},
},
- parser::helix_parser::{
+ parser::types::{
FieldValueType, Migration, MigrationItem, MigrationPropertyMapping,
},
},
diff --git a/helix-db/src/helixc/analyzer/methods/object_validation.rs b/helix-db/src/helixc/analyzer/methods/object_validation.rs
index bae6ef3c..a8e9d01b 100644
--- a/helix-db/src/helixc/analyzer/methods/object_validation.rs
+++ b/helix-db/src/helixc/analyzer/methods/object_validation.rs
@@ -1,12 +1,12 @@
//! Semantic analyzer for Helix‑QL.
use crate::helixc::analyzer::error_codes::ErrorCode;
-use crate::helixc::analyzer::utils::FieldLookup;
-use crate::helixc::generator::object_remapping_generation::SingleFieldTraversalRemapping;
+use crate::helixc::analyzer::utils::{FieldLookup, DEFAULT_VAR_NAME};
+use crate::helixc::generator::object_remappings::SingleFieldTraversalRemapping;
use crate::{
generate_error,
helixc::{
analyzer::{
- analyzer::Ctx,
+ Ctx,
errors::push_query_err,
methods::{infer_expr_type::infer_expr_type, traversal_validation::validate_traversal},
types::Type,
@@ -16,7 +16,7 @@ use crate::{
},
},
generator::{
- object_remapping_generation::{
+ object_remappings::{
ExistsRemapping, IdentifierRemapping, ObjectRemapping, Remapping, RemappingType,
TraversalRemapping, ValueRemapping,
},
@@ -29,7 +29,7 @@ use crate::{
},
utils::{GenRef, Separator},
},
- parser::{helix_parser::*, location::Loc},
+ parser::{types::*, location::Loc},
},
};
use paste::paste;
@@ -191,13 +191,10 @@ pub(crate) fn parse_object_remapping<'a>(
Some(parent_ty.clone()),
gen_query,
);
- assert!(stmt.is_some());
- assert!(matches!(stmt, Some(Statement::Traversal(_))));
let expr = match stmt.unwrap() {
Statement::Traversal(mut tr) => {
tr.traversal_type =
- // TODO: FIX VALUE HERE
- TraversalType::NestedFrom(GenRef::Std("val".to_string()));
+ TraversalType::NestedFrom(GenRef::Std(DEFAULT_VAR_NAME.to_string()));
tr
}
_ => unreachable!(),
@@ -267,7 +264,7 @@ pub(crate) fn parse_object_remapping<'a>(
RemappingType::ValueRemapping(ValueRemapping {
variable_name: closure_variable.get_variable_name(),
field_name: key.clone(),
- value: GenRef::from(lit.clone()), // TODO: Implement
+ value: GenRef::from(lit.clone()),
should_spread,
})
}
@@ -476,7 +473,6 @@ fn parse_traversal_as_remapping_value<'a>(
should_spread,
})
}
- // TODO: IF CLOSURE
StepType::Object(ref object)
if object.fields.len() == 1 && traversal.steps.len() == 1 =>
{
diff --git a/helix-db/src/helixc/analyzer/methods/query_validation.rs b/helix-db/src/helixc/analyzer/methods/query_validation.rs
index 77832ef4..d77afa21 100644
--- a/helix-db/src/helixc/analyzer/methods/query_validation.rs
+++ b/helix-db/src/helixc/analyzer/methods/query_validation.rs
@@ -4,7 +4,7 @@ use crate::generate_error;
use crate::helixc::analyzer::error_codes::ErrorCode;
use crate::helixc::{
analyzer::{
- analyzer::Ctx,
+ Ctx,
errors::{push_query_err, push_query_warn},
methods::{infer_expr_type::infer_expr_type, statement_validation::validate_statements},
types::Type,
@@ -18,7 +18,7 @@ use crate::helixc::{
traversal_steps::ShouldCollect,
utils::{GenRef, GeneratedValue},
},
- parser::{helix_parser::*, location::Loc},
+ parser::{location::Loc, types::*},
};
use paste::paste;
use std::collections::HashMap;
@@ -40,18 +40,18 @@ pub(crate) fn validate_query<'a>(ctx: &mut Ctx<'a>, original_query: &'a Query) {
for param in &original_query.parameters {
if let FieldType::Identifier(ref id) = param.param_type.1
&& is_valid_identifier(ctx, original_query, param.param_type.0.clone(), id.as_str())
+ && !ctx.node_set.contains(id.as_str())
+ && !ctx.edge_map.contains_key(id.as_str())
+ && !ctx.vector_set.contains(id.as_str())
{
- // TODO: add support for edges
- if !ctx.node_set.contains(id.as_str()) {
- generate_error!(
- ctx,
- original_query,
- param.param_type.0.clone(),
- E209,
- &id,
- ¶m.name.1
- );
- }
+ generate_error!(
+ ctx,
+ original_query,
+ param.param_type.0.clone(),
+ E209,
+ &id,
+ ¶m.name.1
+ );
}
// constructs parameters and sub‑parameters for generator
GeneratedParameter::unwrap_param(
diff --git a/helix-db/src/helixc/analyzer/methods/schema_methods.rs b/helix-db/src/helixc/analyzer/methods/schema_methods.rs
index d09454b0..f4f0200d 100644
--- a/helix-db/src/helixc/analyzer/methods/schema_methods.rs
+++ b/helix-db/src/helixc/analyzer/methods/schema_methods.rs
@@ -1,9 +1,9 @@
use std::{borrow::Cow, collections::HashMap};
use crate::helixc::{
- analyzer::{analyzer::Ctx, error_codes::ErrorCode, errors::push_schema_err},
+ analyzer::{Ctx, error_codes::ErrorCode, errors::push_schema_err},
parser::{
- helix_parser::{Field, FieldPrefix, FieldType, Source},
+ types::{Field, FieldPrefix, FieldType, Source},
location::Loc,
},
};
@@ -149,7 +149,7 @@ pub(crate) fn check_schema(ctx: &mut Ctx) {
}
if let Some(v) = edge.properties.as_ref() {
v.iter().for_each(|f| {
- if f.name.to_lowercase() == "id" {
+ if RESERVED_FIELD_NAMES.contains(&f.name.to_lowercase().as_str()) {
push_schema_err(
ctx,
f.loc.clone(),
@@ -164,7 +164,7 @@ pub(crate) fn check_schema(ctx: &mut Ctx) {
}
for node in &ctx.src.get_latest_schema().node_schemas {
node.fields.iter().for_each(|f| {
- if f.name.to_lowercase() == "id" {
+ if RESERVED_FIELD_NAMES.contains(&f.name.to_lowercase().as_str()) {
push_schema_err(
ctx,
f.loc.clone(),
@@ -178,7 +178,7 @@ pub(crate) fn check_schema(ctx: &mut Ctx) {
}
for vector in &ctx.src.get_latest_schema().vector_schemas {
vector.fields.iter().for_each(|f: &Field| {
- if f.name.to_lowercase() == "id" {
+ if RESERVED_FIELD_NAMES.contains(&f.name.to_lowercase().as_str()) {
push_schema_err(
ctx,
f.loc.clone(),
@@ -191,3 +191,5 @@ pub(crate) fn check_schema(ctx: &mut Ctx) {
ctx.output.vectors.push(vector.clone().into());
}
}
+
+const RESERVED_FIELD_NAMES: &[&str] = &["id", "label", "to_node", "from_node", "data", "score"];
\ No newline at end of file
diff --git a/helix-db/src/helixc/analyzer/methods/statement_validation.rs b/helix-db/src/helixc/analyzer/methods/statement_validation.rs
index 69d90b96..8e12d330 100644
--- a/helix-db/src/helixc/analyzer/methods/statement_validation.rs
+++ b/helix-db/src/helixc/analyzer/methods/statement_validation.rs
@@ -5,18 +5,19 @@ use crate::{
generate_error,
helixc::{
analyzer::{
- analyzer::Ctx, errors::push_query_err, methods::infer_expr_type::infer_expr_type,
- types::Type, utils::is_valid_identifier,
+ Ctx, errors::push_query_err, methods::infer_expr_type::infer_expr_type, types::Type,
+ utils::is_valid_identifier,
},
generator::{
- queries::Query as GeneratedQuery, statements::Statement as GeneratedStatement,
+ queries::Query as GeneratedQuery,
+ statements::Statement as GeneratedStatement,
statements::{
Assignment as GeneratedAssignment, Drop as GeneratedDrop,
ForEach as GeneratedForEach, ForLoopInVariable, ForVariable,
},
utils::GenRef,
},
- parser::helix_parser::*,
+ parser::types::*,
},
};
use paste::paste;
@@ -83,20 +84,22 @@ pub(crate) fn validate_statements<'a>(
stmt
}
+ // PARAMS DONT GET PARSED TO TYPE::ARRAY
ForLoop(fl) => {
- // Ensure the collection exists
if !scope.contains_key(fl.in_variable.1.as_str()) {
generate_error!(ctx, original_query, fl.loc.clone(), E301, &fl.in_variable.1);
}
- // Add loop vars to new child scope and walk the body
+
let mut body_scope = HashMap::new();
let mut for_loop_in_variable: ForLoopInVariable = ForLoopInVariable::Empty;
- // check if fl.in_variable is a valid parameter
+ // Check if the in variable is a parameter
let param = original_query
.parameters
.iter()
.find(|p| p.name.1 == fl.in_variable.1);
+ // if it is a parameter, add it to the body scope
+ // else assume variable in scope and add it to the body scope
let _ = match param {
Some(param) => {
for_loop_in_variable =
@@ -134,22 +137,15 @@ pub(crate) fn validate_statements<'a>(
match &fl.variable {
ForLoopVars::Identifier { name, loc: _ } => {
is_valid_identifier(ctx, original_query, fl.loc.clone(), name.as_str());
- body_scope.insert(name.as_str(), Type::Unknown);
- scope.insert(name.as_str(), Type::Unknown);
+ let field_type = scope.get(name.as_str()).unwrap().clone();
+ body_scope.insert(name.as_str(), field_type.clone());
+ scope.insert(name.as_str(), field_type);
for_variable = ForVariable::Identifier(GenRef::Std(name.clone()));
}
- ForLoopVars::ObjectAccess {
- name: _,
- field: _,
- loc: _,
- } => {
- // body_scope.insert(name.as_str(), Type::Unknown);
- // for_variable =
- // ForVariable::ObjectDestructure(vec![GenRef::Std(name.clone())]);
- unreachable!()
+ ForLoopVars::ObjectAccess { .. } => {
+ todo!()
}
ForLoopVars::ObjectDestructuring { fields, loc: _ } => {
- // TODO: check if fields are valid
match ¶m {
Some(p) => {
for_loop_in_variable =
@@ -168,8 +164,14 @@ pub(crate) fn validate_statements<'a>(
[field_name, &fl.in_variable.1]
);
}
- body_scope.insert(field_name.as_str(), Type::Unknown);
- scope.insert(field_name.as_str(), Type::Unknown);
+ let field_type = Type::from(
+ param_fields
+ .get(field_name.as_str())
+ .unwrap()
+ .clone(),
+ );
+ body_scope.insert(field_name.as_str(), field_type.clone());
+ scope.insert(field_name.as_str(), field_type);
}
for_variable = ForVariable::ObjectDestructure(
fields
@@ -201,24 +203,36 @@ pub(crate) fn validate_statements<'a>(
}
}
}
- None => match scope.contains_key(fl.in_variable.1.as_str()) {
- true => {
- // TODO: Check fields
- for_variable = ForVariable::ObjectDestructure(
- fields
- .iter()
- .map(|(_, f)| {
- let name = f.as_str();
+ None => match scope.get(fl.in_variable.1.as_str()) {
+ Some(Type::Array(object_arr)) => {
+ match object_arr.as_ref() {
+ Type::Object(object) => {
+ let mut obj_dest_fields = Vec::with_capacity(fields.len());
+ let object = object.clone();
+ for (_, field_name) in fields {
+ let name = field_name.as_str();
// adds non-param fields to scope
- body_scope.insert(name, Type::Unknown);
- scope.insert(name, Type::Unknown);
-
- GenRef::Std(name.to_string())
- })
- .collect(),
- );
+ let field_type = object.get(name).unwrap().clone();
+ body_scope.insert(name, field_type.clone());
+ scope.insert(name, field_type);
+ obj_dest_fields.push(GenRef::Std(name.to_string()));
+ }
+ for_variable =
+ ForVariable::ObjectDestructure(obj_dest_fields);
+ }
+ _ => {
+ generate_error!(
+ ctx,
+ original_query,
+ fl.in_variable.0.clone(),
+ E653,
+ [&fl.in_variable.1],
+ [&fl.in_variable.1]
+ );
+ }
+ }
}
- false => {
+ _ => {
generate_error!(
ctx,
original_query,
@@ -233,8 +247,6 @@ pub(crate) fn validate_statements<'a>(
}
let mut statements = Vec::new();
for body_stmt in &fl.statements {
- // Recursive walk (but without infinite nesting for now)
-
let stmt = validate_statements(ctx, scope, original_query, query, body_stmt);
if let Some(s) = stmt {
statements.push(s);
diff --git a/helix-db/src/helixc/analyzer/methods/traversal_validation.rs b/helix-db/src/helixc/analyzer/methods/traversal_validation.rs
index 4d06d8f6..f0a0b992 100644
--- a/helix-db/src/helixc/analyzer/methods/traversal_validation.rs
+++ b/helix-db/src/helixc/analyzer/methods/traversal_validation.rs
@@ -1,12 +1,13 @@
use crate::helixc::analyzer::error_codes::*;
-use crate::helixc::generator::bool_op::{Contains, IsIn};
+use crate::helixc::analyzer::utils::DEFAULT_VAR_NAME;
+use crate::helixc::generator::bool_ops::{Contains, IsIn};
use crate::helixc::generator::source_steps::SearchVector;
use crate::helixc::generator::utils::{EmbedData, VecData};
use crate::{
generate_error,
helixc::{
analyzer::{
- analyzer::Ctx,
+ Ctx,
errors::push_query_err,
methods::{
exclude_validation::validate_exclude, graph_step_validation::apply_graph_step,
@@ -19,8 +20,8 @@ use crate::{
},
},
generator::{
- bool_op::{BoExp, BoolOp, Eq, Gt, Gte, Lt, Lte, Neq},
- object_remapping_generation::{ExcludeField, Remapping, RemappingType},
+ bool_ops::{BoExp, BoolOp, Eq, Gt, Gte, Lt, Lte, Neq},
+ object_remappings::{ExcludeField, Remapping, RemappingType},
queries::Query as GeneratedQuery,
source_steps::{EFromID, EFromType, NFromID, NFromIndex, NFromType, SourceStep},
statements::Statement as GeneratedStatement,
@@ -30,7 +31,7 @@ use crate::{
},
utils::{GenRef, GeneratedValue, Order, Separator},
},
- parser::{helix_parser::*, location::Loc},
+ parser::{location::Loc, types::*},
},
protocol::value::Value,
};
@@ -97,6 +98,7 @@ pub(crate) fn validate_traversal<'a>(
original_query,
loc.clone(),
E205,
+ &value.inner_stringify(),
&value.to_string(),
&field.field_type.to_string(),
"node",
@@ -286,7 +288,8 @@ pub(crate) fn validate_traversal<'a>(
// anonymous will be the traversal type rather than the start type
StartNode::Anonymous => {
let parent = parent_ty.unwrap();
- gen_traversal.traversal_type = TraversalType::FromVar(GenRef::Std("val".to_string())); // TODO: ensure this default is stable
+ gen_traversal.traversal_type =
+ TraversalType::FromVar(GenRef::Std(DEFAULT_VAR_NAME.to_string()));
gen_traversal.source_step = Separator::Empty(SourceStep::Anonymous);
parent
}
@@ -515,11 +518,11 @@ pub(crate) fn validate_traversal<'a>(
gen_traversal
.steps
.push(Separator::Period(GeneratedStep::Remapping(Remapping {
- variable_name: "item".to_string(), // TODO: Change to start var
+ variable_name: DEFAULT_VAR_NAME.to_string(),
is_inner: false,
should_spread: false,
remappings: vec![RemappingType::ExcludeField(ExcludeField {
- variable_name: "item".to_string(), // TODO: Change to start var
+ variable_name: DEFAULT_VAR_NAME.to_string(),
fields_to_exclude: ex
.fields
.iter()
@@ -530,16 +533,6 @@ pub(crate) fn validate_traversal<'a>(
}
StepType::Object(obj) => {
- // TODO: Fix issue with step count being incorrect (i think its counting each field as a step)
- // if i != number_of_steps {
- // println!("{} {}", i, number_of_steps);
- // push_query_err(ctx,
- // original_query,
- // obj.loc.clone(),
- // "object is only valid as the last step in a traversal".to_string(),
- // "move the object to the end of the traversal",
- // );
- // }
validate_object(
ctx,
&cur_ty,
@@ -1044,7 +1037,6 @@ pub(crate) fn validate_traversal<'a>(
);
}
_ => {
- // TODO: maybe use cur_ty instead of update.loc.span?
generate_error!(
ctx,
original_query,
diff --git a/helix-db/src/helixc/analyzer/mod.rs b/helix-db/src/helixc/analyzer/mod.rs
index 6bca9538..7bcd3018 100644
--- a/helix-db/src/helixc/analyzer/mod.rs
+++ b/helix-db/src/helixc/analyzer/mod.rs
@@ -1,4 +1,40 @@
-pub mod analyzer;
+// Copyright 2025 HelixDB Inc.
+// SPDX-License-Identifier: AGPL-3.0
+
+//! This is the static analyzer for HelixQL.
+//! It type checks the queries for grammatical and semantic correctness.
+//! The analyzer methods are broken up into separate files within /methods, grouped by general functionality.
+//! File names should be self-explanatory as to what is included in the file.
+
+use crate::helixc::{
+ analyzer::{
+ diagnostic::Diagnostic,
+ methods::{
+ migration_validation::validate_migration,
+ query_validation::validate_query,
+ schema_methods::{build_field_lookups, check_schema, SchemaVersionMap},
+ },
+ types::Type,
+ },
+ generator::Source as GeneratedSource,
+ parser::types::{EdgeSchema, ExpressionType, Field, Query, ReturnType, Source},
+};
+use itertools::Itertools;
+use serde::Serialize;
+use std::{
+ borrow::Cow,
+ collections::{HashMap, HashSet},
+ sync::OnceLock,
+};
+
+pub fn analyze(src: &Source) -> (Vec, GeneratedSource) {
+ let mut ctx = Ctx::new(src);
+ ctx.check_schema();
+ ctx.check_schema_migrations();
+ ctx.check_queries();
+ (ctx.diagnostics, ctx.output)
+}
+
pub mod diagnostic;
pub mod error_codes;
pub mod errors;
@@ -7,3 +43,246 @@ pub mod methods;
pub mod pretty;
pub mod types;
pub mod utils;
+
+
+/// Internal working context shared by all passes.
+pub(crate) struct Ctx<'a> {
+ pub(super) src: &'a Source,
+ /// Quick look‑ups
+ pub(super) node_set: HashSet<&'a str>,
+ pub(super) vector_set: HashSet<&'a str>,
+ pub(super) edge_map: HashMap<&'a str, &'a EdgeSchema>,
+ pub(super) node_fields: HashMap<&'a str, HashMap<&'a str, Cow<'a, Field>>>,
+ pub(super) edge_fields: HashMap<&'a str, HashMap<&'a str, Cow<'a, Field>>>,
+ pub(super) vector_fields: HashMap<&'a str, HashMap<&'a str, Cow<'a, Field>>>,
+ pub(super) all_schemas: SchemaVersionMap<'a>,
+ pub(super) diagnostics: Vec,
+ pub(super) output: GeneratedSource,
+}
+
+pub static INTROSPECTION_DATA: OnceLock = OnceLock::new();
+pub static SECONDARY_INDICES: OnceLock> = OnceLock::new();
+
+impl<'a> Ctx<'a> {
+ pub(super) fn new(src: &'a Source) -> Self {
+ // Build field look‑ups once
+ let all_schemas = build_field_lookups(src);
+ let (node_fields, edge_fields, vector_fields) = all_schemas.get_latest();
+
+ let output = GeneratedSource {
+ src: src.source.clone(),
+ ..Default::default()
+ };
+
+ let ctx = Self {
+ node_set: src
+ .get_latest_schema()
+ .node_schemas
+ .iter()
+ .map(|n| n.name.1.as_str())
+ .collect(),
+ vector_set: src
+ .get_latest_schema()
+ .vector_schemas
+ .iter()
+ .map(|v| v.name.as_str())
+ .collect(),
+ edge_map: src
+ .get_latest_schema()
+ .edge_schemas
+ .iter()
+ .map(|e| (e.name.1.as_str(), e))
+ .collect(),
+ node_fields,
+ edge_fields,
+ vector_fields,
+ all_schemas,
+ src,
+ diagnostics: Vec::new(),
+ output,
+ };
+
+ INTROSPECTION_DATA
+ .set(IntrospectionData::from_schema(&ctx))
+ .ok();
+
+ SECONDARY_INDICES
+ .set(
+ src.get_latest_schema()
+ .node_schemas
+ .iter()
+ .flat_map(|schema| {
+ schema
+ .fields
+ .iter()
+ .filter(|f| f.is_indexed())
+ .map(|f| f.name.clone())
+ })
+ .dedup()
+ .collect(),
+ )
+ .ok();
+ ctx
+ }
+
+ #[allow(unused)]
+ pub(super) fn get_item_fields(
+ &self,
+ item_type: &Type,
+ ) -> Option<&HashMap<&str, Cow<'_, Field>>> {
+ match item_type {
+ Type::Node(Some(node_type)) | Type::Nodes(Some(node_type)) => {
+ self.node_fields.get(node_type.as_str())
+ }
+ Type::Edge(Some(edge_type)) | Type::Edges(Some(edge_type)) => {
+ self.edge_fields.get(edge_type.as_str())
+ }
+ Type::Vector(Some(vector_type)) | Type::Vectors(Some(vector_type)) => {
+ self.vector_fields.get(vector_type.as_str())
+ }
+ _ => None,
+ }
+ }
+
+ // ---------- Pass #1: schema --------------------------
+ /// Validate that every edge references declared node types.
+ pub(super) fn check_schema(&mut self) {
+ check_schema(self);
+ }
+
+ // ---------- Pass #1.5: schema migrations --------------------------
+ pub(super) fn check_schema_migrations(&mut self) {
+ for m in &self.src.migrations {
+ validate_migration(self, m);
+ }
+ }
+
+ // ---------- Pass #2: queries -------------------------
+ pub(super) fn check_queries(&mut self) {
+ for q in &self.src.queries {
+ validate_query(self, q);
+ }
+ }
+}
+
+#[derive(Serialize)]
+pub struct IntrospectionData {
+ schema: SchemaData,
+ queries: Vec,
+}
+
+impl IntrospectionData {
+ fn from_schema(ctx: &Ctx) -> Self {
+ let queries = ctx.src.queries.iter().map(QueryData::from_query).collect();
+ Self {
+ schema: SchemaData::from_ctx(ctx),
+ queries,
+ }
+ }
+}
+
+#[derive(Serialize)]
+pub struct SchemaData {
+ nodes: Vec,
+ vectors: Vec,
+ edges: Vec,
+}
+
+impl SchemaData {
+ fn from_ctx(ctx: &Ctx) -> Self {
+ let nodes = ctx.node_fields.iter().map(NodeData::from_entry).collect();
+ let vectors = ctx.vector_fields.iter().map(NodeData::from_entry).collect();
+ let edges = ctx.edge_map.iter().map(EdgeData::from_entry).collect();
+
+ SchemaData {
+ nodes,
+ vectors,
+ edges,
+ }
+ }
+}
+
+#[derive(Serialize)]
+pub struct NodeData {
+ name: String,
+ properties: HashMap,
+}
+
+impl NodeData {
+ fn from_entry(val: (&&str, &HashMap<&str, Cow>)) -> Self {
+ let properties = val
+ .1
+ .iter()
+ .map(|(n, f)| (n.to_string(), f.field_type.to_string()))
+ .collect();
+ NodeData {
+ name: val.0.to_string(),
+ properties,
+ }
+ }
+}
+
+#[derive(Serialize)]
+pub struct EdgeData {
+ name: String,
+ from: String,
+ to: String,
+ properties: HashMap,
+}
+
+impl EdgeData {
+ fn from_entry((name, es): (&&str, &&EdgeSchema)) -> Self {
+ let properties = es
+ .properties
+ .iter()
+ .flatten()
+ .map(|f| (f.name.to_string(), f.field_type.to_string()))
+ .collect();
+
+ EdgeData {
+ name: name.to_string(),
+ from: es.from.1.clone(),
+ to: es.to.1.clone(),
+ properties,
+ }
+ }
+}
+
+#[derive(Serialize)]
+pub struct QueryData {
+ name: String,
+ parameters: HashMap,
+ returns: Vec,
+}
+
+impl QueryData {
+ fn from_query(query: &Query) -> Self {
+ let parameters = query
+ .parameters
+ .iter()
+ .map(|p| (p.name.1.clone(), p.param_type.1.to_string()))
+ .collect();
+
+ let returns = query
+ .return_values
+ .iter()
+ .flat_map(|e| {
+ if let ReturnType::Expression(expr) = e {
+ if let ExpressionType::Identifier(ident) = &expr.expr {
+ Some(ident.clone())
+ } else {
+ None
+ }
+ } else {
+ None
+ }
+ })
+ .collect();
+
+ QueryData {
+ name: query.name.to_string(),
+ parameters,
+ returns,
+ }
+ }
+}
diff --git a/helix-db/src/helixc/analyzer/types.rs b/helix-db/src/helixc/analyzer/types.rs
index 77678eaa..7a1e0bbd 100644
--- a/helix-db/src/helixc/analyzer/types.rs
+++ b/helix-db/src/helixc/analyzer/types.rs
@@ -9,9 +9,7 @@ use crate::helixc::{
},
utils::{GenRef, GeneratedType, GeneratedValue, RustType as GeneratedRustType},
},
- parser::helix_parser::{
- DefaultValue, EdgeSchema, FieldType, NodeSchema, Parameter, VectorSchema,
- },
+ parser::types::{DefaultValue, EdgeSchema, FieldType, NodeSchema, Parameter, VectorSchema},
};
impl From for GeneratedNodeSchema {
@@ -21,11 +19,14 @@ impl From for GeneratedNodeSchema {
properties: generated
.fields
.into_iter()
- .map(|f| SchemaProperty {
- name: f.name,
- field_type: f.field_type.into(),
- default_value: f.defaults.map(|d| d.into()),
- is_index: f.prefix,
+ .map(|f| {
+ // println!("into: {:?}", f.field_type.into());
+ SchemaProperty {
+ name: f.name,
+ field_type: f.field_type.into(),
+ default_value: f.defaults.map(|d| d.into()),
+ is_index: f.prefix,
+ }
})
.collect(),
}
@@ -240,6 +241,7 @@ pub(crate) enum Type {
Vectors(Option),
Scalar(FieldType),
Object(HashMap),
+ Array(Box),
Anonymous(Box),
Boolean,
Unknown,
@@ -256,6 +258,7 @@ impl Type {
Type::Vectors(_) => "vectors",
Type::Scalar(_) => "scalar",
Type::Object(_) => "object",
+ Type::Array(_) => "array",
Type::Boolean => "boolean",
Type::Unknown => "unknown",
Type::Anonymous(ty) => ty.kind_str(),
@@ -272,6 +275,7 @@ impl Type {
Type::Vectors(Some(name)) => name.clone(),
Type::Scalar(ft) => ft.to_string(),
Type::Anonymous(ty) => ty.get_type_name(),
+ Type::Array(ty) => ty.get_type_name(),
Type::Boolean => "boolean".to_string(),
Type::Unknown => "unknown".to_string(),
Type::Object(fields) => {
@@ -293,7 +297,6 @@ impl Type {
#[allow(dead_code)]
/// Same, but returns an owned clone for convenience.
pub fn cloned_base(&self) -> Type {
- // TODO: never used?
match self {
Type::Anonymous(inner) => inner.cloned_base(),
_ => self.clone(),
@@ -338,14 +341,42 @@ impl Type {
}
}
+impl PartialEq for Type {
+ fn eq(&self, other: &Self) -> bool {
+ match (self, other) {
+ (Type::Scalar(ft), Type::Scalar(other_ft)) => ft == other_ft,
+ (Type::Object(fields), Type::Object(other_fields)) => fields == other_fields,
+ (Type::Boolean, Type::Boolean) => true,
+ (Type::Unknown, Type::Unknown) => true,
+ (Type::Anonymous(inner), Type::Anonymous(other_inner)) => inner == other_inner,
+ (Type::Node(name), Type::Node(other_name)) => name == other_name,
+ (Type::Nodes(name), Type::Nodes(other_name)) => name == other_name,
+ (Type::Edge(name), Type::Edge(other_name)) => name == other_name,
+ (Type::Edges(name), Type::Edges(other_name)) => name == other_name,
+ (Type::Vector(name), Type::Vector(other_name)) => name == other_name,
+ (Type::Vectors(name), Type::Vectors(other_name)) => name == other_name,
+ (Type::Array(inner), Type::Array(other_inner)) => inner == other_inner,
+ (Type::Vector(name), Type::Vectors(other_name)) => name == other_name,
+ _ => unreachable!(),
+ }
+ }
+}
+
impl From for Type {
fn from(ft: FieldType) -> Self {
use FieldType::*;
match ft {
String | Boolean | F32 | F64 | I8 | I16 | I32 | I64 | U8 | U16 | U32 | U64 | U128
| Uuid | Date => Type::Scalar(ft.clone()),
- Array(inner_ft) => Type::from(*inner_ft),
- _ => Type::Unknown,
+ Array(inner_ft) => Type::Array(Box::new(Type::from(*inner_ft))),
+ Object(obj) => Type::Object(obj.into_iter().map(|(k, v)| (k, Type::from(v))).collect()),
+ Identifier(id) => Type::Scalar(FieldType::Identifier(id)),
}
}
}
+
+impl std::fmt::Display for Type {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(f, "{:?}", self)
+ }
+}
diff --git a/helix-db/src/helixc/analyzer/utils.rs b/helix-db/src/helixc/analyzer/utils.rs
index 1336a9d6..57a5f5f7 100644
--- a/helix-db/src/helixc/analyzer/utils.rs
+++ b/helix-db/src/helixc/analyzer/utils.rs
@@ -3,17 +3,19 @@ use crate::helixc::analyzer::error_codes::ErrorCode;
use crate::{
generate_error,
helixc::{
- analyzer::{analyzer::Ctx, errors::push_query_err, types::Type},
+ analyzer::{Ctx, errors::push_query_err, types::Type},
generator::{
traversal_steps::Step,
utils::{GenRef, GeneratedValue},
},
- parser::{helix_parser::*, location::Loc},
+ parser::{location::Loc, types::*},
},
};
use paste::paste;
use std::collections::HashMap;
+pub(super) const DEFAULT_VAR_NAME: &str = "val";
+
pub(super) fn is_valid_identifier(
ctx: &mut Ctx,
original_query: &Query,
@@ -74,6 +76,10 @@ pub(super) fn gen_id_access_or_param(original_query: &Query, name: &str) -> Gene
}
}
+pub(super) fn is_in_scope(scope: &HashMap<&str, Type>, name: &str) -> bool {
+ scope.contains_key(name)
+}
+
pub(super) fn type_in_scope(
ctx: &mut Ctx,
original_query: &Query,
diff --git a/helix-db/src/helixc/generator/README.md b/helix-db/src/helixc/generator/README.md
new file mode 100644
index 00000000..987147c6
--- /dev/null
+++ b/helix-db/src/helixc/generator/README.md
@@ -0,0 +1,35 @@
+# Generator Module
+
+## Overview
+The generator module transforms the validated HelixQL AST into executable Rust code, creating type-safe graph database operations.
+
+## Structure
+
+### Core Components
+- **`mod.rs`** - Main generator entry point, defines output structure
+- **`utils.rs`** - Helper functions and code generation utilities
+
+### Code Generation Methods (by domain)
+- **`schemas.rs`** - Generates Rust structs for nodes, edges, and vectors
+- **`queries.rs`** - Generates query functions with proper signatures
+- **`migrations.rs`** - Generates migration code for schema evolution
+- **`statements.rs`** - Generates statement execution code
+- **`traversal_steps.rs`** - Generates graph traversal operations
+- **`source_steps.rs`** - Generates source operations (add_n, add_e, n_from_id, n_from_type, etc.)
+- **`bool_ops.rs`** - Generates boolean expression evaluators
+- **`object_remappings.rs`** - Generates object transformation code
+- **`return_values.rs`** - Generates return value processing
+- **`tsdisplay.rs`** - TypeScript display utilities
+
+## Generation Flow
+
+1. **Input**: Validated AST from the analyzer module
+2. **Schema Generation**: Creates Rust structs for all schema types
+3. **Query Generation**: Transforms queries into Rust functions
+4. **Migration Generation**: Creates migration execution code
+5. **Output**: Complete Rust source code ready for compilation
+
+## Code Generation Patterns
+- Uses Rust's `Display` trait for code generation
+- Maintains proper indentation and formatting
+- Generates idiomatic Rust code with appropriate error handling
diff --git a/helix-db/src/helixc/generator/bool_op.rs b/helix-db/src/helixc/generator/bool_ops.rs
similarity index 100%
rename from helix-db/src/helixc/generator/bool_op.rs
rename to helix-db/src/helixc/generator/bool_ops.rs
diff --git a/helix-db/src/helixc/generator/migrations.rs b/helix-db/src/helixc/generator/migrations.rs
index d9655ca8..b138f46c 100644
--- a/helix-db/src/helixc/generator/migrations.rs
+++ b/helix-db/src/helixc/generator/migrations.rs
@@ -1,7 +1,7 @@
use crate::{
helixc::{
generator::utils::{GeneratedValue, Separator},
- parser::helix_parser::FieldType,
+ parser::types::FieldType,
},
protocol::value::casting::CastType,
};
diff --git a/helix-db/src/helixc/generator/mod.rs b/helix-db/src/helixc/generator/mod.rs
index bdf8c655..06256876 100644
--- a/helix-db/src/helixc/generator/mod.rs
+++ b/helix-db/src/helixc/generator/mod.rs
@@ -1,5 +1,9 @@
-use core::fmt;
-use std::fmt::Display;
+// Copyright 2025 HelixDB Inc.
+// SPDX-License-Identifier: AGPL-3.0
+
+//! This is the generator for HelixQL. It transforms the AST into Rust code.
+//! The generator methods are broken up into separate files, grouped by general functionality.
+//! File names should be self-explanatory as to what is included in the file.
use crate::{
helix_engine::traversal_core::config::Config,
@@ -10,10 +14,12 @@ use crate::{
utils::write_headers,
},
};
+use core::fmt;
+use std::fmt::Display;
-pub mod bool_op;
+pub mod bool_ops;
pub mod migrations;
-pub mod object_remapping_generation;
+pub mod object_remappings;
pub mod queries;
pub mod return_values;
pub mod schemas;
diff --git a/helix-db/src/helixc/generator/object_remapping_generation.rs b/helix-db/src/helixc/generator/object_remappings.rs
similarity index 100%
rename from helix-db/src/helixc/generator/object_remapping_generation.rs
rename to helix-db/src/helixc/generator/object_remappings.rs
diff --git a/helix-db/src/helixc/generator/return_values.rs b/helix-db/src/helixc/generator/return_values.rs
index c92a419d..7dc03dff 100644
--- a/helix-db/src/helixc/generator/return_values.rs
+++ b/helix-db/src/helixc/generator/return_values.rs
@@ -70,9 +70,9 @@ impl ReturnValue {
ReturnType::NamedLiteral(name) => name.inner().inner().to_string(),
ReturnType::NamedExpr(name) => name.inner().inner().to_string(),
ReturnType::SingleExpr(name) => name.inner().inner().to_string(),
- ReturnType::UnnamedExpr => todo!(),
- ReturnType::HashMap => todo!(),
- ReturnType::Array => todo!(),
+ ReturnType::UnnamedExpr => unimplemented!(),
+ ReturnType::HashMap => unimplemented!(),
+ ReturnType::Array => unimplemented!(),
}
}
diff --git a/helix-db/src/helixc/generator/schemas.rs b/helix-db/src/helixc/generator/schemas.rs
index 6ffe0ebe..59bc23b6 100644
--- a/helix-db/src/helixc/generator/schemas.rs
+++ b/helix-db/src/helixc/generator/schemas.rs
@@ -1,7 +1,13 @@
use core::fmt;
use std::fmt::Display;
-use crate::helixc::{generator::{tsdisplay::ToTypeScript, utils::{GeneratedType, GeneratedValue}}, parser::helix_parser::FieldPrefix};
+use crate::helixc::{
+ generator::{
+ tsdisplay::ToTypeScript,
+ utils::{GeneratedType, GeneratedValue},
+ },
+ parser::types::FieldPrefix,
+};
#[derive(Clone)]
pub struct NodeSchema {
@@ -123,4 +129,4 @@ pub struct SchemaProperty {
pub default_value: Option,
// pub is_optional: bool,
pub is_index: FieldPrefix,
-}
\ No newline at end of file
+}
diff --git a/helix-db/src/helixc/generator/source_steps.rs b/helix-db/src/helixc/generator/source_steps.rs
index daed5a28..3d4f981c 100644
--- a/helix-db/src/helixc/generator/source_steps.rs
+++ b/helix-db/src/helixc/generator/source_steps.rs
@@ -4,31 +4,46 @@ use std::fmt::Display;
use crate::helixc::generator::utils::{write_properties, write_secondary_indices, VecData};
use super::{
- bool_op::BoExp,
+ bool_ops::BoExp,
utils::{GenRef, GeneratedValue},
};
#[derive(Clone)]
pub enum SourceStep {
+ /// Traversal starts from an identifier
Identifier(GenRef),
+ /// Add a node
AddN(AddN),
+ /// Add an edge
AddE(AddE),
+ /// Insert a vector
AddV(AddV),
+ /// Lookup a node by ID
NFromID(NFromID),
+ /// Lookup a node by index
NFromIndex(NFromIndex),
+ /// Lookup a node by type
NFromType(NFromType),
+ /// Lookup an edge by ID
EFromID(EFromID),
+ /// Lookup an edge by type
EFromType(EFromType),
+ /// Search for vectors
SearchVector(SearchVector),
+ /// Search for vectors using BM25
SearchBM25(SearchBM25),
+ /// Traversal starts from an anonymous node
Anonymous,
Empty,
}
#[derive(Clone)]
pub struct AddN {
+ /// Label of node
pub label: GenRef,
+ /// Properties of node
pub properties: Option>,
+ /// Names of properties to index on
pub secondary_indices: Option>,
}
impl Display for AddN {
@@ -45,11 +60,14 @@ impl Display for AddN {
#[derive(Clone)]
pub struct AddE {
+ /// Label of edge
pub label: GenRef,
+ /// Properties of edge
pub properties: Option>,
+ /// From node ID
pub from: GeneratedValue,
+ /// To node ID
pub to: GeneratedValue,
- // pub secondary_indices: Option>,
}
impl Display for AddE {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
@@ -65,8 +83,11 @@ impl Display for AddE {
}
#[derive(Clone)]
pub struct AddV {
+ /// Vector to add
pub vec: VecData,
+ /// Label of vector
pub label: GenRef,
+ /// Properties of vector
pub properties: Option>,
}
impl Display for AddV {
@@ -84,18 +105,22 @@ impl Display for AddV {
#[derive(Clone)]
pub struct NFromID {
+ /// ID of node
pub id: GenRef,
- pub label: GenRef, // possible not needed, do we do runtime label checking?
+ /// Label of node
+ ///
+ /// - unused currently but kept in the case ID lookups need to be from specific table based on type
+ pub label: GenRef,
}
impl Display for NFromID {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- // TODO: possibly add label for runtime label checking?
write!(f, "n_from_id({})", self.id)
}
}
#[derive(Clone)]
pub struct NFromType {
+ /// Label of nodes to lookup
pub label: GenRef,
}
impl Display for NFromType {
@@ -106,8 +131,12 @@ impl Display for NFromType {
#[derive(Clone)]
pub struct EFromID {
+ /// ID of edge
pub id: GenRef,
- pub label: GenRef, // possible not needed, do we do runtime label checking?
+ /// Label of edge
+ ///
+ /// - unused currently but kept in the case ID lookups need to be from specific table based on type
+ pub label: GenRef,
}
impl Display for EFromID {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
@@ -117,6 +146,7 @@ impl Display for EFromID {
#[derive(Clone)]
pub struct EFromType {
+ /// Label of edges to lookup
pub label: GenRef,
}
impl Display for EFromType {
@@ -127,8 +157,11 @@ impl Display for EFromType {
#[derive(Clone)]
pub struct SearchBM25 {
+ /// Type of node to search for
pub type_arg: GenRef,
+ /// Query to search for
pub query: GeneratedValue,
+ /// Number of results to return
pub k: GeneratedValue,
}
@@ -160,9 +193,13 @@ impl Display for SourceStep {
#[derive(Clone)]
pub struct SearchVector {
+ /// Label of vector to search for
pub label: GenRef,
+ /// Vector to search for
pub vec: VecData,
+ /// Number of results to return
pub k: GeneratedValue,
+ /// Pre-filter to apply to the search - currently not implemented in grammar
pub pre_filter: Option>,
}
@@ -194,8 +231,11 @@ impl Display for SearchVector {
#[derive(Clone)]
pub struct NFromIndex {
+ /// Index to search against
pub index: GenRef,
+ /// Key to search for in the index
pub key: GeneratedValue,
+ /// Label of nodes to lookup - used for post filtering
pub label: GenRef,
}
diff --git a/helix-db/src/helixc/generator/statements.rs b/helix-db/src/helixc/generator/statements.rs
index eb4ab8a3..89fd88ee 100644
--- a/helix-db/src/helixc/generator/statements.rs
+++ b/helix-db/src/helixc/generator/statements.rs
@@ -1,7 +1,7 @@
use core::fmt;
use std::fmt::Display;
-use crate::helixc::generator::{bool_op::BoExp, traversal_steps::Traversal, utils::GenRef};
+use crate::helixc::generator::{bool_ops::BoExp, traversal_steps::Traversal, utils::GenRef};
diff --git a/helix-db/src/helixc/generator/traversal_steps.rs b/helix-db/src/helixc/generator/traversal_steps.rs
index 24352e41..9513b855 100644
--- a/helix-db/src/helixc/generator/traversal_steps.rs
+++ b/helix-db/src/helixc/generator/traversal_steps.rs
@@ -1,8 +1,8 @@
-use crate::helixc::generator::utils::{write_properties, VecData};
+use crate::helixc::generator::utils::{VecData, write_properties};
use super::{
- bool_op::{BoolOp, BoExp},
- object_remapping_generation::Remapping,
+ bool_ops::{BoExp, BoolOp},
+ object_remappings::Remapping,
source_steps::SourceStep,
utils::{GenRef, GeneratedValue, Order, Separator},
};
@@ -123,12 +123,7 @@ impl Display for Traversal {
write!(f, "\n{step}")?;
}
write!(f, "\n .collect_to::>();")?;
- write!(
- f,
- "G::new_mut_from(Arc::clone(&db), &mut txn, update_tr)", // TODO: make
- // this less
- // scrappy
- )?;
+ write!(f, "G::new_mut_from(Arc::clone(&db), &mut txn, update_tr)",)?;
write!(f, "\n .update({})", write_properties(properties))?;
write!(f, "\n .collect_to_obj()")?;
write!(f, "}}")?;
@@ -279,14 +274,11 @@ impl Display for InE {
#[derive(Clone)]
pub enum Where {
Ref(WhereRef),
- Mut(WhereMut),
}
impl Display for Where {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- match self {
- Where::Ref(wr) => write!(f, "{wr}"),
- Where::Mut(wm) => write!(f, "{wm}"),
- }
+ let Where::Ref(wr) = self;
+ write!(f, "{wr}")
}
}
@@ -310,16 +302,6 @@ impl Display for WhereRef {
}
}
-#[derive(Clone)]
-pub struct WhereMut {
- pub expr: BoExp,
-}
-impl Display for WhereMut {
- fn fmt(&self, _: &mut fmt::Formatter<'_>) -> fmt::Result {
- todo!()
- }
-}
-
#[derive(Clone)]
pub struct Range {
pub start: GeneratedValue,
diff --git a/helix-db/src/helixc/generator/utils.rs b/helix-db/src/helixc/generator/utils.rs
index 10b6c8e3..8ca8e6a1 100644
--- a/helix-db/src/helixc/generator/utils.rs
+++ b/helix-db/src/helixc/generator/utils.rs
@@ -1,7 +1,6 @@
+use crate::helixc::parser::types::IdType;
use std::fmt::{self, Debug, Display};
-use crate::helixc::parser::helix_parser::IdType;
-
#[derive(Clone)]
pub enum GenRef
where
@@ -235,11 +234,11 @@ impl Display for GeneratedValue {
impl Debug for GeneratedValue {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
- GeneratedValue::Literal(value) => write!(f, "Literal({value})"),
- GeneratedValue::Primitive(value) => write!(f, "Primitive({value})"),
- GeneratedValue::Identifier(value) => write!(f, "Identifier({value})"),
- GeneratedValue::Parameter(value) => write!(f, "Parameter({value})"),
- GeneratedValue::Array(value) => write!(f, "Array({value:?})"),
+ GeneratedValue::Literal(value) => write!(f, "GV: Literal({value})"),
+ GeneratedValue::Primitive(value) => write!(f, "GV: Primitive({value})"),
+ GeneratedValue::Identifier(value) => write!(f, "GV: Identifier({value})"),
+ GeneratedValue::Parameter(value) => write!(f, "GV: Parameter({value})"),
+ GeneratedValue::Array(value) => write!(f, "GV: Array({value:?})"),
GeneratedValue::Unknown => write!(f, "Unknown"),
}
}
@@ -298,7 +297,7 @@ impl Display for RustType {
RustType::F32 => write!(f, "f32"),
RustType::F64 => write!(f, "f64"),
RustType::Bool => write!(f, "bool"),
- RustType::Uuid => write!(f, "ID"), // TODO: Change this for actual UUID
+ RustType::Uuid => write!(f, "ID"),
RustType::Date => write!(f, "DateTime"),
}
}
diff --git a/helix-db/src/helixc/mod.rs b/helix-db/src/helixc/mod.rs
index a4a92f94..1f965ed6 100644
--- a/helix-db/src/helixc/mod.rs
+++ b/helix-db/src/helixc/mod.rs
@@ -1,3 +1,8 @@
+// Copyright 2025 HelixDB Inc.
+// SPDX-License-Identifier: AGPL-3.0
+
+//! HelixQL compiler, analyzer, and transpiler.
+
pub mod analyzer;
pub mod generator;
pub mod parser;
diff --git a/helix-db/src/helixc/parser/README.md b/helix-db/src/helixc/parser/README.md
new file mode 100644
index 00000000..797ff9a0
--- /dev/null
+++ b/helix-db/src/helixc/parser/README.md
@@ -0,0 +1,44 @@
+# Parser Module
+
+## Overview
+The parser module transforms HelixQL (HQL) source code into an Abstract Syntax Tree (AST) using the Pest parser generator framework.
+
+## Structure
+
+### Core Components
+- **`mod.rs`** - Main parser entry point, orchestrates parsing of schemas, queries, and migrations
+- **`grammar.pest`** - Pest grammar defining HQL syntax rules
+- **`types.rs`** - AST node definitions and data structures
+- **`location.rs`** - Location tracking for error reporting
+
+### Parse Methods (by domain)
+- **`schema_parse_methods.rs`** - Parses node, edge, and vector schema definitions
+- **`query_parse_methods.rs`** - Parses query definitions with parameters and statements
+- **`migration_parse_methods.rs`** - Parses schema migration definitions
+- **`traversal_parse_methods.rs`** - Parses traversal (anonymous/id/starting node, vector or edge etc)
+- **`graph_step_parse_methods.rs`** - Parses graph step operations (object remapping/order by/where/range etc)
+- **`creation_step_parse_methods.rs`** - Parses node/edge/vector creation operations
+- **`expression_parse_methods.rs`** - Parses expressions e.g. assignment, for loop, boolean expressions etc
+- **`object_parse_methods.rs`** - Parses object fields for remappings/parameters/item creations etc
+- **`return_value_parse_methods.rs`** - Parses return statements and remappings
+
+## Parsing Flow
+
+1. **Input**: HQL files containing schemas, queries, and migrations
+2. **Lexing**: Pest tokenizes input according to `grammar.pest` rules
+3. **AST Construction**:
+ - Schemas parsed first (establishing type definitions)
+ - Migrations parsed second (for schema evolution)
+ - Queries parsed last (can reference schema types)
+4. **Output**: `Source` struct containing parsed schemas, migrations, and queries
+
+## Key Types
+
+- `Source` - Top-level container for all parsed content
+- `Schema` - Contains node, edge, and vector type definitions
+- `Query` - Parsed query with parameters, statements, and return values
+- `Migration` - Schema version transition definitions
+
+## Error Handling
+- `ParserError` enum handles parse errors, lex errors, and schema validation
+- Location tracking enables precise error reporting with file/line/column info
diff --git a/helix-db/src/helixc/parser/creation_step_parse_methods.rs b/helix-db/src/helixc/parser/creation_step_parse_methods.rs
new file mode 100644
index 00000000..b02173f4
--- /dev/null
+++ b/helix-db/src/helixc/parser/creation_step_parse_methods.rs
@@ -0,0 +1,165 @@
+use crate::helixc::parser::{
+ HelixParser, Rule,
+ location::HasLoc,
+ ParserError,
+ types::{AddEdge, AddNode, AddVector, Embed, EvaluatesToString, VectorData},
+};
+use pest::iterators::Pair;
+
+impl HelixParser {
+ pub(super) fn parse_add_vector(&self, pair: Pair) -> Result {
+ let mut vector_type = None;
+ let mut data = None;
+ let mut fields = None;
+
+ for p in pair.clone().into_inner() {
+ match p.as_rule() {
+ Rule::identifier_upper => {
+ vector_type = Some(p.as_str().to_string());
+ }
+ Rule::vector_data => match p.clone().into_inner().next() {
+ Some(vector_data) => match vector_data.as_rule() {
+ Rule::identifier => {
+ data = Some(VectorData::Identifier(p.as_str().to_string()));
+ }
+ Rule::vec_literal => {
+ data = Some(VectorData::Vector(self.parse_vec_literal(p)?));
+ }
+ Rule::embed_method => {
+ data = Some(VectorData::Embed(Embed {
+ loc: vector_data.loc(),
+ value: match vector_data.clone().into_inner().next() {
+ Some(inner) => match inner.as_rule() {
+ Rule::identifier => EvaluatesToString::Identifier(
+ inner.as_str().to_string(),
+ ),
+ Rule::string_literal => EvaluatesToString::StringLiteral(
+ inner.as_str().to_string(),
+ ),
+ _ => {
+ return Err(ParserError::from(format!(
+ "Unexpected rule in AddV: {:?} => {:?}",
+ inner.as_rule(),
+ inner,
+ )));
+ }
+ },
+ None => {
+ return Err(ParserError::from(format!(
+ "Unexpected rule in AddV: {:?} => {:?}",
+ p.as_rule(),
+ p,
+ )));
+ }
+ },
+ }));
+ }
+ _ => {
+ return Err(ParserError::from(format!(
+ "Unexpected rule in AddV: {:?} => {:?}",
+ vector_data.as_rule(),
+ vector_data,
+ )));
+ }
+ },
+ None => {
+ return Err(ParserError::from(format!(
+ "Unexpected rule in AddV: {:?} => {:?}",
+ p.as_rule(),
+ p,
+ )));
+ }
+ },
+ Rule::create_field => {
+ fields = Some(self.parse_property_assignments(p)?);
+ }
+ _ => {
+ return Err(ParserError::from(format!(
+ "Unexpected rule in AddV: {:?} => {:?}",
+ p.as_rule(),
+ p,
+ )));
+ }
+ }
+ }
+
+ Ok(AddVector {
+ vector_type,
+ data,
+ fields,
+ loc: pair.loc(),
+ })
+ }
+
+ pub(super) fn parse_add_node(&self, pair: Pair) -> Result {
+ let mut node_type = None;
+ let mut fields = None;
+
+ for p in pair.clone().into_inner() {
+ match p.as_rule() {
+ Rule::identifier_upper => {
+ node_type = Some(p.as_str().to_string());
+ }
+ Rule::create_field => {
+ fields = Some(self.parse_property_assignments(p)?);
+ }
+ _ => {
+ return Err(ParserError::from(format!(
+ "Unexpected rule in AddV: {:?} => {:?}",
+ p.as_rule(),
+ p,
+ )));
+ }
+ }
+ }
+
+ Ok(AddNode {
+ node_type,
+ fields,
+ loc: pair.loc(),
+ })
+ }
+
+ pub(super) fn parse_add_edge(
+ &self,
+ pair: Pair,
+ from_identifier: bool,
+ ) -> Result {
+ let mut edge_type = None;
+ let mut fields = None;
+ let mut connection = None;
+
+ for p in pair.clone().into_inner() {
+ match p.as_rule() {
+ Rule::identifier_upper => {
+ edge_type = Some(p.as_str().to_string());
+ }
+ Rule::create_field => {
+ fields = Some(self.parse_property_assignments(p)?);
+ }
+ Rule::to_from => {
+ connection = Some(self.parse_to_from(p)?);
+ }
+ _ => {
+ return Err(ParserError::from(format!(
+ "Unexpected rule in AddE: {:?}",
+ p.as_rule()
+ )));
+ }
+ }
+ }
+ if edge_type.is_none() {
+ return Err(ParserError::from("Missing edge type"));
+ }
+ if connection.is_none() {
+ return Err(ParserError::from("Missing edge connection"));
+ }
+ Ok(AddEdge {
+ edge_type,
+ fields,
+ connection: connection.ok_or_else(|| ParserError::from("Missing edge connection"))?,
+ from_identifier,
+ loc: pair.loc(),
+ })
+ }
+}
diff --git a/helix-db/src/helixc/parser/expression_parse_methods.rs b/helix-db/src/helixc/parser/expression_parse_methods.rs
new file mode 100644
index 00000000..c5cded0a
--- /dev/null
+++ b/helix-db/src/helixc/parser/expression_parse_methods.rs
@@ -0,0 +1,502 @@
+use crate::{
+ helixc::parser::{
+ HelixParser, Rule,
+ location::{HasLoc, Loc},
+ ParserError,
+ types::{
+ Assignment, BM25Search, Embed, EvaluatesToNumber, EvaluatesToNumberType,
+ EvaluatesToString, ExistsExpression, Expression, ExpressionType, ForLoop, ForLoopVars,
+ SearchVector, ValueType, VectorData,
+ },
+ },
+ protocol::value::Value,
+};
+use pest::iterators::{Pair, Pairs};
+
+impl HelixParser {
+ pub(super) fn parse_assignment(&self, pair: Pair) -> Result {
+ let mut pairs = pair.clone().into_inner();
+ let variable = pairs.next().unwrap().as_str().to_string();
+ let value = self.parse_expression(pairs.next().unwrap())?;
+
+ Ok(Assignment {
+ variable,
+ value,
+ loc: pair.loc(),
+ })
+ }
+
+ pub(super) fn parse_expression(&self, p: Pair) -> Result {
+ let pair = p
+ .into_inner()
+ .next()
+ .ok_or_else(|| ParserError::from("Empty expression"))?;
+
+ match pair.as_rule() {
+ Rule::traversal => Ok(Expression {
+ loc: pair.loc(),
+ expr: ExpressionType::Traversal(Box::new(self.parse_traversal(pair)?)),
+ }),
+ Rule::id_traversal => Ok(Expression {
+ loc: pair.loc(),
+ expr: ExpressionType::Traversal(Box::new(self.parse_traversal(pair)?)),
+ }),
+ Rule::anonymous_traversal => Ok(Expression {
+ loc: pair.loc(),
+ expr: ExpressionType::Traversal(Box::new(self.parse_anon_traversal(pair)?)),
+ }),
+ Rule::identifier => Ok(Expression {
+ loc: pair.loc(),
+ expr: ExpressionType::Identifier(pair.as_str().to_string()),
+ }),
+ Rule::string_literal => Ok(Expression {
+ loc: pair.loc(),
+ expr: ExpressionType::StringLiteral(self.parse_string_literal(pair)?),
+ }),
+ Rule::exists => {
+ let loc = pair.loc();
+ let mut inner = pair.into_inner();
+ let negated = match inner.peek() {
+ Some(p) => p.as_rule() == Rule::negate,
+ None => false,
+ };
+ if negated {
+ inner.next();
+ }
+ let traversal = inner
+ .next()
+ .ok_or_else(|| ParserError::from("Missing traversal"))?;
+ let expr = ExpressionType::Exists(ExistsExpression {
+ loc: loc.clone(),
+ expr: Box::new(Expression {
+ loc: loc.clone(),
+ expr: ExpressionType::Traversal(Box::new(match traversal.as_rule() {
+ Rule::anonymous_traversal => self.parse_anon_traversal(traversal)?,
+ Rule::id_traversal => self.parse_traversal(traversal)?,
+ Rule::traversal => self.parse_traversal(traversal)?,
+ _ => unreachable!(),
+ })),
+ }),
+ });
+ Ok(Expression {
+ loc: loc.clone(),
+ expr: match negated {
+ true => ExpressionType::Not(Box::new(Expression {
+ loc: loc.clone(),
+ expr,
+ })),
+ false => expr,
+ },
+ })
+ }
+ Rule::integer => pair
+ .as_str()
+ .parse()
+ .map(|i| Expression {
+ loc: pair.loc(),
+ expr: ExpressionType::IntegerLiteral(i),
+ })
+ .map_err(|_| ParserError::from("Invalid integer literal")),
+ Rule::float => pair
+ .as_str()
+ .parse()
+ .map(|f| Expression {
+ loc: pair.loc(),
+ expr: ExpressionType::FloatLiteral(f),
+ })
+ .map_err(|_| ParserError::from("Invalid float literal")),
+ Rule::boolean => Ok(Expression {
+ loc: pair.loc(),
+ expr: ExpressionType::BooleanLiteral(pair.as_str() == "true"),
+ }),
+ Rule::array_literal => Ok(Expression {
+ loc: pair.loc(),
+ expr: ExpressionType::ArrayLiteral(self.parse_array_literal(pair)?),
+ }),
+ Rule::evaluates_to_bool => Ok(self.parse_boolean_expression(pair)?),
+ Rule::AddN => Ok(Expression {
+ loc: pair.loc(),
+ expr: ExpressionType::AddNode(self.parse_add_node(pair)?),
+ }),
+ Rule::AddV => Ok(Expression {
+ loc: pair.loc(),
+ expr: ExpressionType::AddVector(self.parse_add_vector(pair)?),
+ }),
+ Rule::AddE => Ok(Expression {
+ loc: pair.loc(),
+ expr: ExpressionType::AddEdge(self.parse_add_edge(pair, false)?),
+ }),
+ Rule::search_vector => Ok(Expression {
+ loc: pair.loc(),
+ expr: ExpressionType::SearchVector(self.parse_search_vector(pair)?),
+ }),
+ Rule::none => Ok(Expression {
+ loc: pair.loc(),
+ expr: ExpressionType::Empty,
+ }),
+ Rule::bm25_search => Ok(Expression {
+ loc: pair.loc(),
+ expr: ExpressionType::BM25Search(self.parse_bm25_search(pair)?),
+ }),
+ _ => Err(ParserError::from(format!(
+ "Unexpected expression type: {:?}",
+ pair.as_rule()
+ ))),
+ }
+ }
+
+ pub(super) fn parse_boolean_expression(
+ &self,
+ pair: Pair,
+ ) -> Result {
+ let expression = pair.into_inner().next().unwrap();
+ match expression.as_rule() {
+ Rule::and => {
+ let loc: Loc = expression.loc();
+ let mut inner = expression.into_inner();
+ let negated = match inner.peek() {
+ Some(p) => p.as_rule() == Rule::negate,
+ None => false,
+ };
+ if negated {
+ inner.next();
+ }
+ let exprs = self.parse_expression_vec(inner)?;
+ Ok(Expression {
+ loc: loc.clone(),
+ expr: match negated {
+ true => ExpressionType::Not(Box::new(Expression {
+ loc,
+ expr: ExpressionType::And(exprs),
+ })),
+ false => ExpressionType::And(exprs),
+ },
+ })
+ }
+ Rule::or => {
+ let loc: Loc = expression.loc();
+ let mut inner = expression.into_inner();
+ let negated = match inner.peek() {
+ Some(p) => p.as_rule() == Rule::negate,
+ None => false,
+ };
+ if negated {
+ inner.next();
+ }
+ let exprs = self.parse_expression_vec(inner)?;
+ Ok(Expression {
+ loc: loc.clone(),
+ expr: match negated {
+ true => ExpressionType::Not(Box::new(Expression {
+ loc,
+ expr: ExpressionType::Or(exprs),
+ })),
+ false => ExpressionType::Or(exprs),
+ },
+ })
+ }
+ Rule::boolean => Ok(Expression {
+ loc: expression.loc(),
+ expr: ExpressionType::BooleanLiteral(expression.as_str() == "true"),
+ }),
+ Rule::exists => {
+ let loc = expression.loc();
+ let mut inner = expression.into_inner();
+ let negated = match inner.peek() {
+ Some(p) => p.as_rule() == Rule::negate,
+ None => false,
+ };
+ if negated {
+ inner.next();
+ }
+ let traversal = inner
+ .next()
+ .ok_or_else(|| ParserError::from("Missing traversal"))?;
+ let expr = ExpressionType::Exists(ExistsExpression {
+ loc: loc.clone(),
+ expr: Box::new(Expression {
+ loc: loc.clone(),
+ expr: ExpressionType::Traversal(Box::new(
+ self.parse_anon_traversal(traversal)?,
+ )),
+ }),
+ });
+ Ok(Expression {
+ loc: loc.clone(),
+ expr: match negated {
+ true => ExpressionType::Not(Box::new(Expression {
+ loc: loc.clone(),
+ expr,
+ })),
+ false => expr,
+ },
+ })
+ }
+
+ _ => unreachable!(),
+ }
+ }
+ pub(super) fn parse_expression_vec(
+ &self,
+ pairs: Pairs,
+ ) -> Result, ParserError> {
+ let mut expressions = Vec::new();
+ for p in pairs {
+ match p.as_rule() {
+ Rule::anonymous_traversal => {
+ expressions.push(Expression {
+ loc: p.loc(),
+ expr: ExpressionType::Traversal(Box::new(self.parse_anon_traversal(p)?)),
+ });
+ }
+ Rule::traversal => {
+ expressions.push(Expression {
+ loc: p.loc(),
+ expr: ExpressionType::Traversal(Box::new(self.parse_traversal(p)?)),
+ });
+ }
+ Rule::id_traversal => {
+ expressions.push(Expression {
+ loc: p.loc(),
+ expr: ExpressionType::Traversal(Box::new(self.parse_traversal(p)?)),
+ });
+ }
+ Rule::evaluates_to_bool => {
+ expressions.push(self.parse_boolean_expression(p)?);
+ }
+ _ => unreachable!(),
+ }
+ }
+ Ok(expressions)
+ }
+
+ pub(super) fn parse_bm25_search(&self, pair: Pair) -> Result {
+ let mut pairs = pair.clone().into_inner();
+ let vector_type = pairs.next().unwrap().as_str().to_string();
+ let query = match pairs.next() {
+ Some(pair) => match pair.as_rule() {
+ Rule::identifier => ValueType::Identifier {
+ value: pair.as_str().to_string(),
+ loc: pair.loc(),
+ },
+ Rule::string_literal => ValueType::Literal {
+ value: Value::String(pair.as_str().to_string()),
+ loc: pair.loc(),
+ },
+ _ => {
+ return Err(ParserError::from(format!(
+ "Unexpected rule in BM25Search: {:?}",
+ pair.as_rule()
+ )));
+ }
+ },
+ None => {
+ return Err(ParserError::from(format!(
+ "Unexpected rule in BM25Search: {:?}",
+ pair.as_rule()
+ )));
+ }
+ };
+ let k = Some(match pairs.next() {
+ Some(pair) => match pair.as_rule() {
+ Rule::identifier => EvaluatesToNumber {
+ loc: pair.loc(),
+ value: EvaluatesToNumberType::Identifier(pair.as_str().to_string()),
+ },
+ Rule::integer => EvaluatesToNumber {
+ loc: pair.loc(),
+ value: EvaluatesToNumberType::I32(
+ pair.as_str()
+ .to_string()
+ .parse::()
+ .map_err(|_| ParserError::from("Invalid integer value"))?,
+ ),
+ },
+ _ => {
+ return Err(ParserError::from(format!(
+ "Unexpected rule in BM25Search: {:?}",
+ pair.as_rule()
+ )));
+ }
+ },
+ None => {
+ return Err(ParserError::from(format!(
+ "Unexpected rule in BM25Search: {:?}",
+ pair.as_rule()
+ )));
+ }
+ });
+
+ Ok(BM25Search {
+ loc: pair.loc(),
+ type_arg: Some(vector_type),
+ data: Some(query),
+ k,
+ })
+ }
+
+ pub(super) fn parse_for_loop(&self, pair: Pair) -> Result {
+ let mut pairs = pair.clone().into_inner();
+ // parse the arguments
+ let argument = pairs.next().unwrap().clone().into_inner().next().unwrap();
+ let argument_loc = argument.loc();
+ let variable = match argument.as_rule() {
+ Rule::object_destructuring => {
+ let fields = argument
+ .into_inner()
+ .map(|p| (p.loc(), p.as_str().to_string()))
+ .collect();
+ ForLoopVars::ObjectDestructuring {
+ fields,
+ loc: argument_loc,
+ }
+ }
+ Rule::object_access => {
+ let mut inner = argument.clone().into_inner();
+ let object_name = inner.next().unwrap().as_str().to_string();
+ let field_name = inner.next().unwrap().as_str().to_string();
+ ForLoopVars::ObjectAccess {
+ name: object_name,
+ field: field_name,
+ loc: argument_loc,
+ }
+ }
+ Rule::identifier => ForLoopVars::Identifier {
+ name: argument.as_str().to_string(),
+ loc: argument_loc,
+ },
+ _ => {
+ return Err(ParserError::from(format!(
+ "Unexpected rule in ForLoop: {:?}",
+ argument.as_rule()
+ )));
+ }
+ };
+
+ // parse the in
+ let in_ = pairs.next().unwrap().clone();
+ let in_variable = match in_.as_rule() {
+ Rule::identifier => (in_.loc(), in_.as_str().to_string()),
+ _ => {
+ return Err(ParserError::from(format!(
+ "Unexpected rule in ForLoop: {:?}",
+ in_.as_rule()
+ )));
+ }
+ };
+ // parse the body
+ let statements = self.parse_query_body(pairs.next().unwrap())?;
+
+ Ok(ForLoop {
+ variable,
+ in_variable,
+ statements,
+ loc: pair.loc(),
+ })
+ }
+
+ pub(super) fn parse_search_vector(
+ &self,
+ pair: Pair,
+ ) -> Result {
+ let mut vector_type = None;
+ let mut data = None;
+ let mut k: Option = None;
+ let mut pre_filter = None;
+ for p in pair.clone().into_inner() {
+ match p.as_rule() {
+ Rule::identifier_upper => {
+ vector_type = Some(p.as_str().to_string());
+ }
+ Rule::vector_data => match p.clone().into_inner().next() {
+ Some(vector_data) => match vector_data.as_rule() {
+ Rule::identifier => {
+ data = Some(VectorData::Identifier(p.as_str().to_string()));
+ }
+ Rule::vec_literal => {
+ data = Some(VectorData::Vector(self.parse_vec_literal(p)?));
+ }
+ Rule::embed_method => {
+ data = Some(VectorData::Embed(Embed {
+ loc: vector_data.loc(),
+ value: match vector_data.clone().into_inner().next() {
+ Some(inner) => match inner.as_rule() {
+ Rule::identifier => EvaluatesToString::Identifier(
+ inner.as_str().to_string(),
+ ),
+ Rule::string_literal => EvaluatesToString::StringLiteral(
+ inner.as_str().to_string(),
+ ),
+ _ => {
+ return Err(ParserError::from(format!(
+ "Unexpected rule in SearchV: {:?} => {:?}",
+ inner.as_rule(),
+ inner,
+ )));
+ }
+ },
+ None => {
+ return Err(ParserError::from(format!(
+ "Unexpected rule in SearchV: {:?} => {:?}",
+ p.as_rule(),
+ p,
+ )));
+ }
+ },
+ }));
+ }
+ _ => {
+ return Err(ParserError::from(format!(
+ "Unexpected rule in SearchV: {:?} => {:?}",
+ vector_data.as_rule(),
+ vector_data,
+ )));
+ }
+ },
+ None => {
+ return Err(ParserError::from(format!(
+ "Unexpected rule in SearchV: {:?} => {:?}",
+ p.as_rule(),
+ p,
+ )));
+ }
+ },
+ Rule::integer => {
+ k = Some(EvaluatesToNumber {
+ loc: p.loc(),
+ value: EvaluatesToNumberType::I32(
+ p.as_str()
+ .to_string()
+ .parse::()
+ .map_err(|_| ParserError::from("Invalid integer value"))?,
+ ),
+ });
+ }
+ Rule::identifier => {
+ k = Some(EvaluatesToNumber {
+ loc: p.loc(),
+ value: EvaluatesToNumberType::Identifier(p.as_str().to_string()),
+ });
+ }
+ Rule::pre_filter => {
+ pre_filter = Some(Box::new(self.parse_expression(p)?));
+ }
+ _ => {
+ return Err(ParserError::from(format!(
+ "Unexpected rule in SearchV: {:?} => {:?}",
+ p.as_rule(),
+ p,
+ )));
+ }
+ }
+ }
+
+ Ok(SearchVector {
+ loc: pair.loc(),
+ vector_type,
+ data,
+ k,
+ pre_filter,
+ })
+ }
+}
diff --git a/helix-db/src/helixc/parser/graph_step_parse_methods.rs b/helix-db/src/helixc/parser/graph_step_parse_methods.rs
new file mode 100644
index 00000000..1a4e71a8
--- /dev/null
+++ b/helix-db/src/helixc/parser/graph_step_parse_methods.rs
@@ -0,0 +1,389 @@
+use crate::helixc::parser::{
+ location::HasLoc, types::{
+ BooleanOp, BooleanOpType, Closure, Exclude, Expression, FieldAddition, FieldValue, FieldValueType, GraphStep, GraphStepType, IdType, Object, OrderBy, OrderByType, ShortestPath, Step, StepType, Update
+ }, HelixParser, ParserError, Rule
+};
+use pest::iterators::Pair;
+
+impl HelixParser {
+ /// Parses an order by step
+ ///
+ /// #### Example
+ /// ```rs
+ /// ::ORDER(_::{age})
+ /// ```
+ pub(super) fn parse_order_by(&self, pair: Pair) -> Result {
+ let mut inner = pair.clone().into_inner();
+ let order_by_type = match inner.next().unwrap().into_inner().next().unwrap().as_rule() {
+ Rule::asc => OrderByType::Asc,
+ Rule::desc => OrderByType::Desc,
+ _ => unreachable!(),
+ };
+ let expression = self.parse_expression(inner.next().unwrap())?;
+ Ok(OrderBy {
+ loc: pair.loc(),
+ order_by_type,
+ expression: Box::new(expression),
+ })
+ }
+
+ /// Parses a range step
+ ///
+ /// #### Example
+ /// ```rs
+ /// ::RANGE(1, 10)
+ /// ```
+ pub(super) fn parse_range(&self, pair: Pair) -> Result<(Expression, Expression), ParserError> {
+ let mut inner = pair.into_inner().next().unwrap().into_inner();
+ let start = self.parse_expression(inner.next().unwrap())?;
+ let end = self.parse_expression(inner.next().unwrap())?;
+
+ Ok((start, end))
+ }
+
+ /// Parses a boolean operation
+ ///
+ /// #### Example
+ /// ```rs
+ /// ::GT(1)
+ /// ```
+ pub(super) fn parse_bool_operation(&self, pair: Pair) -> Result {
+ let inner = pair.clone().into_inner().next().unwrap();
+ let expr = match inner.as_rule() {
+ Rule::GT => BooleanOp {
+ loc: pair.loc(),
+ op: BooleanOpType::GreaterThan(Box::new(
+ self.parse_expression(inner.into_inner().next().unwrap())?,
+ )),
+ },
+ Rule::GTE => BooleanOp {
+ loc: pair.loc(),
+ op: BooleanOpType::GreaterThanOrEqual(Box::new(
+ self.parse_expression(inner.into_inner().next().unwrap())?,
+ )),
+ },
+ Rule::LT => BooleanOp {
+ loc: pair.loc(),
+ op: BooleanOpType::LessThan(Box::new(
+ self.parse_expression(inner.into_inner().next().unwrap())?,
+ )),
+ },
+ Rule::LTE => BooleanOp {
+ loc: pair.loc(),
+ op: BooleanOpType::LessThanOrEqual(Box::new(
+ self.parse_expression(inner.into_inner().next().unwrap())?,
+ )),
+ },
+ Rule::EQ => BooleanOp {
+ loc: pair.loc(),
+ op: BooleanOpType::Equal(Box::new(
+ self.parse_expression(inner.into_inner().next().unwrap())?,
+ )),
+ },
+ Rule::NEQ => BooleanOp {
+ loc: pair.loc(),
+ op: BooleanOpType::NotEqual(Box::new(
+ self.parse_expression(inner.into_inner().next().unwrap())?,
+ )),
+ },
+ Rule::CONTAINS => BooleanOp {
+ loc: pair.loc(),
+ op: BooleanOpType::Contains(Box::new(
+ self.parse_expression(inner.into_inner().next().unwrap())?,
+ )),
+ },
+ Rule::IS_IN => BooleanOp {
+ loc: pair.loc(),
+ op: BooleanOpType::IsIn(Box::new(self.parse_expression(inner)?)),
+ },
+ _ => return Err(ParserError::from("Invalid boolean operation")),
+ };
+ Ok(expr)
+ }
+
+ /// Parses an update step
+ ///
+ /// #### Example
+ /// ```rs
+ /// ::UPDATE({age: 1})
+ /// ```
+ pub(super) fn parse_update(&self, pair: Pair) -> Result {
+ let fields = self.parse_object_fields(pair.clone())?;
+ Ok(Update {
+ fields,
+ loc: pair.loc(),
+ })
+ }
+
+ /// Parses an object step
+ ///
+ /// #### Example
+ /// ```rs
+ /// ::{username: name}
+ /// ```
+ pub(super) fn parse_object_step(&self, pair: Pair) -> Result