From cb37d55642bce2d3c8785b6cd3dce0dadab34550 Mon Sep 17 00:00:00 2001 From: joeoc2001 Date: Thu, 13 Mar 2025 14:24:42 +0000 Subject: [PATCH 01/50] Add prec_dynamic --- README.md | 2 +- macro/src/lib.rs | 22 +++++ tool/src/expansion.rs | 16 ++++ tool/src/lib.rs | 83 +++++++++++++++++++ ...l__tests__enum_conflicts_prec_dynamic.snap | 5 ++ 5 files changed, 127 insertions(+), 1 deletion(-) create mode 100644 tool/src/snapshots/rust_sitter_tool__tests__enum_conflicts_prec_dynamic.snap diff --git a/README.md b/README.md index 937bb50..11231e3 100644 --- a/README.md +++ b/README.md @@ -165,7 +165,7 @@ enum SmallDigit { } ``` -### `#[rust_sitter::prec(...)]` / `#[rust_sitter::prec_left(...)]` / `#[rust_sitter::prec_right(...)]` +### `#[rust_sitter::prec(...)]` / `#[rust_sitter::prec_left(...)]` / `#[rust_sitter::prec_right(...)]` / `#[rust_sitter::prec_dynamic(...)]` This annotation can be used to define a non/left/right-associative operator. This annotation takes a single parameter, which is the precedence level of the operator (higher binds more tightly). ### `#[rust_sitter::skip(...)]` diff --git a/macro/src/lib.rs b/macro/src/lib.rs index d65d083..1f403cd 100644 --- a/macro/src/lib.rs +++ b/macro/src/lib.rs @@ -164,6 +164,28 @@ pub fn prec_right( item } +#[proc_macro_attribute] +/// This macro is similar to [`prec`], but the given numerical precedence is applied at runtime instead +/// of at parser generation time. This is only necessary when handling a conflict dynamically using +/// [`conflicts`], and when there is a genuine ambiguity: multiple rules correctly +/// match a given piece of code. In that event, Rust-sitter compares the total dynamic precedence +/// associated with each rule, and selects the one with the highest total. +/// +/// This is similar to dynamic precedence directives in Bison grammars. +/// +/// ## Example +/// ```ignore +/// #[rust_sitter::prec_dynamic(1)] +/// Cons(Box, Box) +/// ``` +pub fn prec_dynamic( + _attr: proc_macro::TokenStream, + item: proc_macro::TokenStream, +) -> proc_macro::TokenStream { + item +} + + #[proc_macro_attribute] /// On `Vec<_>` typed fields, specifies a non-terminal that should be parsed in between the elements. /// The [`rust_sitter::repeat`] annotation must be used on the field as well. diff --git a/tool/src/expansion.rs b/tool/src/expansion.rs index 8aa9cf8..7cd4a30 100644 --- a/tool/src/expansion.rs +++ b/tool/src/expansion.rs @@ -322,6 +322,12 @@ fn gen_struct_or_variant( let prec_right_param = prec_right_attr.and_then(|a| a.parse_args_with(Expr::parse).ok()); + let prec_dynamic_attr = attrs + .iter() + .find(|attr| attr.path() == &syn::parse_quote!(rust_sitter::prec_dynamic)); + + let prec_dynamic_param = prec_dynamic_attr.and_then(|a| a.parse_args_with(Expr::parse).ok()); + let base_rule = match fields { Fields::Unit => { let dummy_field = Field { @@ -381,6 +387,16 @@ fn gen_struct_or_variant( } else { panic!("Expected integer literal for precedence"); } + } else if let Some(Expr::Lit(lit)) = prec_dynamic_param { + if let Lit::Int(i) = &lit.lit { + json!({ + "type": "PREC_DYNAMIC", + "value": i.base10_parse::().unwrap(), + "content": base_rule + }) + } else { + panic!("Expected integer literal for dynamic precedence"); + } } else { base_rule }; diff --git a/tool/src/lib.rs b/tool/src/lib.rs index 6c74c07..1f7fdde 100644 --- a/tool/src/lib.rs +++ b/tool/src/lib.rs @@ -50,6 +50,7 @@ pub fn build_parsers(root_file: &Path) { .map(|s| s.parse().unwrap_or(false)) .unwrap_or(false); generate_grammars(root_file).iter().for_each(|grammar| { + panic!("{}", grammar.to_string()); let (grammar_name, grammar_c) = generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap(); let tempfile = tempfile::Builder::new() @@ -253,6 +254,88 @@ mod tests { generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap(); } + #[test] + fn enum_conflicts_prec_dynamic() { + let m = if let syn::Item::Mod(m) = parse_quote! { + #[rust_sitter::grammar("test")] + mod grammar { + #[rust_sitter::language] + pub struct Program(pub Vec); + + pub enum Statement { + ExpressionStatement(ExpressionStatement), + IfStatement(Box), + } + + pub enum Expression { + Identifier(Identifier), + Number(Number), + BinaryExpression(Box), + } + + #[rust_sitter::prec_left(1)] + pub struct BinaryExpression { + pub expression: Expression, + pub binary_expression_inner: BinaryExpressionInner, + pub expression2: Expression, + } + + pub enum BinaryExpressionInner { + String(#[rust_sitter::leaf(text = "+")] ()), + String2(#[rust_sitter::leaf(text = "-")] ()), + String3(#[rust_sitter::leaf(text = "*")] ()), + String4(#[rust_sitter::leaf(text = "/")] ()), + } + + pub struct ExpressionStatement { + pub expression: Expression, + #[rust_sitter::leaf(text = ";")] + pub _semicolon: (), + } + + #[rust_sitter::prec_dynamic(1)] + pub struct IfStatement { + #[rust_sitter::leaf(text = "if")] + pub _if: (), + #[rust_sitter::leaf(text = "(")] + pub _lparen: (), + pub expression: Expression, + #[rust_sitter::leaf(text = ")")] + pub _rparen: (), + #[rust_sitter::leaf(text = "{")] + pub _lbrace: (), + pub statement: Statement, + #[rust_sitter::leaf(text = "}")] + pub _rbrace: (), + pub if_statement_inner: Option, + } + + pub struct IfStatementElse { + #[rust_sitter::leaf(text = "else")] + pub _else: (), + #[rust_sitter::leaf(text = "{")] + pub _lbrace: (), + pub statement: Statement, + #[rust_sitter::leaf(text = "}")] + pub _rbrace: (), + } + + #[rust_sitter::word] + pub struct Identifier(#[rust_sitter::leaf(pattern = "[a-zA-Z_][a-zA-Z0-9_]*")] ()); + + pub struct Number(#[rust_sitter::leaf(pattern = "\\d+")] ()); + } + } { + m + } else { + panic!() + }; + + let grammar = generate_grammar(&m); + insta::assert_snapshot!(grammar); + generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap(); + } + #[test] fn grammar_with_extras() { let m = if let syn::Item::Mod(m) = parse_quote! { diff --git a/tool/src/snapshots/rust_sitter_tool__tests__enum_conflicts_prec_dynamic.snap b/tool/src/snapshots/rust_sitter_tool__tests__enum_conflicts_prec_dynamic.snap new file mode 100644 index 0000000..9d100ed --- /dev/null +++ b/tool/src/snapshots/rust_sitter_tool__tests__enum_conflicts_prec_dynamic.snap @@ -0,0 +1,5 @@ +--- +source: tool/src/lib.rs +expression: grammar +--- +{"name":"test","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Program_0_vec_contents"}}]}]},"Program_0_vec_contents":{"type":"REPEAT1","content":{"type":"FIELD","name":"Program_0_vec_element","content":{"type":"SYMBOL","name":"Statement"}}},"Program":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Program_0_vec_contents"}}]}]},"Statement_ExpressionStatement":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"ExpressionStatement"}}]},"Statement_IfStatement":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"IfStatement"}}]},"Statement":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Statement_ExpressionStatement"},{"type":"SYMBOL","name":"Statement_IfStatement"}]},"Expression_Identifier":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Identifier"}}]},"Expression_Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Number"}}]},"Expression_BinaryExpression":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"BinaryExpression"}}]},"Expression":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Identifier"},{"type":"SYMBOL","name":"Expression_Number"},{"type":"SYMBOL","name":"Expression_BinaryExpression"}]},"BinaryExpression":{"type":"PREC_LEFT","value":1,"content":{"type":"SEQ","members":[{"type":"FIELD","name":"expression","content":{"type":"SYMBOL","name":"Expression"}},{"type":"FIELD","name":"binary_expression_inner","content":{"type":"SYMBOL","name":"BinaryExpressionInner"}},{"type":"FIELD","name":"expression2","content":{"type":"SYMBOL","name":"Expression"}}]}},"BinaryExpressionInner_String_0":{"type":"STRING","value":"+"},"BinaryExpressionInner_String":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"BinaryExpressionInner_String_0"}}]},"BinaryExpressionInner_String2_0":{"type":"STRING","value":"-"},"BinaryExpressionInner_String2":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"BinaryExpressionInner_String2_0"}}]},"BinaryExpressionInner_String3_0":{"type":"STRING","value":"*"},"BinaryExpressionInner_String3":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"BinaryExpressionInner_String3_0"}}]},"BinaryExpressionInner_String4_0":{"type":"STRING","value":"/"},"BinaryExpressionInner_String4":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"BinaryExpressionInner_String4_0"}}]},"BinaryExpressionInner":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"BinaryExpressionInner_String"},{"type":"SYMBOL","name":"BinaryExpressionInner_String2"},{"type":"SYMBOL","name":"BinaryExpressionInner_String3"},{"type":"SYMBOL","name":"BinaryExpressionInner_String4"}]},"ExpressionStatement__semicolon":{"type":"STRING","value":";"},"ExpressionStatement":{"type":"SEQ","members":[{"type":"FIELD","name":"expression","content":{"type":"SYMBOL","name":"Expression"}},{"type":"FIELD","name":"_semicolon","content":{"type":"SYMBOL","name":"ExpressionStatement__semicolon"}}]},"IfStatement__if":{"type":"STRING","value":"if"},"IfStatement__lparen":{"type":"STRING","value":"("},"IfStatement__rparen":{"type":"STRING","value":")"},"IfStatement__lbrace":{"type":"STRING","value":"{"},"IfStatement__rbrace":{"type":"STRING","value":"}"},"IfStatement":{"type":"PREC_DYNAMIC","value":1,"content":{"type":"SEQ","members":[{"type":"FIELD","name":"_if","content":{"type":"SYMBOL","name":"IfStatement__if"}},{"type":"FIELD","name":"_lparen","content":{"type":"SYMBOL","name":"IfStatement__lparen"}},{"type":"FIELD","name":"expression","content":{"type":"SYMBOL","name":"Expression"}},{"type":"FIELD","name":"_rparen","content":{"type":"SYMBOL","name":"IfStatement__rparen"}},{"type":"FIELD","name":"_lbrace","content":{"type":"SYMBOL","name":"IfStatement__lbrace"}},{"type":"FIELD","name":"statement","content":{"type":"SYMBOL","name":"Statement"}},{"type":"FIELD","name":"_rbrace","content":{"type":"SYMBOL","name":"IfStatement__rbrace"}},{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"if_statement_inner","content":{"type":"SYMBOL","name":"IfStatementElse"}}]}]}},"IfStatementElse__else":{"type":"STRING","value":"else"},"IfStatementElse__lbrace":{"type":"STRING","value":"{"},"IfStatementElse__rbrace":{"type":"STRING","value":"}"},"IfStatementElse":{"type":"SEQ","members":[{"type":"FIELD","name":"_else","content":{"type":"SYMBOL","name":"IfStatementElse__else"}},{"type":"FIELD","name":"_lbrace","content":{"type":"SYMBOL","name":"IfStatementElse__lbrace"}},{"type":"FIELD","name":"statement","content":{"type":"SYMBOL","name":"Statement"}},{"type":"FIELD","name":"_rbrace","content":{"type":"SYMBOL","name":"IfStatementElse__rbrace"}}]},"Identifier_0":{"type":"PATTERN","value":"[a-zA-Z_][a-zA-Z0-9_]*"},"Identifier":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Identifier_0"}}]},"Number_0":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Number_0"}}]}},"extras":[]} From 3d0f8f325a2e9b23acab09ab1c41a82185e6d7a8 Mon Sep 17 00:00:00 2001 From: joeoc2001 Date: Thu, 13 Mar 2025 15:28:28 +0000 Subject: [PATCH 02/50] Add immediate tokens --- README.md | 3 + macro/src/lib.rs | 21 +- tool/src/expansion.rs | 235 +++++++++++------- tool/src/lib.rs | 31 ++- .../rust_sitter_tool__tests__immediate.snap | 5 + 5 files changed, 209 insertions(+), 86 deletions(-) create mode 100644 tool/src/snapshots/rust_sitter_tool__tests__immediate.snap diff --git a/README.md b/README.md index 11231e3..fc91af4 100644 --- a/README.md +++ b/README.md @@ -168,6 +168,9 @@ enum SmallDigit { ### `#[rust_sitter::prec(...)]` / `#[rust_sitter::prec_left(...)]` / `#[rust_sitter::prec_right(...)]` / `#[rust_sitter::prec_dynamic(...)]` This annotation can be used to define a non/left/right-associative operator. This annotation takes a single parameter, which is the precedence level of the operator (higher binds more tightly). +### `[#rust_sitter::immediate]` +Usually, whitespace is optional before each token. This attribute means that the token will only match if there is no whitespace. + ### `#[rust_sitter::skip(...)]` This annotation can be used to define a field that does not correspond to anything in the input string, such as some metadata. This annotation takes a single parameter, which is the value that should be used to populate that field at runtime. diff --git a/macro/src/lib.rs b/macro/src/lib.rs index 1f403cd..bb54c23 100644 --- a/macro/src/lib.rs +++ b/macro/src/lib.rs @@ -170,9 +170,9 @@ pub fn prec_right( /// [`conflicts`], and when there is a genuine ambiguity: multiple rules correctly /// match a given piece of code. In that event, Rust-sitter compares the total dynamic precedence /// associated with each rule, and selects the one with the highest total. -/// +/// /// This is similar to dynamic precedence directives in Bison grammars. -/// +/// /// ## Example /// ```ignore /// #[rust_sitter::prec_dynamic(1)] @@ -185,6 +185,23 @@ pub fn prec_dynamic( item } +#[proc_macro_attribute] +/// Usually, whitespace is optional before each token. This attribute means that the token will only match if there is no whitespace. +/// +/// ## Example +/// ```ignore +/// struct StringFragment( +/// #[rust_sitter::immediate] +/// #[rust_sitter::leaf(pattern = r"[^"\\]+")] +/// () +/// ); +/// ``` +pub fn immediate( + _attr: proc_macro::TokenStream, + item: proc_macro::TokenStream, +) -> proc_macro::TokenStream { + item +} #[proc_macro_attribute] /// On `Vec<_>` typed fields, specifies a non-terminal that should be parsed in between the elements. diff --git a/tool/src/expansion.rs b/tool/src/expansion.rs index 7cd4a30..1771633 100644 --- a/tool/src/expansion.rs +++ b/tool/src/expansion.rs @@ -4,6 +4,128 @@ use rust_sitter_common::*; use serde_json::{json, Map, Value}; use syn::{parse::Parse, punctuated::Punctuated, *}; +struct Precs { + prec_param: Option, + prec_left_param: Option, + prec_right_param: Option, + prec_dynamic_param: Option, + immediate: bool, +} + +impl Precs { + fn new(attrs: &[Attribute]) -> Self { + let prec_attr = attrs + .iter() + .find(|attr| attr.path() == &syn::parse_quote!(rust_sitter::prec)); + + let prec_param = prec_attr.and_then(|a| a.parse_args_with(Expr::parse).ok()); + + let prec_left_attr = attrs + .iter() + .find(|attr| attr.path() == &syn::parse_quote!(rust_sitter::prec_left)); + + let prec_left_param = prec_left_attr.and_then(|a| a.parse_args_with(Expr::parse).ok()); + + let prec_right_attr = attrs + .iter() + .find(|attr| attr.path() == &syn::parse_quote!(rust_sitter::prec_right)); + + let prec_right_param = prec_right_attr.and_then(|a| a.parse_args_with(Expr::parse).ok()); + + let prec_dynamic_attr = attrs + .iter() + .find(|attr| attr.path() == &syn::parse_quote!(rust_sitter::prec_dynamic)); + + let prec_dynamic_param = + prec_dynamic_attr.and_then(|a| a.parse_args_with(Expr::parse).ok()); + + let immediate_attr = attrs + .iter() + .find(|attr| attr.path() == &syn::parse_quote!(rust_sitter::immediate)); + + Self { + prec_param, + prec_left_param, + prec_right_param, + prec_dynamic_param, + immediate: immediate_attr.is_some(), + } + } + + fn apply(self, rule: serde_json::Value) -> serde_json::Value { + let Self { + prec_param, + prec_left_param, + prec_right_param, + prec_dynamic_param, + immediate, + } = self; + + let rule = if let Some(Expr::Lit(lit)) = prec_param { + if prec_left_param.is_some() || prec_right_param.is_some() { + panic!("only one of prec, prec_left, and prec_right can be specified"); + } + + if let Lit::Int(i) = &lit.lit { + json!({ + "type": "PREC", + "value": i.base10_parse::().unwrap(), + "content": rule + }) + } else { + panic!("Expected integer literal for precedence"); + } + } else if let Some(Expr::Lit(lit)) = prec_left_param { + if prec_right_param.is_some() { + panic!("only one of prec, prec_left, and prec_right can be specified"); + } + + if let Lit::Int(i) = &lit.lit { + json!({ + "type": "PREC_LEFT", + "value": i.base10_parse::().unwrap(), + "content": rule + }) + } else { + panic!("Expected integer literal for precedence"); + } + } else if let Some(Expr::Lit(lit)) = prec_right_param { + if let Lit::Int(i) = &lit.lit { + json!({ + "type": "PREC_RIGHT", + "value": i.base10_parse::().unwrap(), + "content": rule + }) + } else { + panic!("Expected integer literal for precedence"); + } + } else if let Some(Expr::Lit(lit)) = prec_dynamic_param { + if let Lit::Int(i) = &lit.lit { + json!({ + "type": "PREC_DYNAMIC", + "value": i.base10_parse::().unwrap(), + "content": rule + }) + } else { + panic!("Expected integer literal for dynamic precedence"); + } + } else { + rule + }; + + let rule = if immediate { + json!({ + "type": "IMMEDIATE_TOKEN", + "content": rule + }) + } else { + rule + }; + + rule + } +} + fn gen_field( path: String, leaf_type: Type, @@ -26,6 +148,13 @@ fn gen_field( *word_rule = Some(path.clone()); } + let precs = Precs::new(&leaf_attrs); + if precs.prec_left_param.is_some() || precs.prec_right_param.is_some() { + panic!( + "The attributes `prec_left` and `prec_right` cannot be applied to a non-struct type" + ); + } + let leaf_params = leaf_attr.and_then(|a| { a.parse_args_with(Punctuated::::parse_terminated) .ok() @@ -50,15 +179,15 @@ fn gen_field( let (inner_type_vec, is_vec) = try_extract_inner_type(&leaf_type, "Vec", &skip_over); let (inner_type_option, is_option) = try_extract_inner_type(&leaf_type, "Option", &skip_over); - if !is_vec && !is_option { + let (rule, field_optional) = if !is_vec && !is_option { if let Some(Expr::Lit(lit)) = pattern_param { if let Lit::Str(s) = &lit.lit { out.insert( path.clone(), - json!({ + precs.apply(json!({ "type": "PATTERN", "value": s.value(), - }), + })), ); ( @@ -75,10 +204,10 @@ fn gen_field( if let Lit::Str(s) = &lit.lit { out.insert( path.clone(), - json!({ + precs.apply(json!({ "type": "STRING", "value": s.value(), - }), + })), ); ( @@ -103,10 +232,10 @@ fn gen_field( }; ( - json!({ + precs.apply(json!({ "type": "SYMBOL", "name": symbol_name, - }), + })), false, ) } @@ -216,6 +345,8 @@ fn gen_field( }) }; + let vec_contents = precs.apply(vec_contents); + let contents_ident = format!("{path}_vec_contents"); out.insert(contents_ident.clone(), vec_contents); @@ -235,8 +366,10 @@ fn gen_field( panic!("Option> is not supported"); } - (field_json, true) - } + (precs.apply(field_json), true) + }; + + (rule, field_optional) } fn gen_struct_or_variant( @@ -304,29 +437,7 @@ fn gen_struct_or_variant( }) .collect::>(); - let prec_attr = attrs - .iter() - .find(|attr| attr.path() == &syn::parse_quote!(rust_sitter::prec)); - - let prec_param = prec_attr.and_then(|a| a.parse_args_with(Expr::parse).ok()); - - let prec_left_attr = attrs - .iter() - .find(|attr| attr.path() == &syn::parse_quote!(rust_sitter::prec_left)); - - let prec_left_param = prec_left_attr.and_then(|a| a.parse_args_with(Expr::parse).ok()); - - let prec_right_attr = attrs - .iter() - .find(|attr| attr.path() == &syn::parse_quote!(rust_sitter::prec_right)); - - let prec_right_param = prec_right_attr.and_then(|a| a.parse_args_with(Expr::parse).ok()); - - let prec_dynamic_attr = attrs - .iter() - .find(|attr| attr.path() == &syn::parse_quote!(rust_sitter::prec_dynamic)); - - let prec_dynamic_param = prec_dynamic_attr.and_then(|a| a.parse_args_with(Expr::parse).ok()); + let precs = Precs::new(&attrs); let base_rule = match fields { Fields::Unit => { @@ -349,57 +460,7 @@ fn gen_struct_or_variant( }), }; - let rule = if let Some(Expr::Lit(lit)) = prec_param { - if prec_left_attr.is_some() || prec_right_attr.is_some() { - panic!("only one of prec, prec_left, and prec_right can be specified"); - } - - if let Lit::Int(i) = &lit.lit { - json!({ - "type": "PREC", - "value": i.base10_parse::().unwrap(), - "content": base_rule - }) - } else { - panic!("Expected integer literal for precedence"); - } - } else if let Some(Expr::Lit(lit)) = prec_left_param { - if prec_right_attr.is_some() { - panic!("only one of prec, prec_left, and prec_right can be specified"); - } - - if let Lit::Int(i) = &lit.lit { - json!({ - "type": "PREC_LEFT", - "value": i.base10_parse::().unwrap(), - "content": base_rule - }) - } else { - panic!("Expected integer literal for precedence"); - } - } else if let Some(Expr::Lit(lit)) = prec_right_param { - if let Lit::Int(i) = &lit.lit { - json!({ - "type": "PREC_RIGHT", - "value": i.base10_parse::().unwrap(), - "content": base_rule - }) - } else { - panic!("Expected integer literal for precedence"); - } - } else if let Some(Expr::Lit(lit)) = prec_dynamic_param { - if let Lit::Int(i) = &lit.lit { - json!({ - "type": "PREC_DYNAMIC", - "value": i.base10_parse::().unwrap(), - "content": base_rule - }) - } else { - panic!("Expected integer literal for dynamic precedence"); - } - } else { - base_rule - }; + let rule = precs.apply(base_rule); out.insert(path, rule); } @@ -482,6 +543,14 @@ pub fn generate_grammar(module: &ItemMod) -> Value { "members": members }); + let precs = Precs::new(&e.attrs); + if precs.prec_left_param.is_some() || precs.prec_right_param.is_some() { + panic!( + "The attributes `prec_left` and `prec_right` cannot be applied directly to an enum" + ); + } + let rule = precs.apply(rule); + rules_map.insert(e.ident.to_string(), rule); (e.ident.to_string(), e.attrs.clone()) diff --git a/tool/src/lib.rs b/tool/src/lib.rs index 1f7fdde..2d40e46 100644 --- a/tool/src/lib.rs +++ b/tool/src/lib.rs @@ -50,7 +50,6 @@ pub fn build_parsers(root_file: &Path) { .map(|s| s.parse().unwrap_or(false)) .unwrap_or(false); generate_grammars(root_file).iter().for_each(|grammar| { - panic!("{}", grammar.to_string()); let (grammar_name, grammar_c) = generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap(); let tempfile = tempfile::Builder::new() @@ -587,4 +586,34 @@ mod tests { insta::assert_snapshot!(grammar); generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap(); } + + #[test] + fn immediate() { + let m = if let syn::Item::Mod(m) = parse_quote! { + #[rust_sitter::grammar("test")] + mod grammar { + #[rust_sitter::language] + pub struct StringFragment( + #[rust_sitter::immediate] + #[rust_sitter::prec(1)] + #[rust_sitter::leaf(pattern = r#"[^"\\]+"#)] + () + ); + + #[rust_sitter::extra] + struct Whitespace { + #[rust_sitter::leaf(pattern = r"\s")] + _whitespace: (), + } + } + } { + m + } else { + panic!() + }; + + let grammar = generate_grammar(&m); + insta::assert_snapshot!(grammar); + generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap(); + } } diff --git a/tool/src/snapshots/rust_sitter_tool__tests__immediate.snap b/tool/src/snapshots/rust_sitter_tool__tests__immediate.snap new file mode 100644 index 0000000..66c9d40 --- /dev/null +++ b/tool/src/snapshots/rust_sitter_tool__tests__immediate.snap @@ -0,0 +1,5 @@ +--- +source: tool/src/lib.rs +expression: grammar +--- +{"name":"test","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"StringFragment_0"}}]},"StringFragment_0":{"type":"IMMEDIATE_TOKEN","content":{"type":"PREC","value":1,"content":{"type":"PATTERN","value":"[^\"\\\\]+"}}},"StringFragment":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"StringFragment_0"}}]},"Whitespace__whitespace":{"type":"PATTERN","value":"\\s"},"Whitespace":{"type":"SEQ","members":[{"type":"FIELD","name":"_whitespace","content":{"type":"SYMBOL","name":"Whitespace__whitespace"}}]}},"extras":[{"type":"SYMBOL","name":"Whitespace"}]} From 2fb9d31878f637db73366e55161de2362f3ccf88 Mon Sep 17 00:00:00 2001 From: joeoc2001 Date: Thu, 13 Mar 2025 17:01:06 +0000 Subject: [PATCH 03/50] Clippy --- tool/src/expansion.rs | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/tool/src/expansion.rs b/tool/src/expansion.rs index 1771633..afe6c56 100644 --- a/tool/src/expansion.rs +++ b/tool/src/expansion.rs @@ -113,16 +113,14 @@ impl Precs { rule }; - let rule = if immediate { + if immediate { json!({ "type": "IMMEDIATE_TOKEN", "content": rule }) } else { rule - }; - - rule + } } } @@ -179,7 +177,7 @@ fn gen_field( let (inner_type_vec, is_vec) = try_extract_inner_type(&leaf_type, "Vec", &skip_over); let (inner_type_option, is_option) = try_extract_inner_type(&leaf_type, "Option", &skip_over); - let (rule, field_optional) = if !is_vec && !is_option { + if !is_vec && !is_option { if let Some(Expr::Lit(lit)) = pattern_param { if let Lit::Str(s) = &lit.lit { out.insert( @@ -367,9 +365,7 @@ fn gen_field( } (precs.apply(field_json), true) - }; - - (rule, field_optional) + } } fn gen_struct_or_variant( From 190933d54fce42f38183dbbfb7147d66d12f0ac1 Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Wed, 9 Jul 2025 11:40:13 -0500 Subject: [PATCH 04/50] Add ability to parse from types which implement `FromStr` without specifying a `transform` function. --- runtime/src/lib.rs | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs index 9343cee..6bac0dc 100644 --- a/runtime/src/lib.rs +++ b/runtime/src/lib.rs @@ -41,6 +41,8 @@ impl Extract for WithLeaf { } } +// Common implementations for various types. + impl Extract<()> for () { type LeafFn = (); fn extract( @@ -108,6 +110,38 @@ impl, U> Extract> for Vec { } } +macro_rules! extract_from_str { + ($t:ty) => { + impl Extract<$t> for $t { + type LeafFn = (); + fn extract( + node: Option, + source: &[u8], + _last_idx: usize, + _leaf_fn: Option<&Self::LeafFn>, + ) -> Self { + let node = node.unwrap(); + let text = node.utf8_text(source).unwrap(); + text.parse().unwrap() + } + } + }; +} + +extract_from_str!(u8); +extract_from_str!(i8); +extract_from_str!(u16); +extract_from_str!(i16); +extract_from_str!(u32); +extract_from_str!(i32); +extract_from_str!(u64); +extract_from_str!(i64); +// NOTE: These two may not work as intended due to rounding issues. +extract_from_str!(f32); +extract_from_str!(f64); +// Sort of silly, but keeps it general. +extract_from_str!(String); + #[derive(Clone, Debug)] /// A wrapper around a value that also contains the span of the value in the source. pub struct Spanned { From 551db495c6220ae9550868a1719f7ebaaa135189 Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Wed, 9 Jul 2025 11:50:10 -0500 Subject: [PATCH 05/50] add ability to ignore fields to produce a cleaner grammar. --- ...xample__words__tests__words_grammar-4.snap | 4 +-- example/src/words.rs | 6 ++--- macro/src/expansion.rs | 27 ++++++++++++++++--- macro/src/lib.rs | 23 ++++++++++++++++ tool/src/expansion.rs | 15 ++++++----- ...er_tool__tests__enum_with_named_field.snap | 4 +-- ...st_sitter_tool__tests__grammar_repeat.snap | 2 +- ...t_sitter_tool__tests__grammar_repeat1.snap | 2 +- ...l__tests__grammar_repeat_no_delimiter.snap | 2 +- ...tter_tool__tests__grammar_with_extras.snap | 2 +- ...st_sitter_tool__tests__spanned_in_vec.snap | 2 +- 11 files changed, 67 insertions(+), 22 deletions(-) diff --git a/example/src/snapshots/rust_sitter_example__words__tests__words_grammar-4.snap b/example/src/snapshots/rust_sitter_example__words__tests__words_grammar-4.snap index 1e50599..b8210ae 100644 --- a/example/src/snapshots/rust_sitter_example__words__tests__words_grammar-4.snap +++ b/example/src/snapshots/rust_sitter_example__words__tests__words_grammar-4.snap @@ -4,7 +4,7 @@ expression: "grammar::parse(\"if hello\")" --- Ok( Words { - _keyword: (), - _word: "hello", + keyword: (), + word: "hello", }, ) diff --git a/example/src/words.rs b/example/src/words.rs index 86ffe4a..fa0f009 100644 --- a/example/src/words.rs +++ b/example/src/words.rs @@ -4,10 +4,10 @@ pub mod grammar { #[derive(Debug)] pub struct Words { #[rust_sitter::leaf(text = r"if")] - _keyword: (), + keyword: (), #[rust_sitter::word] - #[rust_sitter::leaf(pattern = r"[a-z_]+", transform = |v| v.to_string())] - _word: String, + #[rust_sitter::leaf(pattern = r"[a-z_]+")] + word: String, } #[rust_sitter::extra] diff --git a/macro/src/expansion.rs b/macro/src/expansion.rs index 75c2b80..a498d5d 100644 --- a/macro/src/expansion.rs +++ b/macro/src/expansion.rs @@ -1,4 +1,4 @@ -use std::collections::HashSet; +use std::{collections::HashSet, sync::LazyLock}; use crate::errors::IteratorExt as _; use proc_macro2::Span; @@ -6,13 +6,33 @@ use quote::{quote, ToTokens}; use rust_sitter_common::*; use syn::{parse::Parse, punctuated::Punctuated, *}; +static RUST_SITTER_ATTRS: LazyLock> = LazyLock::new(|| { + [ + "leaf", + "token", + "immediate", + "prec", + "prec_left", + "prec_right", + "prec_dynamic", + "extra", + ] + .into_iter() + .collect() +}); + fn is_sitter_attr(attr: &Attribute) -> bool { - attr.path() + let is_explicit = attr + .path() .segments .iter() .next() .map(|segment| segment.ident == "rust_sitter") - .unwrap_or(false) + .unwrap_or(false); + is_explicit || { + attr.path().segments.len() == 1 + && RUST_SITTER_ATTRS.contains(attr.path().segments[0].ident.to_string().as_str()) + } } pub enum ParamOrField { @@ -73,6 +93,7 @@ fn gen_struct_or_variant( container_attrs: Vec, ) -> Result { let children_parsed = if fields == Fields::Unit { + // TODO (JAB): Handle `` let expr = { let dummy_field = Field { attrs: container_attrs, diff --git a/macro/src/lib.rs b/macro/src/lib.rs index d65d083..23b10ef 100644 --- a/macro/src/lib.rs +++ b/macro/src/lib.rs @@ -187,6 +187,27 @@ pub fn delimited( item } +#[proc_macro_attribute] +/// Produce a series of literal, anonymous tokens in the grammar. +/// ## Example +/// ```ignore +/// pub struct Function { +/// #[rust_sitter::lit("function")] // produces a keyword-like "function" +/// // parses name as Ident +/// name: Ident, +/// #[rust_sitter::lit("(", ")", "{")] // simple grammar, no inputs +/// // The body +/// body: Vec, +/// #[rust_sitter::lit("}")] // Closing } +/// } +/// ``` +pub fn lit( + _attr: proc_macro::TokenStream, + item: proc_macro::TokenStream, +) -> proc_macro::TokenStream { + item +} + #[proc_macro_attribute] /// On `Vec<_>` typed fields, specifies additional config for how the repeated elements should /// be parsed. In particular, this annotation takes the following named arguments: @@ -223,6 +244,8 @@ pub fn grammar( proc_macro::TokenStream::from(expanded) } + + #[cfg(test)] mod tests { use std::fs::File; diff --git a/tool/src/expansion.rs b/tool/src/expansion.rs index 8aa9cf8..5243f58 100644 --- a/tool/src/expansion.rs +++ b/tool/src/expansion.rs @@ -253,13 +253,14 @@ fn gen_struct_or_variant( out: &mut Map, ident_str: String, ) -> Value { - let (field_contents, is_option) = gen_field( - format!("{path}_{ident_str}"), - field.ty.clone(), - field.attrs.clone(), - word_rule, - out, - ); + // Produce a cleaner grammar: fields with `_` are hidden fields. + let path = if ident_str.starts_with("_") { + format!("_{path}_{ident_str}") + } else { + format!("{path}_{ident_str}") + }; + let (field_contents, is_option) = + gen_field(path, field.ty.clone(), field.attrs.clone(), word_rule, out); let core = json!({ "type": "FIELD", diff --git a/tool/src/snapshots/rust_sitter_tool__tests__enum_with_named_field.snap b/tool/src/snapshots/rust_sitter_tool__tests__enum_with_named_field.snap index b02e30d..f2865f9 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__enum_with_named_field.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__enum_with_named_field.snap @@ -1,5 +1,5 @@ --- source: tool/src/lib.rs -expression: generate_grammar(&m) +expression: grammar --- -{"name":"test","word":null,"rules":{"source_file":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expr_Number"},{"type":"SYMBOL","name":"Expr_Neg"}]},"Expr_Number_0":{"type":"PATTERN","value":"\\d+"},"Expr_Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Expr_Number_0"}}]},"Expr_Neg__bang":{"type":"STRING","value":"!"},"Expr_Neg":{"type":"SEQ","members":[{"type":"FIELD","name":"_bang","content":{"type":"SYMBOL","name":"Expr_Neg__bang"}},{"type":"FIELD","name":"value","content":{"type":"SYMBOL","name":"Expr"}}]},"Expr":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expr_Number"},{"type":"SYMBOL","name":"Expr_Neg"}]}},"extras":[]} +{"name":"test","word":null,"rules":{"source_file":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expr_Number"},{"type":"SYMBOL","name":"Expr_Neg"}]},"Expr_Number_0":{"type":"PATTERN","value":"\\d+"},"Expr_Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Expr_Number_0"}}]},"_Expr_Neg__bang":{"type":"STRING","value":"!"},"Expr_Neg":{"type":"SEQ","members":[{"type":"FIELD","name":"_bang","content":{"type":"SYMBOL","name":"_Expr_Neg__bang"}},{"type":"FIELD","name":"value","content":{"type":"SYMBOL","name":"Expr"}}]},"Expr":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expr_Number"},{"type":"SYMBOL","name":"Expr_Neg"}]}},"extras":[]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat.snap b/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat.snap index dc3025e..05407a9 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"test","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]}]},"NumberList_numbers_vec_delimiter":{"type":"STRING","value":","},"NumberList_numbers_vec_contents":{"type":"SEQ","members":[{"type":"FIELD","name":"NumberList_numbers_vec_element","content":{"type":"SYMBOL","name":"Number"}},{"type":"REPEAT","content":{"type":"SEQ","members":[{"type":"SYMBOL","name":"NumberList_numbers_vec_delimiter"},{"type":"FIELD","name":"NumberList_numbers_vec_element","content":{"type":"SYMBOL","name":"Number"}}]}}]},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]}]},"Number_v":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"SYMBOL","name":"Number_v"}}]},"Whitespace__whitespace":{"type":"PATTERN","value":"\\s"},"Whitespace":{"type":"SEQ","members":[{"type":"FIELD","name":"_whitespace","content":{"type":"SYMBOL","name":"Whitespace__whitespace"}}]}},"extras":[{"type":"SYMBOL","name":"Whitespace"}]} +{"name":"test","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]}]},"NumberList_numbers_vec_delimiter":{"type":"STRING","value":","},"NumberList_numbers_vec_contents":{"type":"SEQ","members":[{"type":"FIELD","name":"NumberList_numbers_vec_element","content":{"type":"SYMBOL","name":"Number"}},{"type":"REPEAT","content":{"type":"SEQ","members":[{"type":"SYMBOL","name":"NumberList_numbers_vec_delimiter"},{"type":"FIELD","name":"NumberList_numbers_vec_element","content":{"type":"SYMBOL","name":"Number"}}]}}]},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]}]},"Number_v":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"SYMBOL","name":"Number_v"}}]},"_Whitespace__whitespace":{"type":"PATTERN","value":"\\s"},"Whitespace":{"type":"SEQ","members":[{"type":"FIELD","name":"_whitespace","content":{"type":"SYMBOL","name":"_Whitespace__whitespace"}}]}},"extras":[{"type":"SYMBOL","name":"Whitespace"}]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat1.snap b/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat1.snap index c14068e..1585c93 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat1.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat1.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"test","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]},"NumberList_numbers_vec_delimiter":{"type":"STRING","value":","},"NumberList_numbers_vec_contents":{"type":"SEQ","members":[{"type":"FIELD","name":"NumberList_numbers_vec_element","content":{"type":"SYMBOL","name":"Number"}},{"type":"REPEAT","content":{"type":"SEQ","members":[{"type":"SYMBOL","name":"NumberList_numbers_vec_delimiter"},{"type":"FIELD","name":"NumberList_numbers_vec_element","content":{"type":"SYMBOL","name":"Number"}}]}}]},"NumberList":{"type":"SEQ","members":[{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]},"Number_v":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"SYMBOL","name":"Number_v"}}]},"Whitespace__whitespace":{"type":"PATTERN","value":"\\s"},"Whitespace":{"type":"SEQ","members":[{"type":"FIELD","name":"_whitespace","content":{"type":"SYMBOL","name":"Whitespace__whitespace"}}]}},"extras":[{"type":"SYMBOL","name":"Whitespace"}]} +{"name":"test","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]},"NumberList_numbers_vec_delimiter":{"type":"STRING","value":","},"NumberList_numbers_vec_contents":{"type":"SEQ","members":[{"type":"FIELD","name":"NumberList_numbers_vec_element","content":{"type":"SYMBOL","name":"Number"}},{"type":"REPEAT","content":{"type":"SEQ","members":[{"type":"SYMBOL","name":"NumberList_numbers_vec_delimiter"},{"type":"FIELD","name":"NumberList_numbers_vec_element","content":{"type":"SYMBOL","name":"Number"}}]}}]},"NumberList":{"type":"SEQ","members":[{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]},"Number_v":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"SYMBOL","name":"Number_v"}}]},"_Whitespace__whitespace":{"type":"PATTERN","value":"\\s"},"Whitespace":{"type":"SEQ","members":[{"type":"FIELD","name":"_whitespace","content":{"type":"SYMBOL","name":"_Whitespace__whitespace"}}]}},"extras":[{"type":"SYMBOL","name":"Whitespace"}]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat_no_delimiter.snap b/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat_no_delimiter.snap index c5f6753..4e4cf04 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat_no_delimiter.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat_no_delimiter.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"test","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]}]},"NumberList_numbers_vec_contents":{"type":"REPEAT1","content":{"type":"FIELD","name":"NumberList_numbers_vec_element","content":{"type":"SYMBOL","name":"Number"}}},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]}]},"Number_v":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"SYMBOL","name":"Number_v"}}]},"Whitespace__whitespace":{"type":"PATTERN","value":"\\s"},"Whitespace":{"type":"SEQ","members":[{"type":"FIELD","name":"_whitespace","content":{"type":"SYMBOL","name":"Whitespace__whitespace"}}]}},"extras":[{"type":"SYMBOL","name":"Whitespace"}]} +{"name":"test","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]}]},"NumberList_numbers_vec_contents":{"type":"REPEAT1","content":{"type":"FIELD","name":"NumberList_numbers_vec_element","content":{"type":"SYMBOL","name":"Number"}}},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]}]},"Number_v":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"SYMBOL","name":"Number_v"}}]},"_Whitespace__whitespace":{"type":"PATTERN","value":"\\s"},"Whitespace":{"type":"SEQ","members":[{"type":"FIELD","name":"_whitespace","content":{"type":"SYMBOL","name":"_Whitespace__whitespace"}}]}},"extras":[{"type":"SYMBOL","name":"Whitespace"}]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__grammar_with_extras.snap b/tool/src/snapshots/rust_sitter_tool__tests__grammar_with_extras.snap index abc38d0..0f094bc 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__grammar_with_extras.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__grammar_with_extras.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: generate_grammar(&m) --- -{"name":"test","word":null,"rules":{"source_file":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"}]},"Expression_Number_0":{"type":"PATTERN","value":"\\d+"},"Expression_Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Expression_Number_0"}}]},"Expression":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"}]},"Whitespace__whitespace":{"type":"PATTERN","value":"\\s"},"Whitespace":{"type":"SEQ","members":[{"type":"FIELD","name":"_whitespace","content":{"type":"SYMBOL","name":"Whitespace__whitespace"}}]}},"extras":[{"type":"SYMBOL","name":"Whitespace"}]} +{"name":"test","word":null,"rules":{"source_file":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"}]},"Expression_Number_0":{"type":"PATTERN","value":"\\d+"},"Expression_Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Expression_Number_0"}}]},"Expression":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"}]},"_Whitespace__whitespace":{"type":"PATTERN","value":"\\s"},"Whitespace":{"type":"SEQ","members":[{"type":"FIELD","name":"_whitespace","content":{"type":"SYMBOL","name":"_Whitespace__whitespace"}}]}},"extras":[{"type":"SYMBOL","name":"Whitespace"}]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__spanned_in_vec.snap b/tool/src/snapshots/rust_sitter_tool__tests__spanned_in_vec.snap index c6342da..663fa1a 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__spanned_in_vec.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__spanned_in_vec.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"test","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]}]},"NumberList_numbers":{"type":"PATTERN","value":"\\d+"},"NumberList_numbers_vec_contents":{"type":"REPEAT1","content":{"type":"FIELD","name":"NumberList_numbers_vec_element","content":{"type":"SYMBOL","name":"NumberList_numbers"}}},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]}]},"Whitespace__whitespace":{"type":"PATTERN","value":"\\s"},"Whitespace":{"type":"SEQ","members":[{"type":"FIELD","name":"_whitespace","content":{"type":"SYMBOL","name":"Whitespace__whitespace"}}]}},"extras":[{"type":"SYMBOL","name":"Whitespace"}]} +{"name":"test","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]}]},"NumberList_numbers":{"type":"PATTERN","value":"\\d+"},"NumberList_numbers_vec_contents":{"type":"REPEAT1","content":{"type":"FIELD","name":"NumberList_numbers_vec_element","content":{"type":"SYMBOL","name":"NumberList_numbers"}}},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]}]},"_Whitespace__whitespace":{"type":"PATTERN","value":"\\s"},"Whitespace":{"type":"SEQ","members":[{"type":"FIELD","name":"_whitespace","content":{"type":"SYMBOL","name":"_Whitespace__whitespace"}}]}},"extras":[{"type":"SYMBOL","name":"Whitespace"}]} From de76a8fef6c5d93d40e81b20ae12da00b0f86c40 Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Wed, 9 Jul 2025 13:28:20 -0500 Subject: [PATCH 06/50] Merge tag updates, include standard `token` tag as well. --- common/src/lib.rs | 33 ++++++- macro/src/expansion.rs | 32 +------ macro/src/lib.rs | 1 + tool/src/expansion.rs | 85 ++++++++++++------- ...l__tests__enum_conflicts_prec_dynamic.snap | 2 +- .../rust_sitter_tool__tests__immediate.snap | 2 +- 6 files changed, 93 insertions(+), 62 deletions(-) diff --git a/common/src/lib.rs b/common/src/lib.rs index 8aa06ca..07b9214 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -1,4 +1,4 @@ -use std::collections::HashSet; +use std::{collections::HashSet, sync::LazyLock}; use syn::{ parse::{Parse, ParseStream}, @@ -48,6 +48,37 @@ impl Parse for FieldThenParams { } } +static RUST_SITTER_ATTRS: LazyLock> = LazyLock::new(|| { + [ + "leaf", + "token", + "immediate", + "prec", + "prec_left", + "prec_right", + "prec_dynamic", + "extra", + ] + .into_iter() + .collect() +}); + +pub fn is_sitter_attr(attr: &Attribute) -> bool { + let is_explicit = attr + .path() + .segments + .iter() + .next() + .map(|segment| segment.ident == "rust_sitter") + .unwrap_or(false); + is_explicit || { + attr.path().segments.len() == 1 + && RUST_SITTER_ATTRS.contains(attr.path().segments[0].ident.to_string().as_str()) + } +} + + + pub fn try_extract_inner_type( ty: &Type, inner_of: &str, diff --git a/macro/src/expansion.rs b/macro/src/expansion.rs index a498d5d..636e221 100644 --- a/macro/src/expansion.rs +++ b/macro/src/expansion.rs @@ -1,4 +1,5 @@ -use std::{collections::HashSet, sync::LazyLock}; +use std::{collections::HashSet}; +use rust_sitter_common::is_sitter_attr; use crate::errors::IteratorExt as _; use proc_macro2::Span; @@ -6,35 +7,6 @@ use quote::{quote, ToTokens}; use rust_sitter_common::*; use syn::{parse::Parse, punctuated::Punctuated, *}; -static RUST_SITTER_ATTRS: LazyLock> = LazyLock::new(|| { - [ - "leaf", - "token", - "immediate", - "prec", - "prec_left", - "prec_right", - "prec_dynamic", - "extra", - ] - .into_iter() - .collect() -}); - -fn is_sitter_attr(attr: &Attribute) -> bool { - let is_explicit = attr - .path() - .segments - .iter() - .next() - .map(|segment| segment.ident == "rust_sitter") - .unwrap_or(false); - is_explicit || { - attr.path().segments.len() == 1 - && RUST_SITTER_ATTRS.contains(attr.path().segments[0].ident.to_string().as_str()) - } -} - pub enum ParamOrField { Param(Expr), Field(FieldValue), diff --git a/macro/src/lib.rs b/macro/src/lib.rs index 1a99d9a..cae574d 100644 --- a/macro/src/lib.rs +++ b/macro/src/lib.rs @@ -238,6 +238,7 @@ pub fn delimited( /// // The body /// body: Vec, /// #[rust_sitter::lit("}")] // Closing } +/// _s: (), /// } /// ``` pub fn lit( diff --git a/tool/src/expansion.rs b/tool/src/expansion.rs index 4301b4b..91d0dfb 100644 --- a/tool/src/expansion.rs +++ b/tool/src/expansion.rs @@ -4,15 +4,16 @@ use rust_sitter_common::*; use serde_json::{json, Map, Value}; use syn::{parse::Parse, punctuated::Punctuated, *}; -struct Precs { +struct Extras { prec_param: Option, prec_left_param: Option, prec_right_param: Option, prec_dynamic_param: Option, immediate: bool, + token: bool, } -impl Precs { +impl Extras { fn new(attrs: &[Attribute]) -> Self { let prec_attr = attrs .iter() @@ -43,12 +44,17 @@ impl Precs { .iter() .find(|attr| attr.path() == &syn::parse_quote!(rust_sitter::immediate)); + // let token = attrs. + // iter() + // .find(|attr| attr.path() == &syn:: + Self { prec_param, prec_left_param, prec_right_param, prec_dynamic_param, immediate: immediate_attr.is_some(), + token: false, } } @@ -59,6 +65,7 @@ impl Precs { prec_right_param, prec_dynamic_param, immediate, + token, } = self; let rule = if let Some(Expr::Lit(lit)) = prec_param { @@ -80,25 +87,27 @@ impl Precs { panic!("only one of prec, prec_left, and prec_right can be specified"); } - if let Lit::Int(i) = &lit.lit { - json!({ - "type": "PREC_LEFT", - "value": i.base10_parse::().unwrap(), - "content": rule - }) + let value = if let Lit::Int(i) = &lit.lit { + i.base10_parse::().unwrap() } else { - panic!("Expected integer literal for precedence"); - } + 0 + }; + json!({ + "type": "PREC_LEFT", + "value": value, + "content": rule + }) } else if let Some(Expr::Lit(lit)) = prec_right_param { - if let Lit::Int(i) = &lit.lit { - json!({ - "type": "PREC_RIGHT", - "value": i.base10_parse::().unwrap(), - "content": rule - }) + let value = if let Lit::Int(i) = &lit.lit { + i.base10_parse::().unwrap() } else { - panic!("Expected integer literal for precedence"); - } + 0 + }; + json!({ + "type": "PREC_RIGHT", + "value": value, + "content": rule + }) } else if let Some(Expr::Lit(lit)) = prec_dynamic_param { if let Lit::Int(i) = &lit.lit { json!({ @@ -113,11 +122,20 @@ impl Precs { rule }; + if immediate && token { + panic!("Cannot be immediate and token"); + } + if immediate { json!({ "type": "IMMEDIATE_TOKEN", "content": rule }) + } else if token { + json!({ + "type": "TOKEN", + "content": rule, + }) } else { rule } @@ -127,15 +145,16 @@ impl Precs { fn gen_field( path: String, leaf_type: Type, - leaf_attrs: Vec, + attrs: Vec, word_rule: &mut Option, out: &mut Map, ) -> (Value, bool) { - let leaf_attr = leaf_attrs - .iter() - .find(|attr| attr.path() == &syn::parse_quote!(rust_sitter::leaf)); + let leaf_attr = attrs.iter().find(|attr| { + attr.path() == &syn::parse_quote!(rust_sitter::leaf) + || attr.path() == &syn::parse_quote!(leaf) + }); - if leaf_attrs + if attrs .iter() .any(|attr| attr.path() == &syn::parse_quote!(rust_sitter::word)) { @@ -146,12 +165,20 @@ fn gen_field( *word_rule = Some(path.clone()); } - let precs = Precs::new(&leaf_attrs); + let precs = Extras::new(&attrs); if precs.prec_left_param.is_some() || precs.prec_right_param.is_some() { panic!( "The attributes `prec_left` and `prec_right` cannot be applied to a non-struct type" ); } + let literals: Vec<_> = attrs + .iter() + .filter(|attr| attr.path() == &syn::parse_quote!(rust_sitter::lit)) + .filter_map(|a| { + a.parse_args_with(Punctuated::::parse_terminated) + .ok() + }) + .collect(); let leaf_params = leaf_attr.and_then(|a| { a.parse_args_with(Punctuated::::parse_terminated) @@ -246,7 +273,7 @@ fn gen_field( out, ); - let delimited_attr = leaf_attrs + let delimited_attr = attrs .iter() .find(|attr| attr.path() == &syn::parse_quote!(rust_sitter::delimited)); @@ -263,7 +290,7 @@ fn gen_field( ) }); - let repeat_attr = leaf_attrs + let repeat_attr = attrs .iter() .find(|attr| attr.path() == &syn::parse_quote!(rust_sitter::repeat)); @@ -358,7 +385,7 @@ fn gen_field( } else { // is_option let (field_json, field_optional) = - gen_field(path, inner_type_option, leaf_attrs, word_rule, out); + gen_field(path, inner_type_option, attrs, word_rule, out); if field_optional { panic!("Option> is not supported"); @@ -434,7 +461,7 @@ fn gen_struct_or_variant( }) .collect::>(); - let precs = Precs::new(&attrs); + let precs = Extras::new(&attrs); let base_rule = match fields { Fields::Unit => { @@ -540,7 +567,7 @@ pub fn generate_grammar(module: &ItemMod) -> Value { "members": members }); - let precs = Precs::new(&e.attrs); + let precs = Extras::new(&e.attrs); if precs.prec_left_param.is_some() || precs.prec_right_param.is_some() { panic!( "The attributes `prec_left` and `prec_right` cannot be applied directly to an enum" diff --git a/tool/src/snapshots/rust_sitter_tool__tests__enum_conflicts_prec_dynamic.snap b/tool/src/snapshots/rust_sitter_tool__tests__enum_conflicts_prec_dynamic.snap index 9d100ed..6778575 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__enum_conflicts_prec_dynamic.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__enum_conflicts_prec_dynamic.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"test","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Program_0_vec_contents"}}]}]},"Program_0_vec_contents":{"type":"REPEAT1","content":{"type":"FIELD","name":"Program_0_vec_element","content":{"type":"SYMBOL","name":"Statement"}}},"Program":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Program_0_vec_contents"}}]}]},"Statement_ExpressionStatement":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"ExpressionStatement"}}]},"Statement_IfStatement":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"IfStatement"}}]},"Statement":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Statement_ExpressionStatement"},{"type":"SYMBOL","name":"Statement_IfStatement"}]},"Expression_Identifier":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Identifier"}}]},"Expression_Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Number"}}]},"Expression_BinaryExpression":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"BinaryExpression"}}]},"Expression":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Identifier"},{"type":"SYMBOL","name":"Expression_Number"},{"type":"SYMBOL","name":"Expression_BinaryExpression"}]},"BinaryExpression":{"type":"PREC_LEFT","value":1,"content":{"type":"SEQ","members":[{"type":"FIELD","name":"expression","content":{"type":"SYMBOL","name":"Expression"}},{"type":"FIELD","name":"binary_expression_inner","content":{"type":"SYMBOL","name":"BinaryExpressionInner"}},{"type":"FIELD","name":"expression2","content":{"type":"SYMBOL","name":"Expression"}}]}},"BinaryExpressionInner_String_0":{"type":"STRING","value":"+"},"BinaryExpressionInner_String":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"BinaryExpressionInner_String_0"}}]},"BinaryExpressionInner_String2_0":{"type":"STRING","value":"-"},"BinaryExpressionInner_String2":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"BinaryExpressionInner_String2_0"}}]},"BinaryExpressionInner_String3_0":{"type":"STRING","value":"*"},"BinaryExpressionInner_String3":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"BinaryExpressionInner_String3_0"}}]},"BinaryExpressionInner_String4_0":{"type":"STRING","value":"/"},"BinaryExpressionInner_String4":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"BinaryExpressionInner_String4_0"}}]},"BinaryExpressionInner":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"BinaryExpressionInner_String"},{"type":"SYMBOL","name":"BinaryExpressionInner_String2"},{"type":"SYMBOL","name":"BinaryExpressionInner_String3"},{"type":"SYMBOL","name":"BinaryExpressionInner_String4"}]},"ExpressionStatement__semicolon":{"type":"STRING","value":";"},"ExpressionStatement":{"type":"SEQ","members":[{"type":"FIELD","name":"expression","content":{"type":"SYMBOL","name":"Expression"}},{"type":"FIELD","name":"_semicolon","content":{"type":"SYMBOL","name":"ExpressionStatement__semicolon"}}]},"IfStatement__if":{"type":"STRING","value":"if"},"IfStatement__lparen":{"type":"STRING","value":"("},"IfStatement__rparen":{"type":"STRING","value":")"},"IfStatement__lbrace":{"type":"STRING","value":"{"},"IfStatement__rbrace":{"type":"STRING","value":"}"},"IfStatement":{"type":"PREC_DYNAMIC","value":1,"content":{"type":"SEQ","members":[{"type":"FIELD","name":"_if","content":{"type":"SYMBOL","name":"IfStatement__if"}},{"type":"FIELD","name":"_lparen","content":{"type":"SYMBOL","name":"IfStatement__lparen"}},{"type":"FIELD","name":"expression","content":{"type":"SYMBOL","name":"Expression"}},{"type":"FIELD","name":"_rparen","content":{"type":"SYMBOL","name":"IfStatement__rparen"}},{"type":"FIELD","name":"_lbrace","content":{"type":"SYMBOL","name":"IfStatement__lbrace"}},{"type":"FIELD","name":"statement","content":{"type":"SYMBOL","name":"Statement"}},{"type":"FIELD","name":"_rbrace","content":{"type":"SYMBOL","name":"IfStatement__rbrace"}},{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"if_statement_inner","content":{"type":"SYMBOL","name":"IfStatementElse"}}]}]}},"IfStatementElse__else":{"type":"STRING","value":"else"},"IfStatementElse__lbrace":{"type":"STRING","value":"{"},"IfStatementElse__rbrace":{"type":"STRING","value":"}"},"IfStatementElse":{"type":"SEQ","members":[{"type":"FIELD","name":"_else","content":{"type":"SYMBOL","name":"IfStatementElse__else"}},{"type":"FIELD","name":"_lbrace","content":{"type":"SYMBOL","name":"IfStatementElse__lbrace"}},{"type":"FIELD","name":"statement","content":{"type":"SYMBOL","name":"Statement"}},{"type":"FIELD","name":"_rbrace","content":{"type":"SYMBOL","name":"IfStatementElse__rbrace"}}]},"Identifier_0":{"type":"PATTERN","value":"[a-zA-Z_][a-zA-Z0-9_]*"},"Identifier":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Identifier_0"}}]},"Number_0":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Number_0"}}]}},"extras":[]} +{"name":"test","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Program_0_vec_contents"}}]}]},"Program_0_vec_contents":{"type":"REPEAT1","content":{"type":"FIELD","name":"Program_0_vec_element","content":{"type":"SYMBOL","name":"Statement"}}},"Program":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Program_0_vec_contents"}}]}]},"Statement_ExpressionStatement":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"ExpressionStatement"}}]},"Statement_IfStatement":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"IfStatement"}}]},"Statement":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Statement_ExpressionStatement"},{"type":"SYMBOL","name":"Statement_IfStatement"}]},"Expression_Identifier":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Identifier"}}]},"Expression_Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Number"}}]},"Expression_BinaryExpression":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"BinaryExpression"}}]},"Expression":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Identifier"},{"type":"SYMBOL","name":"Expression_Number"},{"type":"SYMBOL","name":"Expression_BinaryExpression"}]},"BinaryExpression":{"type":"PREC_LEFT","value":1,"content":{"type":"SEQ","members":[{"type":"FIELD","name":"expression","content":{"type":"SYMBOL","name":"Expression"}},{"type":"FIELD","name":"binary_expression_inner","content":{"type":"SYMBOL","name":"BinaryExpressionInner"}},{"type":"FIELD","name":"expression2","content":{"type":"SYMBOL","name":"Expression"}}]}},"BinaryExpressionInner_String_0":{"type":"STRING","value":"+"},"BinaryExpressionInner_String":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"BinaryExpressionInner_String_0"}}]},"BinaryExpressionInner_String2_0":{"type":"STRING","value":"-"},"BinaryExpressionInner_String2":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"BinaryExpressionInner_String2_0"}}]},"BinaryExpressionInner_String3_0":{"type":"STRING","value":"*"},"BinaryExpressionInner_String3":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"BinaryExpressionInner_String3_0"}}]},"BinaryExpressionInner_String4_0":{"type":"STRING","value":"/"},"BinaryExpressionInner_String4":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"BinaryExpressionInner_String4_0"}}]},"BinaryExpressionInner":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"BinaryExpressionInner_String"},{"type":"SYMBOL","name":"BinaryExpressionInner_String2"},{"type":"SYMBOL","name":"BinaryExpressionInner_String3"},{"type":"SYMBOL","name":"BinaryExpressionInner_String4"}]},"_ExpressionStatement__semicolon":{"type":"STRING","value":";"},"ExpressionStatement":{"type":"SEQ","members":[{"type":"FIELD","name":"expression","content":{"type":"SYMBOL","name":"Expression"}},{"type":"FIELD","name":"_semicolon","content":{"type":"SYMBOL","name":"_ExpressionStatement__semicolon"}}]},"_IfStatement__if":{"type":"STRING","value":"if"},"_IfStatement__lparen":{"type":"STRING","value":"("},"_IfStatement__rparen":{"type":"STRING","value":")"},"_IfStatement__lbrace":{"type":"STRING","value":"{"},"_IfStatement__rbrace":{"type":"STRING","value":"}"},"IfStatement":{"type":"PREC_DYNAMIC","value":1,"content":{"type":"SEQ","members":[{"type":"FIELD","name":"_if","content":{"type":"SYMBOL","name":"_IfStatement__if"}},{"type":"FIELD","name":"_lparen","content":{"type":"SYMBOL","name":"_IfStatement__lparen"}},{"type":"FIELD","name":"expression","content":{"type":"SYMBOL","name":"Expression"}},{"type":"FIELD","name":"_rparen","content":{"type":"SYMBOL","name":"_IfStatement__rparen"}},{"type":"FIELD","name":"_lbrace","content":{"type":"SYMBOL","name":"_IfStatement__lbrace"}},{"type":"FIELD","name":"statement","content":{"type":"SYMBOL","name":"Statement"}},{"type":"FIELD","name":"_rbrace","content":{"type":"SYMBOL","name":"_IfStatement__rbrace"}},{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"if_statement_inner","content":{"type":"SYMBOL","name":"IfStatementElse"}}]}]}},"_IfStatementElse__else":{"type":"STRING","value":"else"},"_IfStatementElse__lbrace":{"type":"STRING","value":"{"},"_IfStatementElse__rbrace":{"type":"STRING","value":"}"},"IfStatementElse":{"type":"SEQ","members":[{"type":"FIELD","name":"_else","content":{"type":"SYMBOL","name":"_IfStatementElse__else"}},{"type":"FIELD","name":"_lbrace","content":{"type":"SYMBOL","name":"_IfStatementElse__lbrace"}},{"type":"FIELD","name":"statement","content":{"type":"SYMBOL","name":"Statement"}},{"type":"FIELD","name":"_rbrace","content":{"type":"SYMBOL","name":"_IfStatementElse__rbrace"}}]},"Identifier_0":{"type":"PATTERN","value":"[a-zA-Z_][a-zA-Z0-9_]*"},"Identifier":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Identifier_0"}}]},"Number_0":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Number_0"}}]}},"extras":[]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__immediate.snap b/tool/src/snapshots/rust_sitter_tool__tests__immediate.snap index 66c9d40..a7ebcc9 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__immediate.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__immediate.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"test","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"StringFragment_0"}}]},"StringFragment_0":{"type":"IMMEDIATE_TOKEN","content":{"type":"PREC","value":1,"content":{"type":"PATTERN","value":"[^\"\\\\]+"}}},"StringFragment":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"StringFragment_0"}}]},"Whitespace__whitespace":{"type":"PATTERN","value":"\\s"},"Whitespace":{"type":"SEQ","members":[{"type":"FIELD","name":"_whitespace","content":{"type":"SYMBOL","name":"Whitespace__whitespace"}}]}},"extras":[{"type":"SYMBOL","name":"Whitespace"}]} +{"name":"test","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"StringFragment_0"}}]},"StringFragment_0":{"type":"IMMEDIATE_TOKEN","content":{"type":"PREC","value":1,"content":{"type":"PATTERN","value":"[^\"\\\\]+"}}},"StringFragment":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"StringFragment_0"}}]},"_Whitespace__whitespace":{"type":"PATTERN","value":"\\s"},"Whitespace":{"type":"SEQ","members":[{"type":"FIELD","name":"_whitespace","content":{"type":"SYMBOL","name":"_Whitespace__whitespace"}}]}},"extras":[{"type":"SYMBOL","name":"Whitespace"}]} From 9183e8828c1676e354df76914642d8ca68b7225d Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Wed, 9 Jul 2025 15:08:56 -0500 Subject: [PATCH 07/50] Add ability to specify `seq` of tokens which are part of the grammar but not parsed as explicit rules or extractable. --- common/src/lib.rs | 11 +++++- example/src/words.rs | 1 + macro/src/expansion.rs | 20 ++++++++-- macro/src/lib.rs | 40 +++++++++---------- runtime/src/__private.rs | 19 ++++++++- runtime/src/lib.rs | 10 ++--- tool/Cargo.toml | 2 +- tool/src/expansion.rs | 84 ++++++++++++++++++++++++++++++---------- 8 files changed, 133 insertions(+), 54 deletions(-) diff --git a/common/src/lib.rs b/common/src/lib.rs index 07b9214..5316163 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -77,7 +77,16 @@ pub fn is_sitter_attr(attr: &Attribute) -> bool { } } - +pub fn sitter_attr_matches(attr: &Attribute, name: &str) -> bool { + let path = attr.path(); + if path.segments.len() == 1 { + path.segments[0].ident == name + } else if path.segments.len() == 2 { + path.segments[0].ident == "rust_sitter" && path.segments[1].ident == name + } else { + false + } +} pub fn try_extract_inner_type( ty: &Type, diff --git a/example/src/words.rs b/example/src/words.rs index fa0f009..fd87363 100644 --- a/example/src/words.rs +++ b/example/src/words.rs @@ -2,6 +2,7 @@ pub mod grammar { #[rust_sitter::language] #[derive(Debug)] + #[allow(dead_code)] pub struct Words { #[rust_sitter::leaf(text = r"if")] keyword: (), diff --git a/macro/src/expansion.rs b/macro/src/expansion.rs index 636e221..5496123 100644 --- a/macro/src/expansion.rs +++ b/macro/src/expansion.rs @@ -1,5 +1,5 @@ -use std::{collections::HashSet}; use rust_sitter_common::is_sitter_attr; +use std::collections::HashSet; use crate::errors::IteratorExt as _; use proc_macro2::Span; @@ -29,6 +29,19 @@ fn gen_field(ident_str: String, leaf: Field) -> Expr { .iter() .find(|attr| attr.path() == &syn::parse_quote!(rust_sitter::leaf)); + let seq_attr = leaf + .attrs + .iter() + .find(|attr| attr.path() == &syn::parse_quote!(rust_sitter::seq)); + if seq_attr.is_some() { + if leaf_attr.is_some() { + panic!("Cannot use leaf and seq at the same time"); + } + return syn::parse_quote!({ + ::rust_sitter::__private::skip_seq(cursor, #ident_str); + () + }); + } let leaf_params = leaf_attr.and_then(|a| { a.parse_args_with(Punctuated::::parse_terminated) .ok() @@ -65,7 +78,6 @@ fn gen_struct_or_variant( container_attrs: Vec, ) -> Result { let children_parsed = if fields == Fields::Unit { - // TODO (JAB): Handle `` let expr = { let dummy_field = Field { attrs: container_attrs, @@ -246,7 +258,7 @@ pub fn expand_grammar(input: ItemMod) -> Result { #[allow(non_snake_case)] fn extract(node: Option<::rust_sitter::tree_sitter::Node>, source: &[u8], _last_idx: usize, _leaf_fn: Option<&Self::LeafFn>) -> Self { - let node = node.unwrap(); + let node = node.expect("No node found"); let mut cursor = node.walk(); assert!(cursor.goto_first_child(), "Could not find a child corresponding to any enum branch"); @@ -286,7 +298,7 @@ pub fn expand_grammar(input: ItemMod) -> Result { #[allow(non_snake_case)] fn extract(node: Option<::rust_sitter::tree_sitter::Node>, source: &[u8], last_idx: usize, _leaf_fn: Option<&Self::LeafFn>) -> Self { - let node = node.unwrap(); + let node = node.expect("no node found"); #extract_expr } } diff --git a/macro/src/lib.rs b/macro/src/lib.rs index cae574d..6819711 100644 --- a/macro/src/lib.rs +++ b/macro/src/lib.rs @@ -203,6 +203,24 @@ pub fn immediate( item } +#[proc_macro_attribute] +/// Allows the leaf node sequence to be created as a single token. +/// +/// ## Example +/// ```ignore +/// struct StringFragment( +/// #[rust_sitter::token] +/// #[rust_sitter::leaf(pattern = r"[^"\\]+")] +/// () +/// ); +/// ``` +pub fn token( + _attr: proc_macro::TokenStream, + item: proc_macro::TokenStream, +) -> proc_macro::TokenStream { + item +} + #[proc_macro_attribute] /// On `Vec<_>` typed fields, specifies a non-terminal that should be parsed in between the elements. /// The [`rust_sitter::repeat`] annotation must be used on the field as well. @@ -226,28 +244,6 @@ pub fn delimited( item } -#[proc_macro_attribute] -/// Produce a series of literal, anonymous tokens in the grammar. -/// ## Example -/// ```ignore -/// pub struct Function { -/// #[rust_sitter::lit("function")] // produces a keyword-like "function" -/// // parses name as Ident -/// name: Ident, -/// #[rust_sitter::lit("(", ")", "{")] // simple grammar, no inputs -/// // The body -/// body: Vec, -/// #[rust_sitter::lit("}")] // Closing } -/// _s: (), -/// } -/// ``` -pub fn lit( - _attr: proc_macro::TokenStream, - item: proc_macro::TokenStream, -) -> proc_macro::TokenStream { - item -} - #[proc_macro_attribute] /// On `Vec<_>` typed fields, specifies additional config for how the repeated elements should /// be parsed. In particular, this annotation takes the following named arguments: diff --git a/runtime/src/__private.rs b/runtime/src/__private.rs index 5858c52..5f843d3 100644 --- a/runtime/src/__private.rs +++ b/runtime/src/__private.rs @@ -58,13 +58,30 @@ pub fn extract_field, T>( } } +pub fn skip_seq(cursor_opt: &mut Option, field_name: &str) { + if let Some(cursor) = cursor_opt.as_mut() { + loop { + if let Some(name) = cursor.field_name() { + if name == field_name { + if !cursor.goto_next_sibling() { + *cursor_opt = None; + return; + } + } else { + return; + } + } + } + } +} + pub fn parse>( input: &str, language: impl Fn() -> tree_sitter::Language, ) -> core::result::Result> { let mut parser = crate::tree_sitter::Parser::new(); parser.set_language(&language()).unwrap(); - let tree = parser.parse(input, None).unwrap(); + let tree = parser.parse(input, None).expect("Failed to parse"); let root_node = tree.root_node(); if root_node.has_error() { diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs index 6bac0dc..5d7c2d3 100644 --- a/runtime/src/lib.rs +++ b/runtime/src/lib.rs @@ -36,8 +36,8 @@ impl Extract for WithLeaf { leaf_fn: Option<&Self::LeafFn>, ) -> L { node.and_then(|n| n.utf8_text(source).ok()) - .map(|s| leaf_fn.unwrap()(s)) - .unwrap() + .map(|s| leaf_fn.expect("No leaf function on WithLeaf")(s)) + .expect("Could not extract WithLeaf") } } @@ -120,9 +120,9 @@ macro_rules! extract_from_str { _last_idx: usize, _leaf_fn: Option<&Self::LeafFn>, ) -> Self { - let node = node.unwrap(); - let text = node.utf8_text(source).unwrap(); - text.parse().unwrap() + let node = node.expect("No node found"); + let text = node.utf8_text(source).expect("No text found for node"); + text.parse().expect("Failed to parse type") } } }; diff --git a/tool/Cargo.toml b/tool/Cargo.toml index 4335c8f..fdb94b0 100644 --- a/tool/Cargo.toml +++ b/tool/Cargo.toml @@ -6,7 +6,7 @@ repository = "https://github.com/hydro-project/rust-sitter" version = "0.4.5" authors = ["Shadaj Laddad "] license = "MIT" -edition = "2021" +edition = "2024" keywords = ["parsing", "codegen"] categories = ["development-tools"] diff --git a/tool/src/expansion.rs b/tool/src/expansion.rs index 91d0dfb..93dff2f 100644 --- a/tool/src/expansion.rs +++ b/tool/src/expansion.rs @@ -1,7 +1,7 @@ use std::collections::HashSet; use rust_sitter_common::*; -use serde_json::{json, Map, Value}; +use serde_json::{Map, Value, json}; use syn::{parse::Parse, punctuated::Punctuated, *}; struct Extras { @@ -44,9 +44,9 @@ impl Extras { .iter() .find(|attr| attr.path() == &syn::parse_quote!(rust_sitter::immediate)); - // let token = attrs. - // iter() - // .find(|attr| attr.path() == &syn:: + let token = attrs + .iter() + .find(|attr| attr.path() == &syn::parse_quote!(rust_sitter::token)); Self { prec_param, @@ -54,7 +54,7 @@ impl Extras { prec_right_param, prec_dynamic_param, immediate: immediate_attr.is_some(), - token: false, + token: token.is_some(), } } @@ -149,11 +149,67 @@ fn gen_field( word_rule: &mut Option, out: &mut Map, ) -> (Value, bool) { + let precs = Extras::new(&attrs); let leaf_attr = attrs.iter().find(|attr| { attr.path() == &syn::parse_quote!(rust_sitter::leaf) || attr.path() == &syn::parse_quote!(leaf) }); + let seq_attr = attrs + .iter() + .find(|attr| attr.path() == &syn::parse_quote!(rust_sitter::seq)); + + if leaf_attr.is_some() && seq_attr.is_some() { + panic!("Cannot specify leaf and seq at the same time"); + } + + let mut skip_over = HashSet::new(); + skip_over.insert("Spanned"); + skip_over.insert("Box"); + + let (inner_type_vec, is_vec) = try_extract_inner_type(&leaf_type, "Vec", &skip_over); + let (inner_type_option, is_option) = try_extract_inner_type(&leaf_type, "Option", &skip_over); + + if let Some(seq) = seq_attr { + // Handle the seq separately. + let inputs = seq + .parse_args_with(Punctuated::::parse_terminated) + .unwrap(); + let mut members = vec![]; + for input in inputs { + let typ = if input.path == "text" { + "STRING" + } else { + "PATTERN" + }; + if let Expr::Lit(lit) = input.expr + && let Lit::Str(s) = lit.lit + { + members.push(json!({ + "type": typ, + "value": s.value(), + })); + } else { + panic!("{typ} in seq must be a literal string"); + } + } + + // seq is only used to parse a bunch of tokens which are then not used directly. As such, + // the type is required to be `()` or else it will fail to compile. + match &leaf_type { + Type::Tuple(t) if t.elems.is_empty() => { + } + _ => panic!("Unexpected type `()` is required for rust_sitter::seq"), + } + return ( + precs.apply(json!({ + "type": "SEQ", + "members": members, + })), + is_option, + ); + } + if attrs .iter() .any(|attr| attr.path() == &syn::parse_quote!(rust_sitter::word)) @@ -165,20 +221,11 @@ fn gen_field( *word_rule = Some(path.clone()); } - let precs = Extras::new(&attrs); if precs.prec_left_param.is_some() || precs.prec_right_param.is_some() { panic!( "The attributes `prec_left` and `prec_right` cannot be applied to a non-struct type" ); } - let literals: Vec<_> = attrs - .iter() - .filter(|attr| attr.path() == &syn::parse_quote!(rust_sitter::lit)) - .filter_map(|a| { - a.parse_args_with(Punctuated::::parse_terminated) - .ok() - }) - .collect(); let leaf_params = leaf_attr.and_then(|a| { a.parse_args_with(Punctuated::::parse_terminated) @@ -197,12 +244,9 @@ fn gen_field( .map(|p| p.expr.clone()) }); - let mut skip_over = HashSet::new(); - skip_over.insert("Spanned"); - skip_over.insert("Box"); - - let (inner_type_vec, is_vec) = try_extract_inner_type(&leaf_type, "Vec", &skip_over); - let (inner_type_option, is_option) = try_extract_inner_type(&leaf_type, "Option", &skip_over); + if pattern_param.is_some() && text_param.is_some() { + panic!("cannot specify text and pattern in the same leaf"); + } if !is_vec && !is_option { if let Some(Expr::Lit(lit)) = pattern_param { From 42598ea931e137c3a358c64b14ced13b8d01a178 Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Wed, 9 Jul 2025 15:20:26 -0500 Subject: [PATCH 08/50] Allow optional seq and allow not using the full `rust_sitter::` when within the grammar module. --- macro/src/expansion.rs | 6 ++-- ...t_sitter_macro__tests__enum_prec_left.snap | 5 ++- ...t_sitter_macro__tests__enum_recursive.snap | 5 ++- ...macro__tests__enum_transformed_fields.snap | 5 ++- ...r_macro__tests__enum_with_named_field.snap | 5 ++- ...macro__tests__enum_with_unamed_vector.snap | 7 ++-- ...r_macro__tests__grammar_unboxed_field.snap | 7 ++-- ...t_sitter_macro__tests__spanned_in_vec.snap | 9 +++--- ...ust_sitter_macro__tests__struct_extra.snap | 7 ++-- ..._sitter_macro__tests__struct_optional.snap | 7 ++-- ...st_sitter_macro__tests__struct_repeat.snap | 9 +++--- tool/src/expansion.rs | 32 ++++++++----------- 12 files changed, 45 insertions(+), 59 deletions(-) diff --git a/macro/src/expansion.rs b/macro/src/expansion.rs index 5496123..1d24a6e 100644 --- a/macro/src/expansion.rs +++ b/macro/src/expansion.rs @@ -27,12 +27,12 @@ fn gen_field(ident_str: String, leaf: Field) -> Expr { let leaf_attr = leaf .attrs .iter() - .find(|attr| attr.path() == &syn::parse_quote!(rust_sitter::leaf)); + .find(|attr| sitter_attr_matches(attr, "leaf")); let seq_attr = leaf .attrs .iter() - .find(|attr| attr.path() == &syn::parse_quote!(rust_sitter::seq)); + .find(|attr| sitter_attr_matches(attr, "seq")); if seq_attr.is_some() { if leaf_attr.is_some() { panic!("Cannot use leaf and seq at the same time"); @@ -99,7 +99,7 @@ fn gen_struct_or_variant( let expr = if let Some(skip_attrs) = field .attrs .iter() - .find(|attr| attr.path() == &syn::parse_quote!(rust_sitter::skip)) + .find(|attr| sitter_attr_matches(attr, "skip")) { skip_attrs.parse_args::()? } else { diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap index e9ae783..b2051b4 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap @@ -1,6 +1,6 @@ --- source: macro/src/lib.rs -expression: "rustfmt_code(&expand_grammar(parse_quote! {\n #[rust_sitter :: grammar(\"test\")] mod grammar\n {\n #[rust_sitter :: language] pub enum Expression\n {\n Number(#[rust_sitter ::\n leaf(pattern = r\"\\d+\", transform = | v |\n v.parse().unwrap())] i32), #[rust_sitter :: prec_left(1)]\n Sub(Box < Expression >, #[rust_sitter :: leaf(text = \"-\")]\n (), Box < Expression >),\n }\n }\n })?.to_token_stream().to_string())" +expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n #[rust_sitter::grammar(\"test\")] mod grammar\n {\n #[rust_sitter::language] pub enum Expression\n {\n Number(#[rust_sitter::leaf(pattern = r\"\\d+\", transform = |v|\n v.parse().unwrap())] i32), #[rust_sitter::prec_left(1)]\n Sub(Box, #[rust_sitter::leaf(text = \"-\")] (),\n Box),\n }\n }\n})? .to_token_stream().to_string())" --- mod grammar { pub enum Expression { @@ -16,7 +16,7 @@ mod grammar { _last_idx: usize, _leaf_fn: Option<&Self::LeafFn>, ) -> Self { - let node = node.unwrap(); + let node = node.expect("No node found"); let mut cursor = node.walk(); assert!( cursor.goto_first_child(), @@ -88,4 +88,3 @@ mod grammar { ::rust_sitter::__private::parse::(input, language) } } - diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap index 3cadc92..682c4b3 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap @@ -1,6 +1,6 @@ --- source: macro/src/lib.rs -expression: "rustfmt_code(&expand_grammar(parse_quote! {\n #[rust_sitter :: grammar(\"test\")] mod grammar\n {\n #[rust_sitter :: language] pub enum Expression\n {\n Number(#[rust_sitter ::\n leaf(pattern = r\"\\d+\", transform = | v |\n v.parse().unwrap())] i32),\n Neg(#[rust_sitter :: leaf(text = \"-\")] (), Box < Expression\n >),\n }\n }\n })?.to_token_stream().to_string())" +expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n #[rust_sitter::grammar(\"test\")] mod grammar\n {\n #[rust_sitter::language] pub enum Expression\n {\n Number(#[rust_sitter::leaf(pattern = r\"\\d+\", transform = |v|\n v.parse().unwrap())] i32),\n Neg(#[rust_sitter::leaf(text = \"-\")] (), Box),\n }\n }\n})? .to_token_stream().to_string())" --- mod grammar { pub enum Expression { @@ -16,7 +16,7 @@ mod grammar { _last_idx: usize, _leaf_fn: Option<&Self::LeafFn>, ) -> Self { - let node = node.unwrap(); + let node = node.expect("No node found"); let mut cursor = node.walk(); assert!( cursor.goto_first_child(), @@ -83,4 +83,3 @@ mod grammar { ::rust_sitter::__private::parse::(input, language) } } - diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap index 3f0cb88..532c8ca 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap @@ -1,6 +1,6 @@ --- source: macro/src/lib.rs -expression: "rustfmt_code(&expand_grammar(parse_quote! {\n #[rust_sitter :: grammar(\"test\")] mod grammar\n {\n #[rust_sitter :: language] pub enum Expression\n {\n Number(#[rust_sitter ::\n leaf(pattern = r\"\\d+\", transform = | v | v.parse :: < i32 >\n ().unwrap())] i32),\n }\n }\n })?.to_token_stream().to_string())" +expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n #[rust_sitter::grammar(\"test\")] mod grammar\n {\n #[rust_sitter::language] pub enum Expression\n {\n Number(#[rust_sitter::leaf(pattern = r\"\\d+\", transform = |v|\n v.parse::().unwrap())] i32),\n }\n }\n})? .to_token_stream().to_string())" --- mod grammar { pub enum Expression { @@ -15,7 +15,7 @@ mod grammar { _last_idx: usize, _leaf_fn: Option<&Self::LeafFn>, ) -> Self { - let node = node.unwrap(); + let node = node.expect("No node found"); let mut cursor = node.walk(); assert!( cursor.goto_first_child(), @@ -67,4 +67,3 @@ mod grammar { ::rust_sitter::__private::parse::(input, language) } } - diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap index b6fc87c..a2ff80a 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap @@ -1,6 +1,6 @@ --- source: macro/src/lib.rs -expression: "rustfmt_code(&expand_grammar(parse_quote! {\n #[rust_sitter :: grammar(\"test\")] mod grammar\n {\n #[rust_sitter :: language] pub enum Expr\n {\n Number(#[rust_sitter ::\n leaf(pattern = r\"\\d+\", transform = | v |\n v.parse().unwrap())] u32), Neg\n {\n #[rust_sitter :: leaf(text = \"!\")] _bang : (), value : Box <\n Expr >,\n }\n }\n }\n })?.to_token_stream().to_string())" +expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n #[rust_sitter::grammar(\"test\")] mod grammar\n {\n #[rust_sitter::language] pub enum Expr\n {\n Number(#[rust_sitter::leaf(pattern = r\"\\d+\", transform = |v|\n v.parse().unwrap())] u32), Neg\n { #[rust_sitter::leaf(text = \"!\")] _bang: (), value: Box, }\n }\n }\n})? .to_token_stream().to_string())" --- mod grammar { pub enum Expr { @@ -16,7 +16,7 @@ mod grammar { _last_idx: usize, _leaf_fn: Option<&Self::LeafFn>, ) -> Self { - let node = node.unwrap(); + let node = node.expect("No node found"); let mut cursor = node.walk(); assert!( cursor.goto_first_child(), @@ -81,4 +81,3 @@ mod grammar { ::rust_sitter::__private::parse::(input, language) } } - diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap index 892ee17..58c4b6d 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap @@ -1,6 +1,6 @@ --- source: macro/src/lib.rs -expression: "rustfmt_code(&expand_grammar(parse_quote! {\n #[rust_sitter :: grammar(\"test\")] mod grammar\n {\n pub struct Number\n {\n #[rust_sitter ::\n leaf(pattern = r\"\\d+\", transform = | v |\n v.parse().unwrap())] value : u32\n } #[rust_sitter :: language] pub enum Expr\n {\n Numbers(#[rust_sitter :: repeat(non_empty = true)] Vec <\n Number >)\n }\n }\n })?.to_token_stream().to_string())" +expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n #[rust_sitter::grammar(\"test\")] mod grammar\n {\n pub struct Number\n {\n #[rust_sitter::leaf(pattern = r\"\\d+\", transform = |v|\n v.parse().unwrap())] value: u32\n } #[rust_sitter::language] pub enum Expr\n { Numbers(#[rust_sitter::repeat(non_empty = true)] Vec) }\n }\n})? .to_token_stream().to_string())" --- mod grammar { pub struct Number { @@ -15,7 +15,7 @@ mod grammar { last_idx: usize, _leaf_fn: Option<&Self::LeafFn>, ) -> Self { - let node = node.unwrap(); + let node = node.expect("no node found"); ::rust_sitter::__private::extract_struct_or_variant(node, move |cursor, last_idx| { Number { value: { @@ -43,7 +43,7 @@ mod grammar { _last_idx: usize, _leaf_fn: Option<&Self::LeafFn>, ) -> Self { - let node = node.unwrap(); + let node = node.expect("No node found"); let mut cursor = node.walk(); assert!( cursor.goto_first_child(), @@ -88,4 +88,3 @@ mod grammar { ::rust_sitter::__private::parse::(input, language) } } - diff --git a/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap b/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap index 5e28401..68b183d 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap @@ -1,6 +1,6 @@ --- source: macro/src/lib.rs -expression: "rustfmt_code(&expand_grammar(parse_quote! {\n #[rust_sitter :: grammar(\"test\")] mod grammar\n {\n #[rust_sitter :: language] pub struct Language\n { e : Expression, } pub enum Expression\n {\n Number(#[rust_sitter ::\n leaf(pattern = r\"\\d+\", transform = | v : & str | v.parse ::\n < i32 > ().unwrap())] i32),\n }\n }\n })?.to_token_stream().to_string())" +expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n #[rust_sitter::grammar(\"test\")] mod grammar\n {\n #[rust_sitter::language] pub struct Language { e: Expression, } pub\n enum Expression\n {\n Number(#[rust_sitter::leaf(pattern = r\"\\d+\", transform = |v: &str|\n v.parse::().unwrap())] i32),\n }\n }\n})? .to_token_stream().to_string())" --- mod grammar { pub struct Language { @@ -15,7 +15,7 @@ mod grammar { last_idx: usize, _leaf_fn: Option<&Self::LeafFn>, ) -> Self { - let node = node.unwrap(); + let node = node.expect("no node found"); ::rust_sitter::__private::extract_struct_or_variant(node, move |cursor, last_idx| { Language { e: { @@ -39,7 +39,7 @@ mod grammar { _last_idx: usize, _leaf_fn: Option<&Self::LeafFn>, ) -> Self { - let node = node.unwrap(); + let node = node.expect("No node found"); let mut cursor = node.walk(); assert!( cursor.goto_first_child(), @@ -91,4 +91,3 @@ mod grammar { ::rust_sitter::__private::parse::(input, language) } } - diff --git a/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap b/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap index 9efa021..bd82531 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap @@ -1,6 +1,6 @@ --- source: macro/src/lib.rs -expression: "rustfmt_code(&expand_grammar(parse_quote! {\n #[rust_sitter :: grammar(\"test\")] mod grammar\n {\n use rust_sitter :: Spanned ; #[rust_sitter :: language] pub\n struct NumberList { numbers : Vec < Spanned < Number >>, }\n pub struct Number\n {\n #[rust_sitter ::\n leaf(pattern = r\"\\d+\", transform = | v |\n v.parse().unwrap())] v : i32\n } #[rust_sitter :: extra] struct Whitespace\n {\n #[rust_sitter :: leaf(pattern = r\"\\s\")] _whitespace : (),\n }\n }\n })?.to_token_stream().to_string())" +expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n #[rust_sitter::grammar(\"test\")] mod grammar\n {\n use rust_sitter::Spanned; #[rust_sitter::language] pub struct\n NumberList { numbers: Vec>, } pub struct Number\n {\n #[rust_sitter::leaf(pattern = r\"\\d+\", transform = |v|\n v.parse().unwrap())] v: i32\n } #[rust_sitter::extra] struct Whitespace\n { #[rust_sitter::leaf(pattern = r\"\\s\")] _whitespace: (), }\n }\n})? .to_token_stream().to_string())" --- mod grammar { use rust_sitter::Spanned; @@ -16,7 +16,7 @@ mod grammar { last_idx: usize, _leaf_fn: Option<&Self::LeafFn>, ) -> Self { - let node = node.unwrap(); + let node = node.expect("no node found"); ::rust_sitter::__private::extract_struct_or_variant(node, move |cursor, last_idx| { NumberList { numbers: { @@ -40,7 +40,7 @@ mod grammar { last_idx: usize, _leaf_fn: Option<&Self::LeafFn>, ) -> Self { - let node = node.unwrap(); + let node = node.expect("no node found"); ::rust_sitter::__private::extract_struct_or_variant(node, move |cursor, last_idx| { Number { v: { @@ -68,7 +68,7 @@ mod grammar { last_idx: usize, _leaf_fn: Option<&Self::LeafFn>, ) -> Self { - let node = node.unwrap(); + let node = node.expect("no node found"); ::rust_sitter::__private::extract_struct_or_variant(node, move |cursor, last_idx| { Whitespace { _whitespace: { @@ -99,4 +99,3 @@ mod grammar { ::rust_sitter::__private::parse::(input, language) } } - diff --git a/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap b/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap index 07cea34..cbbd5e2 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap @@ -1,6 +1,6 @@ --- source: macro/src/lib.rs -expression: "rustfmt_code(&expand_grammar(parse_quote! {\n #[rust_sitter :: grammar(\"test\")] mod grammar\n {\n #[rust_sitter :: language] pub enum Expression\n {\n Number(#[rust_sitter ::\n leaf(pattern = r\"\\d+\", transform = | v |\n v.parse().unwrap())] i32,),\n } #[rust_sitter :: extra] struct Whitespace\n {\n #[rust_sitter :: leaf(pattern = r\"\\s\")] _whitespace : (),\n }\n }\n })?.to_token_stream().to_string())" +expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n #[rust_sitter::grammar(\"test\")] mod grammar\n {\n #[rust_sitter::language] pub enum Expression\n {\n Number(#[rust_sitter::leaf(pattern = r\"\\d+\", transform = |v|\n v.parse().unwrap())] i32,),\n } #[rust_sitter::extra] struct Whitespace\n { #[rust_sitter::leaf(pattern = r\"\\s\")] _whitespace: (), }\n }\n})? .to_token_stream().to_string())" --- mod grammar { pub enum Expression { @@ -15,7 +15,7 @@ mod grammar { _last_idx: usize, _leaf_fn: Option<&Self::LeafFn>, ) -> Self { - let node = node.unwrap(); + let node = node.expect("No node found"); let mut cursor = node.walk(); assert!( cursor.goto_first_child(), @@ -60,7 +60,7 @@ mod grammar { last_idx: usize, _leaf_fn: Option<&Self::LeafFn>, ) -> Self { - let node = node.unwrap(); + let node = node.expect("no node found"); ::rust_sitter::__private::extract_struct_or_variant(node, move |cursor, last_idx| { Whitespace { _whitespace: { @@ -91,4 +91,3 @@ mod grammar { ::rust_sitter::__private::parse::(input, language) } } - diff --git a/macro/src/snapshots/rust_sitter_macro__tests__struct_optional.snap b/macro/src/snapshots/rust_sitter_macro__tests__struct_optional.snap index 16c8d23..1eaa553 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__struct_optional.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__struct_optional.snap @@ -1,6 +1,6 @@ --- source: macro/src/lib.rs -expression: "rustfmt_code(&expand_grammar(parse_quote! {\n #[rust_sitter :: grammar(\"test\")] mod grammar\n {\n #[rust_sitter :: language] pub struct Language\n {\n #[rust_sitter ::\n leaf(pattern = r\"\\d+\", transform = | v |\n v.parse().unwrap())] v : Option < i32 >, t : Option < Number\n >,\n } pub struct Number\n {\n #[rust_sitter ::\n leaf(pattern = r\"\\d+\", transform = | v |\n v.parse().unwrap())] v : i32\n }\n }\n })?.to_token_stream().to_string())" +expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n #[rust_sitter::grammar(\"test\")] mod grammar\n {\n #[rust_sitter::language] pub struct Language\n {\n #[rust_sitter::leaf(pattern = r\"\\d+\", transform = |v|\n v.parse().unwrap())] v: Option, t: Option,\n } pub struct Number\n {\n #[rust_sitter::leaf(pattern = r\"\\d+\", transform = |v|\n v.parse().unwrap())] v: i32\n }\n }\n})? .to_token_stream().to_string())" --- mod grammar { pub struct Language { @@ -16,7 +16,7 @@ mod grammar { last_idx: usize, _leaf_fn: Option<&Self::LeafFn>, ) -> Self { - let node = node.unwrap(); + let node = node.expect("no node found"); ::rust_sitter::__private::extract_struct_or_variant(node, move |cursor, last_idx| { Language { v: { @@ -48,7 +48,7 @@ mod grammar { last_idx: usize, _leaf_fn: Option<&Self::LeafFn>, ) -> Self { - let node = node.unwrap(); + let node = node.expect("no node found"); ::rust_sitter::__private::extract_struct_or_variant(node, move |cursor, last_idx| { Number { v: { @@ -79,4 +79,3 @@ mod grammar { ::rust_sitter::__private::parse::(input, language) } } - diff --git a/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap b/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap index 16728fa..ba93104 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap @@ -1,6 +1,6 @@ --- source: macro/src/lib.rs -expression: "rustfmt_code(&expand_grammar(parse_quote! {\n #[rust_sitter :: grammar(\"test\")] mod grammar\n {\n #[rust_sitter :: language] pub struct NumberList\n { numbers : Vec < Number >, } pub struct Number\n {\n #[rust_sitter ::\n leaf(pattern = r\"\\d+\", transform = | v |\n v.parse().unwrap())] v : i32\n } #[rust_sitter :: extra] struct Whitespace\n {\n #[rust_sitter :: leaf(pattern = r\"\\s\")] _whitespace : (),\n }\n }\n })?.to_token_stream().to_string())" +expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n #[rust_sitter::grammar(\"test\")] mod grammar\n {\n #[rust_sitter::language] pub struct NumberList\n { numbers: Vec, } pub struct Number\n {\n #[rust_sitter::leaf(pattern = r\"\\d+\", transform = |v|\n v.parse().unwrap())] v: i32\n } #[rust_sitter::extra] struct Whitespace\n { #[rust_sitter::leaf(pattern = r\"\\s\")] _whitespace: (), }\n }\n})? .to_token_stream().to_string())" --- mod grammar { pub struct NumberList { @@ -15,7 +15,7 @@ mod grammar { last_idx: usize, _leaf_fn: Option<&Self::LeafFn>, ) -> Self { - let node = node.unwrap(); + let node = node.expect("no node found"); ::rust_sitter::__private::extract_struct_or_variant(node, move |cursor, last_idx| { NumberList { numbers: { @@ -39,7 +39,7 @@ mod grammar { last_idx: usize, _leaf_fn: Option<&Self::LeafFn>, ) -> Self { - let node = node.unwrap(); + let node = node.expect("no node found"); ::rust_sitter::__private::extract_struct_or_variant(node, move |cursor, last_idx| { Number { v: { @@ -67,7 +67,7 @@ mod grammar { last_idx: usize, _leaf_fn: Option<&Self::LeafFn>, ) -> Self { - let node = node.unwrap(); + let node = node.expect("no node found"); ::rust_sitter::__private::extract_struct_or_variant(node, move |cursor, last_idx| { Whitespace { _whitespace: { @@ -98,4 +98,3 @@ mod grammar { ::rust_sitter::__private::parse::(input, language) } } - diff --git a/tool/src/expansion.rs b/tool/src/expansion.rs index 93dff2f..0b484a7 100644 --- a/tool/src/expansion.rs +++ b/tool/src/expansion.rs @@ -150,14 +150,9 @@ fn gen_field( out: &mut Map, ) -> (Value, bool) { let precs = Extras::new(&attrs); - let leaf_attr = attrs.iter().find(|attr| { - attr.path() == &syn::parse_quote!(rust_sitter::leaf) - || attr.path() == &syn::parse_quote!(leaf) - }); + let leaf_attr = attrs.iter().find(|attr| sitter_attr_matches(attr, "leaf")); - let seq_attr = attrs - .iter() - .find(|attr| attr.path() == &syn::parse_quote!(rust_sitter::seq)); + let seq_attr = attrs.iter().find(|attr| sitter_attr_matches(attr, "seq")); if leaf_attr.is_some() && seq_attr.is_some() { panic!("Cannot specify leaf and seq at the same time"); @@ -196,9 +191,13 @@ fn gen_field( // seq is only used to parse a bunch of tokens which are then not used directly. As such, // the type is required to be `()` or else it will fail to compile. - match &leaf_type { - Type::Tuple(t) if t.elems.is_empty() => { - } + let ty = if is_option { + &inner_type_option + } else { + &leaf_type + }; + match ty { + Type::Tuple(t) if t.elems.is_empty() => {} _ => panic!("Unexpected type `()` is required for rust_sitter::seq"), } return ( @@ -210,10 +209,7 @@ fn gen_field( ); } - if attrs - .iter() - .any(|attr| attr.path() == &syn::parse_quote!(rust_sitter::word)) - { + if attrs.iter().any(|attr| sitter_attr_matches(attr, "word")) { if word_rule.is_some() { panic!("Multiple `word` rules specified"); } @@ -319,7 +315,7 @@ fn gen_field( let delimited_attr = attrs .iter() - .find(|attr| attr.path() == &syn::parse_quote!(rust_sitter::delimited)); + .find(|attr| sitter_attr_matches(attr, "delimited")); let delimited_params = delimited_attr.and_then(|a| a.parse_args_with(FieldThenParams::parse).ok()); @@ -336,7 +332,7 @@ fn gen_field( let repeat_attr = attrs .iter() - .find(|attr| attr.path() == &syn::parse_quote!(rust_sitter::repeat)); + .find(|attr| sitter_attr_matches(attr, "repeat")); let repeat_params = repeat_attr.and_then(|a| { a.parse_args_with(Punctuated::::parse_terminated) @@ -490,7 +486,7 @@ fn gen_struct_or_variant( if field .attrs .iter() - .any(|attr| attr.path() == &syn::parse_quote!(rust_sitter::skip)) + .any(|attr| sitter_attr_matches(attr, "skip")) { None } else { @@ -641,7 +637,7 @@ pub fn generate_grammar(module: &ItemMod) -> Value { if attrs .iter() - .any(|a| a.path() == &syn::parse_quote!(rust_sitter::extra)) + .any(|a| sitter_attr_matches(a, "extra")) { extras_list.push(json!({ "type": "SYMBOL", From a0cc0fb3f16ffa85a841bf738e2840b1c5adc245 Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Thu, 10 Jul 2025 10:11:53 -0500 Subject: [PATCH 09/50] Refactor `seq` to allow direct inputs to make it cleaner. Follow-up commit incoming to do the same with `leaf` --- common/src/lib.rs | 42 ++++++++++++++++++++++++++++++++++ macro/src/expansion.rs | 1 - macro/src/lib.rs | 25 +++++++++++++++++++-- tool/src/expansion.rs | 51 +++++++++++++++++++++++------------------- tool/src/lib.rs | 12 +++++++--- 5 files changed, 102 insertions(+), 29 deletions(-) diff --git a/common/src/lib.rs b/common/src/lib.rs index 5316163..5ad6028 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -48,6 +48,43 @@ impl Parse for FieldThenParams { } } +// NOTE: Technically this is unnecessary, because `Expr` can be parsed as a call, but this is more +// straight forward for us since it doesn't make us deal with `path`, etc. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum ExprOrCall { + Expr(Expr), + Call(Call), +} + +impl Parse for ExprOrCall { + fn parse(input: ParseStream) -> Result { + if let Ok(e) = input.parse::() { + Ok(Self::Call(e)) + } else { + Ok(Self::Expr(input.parse()?)) + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Call { + pub ident: Ident, + pub paren_token: token::Paren, + // If we need multiple inputs here we can do Punctuated + pub expr: Expr, +} + +impl Parse for Call { + fn parse(input: ParseStream) -> Result { + let content; + Ok(Call { + ident: input.parse()?, + paren_token: parenthesized!(content in input), + expr: content.parse()?, + }) + } +} + static RUST_SITTER_ATTRS: LazyLock> = LazyLock::new(|| { [ "leaf", @@ -58,6 +95,11 @@ static RUST_SITTER_ATTRS: LazyLock> = LazyLock::new(|| { "prec_right", "prec_dynamic", "extra", + "seq", + "repeat", + "delimited", + "text", + "pattern", ] .into_iter() .collect() diff --git a/macro/src/expansion.rs b/macro/src/expansion.rs index 1d24a6e..baa475e 100644 --- a/macro/src/expansion.rs +++ b/macro/src/expansion.rs @@ -39,7 +39,6 @@ fn gen_field(ident_str: String, leaf: Field) -> Expr { } return syn::parse_quote!({ ::rust_sitter::__private::skip_seq(cursor, #ident_str); - () }); } let leaf_params = leaf_attr.and_then(|a| { diff --git a/macro/src/lib.rs b/macro/src/lib.rs index 6819711..8c2f2d6 100644 --- a/macro/src/lib.rs +++ b/macro/src/lib.rs @@ -83,6 +83,29 @@ pub fn leaf( item } +#[proc_macro_attribute] +/// Defines a sequence of inputs in a grammar that should be parsed but are not explicitly used. +/// +/// ## Example +/// ```ignore +/// struct Function { +/// #[seq(text = "function")] +/// _function: (), +/// name: Ident, +/// #[seq(text = "(")] +/// _lparen: (), +/// // ... +/// } +/// ``` +/// `seq` inputs can be either `text = "..."` or `pattern = "..."`. The type assigned to the field +/// must be `()` or else it will fail to compile. +pub fn seq( + _attr: proc_macro::TokenStream, + item: proc_macro::TokenStream, +) -> proc_macro::TokenStream { + item +} + #[proc_macro_attribute] /// Defines a field that does not correspond to anything in the input string, /// such as some metadata. Takes a single, unnamed argument, which is the value @@ -280,8 +303,6 @@ pub fn grammar( proc_macro::TokenStream::from(expanded) } - - #[cfg(test)] mod tests { use std::fs::File; diff --git a/tool/src/expansion.rs b/tool/src/expansion.rs index 0b484a7..eae468b 100644 --- a/tool/src/expansion.rs +++ b/tool/src/expansion.rs @@ -4,6 +4,7 @@ use rust_sitter_common::*; use serde_json::{Map, Value, json}; use syn::{parse::Parse, punctuated::Punctuated, *}; +#[derive(Debug)] struct Extras { prec_param: Option, prec_left_param: Option, @@ -15,38 +16,34 @@ struct Extras { impl Extras { fn new(attrs: &[Attribute]) -> Self { - let prec_attr = attrs - .iter() - .find(|attr| attr.path() == &syn::parse_quote!(rust_sitter::prec)); + let prec_attr = attrs.iter().find(|attr| sitter_attr_matches(attr, "prec")); let prec_param = prec_attr.and_then(|a| a.parse_args_with(Expr::parse).ok()); let prec_left_attr = attrs .iter() - .find(|attr| attr.path() == &syn::parse_quote!(rust_sitter::prec_left)); + .find(|attr| sitter_attr_matches(attr, "prec_left")); let prec_left_param = prec_left_attr.and_then(|a| a.parse_args_with(Expr::parse).ok()); let prec_right_attr = attrs .iter() - .find(|attr| attr.path() == &syn::parse_quote!(rust_sitter::prec_right)); + .find(|attr| sitter_attr_matches(attr, "prec_right")); let prec_right_param = prec_right_attr.and_then(|a| a.parse_args_with(Expr::parse).ok()); let prec_dynamic_attr = attrs .iter() - .find(|attr| attr.path() == &syn::parse_quote!(rust_sitter::prec_dynamic)); + .find(|attr| sitter_attr_matches(attr, "prec_dynamic")); let prec_dynamic_param = prec_dynamic_attr.and_then(|a| a.parse_args_with(Expr::parse).ok()); let immediate_attr = attrs .iter() - .find(|attr| attr.path() == &syn::parse_quote!(rust_sitter::immediate)); + .find(|attr| sitter_attr_matches(attr, "immediate")); - let token = attrs - .iter() - .find(|attr| attr.path() == &syn::parse_quote!(rust_sitter::token)); + let token = attrs.iter().find(|attr| sitter_attr_matches(attr, "token")); Self { prec_param, @@ -162,22 +159,36 @@ fn gen_field( skip_over.insert("Spanned"); skip_over.insert("Box"); + if precs.prec_left_param.is_some() || precs.prec_right_param.is_some() { + panic!( + "The attributes `prec_left` and `prec_right` cannot be applied to a non-struct type" + ); + } + let (inner_type_vec, is_vec) = try_extract_inner_type(&leaf_type, "Vec", &skip_over); let (inner_type_option, is_option) = try_extract_inner_type(&leaf_type, "Option", &skip_over); if let Some(seq) = seq_attr { // Handle the seq separately. let inputs = seq - .parse_args_with(Punctuated::::parse_terminated) + .parse_args_with(Punctuated::::parse_terminated) .unwrap(); let mut members = vec![]; for input in inputs { - let typ = if input.path == "text" { - "STRING" - } else { - "PATTERN" + let (typ, expr) = match input { + ExprOrCall::Expr(expr) => ("STRING", expr), + ExprOrCall::Call(call) => { + let typ = if call.ident == "pattern" || call.ident == "re" { + "PATTERN" + } else if call.ident == "text" { + "STRING" + } else { + panic!("Unexpected seq input, expected one of: [pattern, re, text]"); + }; + (typ, call.expr) + } }; - if let Expr::Lit(lit) = input.expr + if let Expr::Lit(lit) = expr && let Lit::Str(s) = lit.lit { members.push(json!({ @@ -185,7 +196,7 @@ fn gen_field( "value": s.value(), })); } else { - panic!("{typ} in seq must be a literal string"); + panic!("expr in seq must be a literal string"); } } @@ -217,12 +228,6 @@ fn gen_field( *word_rule = Some(path.clone()); } - if precs.prec_left_param.is_some() || precs.prec_right_param.is_some() { - panic!( - "The attributes `prec_left` and `prec_right` cannot be applied to a non-struct type" - ); - } - let leaf_params = leaf_attr.and_then(|a| { a.parse_args_with(Punctuated::::parse_terminated) .ok() diff --git a/tool/src/lib.rs b/tool/src/lib.rs index 711b164..c8da895 100644 --- a/tool/src/lib.rs +++ b/tool/src/lib.rs @@ -1,5 +1,5 @@ use serde_json::Value; -use syn::{parse_quote, Item}; +use syn::{Item, parse_quote}; mod expansion; use expansion::*; @@ -51,7 +51,13 @@ pub fn build_parsers(root_file: &Path) { .unwrap_or(false); generate_grammars(root_file).iter().for_each(|grammar| { let (grammar_name, grammar_c) = - generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap(); + match generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION) { + Ok(o) => o, + Err(e) => { + // Doing it this way produces a clean error from tree-sitter on failure. + panic!("generation error: {e}"); + } + }; let tempfile = tempfile::Builder::new() .prefix("grammar") .tempdir() @@ -139,7 +145,7 @@ pub fn build_parsers(root_file: &Path) { mod tests { use syn::parse_quote; - use super::{generate_grammar, GENERATED_SEMANTIC_VERSION}; + use super::{GENERATED_SEMANTIC_VERSION, generate_grammar}; use tree_sitter_generate::generate_parser_for_grammar; #[test] From 82d4804f0c6db0aa1e4a687fcb95cb64083e9afb Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Thu, 10 Jul 2025 12:53:51 -0500 Subject: [PATCH 10/50] Refactoring to allow expansion at the macro generation level --- Cargo.lock | 1 + common/Cargo.toml | 4 +- {tool => common}/src/expansion.rs | 85 ++++++++++--- common/src/lib.rs | 2 + macro/src/expansion.rs | 77 ++++++++---- tool/src/lib.rs | 203 +++++++++++++----------------- 6 files changed, 213 insertions(+), 159 deletions(-) rename {tool => common}/src/expansion.rs (90%) diff --git a/Cargo.lock b/Cargo.lock index 3adeccf..5fce647 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -489,6 +489,7 @@ name = "rust-sitter-common" version = "0.4.5" dependencies = [ "quote", + "serde_json", "syn 2.0.98", ] diff --git a/common/Cargo.toml b/common/Cargo.toml index 41e825f..84b3d47 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -5,7 +5,7 @@ readme = "../README.md" repository = "https://github.com/hydro-project/rust-sitter" version = "0.4.5" authors = ["Shadaj Laddad "] -edition = "2021" +edition = "2024" license = "MIT" keywords = ["parsing", "codegen"] categories = ["development-tools"] @@ -16,3 +16,5 @@ path = "src/lib.rs" [dependencies] syn = { version = "2", features = [ "full", "extra-traits" ] } quote = "1" + +serde_json = "1" diff --git a/tool/src/expansion.rs b/common/src/expansion.rs similarity index 90% rename from tool/src/expansion.rs rename to common/src/expansion.rs index eae468b..3187e5d 100644 --- a/tool/src/expansion.rs +++ b/common/src/expansion.rs @@ -1,8 +1,37 @@ use std::collections::HashSet; -use rust_sitter_common::*; +use super::*; use serde_json::{Map, Value, json}; -use syn::{parse::Parse, punctuated::Punctuated, *}; +use syn::{parse::Parse, punctuated::Punctuated}; + +/// Generates JSON strings defining Tree Sitter grammars for every Rust Sitter +/// grammar found in the given module and recursive submodules. +pub fn generate_grammars(root_file: Vec) -> Vec { + let mut out = vec![]; + root_file + .iter() + .for_each(|i| generate_all_grammars(i, &mut out)); + out +} + +pub fn generate_grammars_string(root_file: Vec) -> String { + serde_json::to_string(&generate_grammars(root_file)).unwrap() +} + +fn generate_all_grammars(item: &Item, out: &mut Vec) { + if let Item::Mod(m) = item { + m.content + .iter() + .for_each(|(_, items)| items.iter().for_each(|i| generate_all_grammars(i, out))); + + if m.attrs + .iter() + .any(|a| a.path() == &parse_quote!(rust_sitter::grammar)) + { + out.push(generate_grammar(m)) + } + } +} #[derive(Debug)] struct Extras { @@ -540,27 +569,43 @@ pub fn generate_grammar(module: &ItemMod) -> Value { rules_map.insert("source_file".to_string(), json!({})); let mut extras_list = vec![]; - - let grammar_name = module + let attr = module .attrs .iter() - .find_map(|a| { - if a.path() == &syn::parse_quote!(rust_sitter::grammar) { - let grammar_name_expr = a.parse_args_with(Expr::parse).ok(); - if let Some(Expr::Lit(ExprLit { - attrs: _, - lit: Lit::Str(s), - })) = grammar_name_expr - { - Some(s.value()) - } else { - panic!("Expected string literal for grammar name"); - } - } else { - None - } - }) + .find(|a| a.path() == &syn::parse_quote!(rust_sitter::grammar)) .expect("Each grammar must have a name"); + let grammar_name_expr = attr + .parse_args_with(Punctuated::::parse_terminated) + .expect("Inputs should be a comma separated list"); + if grammar_name_expr.is_empty() { + panic!("Expected a string literal for grammar name"); + // return Err(syn::Error::new( + // Span::call_site(), + // "Expected a string literal grammar name", + // )); + } + if grammar_name_expr.len() > 2 { + panic!("Expected at most two inputs"); + } + let grammar_name = if let Expr::Lit(ExprLit { + attrs: _, + lit: Lit::Str(s), + }) = grammar_name_expr.first().unwrap() + { + s.value() + } else { + panic!("Expected a string literal grammar name"); + }; + + let _should_parse = if let Some(Expr::Lit(ExprLit { + attrs: _, + lit: Lit::Bool(b), + })) = grammar_name_expr.last() + { + b.value() + } else { + false + }; let (_, contents) = module.content.as_ref().unwrap(); diff --git a/common/src/lib.rs b/common/src/lib.rs index 5ad6028..35fab97 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -6,6 +6,8 @@ use syn::{ *, }; +pub mod expansion; + #[derive(Debug, Clone, PartialEq, Eq)] pub struct NameValueExpr { pub path: Ident, diff --git a/macro/src/expansion.rs b/macro/src/expansion.rs index baa475e..b2d35ce 100644 --- a/macro/src/expansion.rs +++ b/macro/src/expansion.rs @@ -5,7 +5,7 @@ use crate::errors::IteratorExt as _; use proc_macro2::Span; use quote::{quote, ToTokens}; use rust_sitter_common::*; -use syn::{parse::Parse, punctuated::Punctuated, *}; +use syn::{punctuated::Punctuated, *}; pub enum ParamOrField { Param(Expr), @@ -168,32 +168,49 @@ fn gen_struct_or_variant( } pub fn expand_grammar(input: ItemMod) -> Result { - let grammar_name = input + let attr = input .attrs .iter() - .find_map(|a| { - if a.path() == &syn::parse_quote!(rust_sitter::grammar) { - let grammar_name_expr = a.parse_args_with(Expr::parse).ok(); - if let Some(Expr::Lit(ExprLit { - attrs: _, - lit: Lit::Str(s), - })) = grammar_name_expr - { - Some(Ok(s.value())) - } else { - Some(Err(syn::Error::new( - Span::call_site(), - "Expected a string literal grammar name", - ))) - } - } else { - None - } - }) - .transpose()? + .find(|a| a.path() == &syn::parse_quote!(rust_sitter::grammar)) .ok_or_else(|| syn::Error::new(Span::call_site(), "Each grammar must have a name"))?; + let grammar_name_expr = + attr.parse_args_with(Punctuated::::parse_terminated)?; + if grammar_name_expr.is_empty() { + return Err(syn::Error::new( + Span::call_site(), + "Expected a string literal grammar name", + )); + } + if grammar_name_expr.len() > 2 { + return Err(syn::Error::new( + Span::call_site(), + "Expected at most two inputs", + )); + } + let grammar_name = if let Expr::Lit(ExprLit { + attrs: _, + lit: Lit::Str(s), + }) = grammar_name_expr.first().unwrap() + { + s.value() + } else { + return Err(syn::Error::new( + Span::call_site(), + "Expected a string literal grammar name", + )); + }; + + let should_parse = if let Some(Expr::Lit(ExprLit { + attrs: _, + lit: Lit::Bool(b), + })) = grammar_name_expr.last() + { + b.value() + } else { + false + }; - let (brace, new_contents) = input.content.ok_or_else(|| { + let (brace, new_contents) = input.content.as_ref().ok_or_else(|| { syn::Error::new( Span::call_site(), "Expected the module to have inline contents (`mod my_module { .. }` syntax)", @@ -227,6 +244,10 @@ pub fn expand_grammar(input: ItemMod) -> Result { .iter() .cloned() .map(|c| match c { + Item::Macro(m) => { + dbg!(&m); + Ok(vec![Item::Macro(m)]) + } Item::Enum(mut e) => { let match_cases: Vec = e.variants.iter().map(|v| { let variant_path = format!("{}_{}", e.ident, v.ident); @@ -334,6 +355,14 @@ pub fn expand_grammar(input: ItemMod) -> Result { } }); + // Produces the grammar as a JSON constant. + if should_parse { + let grammars = rust_sitter_common::expansion::generate_grammar(&input).to_string(); + transformed.push(syn::parse_quote! { + pub const GRAMMAR: &str = #grammars; + }); + } + let mut filtered_attrs = input.attrs; filtered_attrs.retain(|a| !is_sitter_attr(a)); Ok(ItemMod { @@ -342,7 +371,7 @@ pub fn expand_grammar(input: ItemMod) -> Result { unsafety: None, mod_token: input.mod_token, ident: input.ident, - content: Some((brace, transformed)), + content: Some((*brace, transformed)), semi: input.semi, }) } diff --git a/tool/src/lib.rs b/tool/src/lib.rs index c8da895..7b661e4 100644 --- a/tool/src/lib.rs +++ b/tool/src/lib.rs @@ -1,37 +1,5 @@ -use serde_json::Value; -use syn::{Item, parse_quote}; - -mod expansion; -use expansion::*; - const GENERATED_SEMANTIC_VERSION: Option<(u8, u8, u8)> = Some((0, 25, 2)); -/// Generates JSON strings defining Tree Sitter grammars for every Rust Sitter -/// grammar found in the given module and recursive submodules. -pub fn generate_grammars(root_file: &Path) -> Vec { - let root_file = syn_inline_mod::parse_and_inline_modules(root_file).items; - let mut out = vec![]; - root_file - .iter() - .for_each(|i| generate_all_grammars(i, &mut out)); - out -} - -fn generate_all_grammars(item: &Item, out: &mut Vec) { - if let Item::Mod(m) = item { - m.content - .iter() - .for_each(|(_, items)| items.iter().for_each(|i| generate_all_grammars(i, out))); - - if m.attrs - .iter() - .any(|a| a.path() == &parse_quote!(rust_sitter::grammar)) - { - out.push(generate_grammar(m)) - } - } -} - #[cfg(feature = "build_parsers")] use std::io::Write; use std::path::Path; @@ -44,108 +12,115 @@ use tree_sitter_generate::generate_parser_for_grammar; /// for every Rust Sitter grammar found in the given module and recursive /// submodules. pub fn build_parsers(root_file: &Path) { + let root_file = syn_inline_mod::parse_and_inline_modules(root_file); + rust_sitter_common::expansion::generate_grammars(root_file.items) + .iter() + .for_each(generate_parser); +} + +fn generate_parser(grammar: &serde_json::Value) { use std::env; let out_dir = env::var("OUT_DIR").unwrap(); let emit_artifacts: bool = env::var("RUST_SITTER_EMIT_ARTIFACTS") .map(|s| s.parse().unwrap_or(false)) .unwrap_or(false); - generate_grammars(root_file).iter().for_each(|grammar| { - let (grammar_name, grammar_c) = - match generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION) { - Ok(o) => o, - Err(e) => { - // Doing it this way produces a clean error from tree-sitter on failure. - panic!("generation error: {e}"); - } - }; - let tempfile = tempfile::Builder::new() - .prefix("grammar") - .tempdir() - .unwrap(); - let dir = if emit_artifacts { - let grammar_dir = Path::new(out_dir.as_str()).join(format!("grammar_{grammar_name}",)); - if grammar_dir.is_dir() { - std::fs::remove_dir_all(&grammar_dir).expect("Couldn't clear old artifacts"); + let (grammar_name, grammar_c) = + match generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION) { + Ok(o) => o, + Err(e) => { + // Doing it this way produces a clean error from tree-sitter on failure. + panic!("generation error: {e}"); } - std::fs::DirBuilder::new() - .recursive(true) - .create(grammar_dir.clone()) - .expect("Couldn't create grammar JSON directory"); - grammar_dir - } else { - tempfile.path().into() }; + let tempfile = tempfile::Builder::new() + .prefix("grammar") + .tempdir() + .unwrap(); + + let dir = if emit_artifacts { + let grammar_dir = Path::new(out_dir.as_str()).join(format!("grammar_{grammar_name}",)); + if grammar_dir.is_dir() { + std::fs::remove_dir_all(&grammar_dir).expect("Couldn't clear old artifacts"); + } + std::fs::DirBuilder::new() + .recursive(true) + .create(grammar_dir.clone()) + .expect("Couldn't create grammar JSON directory"); + grammar_dir + } else { + tempfile.path().into() + }; + + let grammar_file = dir.join("parser.c"); + let mut f = std::fs::File::create(grammar_file).unwrap(); + + f.write_all(grammar_c.as_bytes()).unwrap(); + drop(f); + + // emit grammar into the build out_dir + let mut grammar_json_file = + std::fs::File::create(dir.join(format!("{grammar_name}.json"))).unwrap(); + grammar_json_file + .write_all(serde_json::to_string_pretty(grammar).unwrap().as_bytes()) + .unwrap(); + drop(grammar_json_file); + + let header_dir = dir.join("tree_sitter"); + std::fs::create_dir(&header_dir).unwrap(); + let mut parser_file = std::fs::File::create(header_dir.join("parser.h")).unwrap(); + parser_file + .write_all(tree_sitter::PARSER_HEADER.as_bytes()) + .unwrap(); + drop(parser_file); + + let sysroot_dir = dir.join("sysroot"); + if env::var("TARGET").unwrap().starts_with("wasm32") { + std::fs::create_dir(&sysroot_dir).unwrap(); + let mut stdint = std::fs::File::create(sysroot_dir.join("stdint.h")).unwrap(); + stdint + .write_all(include_bytes!("wasm-sysroot/stdint.h")) + .unwrap(); + drop(stdint); - let grammar_file = dir.join("parser.c"); - let mut f = std::fs::File::create(grammar_file).unwrap(); - - f.write_all(grammar_c.as_bytes()).unwrap(); - drop(f); + let mut stdlib = std::fs::File::create(sysroot_dir.join("stdlib.h")).unwrap(); + stdlib + .write_all(include_bytes!("wasm-sysroot/stdlib.h")) + .unwrap(); + drop(stdlib); - // emit grammar into the build out_dir - let mut grammar_json_file = - std::fs::File::create(dir.join(format!("{grammar_name}.json"))).unwrap(); - grammar_json_file - .write_all(serde_json::to_string_pretty(grammar).unwrap().as_bytes()) + let mut stdio = std::fs::File::create(sysroot_dir.join("stdio.h")).unwrap(); + stdio + .write_all(include_bytes!("wasm-sysroot/stdio.h")) .unwrap(); - drop(grammar_json_file); + drop(stdio); - let header_dir = dir.join("tree_sitter"); - std::fs::create_dir(&header_dir).unwrap(); - let mut parser_file = std::fs::File::create(header_dir.join("parser.h")).unwrap(); - parser_file - .write_all(tree_sitter::PARSER_HEADER.as_bytes()) + let mut stdbool = std::fs::File::create(sysroot_dir.join("stdbool.h")).unwrap(); + stdbool + .write_all(include_bytes!("wasm-sysroot/stdbool.h")) .unwrap(); - drop(parser_file); - - let sysroot_dir = dir.join("sysroot"); - if env::var("TARGET").unwrap().starts_with("wasm32") { - std::fs::create_dir(&sysroot_dir).unwrap(); - let mut stdint = std::fs::File::create(sysroot_dir.join("stdint.h")).unwrap(); - stdint - .write_all(include_bytes!("wasm-sysroot/stdint.h")) - .unwrap(); - drop(stdint); - - let mut stdlib = std::fs::File::create(sysroot_dir.join("stdlib.h")).unwrap(); - stdlib - .write_all(include_bytes!("wasm-sysroot/stdlib.h")) - .unwrap(); - drop(stdlib); - - let mut stdio = std::fs::File::create(sysroot_dir.join("stdio.h")).unwrap(); - stdio - .write_all(include_bytes!("wasm-sysroot/stdio.h")) - .unwrap(); - drop(stdio); - - let mut stdbool = std::fs::File::create(sysroot_dir.join("stdbool.h")).unwrap(); - stdbool - .write_all(include_bytes!("wasm-sysroot/stdbool.h")) - .unwrap(); - drop(stdbool); - } + drop(stdbool); + } - let mut c_config = cc::Build::new(); - c_config.std("c11").include(&dir).include(&sysroot_dir); - c_config - .flag_if_supported("-Wno-unused-label") - .flag_if_supported("-Wno-unused-parameter") - .flag_if_supported("-Wno-unused-but-set-variable") - .flag_if_supported("-Wno-trigraphs") - .flag_if_supported("-Wno-everything"); - c_config.file(dir.join("parser.c")); - - c_config.compile(&grammar_name); - }); + let mut c_config = cc::Build::new(); + c_config.std("c11").include(&dir).include(&sysroot_dir); + c_config + .flag_if_supported("-Wno-unused-label") + .flag_if_supported("-Wno-unused-parameter") + .flag_if_supported("-Wno-unused-but-set-variable") + .flag_if_supported("-Wno-trigraphs") + .flag_if_supported("-Wno-everything"); + c_config.file(dir.join("parser.c")); + + c_config.compile(&grammar_name); } #[cfg(test)] mod tests { use syn::parse_quote; - use super::{GENERATED_SEMANTIC_VERSION, generate_grammar}; + use super::GENERATED_SEMANTIC_VERSION; + use rust_sitter_common::expansion::generate_grammar; use tree_sitter_generate::generate_parser_for_grammar; #[test] From 09b0a8b6955a33d672b07101520dd13b38d2071e Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Thu, 10 Jul 2025 14:56:21 -0500 Subject: [PATCH 11/50] Rewrite `leaf` and `text` (was `seq`) to allow inserting tree-sitter like rules in them: * Can now provide text directly * Can now provide a `re` or `pattern` function to specify a function * Can now specify `choice` directly * Can now specify `seq` directly * Can now specify `optional` directly --- Cargo.lock | 1 + common/Cargo.toml | 1 + common/src/expansion.rs | 128 +++------------- common/src/lib.rs | 142 ++++++++++++++---- example/src/arithmetic.rs | 8 +- example/src/optionals.rs | 11 +- example/src/repetitions.rs | 16 +- example/src/words.rs | 6 +- macro/src/expansion.rs | 35 +++-- macro/src/lib.rs | 93 ++++++++---- ...t_sitter_macro__tests__enum_prec_left.snap | 9 +- ...t_sitter_macro__tests__enum_recursive.snap | 9 +- ...macro__tests__enum_transformed_fields.snap | 13 +- ...r_macro__tests__enum_with_named_field.snap | 9 +- ...macro__tests__enum_with_unamed_vector.snap | 10 +- ...r_macro__tests__grammar_unboxed_field.snap | 13 +- ...t_sitter_macro__tests__spanned_in_vec.snap | 10 +- ...ust_sitter_macro__tests__struct_extra.snap | 9 +- ..._sitter_macro__tests__struct_optional.snap | 17 +-- ...st_sitter_macro__tests__struct_repeat.snap | 10 +- runtime/src/__private.rs | 2 +- tool/src/lib.rs | 83 +++++----- 22 files changed, 310 insertions(+), 325 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5fce647..c0dff8c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -488,6 +488,7 @@ dependencies = [ name = "rust-sitter-common" version = "0.4.5" dependencies = [ + "proc-macro2", "quote", "serde_json", "syn 2.0.98", diff --git a/common/Cargo.toml b/common/Cargo.toml index 84b3d47..66a5e87 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -15,6 +15,7 @@ path = "src/lib.rs" [dependencies] syn = { version = "2", features = [ "full", "extra-traits" ] } +proc-macro2 = "1" quote = "1" serde_json = "1" diff --git a/common/src/expansion.rs b/common/src/expansion.rs index 3187e5d..c519c66 100644 --- a/common/src/expansion.rs +++ b/common/src/expansion.rs @@ -178,10 +178,10 @@ fn gen_field( let precs = Extras::new(&attrs); let leaf_attr = attrs.iter().find(|attr| sitter_attr_matches(attr, "leaf")); - let seq_attr = attrs.iter().find(|attr| sitter_attr_matches(attr, "seq")); + let text_attr = attrs.iter().find(|attr| sitter_attr_matches(attr, "text")); - if leaf_attr.is_some() && seq_attr.is_some() { - panic!("Cannot specify leaf and seq at the same time"); + if leaf_attr.is_some() && text_attr.is_some() { + panic!("Cannot specify leaf and text at the same time"); } let mut skip_over = HashSet::new(); @@ -197,56 +197,15 @@ fn gen_field( let (inner_type_vec, is_vec) = try_extract_inner_type(&leaf_type, "Vec", &skip_over); let (inner_type_option, is_option) = try_extract_inner_type(&leaf_type, "Option", &skip_over); - if let Some(seq) = seq_attr { - // Handle the seq separately. - let inputs = seq - .parse_args_with(Punctuated::::parse_terminated) - .unwrap(); - let mut members = vec![]; - for input in inputs { - let (typ, expr) = match input { - ExprOrCall::Expr(expr) => ("STRING", expr), - ExprOrCall::Call(call) => { - let typ = if call.ident == "pattern" || call.ident == "re" { - "PATTERN" - } else if call.ident == "text" { - "STRING" - } else { - panic!("Unexpected seq input, expected one of: [pattern, re, text]"); - }; - (typ, call.expr) - } - }; - if let Expr::Lit(lit) = expr - && let Lit::Str(s) = lit.lit - { - members.push(json!({ - "type": typ, - "value": s.value(), - })); - } else { - panic!("expr in seq must be a literal string"); - } - } - - // seq is only used to parse a bunch of tokens which are then not used directly. As such, + if let Some(text) = text_attr { + let input: TsInput = text.parse_args().unwrap(); + // text is only used to parse a bunch of tokens which are then not used directly. As such, // the type is required to be `()` or else it will fail to compile. - let ty = if is_option { - &inner_type_option - } else { - &leaf_type - }; - match ty { + match &leaf_type { Type::Tuple(t) if t.elems.is_empty() => {} - _ => panic!("Unexpected type `()` is required for rust_sitter::seq"), + _ => panic!("Unexpected type `()` is required for rust_sitter::text"), } - return ( - precs.apply(json!({ - "type": "SEQ", - "members": members, - })), - is_option, - ); + return (precs.apply(input.evaluate().unwrap()), false); } if attrs.iter().any(|attr| sitter_attr_matches(attr, "word")) { @@ -257,68 +216,19 @@ fn gen_field( *word_rule = Some(path.clone()); } - let leaf_params = leaf_attr.and_then(|a| { - a.parse_args_with(Punctuated::::parse_terminated) - .ok() - }); - - let pattern_param = leaf_params.as_ref().and_then(|p| { - p.iter() - .find(|param| param.path == "pattern") - .map(|p| p.expr.clone()) - }); - - let text_param = leaf_params.as_ref().and_then(|p| { - p.iter() - .find(|param| param.path == "text") - .map(|p| p.expr.clone()) - }); - - if pattern_param.is_some() && text_param.is_some() { - panic!("cannot specify text and pattern in the same leaf"); - } + let leaf_input = leaf_attr.and_then(|a| a.parse_args::().ok()); if !is_vec && !is_option { - if let Some(Expr::Lit(lit)) = pattern_param { - if let Lit::Str(s) = &lit.lit { - out.insert( - path.clone(), - precs.apply(json!({ - "type": "PATTERN", - "value": s.value(), - })), - ); - - ( - json!({ - "type": "SYMBOL", - "name": path - }), - is_option, - ) - } else { - panic!("Expected string literal for pattern"); - } - } else if let Some(Expr::Lit(lit)) = text_param { - if let Lit::Str(s) = &lit.lit { - out.insert( - path.clone(), - precs.apply(json!({ - "type": "STRING", - "value": s.value(), - })), - ); + if let Some(input) = leaf_input { + out.insert(path.clone(), precs.apply(input.evaluate().unwrap())); - ( - json!({ - "type": "SYMBOL", - "name": path - }), - is_option, - ) - } else { - panic!("Expected string literal for text"); - } + ( + json!({ + "type": "SYMBOL", + "name": path + }), + is_option, + ) } else { let symbol_name = if let Type::Path(p) = filter_inner_type(&leaf_type, &skip_over) { if p.path.segments.len() == 1 { diff --git a/common/src/lib.rs b/common/src/lib.rs index 35fab97..10fcd91 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -1,5 +1,5 @@ +use proc_macro2::Span; use std::{collections::HashSet, sync::LazyLock}; - use syn::{ parse::{Parse, ParseStream}, punctuated::Punctuated, @@ -50,40 +50,123 @@ impl Parse for FieldThenParams { } } -// NOTE: Technically this is unnecessary, because `Expr` can be parsed as a call, but this is more -// straight forward for us since it doesn't make us deal with `path`, etc. +/// tree-sitter input parsing. #[derive(Debug, Clone, PartialEq, Eq)] -pub enum ExprOrCall { - Expr(Expr), - Call(Call), +pub struct TsInput { + expr: Expr, } -impl Parse for ExprOrCall { +impl Parse for TsInput { fn parse(input: ParseStream) -> Result { - if let Ok(e) = input.parse::() { - Ok(Self::Call(e)) - } else { - Ok(Self::Expr(input.parse()?)) - } + Ok(Self { + expr: input.parse()?, + }) } } -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct Call { - pub ident: Ident, - pub paren_token: token::Paren, - // If we need multiple inputs here we can do Punctuated - pub expr: Expr, -} - -impl Parse for Call { - fn parse(input: ParseStream) -> Result { - let content; - Ok(Call { - ident: input.parse()?, - paren_token: parenthesized!(content in input), - expr: content.parse()?, - }) +impl TsInput { + fn new(expr: &Expr) -> Self { + Self { expr: expr.clone() } + } + pub fn evaluate(&self) -> Result { + use serde_json::json; + fn get_str(e: &Expr) -> Result { + let s = match e { + Expr::Lit(ExprLit { + attrs: _, + lit: Lit::Str(f), + }) => f, + _ => return Err(syn::Error::new(Span::call_site(), "expected a string")), + }; + Ok(s.value()) + } + fn get_arg(p: &Punctuated, i: usize, expected: usize) -> Result<&Expr> { + assert!(i < expected); + if p.len() != expected { + return Err(syn::Error::new(Span::call_site(), "Too many arguments")); + } + Ok(p.get(i).unwrap()) + } + let json = match &self.expr { + Expr::Lit(ExprLit { + attrs: _, + lit: Lit::Str(s), + }) => json!({ + "type": "STRING", + "value": s.value(), + }), + Expr::Call(ExprCall { + attrs: _, + func, + paren_token: _, + args, + }) => { + let func = match &**func { + Expr::Path(ExprPath { + attrs: _, + qself: _, + path, + }) => path.require_ident()?.to_string(), + _ => return Err(syn::Error::new(Span::call_site(), "Expected path")), + }; + match func.as_str() { + "optional" => { + let inner = Self::new(get_arg(args, 0, 1)?); + let mut members = vec![]; + members.push(inner.evaluate()?); + members.push(json!({ + "type": "BLANK", + })); + json!({ + "type": "CHOICE", + "members": members, + }) + } + "seq" => { + let mut members = vec![]; + for arg in args { + let ts = Self::new(arg); + members.push(ts.evaluate()?); + } + json!({ + "type": "SEQ", + "members": members, + }) + } + "choice" => { + let mut members = vec![]; + for arg in args { + let ts = Self::new(arg); + members.push(ts.evaluate()?); + } + json!({ + "type": "CHOICE", + "members": members, + }) + } + "re" | "pattern" => { + json!({ + "type": "PATTERN", + "value": get_str(get_arg(args, 0, 1)?)?, + }) + } + "text" => { + json!({ + "type": "STRING", + "value": get_str(get_arg(args, 0, 1)?)?, + }) + } + k => { + return Err(syn::Error::new( + Span::call_site(), + format!("Unexpected function call {k}"), + )); + } + } + } + _ => return Err(syn::Error::new(Span::call_site(), "Unexpected input type")), + }; + Ok(json) } } @@ -97,11 +180,12 @@ static RUST_SITTER_ATTRS: LazyLock> = LazyLock::new(|| { "prec_right", "prec_dynamic", "extra", - "seq", "repeat", "delimited", "text", "pattern", + "with", + "transform", ] .into_iter() .collect() diff --git a/example/src/arithmetic.rs b/example/src/arithmetic.rs index 1e8f3f7..33813d0 100644 --- a/example/src/arithmetic.rs +++ b/example/src/arithmetic.rs @@ -3,24 +3,24 @@ pub mod grammar { #[rust_sitter::language] #[derive(PartialEq, Eq, Debug)] pub enum Expression { - Number(#[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())] i32), + Number(#[rust_sitter::leaf(pattern(r"\d+"))] i32), #[rust_sitter::prec_left(1)] Sub( Box, - #[rust_sitter::leaf(text = "-")] (), + #[rust_sitter::leaf("-")] (), Box, ), #[rust_sitter::prec_left(2)] Mul( Box, - #[rust_sitter::leaf(text = "*")] (), + #[rust_sitter::leaf("*")] (), Box, ), } #[rust_sitter::extra] struct Whitespace { - #[rust_sitter::leaf(pattern = r"\s")] + #[rust_sitter::leaf(pattern(r"\s"))] _whitespace: (), } } diff --git a/example/src/optionals.rs b/example/src/optionals.rs index 365b749..aeb9ec0 100644 --- a/example/src/optionals.rs +++ b/example/src/optionals.rs @@ -6,18 +6,21 @@ mod grammar { #[rust_sitter::language] #[derive(Debug)] pub struct Language { - #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())] + #[rust_sitter::leaf(re(r"\d+"))] + // Not necessary, done automatically. + // #[rust_sitter::with(|v| v.parse().unwrap())] v: Option, - #[rust_sitter::leaf(text = "_")] + #[rust_sitter::leaf("_")] _s: (), t: Spanned>, - #[rust_sitter::leaf(text = ".")] + #[rust_sitter::leaf(".")] _d: Option<()>, } #[derive(Debug)] pub struct Number { - #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())] + #[rust_sitter::leaf(re(r"\d+"))] + #[rust_sitter::with(|v| v.parse().unwrap())] v: i32, } } diff --git a/example/src/repetitions.rs b/example/src/repetitions.rs index df407fa..e8af5d9 100644 --- a/example/src/repetitions.rs +++ b/example/src/repetitions.rs @@ -8,16 +8,16 @@ pub mod grammar { pub struct NumberList { #[rust_sitter::repeat(non_empty = true)] #[rust_sitter::delimited( - #[rust_sitter::leaf(text = ",")] + #[rust_sitter::leaf(",")] () )] - #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())] + #[rust_sitter::leaf(pattern(r"\d+"))] numbers: Spanned>>, } #[rust_sitter::extra] struct Whitespace { - #[rust_sitter::leaf(pattern = r"\s")] + #[rust_sitter::leaf(pattern(r"\s"))] _whitespace: (), } } @@ -30,13 +30,13 @@ pub mod grammar2 { #[derive(Debug)] #[allow(dead_code)] pub struct NumberList { - #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())] + #[rust_sitter::leaf(pattern(r"\d+"))] numbers: Spanned>>, } #[rust_sitter::extra] struct Whitespace { - #[rust_sitter::leaf(pattern = r"\s")] + #[rust_sitter::leaf(pattern(r"\s"))] _whitespace: (), } } @@ -50,10 +50,10 @@ pub mod grammar3 { #[allow(dead_code)] pub struct NumberList { #[rust_sitter::delimited( - #[rust_sitter::leaf(text = ",")] + #[rust_sitter::leaf(",")] () )] - #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())] + #[rust_sitter::leaf(pattern(r"\d+"))] numbers: Spanned>>>, #[rust_sitter::skip(123)] metadata: u32, @@ -61,7 +61,7 @@ pub mod grammar3 { #[rust_sitter::extra] struct Whitespace { - #[rust_sitter::leaf(pattern = r"\s")] + #[rust_sitter::leaf(pattern(r"\s"))] _whitespace: (), } } diff --git a/example/src/words.rs b/example/src/words.rs index fd87363..0b34773 100644 --- a/example/src/words.rs +++ b/example/src/words.rs @@ -4,16 +4,16 @@ pub mod grammar { #[derive(Debug)] #[allow(dead_code)] pub struct Words { - #[rust_sitter::leaf(text = r"if")] + #[rust_sitter::leaf("if")] keyword: (), #[rust_sitter::word] - #[rust_sitter::leaf(pattern = r"[a-z_]+")] + #[rust_sitter::leaf(pattern(r"[a-z_]+"))] word: String, } #[rust_sitter::extra] struct Whitespace { - #[rust_sitter::leaf(pattern = r"\s")] + #[rust_sitter::leaf(pattern(r"\s"))] _whitespace: (), } } diff --git a/macro/src/expansion.rs b/macro/src/expansion.rs index b2d35ce..129c19f 100644 --- a/macro/src/expansion.rs +++ b/macro/src/expansion.rs @@ -29,28 +29,31 @@ fn gen_field(ident_str: String, leaf: Field) -> Expr { .iter() .find(|attr| sitter_attr_matches(attr, "leaf")); - let seq_attr = leaf + let transform_attr = leaf .attrs .iter() - .find(|attr| sitter_attr_matches(attr, "seq")); - if seq_attr.is_some() { + .find(|attr| sitter_attr_matches(attr, "transform") || sitter_attr_matches(attr, "with")); + + if transform_attr.is_some() && leaf_attr.is_none() { + panic!("Cannot transform non-leaf nodes"); + } + + let text_attr = leaf + .attrs + .iter() + .find(|attr| sitter_attr_matches(attr, "text")); + if text_attr.is_some() { if leaf_attr.is_some() { - panic!("Cannot use leaf and seq at the same time"); + panic!("Cannot use leaf and text at the same time"); } return syn::parse_quote!({ - ::rust_sitter::__private::skip_seq(cursor, #ident_str); + ::rust_sitter::__private::skip_text(cursor, #ident_str); }); } - let leaf_params = leaf_attr.and_then(|a| { - a.parse_args_with(Punctuated::::parse_terminated) - .ok() - }); - let transform_param = leaf_params.as_ref().and_then(|p| { - p.iter() - .find(|param| param.path == "transform") - .map(|p| p.expr.clone()) - }); + let transform_param = transform_attr + .as_ref() + .map(|attr| attr.parse_args::().unwrap()); let (leaf_type, closure_expr): (Type, Expr) = match transform_param { Some(closure) => { @@ -244,10 +247,6 @@ pub fn expand_grammar(input: ItemMod) -> Result { .iter() .cloned() .map(|c| match c { - Item::Macro(m) => { - dbg!(&m); - Ok(vec![Item::Macro(m)]) - } Item::Enum(mut e) => { let match_cases: Vec = e.variants.iter().map(|v| { let variant_path = format!("{}_{}", e.ident, v.ident); diff --git a/macro/src/lib.rs b/macro/src/lib.rs index 8c2f2d6..dc9f56d 100644 --- a/macro/src/lib.rs +++ b/macro/src/lib.rs @@ -30,7 +30,7 @@ pub fn language( /// ```ignore /// #[rust_sitter::extra] /// struct Whitespace { -/// #[rust_sitter::leaf(pattern = r"\s")] +/// #[rust_sitter::leaf(re(r"\s"))] /// _whitespace: (), /// } /// ``` @@ -58,20 +58,20 @@ pub fn extra( /// Using the `leaf` attribute on a field: /// ```ignore /// Number( -/// #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())] +/// #[rust_sitter::leaf(re(r"\d+"))] /// u32 /// ) /// ``` /// /// Using the attribute on a unit struct or unit enum variant: /// ```ignore -/// #[rust_sitter::leaf(text = "9")] +/// #[rust_sitter::leaf("9")] /// struct BigDigit; /// /// enum SmallDigit { -/// #[rust_sitter::leaf(text = "0")] +/// #[rust_sitter::leaf("0")] /// Zero, -/// #[rust_sitter::leaf(text = "1")] +/// #[rust_sitter::leaf("1")] /// One, /// } /// ``` @@ -84,22 +84,21 @@ pub fn leaf( } #[proc_macro_attribute] -/// Defines a sequence of inputs in a grammar that should be parsed but are not explicitly used. +/// Defines text in the grammar that should be parsed but not explicitly used. No explicit rule is +/// created and these segments are inlined. /// /// ## Example /// ```ignore /// struct Function { -/// #[seq(text = "function")] +/// #[text("function")] /// _function: (), /// name: Ident, -/// #[seq(text = "(")] +/// #[text("(")] /// _lparen: (), /// // ... /// } /// ``` -/// `seq` inputs can be either `text = "..."` or `pattern = "..."`. The type assigned to the field -/// must be `()` or else it will fail to compile. -pub fn seq( +pub fn text( _attr: proc_macro::TokenStream, item: proc_macro::TokenStream, ) -> proc_macro::TokenStream { @@ -126,6 +125,38 @@ pub fn skip( item } +/// Applies a custom transformation for parsing the input text of a `leaf` node. +/// Without using `with` the default extractor is applied. +/// +/// ## Example +/// ```ignore +/// struct CustomInt( +/// #[leaf(re(r"\d+"))] +/// #[with(plus_one)] +/// i32 +/// ); +/// +/// fn plus_one(s: &str) -> i32 { +/// s.parse::().unwrap() + 1 +/// } +/// ``` +#[proc_macro_attribute] +pub fn with( + _attr: proc_macro::TokenStream, + item: proc_macro::TokenStream, +) -> proc_macro::TokenStream { + item +} + +/// Alias for `with`. +#[proc_macro_attribute] +pub fn transform( + _attr: proc_macro::TokenStream, + item: proc_macro::TokenStream, +) -> proc_macro::TokenStream { + item +} + #[proc_macro_attribute] /// Defines a precedence level for a non-terminal that has no associativity. /// @@ -215,7 +246,7 @@ pub fn prec_dynamic( /// ```ignore /// struct StringFragment( /// #[rust_sitter::immediate] -/// #[rust_sitter::leaf(pattern = r"[^"\\]+")] +/// #[rust_sitter::leaf(pattern(r"[^"\\]+"))] /// () /// ); /// ``` @@ -233,7 +264,7 @@ pub fn immediate( /// ```ignore /// struct StringFragment( /// #[rust_sitter::token] -/// #[rust_sitter::leaf(pattern = r"[^"\\]+")] +/// #[rust_sitter::leaf(pattern(r"[^"\\]+"))] /// () /// ); /// ``` @@ -255,7 +286,7 @@ pub fn token( /// ## Example /// ```ignore /// #[rust_sitter::delimited( -/// #[rust_sitter::leaf(text = ",")] +/// #[rust_sitter::leaf(",")] /// () /// )] /// numbers: Vec @@ -347,7 +378,7 @@ mod tests { #[rust_sitter::language] pub enum Expression { Number( - #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse::().unwrap())] + #[rust_sitter::leaf(re(r"\d+"))] i32 ), } @@ -369,11 +400,11 @@ mod tests { #[rust_sitter::language] pub enum Expression { Number( - #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())] + #[rust_sitter::leaf(re(r"\d+"))] i32 ), Neg( - #[rust_sitter::leaf(text = "-")] + #[rust_sitter::leaf("-")] (), Box ), @@ -396,13 +427,13 @@ mod tests { #[rust_sitter::language] pub enum Expression { Number( - #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())] + #[rust_sitter::leaf(pattern(r"\d+"))] i32 ), #[rust_sitter::prec_left(1)] Sub( Box, - #[rust_sitter::leaf(text = "-")] + #[rust_sitter::leaf("-")] (), Box ), @@ -425,13 +456,13 @@ mod tests { #[rust_sitter::language] pub enum Expression { Number( - #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())] i32, + #[rust_sitter::leaf(re(r"\d+"))] i32, ), } #[rust_sitter::extra] struct Whitespace { - #[rust_sitter::leaf(pattern = r"\s")] + #[rust_sitter::leaf(pattern(r"\s"))] _whitespace: (), } } @@ -456,7 +487,7 @@ mod tests { pub enum Expression { Number( - #[rust_sitter::leaf(pattern = r"\d+", transform = |v: &str| v.parse::().unwrap())] + #[rust_sitter::leaf(re(r"\d+"))] i32 ), } @@ -481,13 +512,13 @@ mod tests { } pub struct Number { - #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())] + #[rust_sitter::leaf(re(r"\d+"))] v: i32 } #[rust_sitter::extra] struct Whitespace { - #[rust_sitter::leaf(pattern = r"\s")] + #[rust_sitter::leaf(pattern(r"\s"))] _whitespace: (), } } @@ -507,13 +538,13 @@ mod tests { mod grammar { #[rust_sitter::language] pub struct Language { - #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())] + #[rust_sitter::leaf(re(r"\d+"))] v: Option, t: Option, } pub struct Number { - #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())] + #[rust_sitter::leaf(re(r"\d+"))] v: i32 } } @@ -532,7 +563,7 @@ mod tests { #[rust_sitter::grammar("test")] mod grammar { pub struct Number { - #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())] + #[rust_sitter::leaf(re(r"\d+"))] value: u32 } @@ -561,11 +592,11 @@ mod tests { #[rust_sitter::language] pub enum Expr { Number( - #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())] + #[rust_sitter::leaf(pattern(r"\d+"))] u32 ), Neg { - #[rust_sitter::leaf(text = "!")] + #[rust_sitter::leaf("!")] _bang: (), value: Box, } @@ -593,13 +624,13 @@ mod tests { } pub struct Number { - #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())] + #[rust_sitter::leaf(re(r"\d+"))] v: i32 } #[rust_sitter::extra] struct Whitespace { - #[rust_sitter::leaf(pattern = r"\s")] + #[rust_sitter::leaf(pattern(r"\s"))] _whitespace: (), } } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap index b2051b4..e4f86ee 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap @@ -1,6 +1,6 @@ --- source: macro/src/lib.rs -expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n #[rust_sitter::grammar(\"test\")] mod grammar\n {\n #[rust_sitter::language] pub enum Expression\n {\n Number(#[rust_sitter::leaf(pattern = r\"\\d+\", transform = |v|\n v.parse().unwrap())] i32), #[rust_sitter::prec_left(1)]\n Sub(Box, #[rust_sitter::leaf(text = \"-\")] (),\n Box),\n }\n }\n})? .to_token_stream().to_string())" +expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n #[rust_sitter::grammar(\"test\")] mod grammar\n {\n #[rust_sitter::language] pub enum Expression\n {\n Number(#[rust_sitter::leaf(pattern(r\"\\d+\"))] i32),\n #[rust_sitter::prec_left(1)]\n Sub(Box, #[rust_sitter::leaf(\"-\")] (),\n Box),\n }\n }\n})? .to_token_stream().to_string())" --- mod grammar { pub enum Expression { @@ -30,11 +30,8 @@ mod grammar { node, move |cursor, last_idx| { Expression::Number({ - ::rust_sitter::__private::extract_field::< - rust_sitter::WithLeaf, - _, - >( - cursor, source, last_idx, "0", Some(&|v| v.parse().unwrap()) + ::rust_sitter::__private::extract_field::( + cursor, source, last_idx, "0", None, ) }) }, diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap index 682c4b3..ee24e84 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap @@ -1,6 +1,6 @@ --- source: macro/src/lib.rs -expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n #[rust_sitter::grammar(\"test\")] mod grammar\n {\n #[rust_sitter::language] pub enum Expression\n {\n Number(#[rust_sitter::leaf(pattern = r\"\\d+\", transform = |v|\n v.parse().unwrap())] i32),\n Neg(#[rust_sitter::leaf(text = \"-\")] (), Box),\n }\n }\n})? .to_token_stream().to_string())" +expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n #[rust_sitter::grammar(\"test\")] mod grammar\n {\n #[rust_sitter::language] pub enum Expression\n {\n Number(#[rust_sitter::leaf(re(r\"\\d+\"))] i32),\n Neg(#[rust_sitter::leaf(\"-\")] (), Box),\n }\n }\n})? .to_token_stream().to_string())" --- mod grammar { pub enum Expression { @@ -30,11 +30,8 @@ mod grammar { node, move |cursor, last_idx| { Expression::Number({ - ::rust_sitter::__private::extract_field::< - rust_sitter::WithLeaf, - _, - >( - cursor, source, last_idx, "0", Some(&|v| v.parse().unwrap()) + ::rust_sitter::__private::extract_field::( + cursor, source, last_idx, "0", None, ) }) }, diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap index 532c8ca..89f8ada 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap @@ -1,6 +1,6 @@ --- source: macro/src/lib.rs -expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n #[rust_sitter::grammar(\"test\")] mod grammar\n {\n #[rust_sitter::language] pub enum Expression\n {\n Number(#[rust_sitter::leaf(pattern = r\"\\d+\", transform = |v|\n v.parse::().unwrap())] i32),\n }\n }\n})? .to_token_stream().to_string())" +expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n #[rust_sitter::grammar(\"test\")] mod grammar\n {\n #[rust_sitter::language] pub enum Expression\n { Number(#[rust_sitter::leaf(re(r\"\\d+\"))] i32), }\n }\n})? .to_token_stream().to_string())" --- mod grammar { pub enum Expression { @@ -29,15 +29,8 @@ mod grammar { node, move |cursor, last_idx| { Expression::Number({ - ::rust_sitter::__private::extract_field::< - rust_sitter::WithLeaf, - _, - >( - cursor, - source, - last_idx, - "0", - Some(&|v| v.parse::().unwrap()), + ::rust_sitter::__private::extract_field::( + cursor, source, last_idx, "0", None, ) }) }, diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap index a2ff80a..5fc0562 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap @@ -1,6 +1,6 @@ --- source: macro/src/lib.rs -expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n #[rust_sitter::grammar(\"test\")] mod grammar\n {\n #[rust_sitter::language] pub enum Expr\n {\n Number(#[rust_sitter::leaf(pattern = r\"\\d+\", transform = |v|\n v.parse().unwrap())] u32), Neg\n { #[rust_sitter::leaf(text = \"!\")] _bang: (), value: Box, }\n }\n }\n})? .to_token_stream().to_string())" +expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n #[rust_sitter::grammar(\"test\")] mod grammar\n {\n #[rust_sitter::language] pub enum Expr\n {\n Number(#[rust_sitter::leaf(pattern(r\"\\d+\"))] u32), Neg\n { #[rust_sitter::leaf(\"!\")] _bang: (), value: Box, }\n }\n }\n})? .to_token_stream().to_string())" --- mod grammar { pub enum Expr { @@ -30,11 +30,8 @@ mod grammar { node, move |cursor, last_idx| { Expr::Number({ - ::rust_sitter::__private::extract_field::< - rust_sitter::WithLeaf, - _, - >( - cursor, source, last_idx, "0", Some(&|v| v.parse().unwrap()) + ::rust_sitter::__private::extract_field::( + cursor, source, last_idx, "0", None, ) }) }, diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap index 58c4b6d..6ffdfbc 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap @@ -1,6 +1,6 @@ --- source: macro/src/lib.rs -expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n #[rust_sitter::grammar(\"test\")] mod grammar\n {\n pub struct Number\n {\n #[rust_sitter::leaf(pattern = r\"\\d+\", transform = |v|\n v.parse().unwrap())] value: u32\n } #[rust_sitter::language] pub enum Expr\n { Numbers(#[rust_sitter::repeat(non_empty = true)] Vec) }\n }\n})? .to_token_stream().to_string())" +expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n #[rust_sitter::grammar(\"test\")] mod grammar\n {\n pub struct Number { #[rust_sitter::leaf(re(r\"\\d+\"))] value: u32 }\n #[rust_sitter::language] pub enum Expr\n { Numbers(#[rust_sitter::repeat(non_empty = true)] Vec) }\n }\n})? .to_token_stream().to_string())" --- mod grammar { pub struct Number { @@ -19,12 +19,8 @@ mod grammar { ::rust_sitter::__private::extract_struct_or_variant(node, move |cursor, last_idx| { Number { value: { - ::rust_sitter::__private::extract_field::, _>( - cursor, - source, - last_idx, - "value", - Some(&|v| v.parse().unwrap()), + ::rust_sitter::__private::extract_field::( + cursor, source, last_idx, "value", None, ) }, } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap b/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap index 68b183d..a399403 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap @@ -1,6 +1,6 @@ --- source: macro/src/lib.rs -expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n #[rust_sitter::grammar(\"test\")] mod grammar\n {\n #[rust_sitter::language] pub struct Language { e: Expression, } pub\n enum Expression\n {\n Number(#[rust_sitter::leaf(pattern = r\"\\d+\", transform = |v: &str|\n v.parse::().unwrap())] i32),\n }\n }\n})? .to_token_stream().to_string())" +expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n #[rust_sitter::grammar(\"test\")] mod grammar\n {\n #[rust_sitter::language] pub struct Language { e: Expression, } pub\n enum Expression { Number(#[rust_sitter::leaf(re(r\"\\d+\"))] i32), }\n }\n})? .to_token_stream().to_string())" --- mod grammar { pub struct Language { @@ -53,15 +53,8 @@ mod grammar { node, move |cursor, last_idx| { Expression::Number({ - ::rust_sitter::__private::extract_field::< - rust_sitter::WithLeaf, - _, - >( - cursor, - source, - last_idx, - "0", - Some(&|v: &str| v.parse::().unwrap()), + ::rust_sitter::__private::extract_field::( + cursor, source, last_idx, "0", None, ) }) }, diff --git a/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap b/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap index bd82531..3010be6 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap @@ -1,6 +1,6 @@ --- source: macro/src/lib.rs -expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n #[rust_sitter::grammar(\"test\")] mod grammar\n {\n use rust_sitter::Spanned; #[rust_sitter::language] pub struct\n NumberList { numbers: Vec>, } pub struct Number\n {\n #[rust_sitter::leaf(pattern = r\"\\d+\", transform = |v|\n v.parse().unwrap())] v: i32\n } #[rust_sitter::extra] struct Whitespace\n { #[rust_sitter::leaf(pattern = r\"\\s\")] _whitespace: (), }\n }\n})? .to_token_stream().to_string())" +expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n #[rust_sitter::grammar(\"test\")] mod grammar\n {\n use rust_sitter::Spanned; #[rust_sitter::language] pub struct\n NumberList { numbers: Vec>, } pub struct Number\n { #[rust_sitter::leaf(re(r\"\\d+\"))] v: i32 } #[rust_sitter::extra]\n struct Whitespace\n { #[rust_sitter::leaf(pattern(r\"\\s\"))] _whitespace: (), }\n }\n})? .to_token_stream().to_string())" --- mod grammar { use rust_sitter::Spanned; @@ -44,12 +44,8 @@ mod grammar { ::rust_sitter::__private::extract_struct_or_variant(node, move |cursor, last_idx| { Number { v: { - ::rust_sitter::__private::extract_field::, _>( - cursor, - source, - last_idx, - "v", - Some(&|v| v.parse().unwrap()), + ::rust_sitter::__private::extract_field::( + cursor, source, last_idx, "v", None, ) }, } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap b/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap index cbbd5e2..829feb9 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap @@ -1,6 +1,6 @@ --- source: macro/src/lib.rs -expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n #[rust_sitter::grammar(\"test\")] mod grammar\n {\n #[rust_sitter::language] pub enum Expression\n {\n Number(#[rust_sitter::leaf(pattern = r\"\\d+\", transform = |v|\n v.parse().unwrap())] i32,),\n } #[rust_sitter::extra] struct Whitespace\n { #[rust_sitter::leaf(pattern = r\"\\s\")] _whitespace: (), }\n }\n})? .to_token_stream().to_string())" +expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n #[rust_sitter::grammar(\"test\")] mod grammar\n {\n #[rust_sitter::language] pub enum Expression\n { Number(#[rust_sitter::leaf(re(r\"\\d+\"))] i32,), }\n #[rust_sitter::extra] struct Whitespace\n { #[rust_sitter::leaf(pattern(r\"\\s\"))] _whitespace: (), }\n }\n})? .to_token_stream().to_string())" --- mod grammar { pub enum Expression { @@ -29,11 +29,8 @@ mod grammar { node, move |cursor, last_idx| { Expression::Number({ - ::rust_sitter::__private::extract_field::< - rust_sitter::WithLeaf, - _, - >( - cursor, source, last_idx, "0", Some(&|v| v.parse().unwrap()) + ::rust_sitter::__private::extract_field::( + cursor, source, last_idx, "0", None, ) }) }, diff --git a/macro/src/snapshots/rust_sitter_macro__tests__struct_optional.snap b/macro/src/snapshots/rust_sitter_macro__tests__struct_optional.snap index 1eaa553..335cfd2 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__struct_optional.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__struct_optional.snap @@ -1,6 +1,6 @@ --- source: macro/src/lib.rs -expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n #[rust_sitter::grammar(\"test\")] mod grammar\n {\n #[rust_sitter::language] pub struct Language\n {\n #[rust_sitter::leaf(pattern = r\"\\d+\", transform = |v|\n v.parse().unwrap())] v: Option, t: Option,\n } pub struct Number\n {\n #[rust_sitter::leaf(pattern = r\"\\d+\", transform = |v|\n v.parse().unwrap())] v: i32\n }\n }\n})? .to_token_stream().to_string())" +expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n #[rust_sitter::grammar(\"test\")] mod grammar\n {\n #[rust_sitter::language] pub struct Language\n {\n #[rust_sitter::leaf(re(r\"\\d+\"))] v: Option, t:\n Option,\n } pub struct Number { #[rust_sitter::leaf(re(r\"\\d+\"))] v: i32 }\n }\n})? .to_token_stream().to_string())" --- mod grammar { pub struct Language { @@ -20,11 +20,8 @@ mod grammar { ::rust_sitter::__private::extract_struct_or_variant(node, move |cursor, last_idx| { Language { v: { - ::rust_sitter::__private::extract_field::< - Option>, - _, - >( - cursor, source, last_idx, "v", Some(&|v| v.parse().unwrap()) + ::rust_sitter::__private::extract_field::, _>( + cursor, source, last_idx, "v", None, ) }, t: { @@ -52,12 +49,8 @@ mod grammar { ::rust_sitter::__private::extract_struct_or_variant(node, move |cursor, last_idx| { Number { v: { - ::rust_sitter::__private::extract_field::, _>( - cursor, - source, - last_idx, - "v", - Some(&|v| v.parse().unwrap()), + ::rust_sitter::__private::extract_field::( + cursor, source, last_idx, "v", None, ) }, } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap b/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap index ba93104..0651c0e 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap @@ -1,6 +1,6 @@ --- source: macro/src/lib.rs -expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n #[rust_sitter::grammar(\"test\")] mod grammar\n {\n #[rust_sitter::language] pub struct NumberList\n { numbers: Vec, } pub struct Number\n {\n #[rust_sitter::leaf(pattern = r\"\\d+\", transform = |v|\n v.parse().unwrap())] v: i32\n } #[rust_sitter::extra] struct Whitespace\n { #[rust_sitter::leaf(pattern = r\"\\s\")] _whitespace: (), }\n }\n})? .to_token_stream().to_string())" +expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n #[rust_sitter::grammar(\"test\")] mod grammar\n {\n #[rust_sitter::language] pub struct NumberList\n { numbers: Vec, } pub struct Number\n { #[rust_sitter::leaf(re(r\"\\d+\"))] v: i32 } #[rust_sitter::extra]\n struct Whitespace\n { #[rust_sitter::leaf(pattern(r\"\\s\"))] _whitespace: (), }\n }\n})? .to_token_stream().to_string())" --- mod grammar { pub struct NumberList { @@ -43,12 +43,8 @@ mod grammar { ::rust_sitter::__private::extract_struct_or_variant(node, move |cursor, last_idx| { Number { v: { - ::rust_sitter::__private::extract_field::, _>( - cursor, - source, - last_idx, - "v", - Some(&|v| v.parse().unwrap()), + ::rust_sitter::__private::extract_field::( + cursor, source, last_idx, "v", None, ) }, } diff --git a/runtime/src/__private.rs b/runtime/src/__private.rs index 5f843d3..d4ce7c6 100644 --- a/runtime/src/__private.rs +++ b/runtime/src/__private.rs @@ -58,7 +58,7 @@ pub fn extract_field, T>( } } -pub fn skip_seq(cursor_opt: &mut Option, field_name: &str) { +pub fn skip_text(cursor_opt: &mut Option, field_name: &str) { if let Some(cursor) = cursor_opt.as_mut() { loop { if let Some(name) = cursor.field_name() { diff --git a/tool/src/lib.rs b/tool/src/lib.rs index 7b661e4..f98de7e 100644 --- a/tool/src/lib.rs +++ b/tool/src/lib.rs @@ -131,11 +131,11 @@ mod tests { #[rust_sitter::language] pub enum Expr { Number( - #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())] + #[rust_sitter::leaf(pattern(r"\d+"))] u32 ), Neg { - #[rust_sitter::leaf(text = "!")] + #[rust_sitter::leaf("!")] _bang: (), value: Box, } @@ -160,7 +160,8 @@ mod tests { #[rust_sitter::language] pub enum Expression { Number( - #[rust_sitter::leaf(pattern = r"\d+", transform = |v: &str| v.parse::().unwrap())] + #[rust_sitter::leaf(pattern(r"\d+"))] + #[rust_sitter::transform(|v: &str| v.parse::().unwrap())] i32 ), } @@ -184,11 +185,11 @@ mod tests { #[rust_sitter::language] pub enum Expression { Number( - #[rust_sitter::leaf(pattern = r"\d+", transform = |v: &str| v.parse::().unwrap())] + #[rust_sitter::leaf(pattern(r"\d+"))] i32 ), Neg( - #[rust_sitter::leaf(text = "-", transform = |v| ())] + #[rust_sitter::leaf("-")] (), Box ), @@ -213,13 +214,13 @@ mod tests { #[rust_sitter::language] pub enum Expression { Number( - #[rust_sitter::leaf(pattern = r"\d+", transform = |v: &str| v.parse::().unwrap())] + #[rust_sitter::leaf(pattern(r"\d+"))] i32 ), #[rust_sitter::prec_left(1)] Sub( Box, - #[rust_sitter::leaf(text = "-", transform = |v| ())] + #[rust_sitter::leaf("-")] (), Box ), @@ -263,49 +264,49 @@ mod tests { } pub enum BinaryExpressionInner { - String(#[rust_sitter::leaf(text = "+")] ()), - String2(#[rust_sitter::leaf(text = "-")] ()), - String3(#[rust_sitter::leaf(text = "*")] ()), - String4(#[rust_sitter::leaf(text = "/")] ()), + String(#[rust_sitter::leaf("+")] ()), + String2(#[rust_sitter::leaf("-")] ()), + String3(#[rust_sitter::leaf("*")] ()), + String4(#[rust_sitter::leaf("/")] ()), } pub struct ExpressionStatement { pub expression: Expression, - #[rust_sitter::leaf(text = ";")] + #[rust_sitter::leaf(";")] pub _semicolon: (), } #[rust_sitter::prec_dynamic(1)] pub struct IfStatement { - #[rust_sitter::leaf(text = "if")] + #[rust_sitter::leaf("if")] pub _if: (), - #[rust_sitter::leaf(text = "(")] + #[rust_sitter::leaf("(")] pub _lparen: (), pub expression: Expression, - #[rust_sitter::leaf(text = ")")] + #[rust_sitter::leaf(")")] pub _rparen: (), - #[rust_sitter::leaf(text = "{")] + #[rust_sitter::leaf("{")] pub _lbrace: (), pub statement: Statement, - #[rust_sitter::leaf(text = "}")] + #[rust_sitter::leaf("}")] pub _rbrace: (), pub if_statement_inner: Option, } pub struct IfStatementElse { - #[rust_sitter::leaf(text = "else")] + #[rust_sitter::leaf("else")] pub _else: (), - #[rust_sitter::leaf(text = "{")] + #[rust_sitter::leaf("{")] pub _lbrace: (), pub statement: Statement, - #[rust_sitter::leaf(text = "}")] + #[rust_sitter::leaf("}")] pub _rbrace: (), } #[rust_sitter::word] - pub struct Identifier(#[rust_sitter::leaf(pattern = "[a-zA-Z_][a-zA-Z0-9_]*")] ()); + pub struct Identifier(#[rust_sitter::leaf(pattern("[a-zA-Z_][a-zA-Z0-9_]*"))] ()); - pub struct Number(#[rust_sitter::leaf(pattern = "\\d+")] ()); + pub struct Number(#[rust_sitter::leaf(pattern("\\d+"))] ()); } } { m @@ -326,14 +327,14 @@ mod tests { #[rust_sitter::language] pub enum Expression { Number( - #[rust_sitter::leaf(pattern = r"\d+", transform = |v: &str| v.parse::().unwrap())] + #[rust_sitter::leaf(re(r"\d+"))] i32 ), } #[rust_sitter::extra] struct Whitespace { - #[rust_sitter::leaf(pattern = r"\s", transform = |_v| ())] + #[rust_sitter::leaf(re(r"\s"))] _whitespace: (), } } @@ -360,7 +361,7 @@ mod tests { pub enum Expression { Number( - #[rust_sitter::leaf(pattern = r"\d+", transform = |v: &str| v.parse::().unwrap())] + #[rust_sitter::leaf(re(r"\d+"))] i32 ), } @@ -384,20 +385,20 @@ mod tests { #[rust_sitter::language] pub struct NumberList { #[rust_sitter::delimited( - #[rust_sitter::leaf(text = ",")] + #[rust_sitter::leaf(",")] () )] numbers: Vec, } pub struct Number { - #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())] + #[rust_sitter::leaf(re(r"\d+"))] v: i32, } #[rust_sitter::extra] struct Whitespace { - #[rust_sitter::leaf(pattern = r"\s")] + #[rust_sitter::leaf(pattern(r"\s"))] _whitespace: (), } } @@ -423,13 +424,13 @@ mod tests { } pub struct Number { - #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())] + #[rust_sitter::leaf(re(r"\d+"))] v: i32, } #[rust_sitter::extra] struct Whitespace { - #[rust_sitter::leaf(pattern = r"\s")] + #[rust_sitter::leaf(pattern(r"\s"))] _whitespace: (), } } @@ -453,20 +454,20 @@ mod tests { pub struct NumberList { #[rust_sitter::repeat(non_empty = true)] #[rust_sitter::delimited( - #[rust_sitter::leaf(text = ",")] + #[rust_sitter::leaf(",")] () )] numbers: Vec, } pub struct Number { - #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())] + #[rust_sitter::leaf(re(r"\d+"))] v: i32, } #[rust_sitter::extra] struct Whitespace { - #[rust_sitter::leaf(pattern = r"\s")] + #[rust_sitter::leaf(pattern(r"\s"))] _whitespace: (), } } @@ -488,15 +489,15 @@ mod tests { mod grammar { #[rust_sitter::language] pub struct Language { - #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())] + #[rust_sitter::leaf(re(r"\d+"))] v: Option, - #[rust_sitter::leaf(pattern = r" ", transform = |v| ())] + #[rust_sitter::leaf(re(r" "))] space: (), t: Option, } pub struct Number { - #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())] + #[rust_sitter::leaf(re(r"\d+"))] v: i32 } } @@ -517,7 +518,7 @@ mod tests { #[rust_sitter::grammar("test")] mod grammar { pub struct Number { - #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())] + #[rust_sitter::leaf(re(r"\d+"))] value: u32 } @@ -549,13 +550,13 @@ mod tests { #[rust_sitter::language] pub struct NumberList { - #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())] + #[rust_sitter::leaf(re(r"\d+"))] numbers: Vec>, } #[rust_sitter::extra] struct Whitespace { - #[rust_sitter::leaf(pattern = r"\s")] + #[rust_sitter::leaf(pattern(r"\s"))] _whitespace: (), } } @@ -579,13 +580,13 @@ mod tests { pub struct StringFragment( #[rust_sitter::immediate] #[rust_sitter::prec(1)] - #[rust_sitter::leaf(pattern = r#"[^"\\]+"#)] + #[rust_sitter::leaf(pattern(r#"[^"\\]+"#))] () ); #[rust_sitter::extra] struct Whitespace { - #[rust_sitter::leaf(pattern = r"\s")] + #[rust_sitter::leaf(pattern(r"\s"))] _whitespace: (), } } From 1822c0d97b88fa4985c3c7265512d236bf21a798 Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Thu, 10 Jul 2025 16:14:41 -0500 Subject: [PATCH 12/50] Refactor delimited to just take TsInput instead. To reach parity with the original implementation, we will need to allow TsInput to handle references to other rules. --- common/src/expansion.rs | 12 +++++++----- common/src/lib.rs | 9 ++++++++- example/src/repetitions.rs | 10 ++-------- macro/src/lib.rs | 7 ++----- tool/src/lib.rs | 10 ++-------- .../rust_sitter_tool__tests__grammar_repeat.snap | 2 +- .../rust_sitter_tool__tests__grammar_repeat1.snap | 2 +- 7 files changed, 23 insertions(+), 29 deletions(-) diff --git a/common/src/expansion.rs b/common/src/expansion.rs index c519c66..f2e01bf 100644 --- a/common/src/expansion.rs +++ b/common/src/expansion.rs @@ -261,14 +261,16 @@ fn gen_field( .iter() .find(|attr| sitter_attr_matches(attr, "delimited")); - let delimited_params = - delimited_attr.and_then(|a| a.parse_args_with(FieldThenParams::parse).ok()); + let delimited_param = + delimited_attr.map(|a| a.parse_args::().unwrap()); - let delimiter_json = delimited_params.map(|p| { + // NOTE (JAB): All of this is pretty ugly, I think we can flatten some of these types + // without losing anything. + let delimiter_json = delimited_param.as_ref().map(|_| { gen_field( format!("{path}_vec_delimiter"), - p.field.ty, - p.field.attrs, + parse_quote!(()), + vec![parse_quote!(#[text(#delimited_param)])], word_rule, out, ) diff --git a/common/src/lib.rs b/common/src/lib.rs index 10fcd91..57a8014 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -1,4 +1,5 @@ use proc_macro2::Span; +use quote::{ToTokens, TokenStreamExt}; use std::{collections::HashSet, sync::LazyLock}; use syn::{ parse::{Parse, ParseStream}, @@ -53,7 +54,7 @@ impl Parse for FieldThenParams { /// tree-sitter input parsing. #[derive(Debug, Clone, PartialEq, Eq)] pub struct TsInput { - expr: Expr, + pub expr: Expr, } impl Parse for TsInput { @@ -64,6 +65,12 @@ impl Parse for TsInput { } } +impl ToTokens for TsInput { + fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) { + self.expr.to_tokens(tokens); + } +} + impl TsInput { fn new(expr: &Expr) -> Self { Self { expr: expr.clone() } diff --git a/example/src/repetitions.rs b/example/src/repetitions.rs index e8af5d9..4f6b784 100644 --- a/example/src/repetitions.rs +++ b/example/src/repetitions.rs @@ -7,10 +7,7 @@ pub mod grammar { #[allow(dead_code)] pub struct NumberList { #[rust_sitter::repeat(non_empty = true)] - #[rust_sitter::delimited( - #[rust_sitter::leaf(",")] - () - )] + #[rust_sitter::delimited(",")] #[rust_sitter::leaf(pattern(r"\d+"))] numbers: Spanned>>, } @@ -49,10 +46,7 @@ pub mod grammar3 { #[derive(Debug)] #[allow(dead_code)] pub struct NumberList { - #[rust_sitter::delimited( - #[rust_sitter::leaf(",")] - () - )] + #[rust_sitter::delimited(",")] #[rust_sitter::leaf(pattern(r"\d+"))] numbers: Spanned>>>, #[rust_sitter::skip(123)] diff --git a/macro/src/lib.rs b/macro/src/lib.rs index dc9f56d..06423a1 100644 --- a/macro/src/lib.rs +++ b/macro/src/lib.rs @@ -277,7 +277,7 @@ pub fn token( #[proc_macro_attribute] /// On `Vec<_>` typed fields, specifies a non-terminal that should be parsed in between the elements. -/// The [`rust_sitter::repeat`] annotation must be used on the field as well. +/// The [`rust_sitter::repeat`] annotation can be used on the field as well. /// /// This annotation takes a single, unnamed argument, which specifies a field type to parse. This can /// either be a reference to another type, or can be defined as a `leaf` field. Generally, the argument @@ -285,10 +285,7 @@ pub fn token( /// /// ## Example /// ```ignore -/// #[rust_sitter::delimited( -/// #[rust_sitter::leaf(",")] -/// () -/// )] +/// #[rust_sitter::delimited(",")] /// numbers: Vec /// ``` pub fn delimited( diff --git a/tool/src/lib.rs b/tool/src/lib.rs index f98de7e..6e35d33 100644 --- a/tool/src/lib.rs +++ b/tool/src/lib.rs @@ -384,10 +384,7 @@ mod tests { pub mod grammar { #[rust_sitter::language] pub struct NumberList { - #[rust_sitter::delimited( - #[rust_sitter::leaf(",")] - () - )] + #[rust_sitter::delimited(",")] numbers: Vec, } @@ -453,10 +450,7 @@ mod tests { #[rust_sitter::language] pub struct NumberList { #[rust_sitter::repeat(non_empty = true)] - #[rust_sitter::delimited( - #[rust_sitter::leaf(",")] - () - )] + #[rust_sitter::delimited(",")] numbers: Vec, } diff --git a/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat.snap b/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat.snap index 05407a9..e8adffa 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"test","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]}]},"NumberList_numbers_vec_delimiter":{"type":"STRING","value":","},"NumberList_numbers_vec_contents":{"type":"SEQ","members":[{"type":"FIELD","name":"NumberList_numbers_vec_element","content":{"type":"SYMBOL","name":"Number"}},{"type":"REPEAT","content":{"type":"SEQ","members":[{"type":"SYMBOL","name":"NumberList_numbers_vec_delimiter"},{"type":"FIELD","name":"NumberList_numbers_vec_element","content":{"type":"SYMBOL","name":"Number"}}]}}]},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]}]},"Number_v":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"SYMBOL","name":"Number_v"}}]},"_Whitespace__whitespace":{"type":"PATTERN","value":"\\s"},"Whitespace":{"type":"SEQ","members":[{"type":"FIELD","name":"_whitespace","content":{"type":"SYMBOL","name":"_Whitespace__whitespace"}}]}},"extras":[{"type":"SYMBOL","name":"Whitespace"}]} +{"name":"test","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]}]},"NumberList_numbers_vec_contents":{"type":"SEQ","members":[{"type":"FIELD","name":"NumberList_numbers_vec_element","content":{"type":"SYMBOL","name":"Number"}},{"type":"REPEAT","content":{"type":"SEQ","members":[{"type":"STRING","value":","},{"type":"FIELD","name":"NumberList_numbers_vec_element","content":{"type":"SYMBOL","name":"Number"}}]}}]},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]}]},"Number_v":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"SYMBOL","name":"Number_v"}}]},"_Whitespace__whitespace":{"type":"PATTERN","value":"\\s"},"Whitespace":{"type":"SEQ","members":[{"type":"FIELD","name":"_whitespace","content":{"type":"SYMBOL","name":"_Whitespace__whitespace"}}]}},"extras":[{"type":"SYMBOL","name":"Whitespace"}]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat1.snap b/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat1.snap index 1585c93..c20f062 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat1.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat1.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"test","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]},"NumberList_numbers_vec_delimiter":{"type":"STRING","value":","},"NumberList_numbers_vec_contents":{"type":"SEQ","members":[{"type":"FIELD","name":"NumberList_numbers_vec_element","content":{"type":"SYMBOL","name":"Number"}},{"type":"REPEAT","content":{"type":"SEQ","members":[{"type":"SYMBOL","name":"NumberList_numbers_vec_delimiter"},{"type":"FIELD","name":"NumberList_numbers_vec_element","content":{"type":"SYMBOL","name":"Number"}}]}}]},"NumberList":{"type":"SEQ","members":[{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]},"Number_v":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"SYMBOL","name":"Number_v"}}]},"_Whitespace__whitespace":{"type":"PATTERN","value":"\\s"},"Whitespace":{"type":"SEQ","members":[{"type":"FIELD","name":"_whitespace","content":{"type":"SYMBOL","name":"_Whitespace__whitespace"}}]}},"extras":[{"type":"SYMBOL","name":"Whitespace"}]} +{"name":"test","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]},"NumberList_numbers_vec_contents":{"type":"SEQ","members":[{"type":"FIELD","name":"NumberList_numbers_vec_element","content":{"type":"SYMBOL","name":"Number"}},{"type":"REPEAT","content":{"type":"SEQ","members":[{"type":"STRING","value":","},{"type":"FIELD","name":"NumberList_numbers_vec_element","content":{"type":"SYMBOL","name":"Number"}}]}}]},"NumberList":{"type":"SEQ","members":[{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]},"Number_v":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"SYMBOL","name":"Number_v"}}]},"_Whitespace__whitespace":{"type":"PATTERN","value":"\\s"},"Whitespace":{"type":"SEQ","members":[{"type":"FIELD","name":"_whitespace","content":{"type":"SYMBOL","name":"_Whitespace__whitespace"}}]}},"extras":[{"type":"SYMBOL","name":"Whitespace"}]} From a055d7cc926cc70ece51aa981ad1a83f6951d8d2 Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Thu, 10 Jul 2025 16:32:34 -0500 Subject: [PATCH 13/50] Update README --- README.md | 82 +++++++++++++++++++++++++++++-------------------------- 1 file changed, 43 insertions(+), 39 deletions(-) diff --git a/README.md b/README.md index 3d21270..0a0efa0 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ -# Rust Sitter -[![Crates.io](https://img.shields.io/crates/v/rust-sitter)](https://crates.io/crates/rust-sitter) +# Rust Sitter - Otonoma fork +**This project is a fork of [rust-sitter](https://github.com/hydro-project/rust-sitter). It has been heavily +modified in many breaking ways.** Rust Sitter makes it easy to create efficient parsers in Rust by leveraging the [Tree Sitter](https://tree-sitter.github.io/tree-sitter/) parser generator. With Rust Sitter, you can define your entire grammar with annotations on idiomatic Rust code, and let macros generate the parser and type-safe bindings for you! @@ -7,10 +8,10 @@ Rust Sitter makes it easy to create efficient parsers in Rust by leveraging the First, add Rust/Tree Sitter to your `Cargo.toml`: ```toml [dependencies] -rust-sitter = "0.4.5" +rust-sitter = { git = "https://github.com/otonoma/rust-sitter" } [build-dependencies] -rust-sitter-tool = "0.4.5" +rust-sitter-tool = { git = "https://github.com/otonoma/rust-sitter" } ``` _Note: By default, Rust Sitter uses a fork of Tree Sitter with a pure-Rust runtime to support `wasm32-unknown-unknown`. To use the standard C runtime instead, disable default features and enable the `tree-sitter-standard` feature_ @@ -22,6 +23,7 @@ use std::path::PathBuf; fn main() { println!("cargo:rerun-if-changed=src"); + // Path to the file containing your grammar. rust_sitter_tool::build_parsers(&PathBuf::from("src/main.rs")); } ``` @@ -46,21 +48,23 @@ pub enum Expr { } ``` -Now that we have the type defined, we must annotate the enum variants to describe how to identify them in the text being parsed. First, we can apply `rust_sitter::leaf` to use a regular expression to match digits corresponding to a number, and define a transformation that parses the resulting string into a `u32`. +Now that we have the type defined, we must annotate the enum variants to describe how to identify them in the text being parsed. First, we can apply `rust_sitter::leaf` to use a regular expression to match digits corresponding to a number. +The value will try to extract the value using a default extraction for the type. For numeric types, this +defaults to `FromStr`. You can specify an alternate function using `#[tree_sitter::with]`. ```rust Number( - #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())] + #[rust_sitter::leaf(re(r"\d+"))] u32, ) ``` -For the `Add` variant, things are a bit more complicated. First, we add an extra field corresponding to the `+` that must sit between the two sub-expressions. This can be achieved with `text` parameter of `rust_sitter::leaf`, which instructs the parser to match a specific string. Because we are parsing to `()`, we do not need to provide a transformation. +For the `Add` variant, things are a bit more complicated. First, we add an extra field corresponding to the `+` that must sit between the two sub-expressions. This can be achieved with `rust_sitter::text` or `rust_sitter::leaf`, which instructs the parser to match a specific string. Because we are parsing to `()`, we do not need to provide a transformation. ```rust Add( Box, - #[rust_sitter::leaf(text = "+")] (), + #[rust_sitter::leaf("+")] (), Box, ) ``` @@ -71,7 +75,7 @@ If we try to compile this grammar, however, we will see ane error due to conflic #[rust_sitter::prec_left(1)] Add( Box, - #[rust_sitter::leaf(text = "+")] (), + #[rust_sitter::leaf("+")] (), Box, ) ``` @@ -84,13 +88,13 @@ mod grammar { #[rust_sitter::language] pub enum Expr { Number( - #[rust_sitter::leaf(pattern = r"\d+", transform = |v| v.parse().unwrap())] + #[rust_sitter::leaf(re(r"\d+"))] u32, ), #[rust_sitter::prec_left(1)] Add( Box, - #[rust_sitter::leaf(text = "+")] (), + #[rust_sitter::leaf("+")] (), Box, ) } @@ -138,29 +142,38 @@ This annotation marks a node as extra and can safely be skipped while parsing. T ```rust #[rust_sitter::extra] -struct Whitespace { - #[rust_sitter::leaf(pattern = r"\s")] - _whitespace: (), -} +#[rust_sitter::leaf(re(r"\s"))] +// Structs and fields that start with `_` are hidden from the output grammar. +struct _Whitespace; ``` ## Field Annotations -### `#[rust_sitter::leaf(...)]` -The `#[rust_sitter::leaf(...)]` annotation can be used to define a leaf node in the AST. This annotation takes a number of parameters that control how the parser behaves: -- the `pattern` parameter takes a regular expression that is used to match the text of the leaf node. This parameter is required. -- the `text` parameter takes a string that is used to match the text of the leaf node. This parameter is mutually exclusive with `pattern`. -- the `transform` parameter takes a function that is used to transform the matched text (an `&str`) into the desired type. This parameter is optional if the target type is `()`. +### `#[rust_sitter::leaf(...)]` and `#[rust_sitter::text(...)]` +The `#[rust_sitter::leaf(...)]` annotation can be used to define a leaf node in the AST. +`#[rust_sitter::text(...)]` is similar, but it does not create a named node in the grammar and cannot be +extracted. It must always be assigned to `()`. + +`leaf` and `text` take an input that looks like the [tree sitter +DSL](https://tree-sitter.github.io/tree-sitter/creating-parsers/2-the-grammar-dsl.html). The supported rules +currently are: +* `choice` +* `optional` +* `seq` +* `re` or `pattern` to specify a regular expression +* literal text + +Others can be added in the future as needed. `leaf` can either be applied to a field in a struct / enum variant (as seen above), or directly on a type with no fields: ```rust -#[rust_sitter::leaf(text = "9")] +#[rust_sitter::leaf("9")] struct BigDigit; enum SmallDigit { - #[rust_sitter::leaf(text = "0")] + #[rust_sitter::leaf("0")] Zero, - #[rust_sitter::leaf(text = "1")] + #[rust_sitter::leaf("1")] One, } ``` @@ -183,14 +196,13 @@ Rust Sitter has a few special types that can be used to define more complex gram ### `Vec` To parse repeating structures, you can use a `Vec` to parse a list of `T`s. Note that the `Vec` type **cannot** be wrapped in another `Vec` (create additional structs if this is necessary). There are two special attributes that can be applied to a `Vec` field to control the parsing behavior. -The `#[rust_sitter::delimited(...)]` attribute can be used to specify a separator between elements of the list, and takes a parameter of the same format as an unnamed field. For example, we can define a grammar that parses a comma-separated list of expressions: +The `#[rust_sitter::delimited(...)]` attribute can be used to specify a separator between elements of the +list. This is parsed in the same way as `text` and `leaf` and therefore supports all of the listed tree-sitter +grammar above. ```rust pub struct CommaSeparatedExprs { - #[rust_sitter::delimited( - #[rust_sitter::leaf(text = ",")] - () - )] + #[rust_sitter::delimited(",")] numbers: Vec, } ``` @@ -200,9 +212,7 @@ The `#[rust_sitter::repeat(...)]` attribute can be used to specify additional co ```rust pub struct CommaSeparatedExprs { #[rust_sitter::repeat(non_empty = true)] - #[rust_sitter::delimited( - #[rust_sitter::leaf(text = ",")] - () + #[rust_sitter::delimited(",")] )] numbers: Vec, } @@ -214,10 +224,7 @@ To parse optional structures, you can use an `Option` to parse a single `T` o ```rust pub struct CommaSeparatedExprs { #[rust_sitter::repeat(non_empty = true)] - #[rust_sitter::delimited( - #[rust_sitter::leaf(text = ",")] - () - )] + #[rust_sitter::delimited(",")] numbers: Vec>, } ``` @@ -228,10 +235,7 @@ When using Rust Sitter to power diagnostic tools, it can be helpful to access sp ```rust pub struct CommaSeparatedExprs { #[rust_sitter::repeat(non_empty = true)] - #[rust_sitter::delimited( - #[rust_sitter::leaf(text = ",")] - () - )] + #[rust_sitter::delimited(",")] numbers: Vec>>, } ``` From d1cf019024417587ba055a714f0f94ebba92331c Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Thu, 10 Jul 2025 19:21:03 -0500 Subject: [PATCH 14/50] Fix lint --- common/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/src/lib.rs b/common/src/lib.rs index 57a8014..ff6d17a 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -1,5 +1,5 @@ use proc_macro2::Span; -use quote::{ToTokens, TokenStreamExt}; +use quote::ToTokens; use std::{collections::HashSet, sync::LazyLock}; use syn::{ parse::{Parse, ParseStream}, From 9f87ffdf91a4d58fec461f8b7ac304c51c80afe6 Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Thu, 10 Jul 2025 19:52:24 -0500 Subject: [PATCH 15/50] Add ability to reference other rules by name in seq and add ability to parse tuples. --- common/src/lib.rs | 9 ++++++++- runtime/src/lib.rs | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/common/src/lib.rs b/common/src/lib.rs index ff6d17a..147c1a4 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -171,7 +171,14 @@ impl TsInput { } } } - _ => return Err(syn::Error::new(Span::call_site(), "Unexpected input type")), + Expr::Path(ExprPath { attrs: _, qself: _, path }) => { + let ident = path.require_ident()?; + json!({ + "type": "SYMBOL", + "name": ident.to_string(), + }) + } + k => return Err(syn::Error::new(Span::call_site(), format!("Unexpected input type: {k:?}"))), }; Ok(json) } diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs index 5d7c2d3..b17358c 100644 --- a/runtime/src/lib.rs +++ b/runtime/src/lib.rs @@ -142,6 +142,52 @@ extract_from_str!(f64); // Sort of silly, but keeps it general. extract_from_str!(String); +macro_rules! extract_for_tuple { + ($($t:ident),*) => { + impl<$($t: Extract<$t>),*> Extract<($($t),*)> for ($($t),*) { + type LeafFn = (); + fn extract( + node: Option, + source: &[u8], + last_idx: usize, + _leaf_fn: Option<&Self::LeafFn>, + ) -> Self { + let node = node.expect("No node found"); + let mut c = node.walk(); + let mut it = node.children(&mut c); + ( + $( + $t::extract(it.next(), source, last_idx, None) + ),* + ) + } + } + + }; +} + +extract_for_tuple!(T1, T2); +extract_for_tuple!(T1, T2, T3); +extract_for_tuple!(T1, T2, T3, T4); +extract_for_tuple!(T1, T2, T3, T4, T5); +extract_for_tuple!(T1, T2, T3, T4, T5, T6); +extract_for_tuple!(T1, T2, T3, T4, T5, T6, T7); +extract_for_tuple!(T1, T2, T3, T4, T5, T6, T7, T8); +// Good enough, can maybe generate all of these with a macro if we are clever enough. + +// Would like this to extract optionals specifically if they exist - probably means if a node is +// present then it is true. Might be too magic though. +// impl Extract for bool { +// type LeafFn = (); +// fn extract( +// node: Option, +// source: &[u8], +// last_idx: usize, +// leaf_fn: Option<&Self::LeafFn>, +// ) -> bool { +// } +// } + #[derive(Clone, Debug)] /// A wrapper around a value that also contains the span of the value in the source. pub struct Spanned { From d8679e33f9ab28b8447b42102d66804df1e54ffc Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Fri, 11 Jul 2025 12:33:11 -0500 Subject: [PATCH 16/50] Fix infinite loop on text parsing --- runtime/src/__private.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/runtime/src/__private.rs b/runtime/src/__private.rs index d4ce7c6..acd82d4 100644 --- a/runtime/src/__private.rs +++ b/runtime/src/__private.rs @@ -31,6 +31,10 @@ pub fn extract_field, T>( if let Some(cursor) = cursor_opt.as_mut() { loop { let n = cursor.node(); + if n.is_extra() { + dbg!("Extra"); + dbg!(n); + } if let Some(name) = cursor.field_name() { if name == field_name { let out = LT::extract(Some(n), source, *last_idx, closure_ref); @@ -70,6 +74,8 @@ pub fn skip_text(cursor_opt: &mut Option, field_name: & } else { return; } + } else { + return; } } } From d95834835f87ae659c7ca41fc515f059575a8490 Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Fri, 11 Jul 2025 13:01:57 -0500 Subject: [PATCH 17/50] `prec` _can_ in fact be applied to fields. Remove `dbg!` calls --- common/src/expansion.rs | 6 ------ runtime/src/__private.rs | 4 ---- 2 files changed, 10 deletions(-) diff --git a/common/src/expansion.rs b/common/src/expansion.rs index f2e01bf..5fb88a8 100644 --- a/common/src/expansion.rs +++ b/common/src/expansion.rs @@ -188,12 +188,6 @@ fn gen_field( skip_over.insert("Spanned"); skip_over.insert("Box"); - if precs.prec_left_param.is_some() || precs.prec_right_param.is_some() { - panic!( - "The attributes `prec_left` and `prec_right` cannot be applied to a non-struct type" - ); - } - let (inner_type_vec, is_vec) = try_extract_inner_type(&leaf_type, "Vec", &skip_over); let (inner_type_option, is_option) = try_extract_inner_type(&leaf_type, "Option", &skip_over); diff --git a/runtime/src/__private.rs b/runtime/src/__private.rs index acd82d4..8e4eb21 100644 --- a/runtime/src/__private.rs +++ b/runtime/src/__private.rs @@ -31,10 +31,6 @@ pub fn extract_field, T>( if let Some(cursor) = cursor_opt.as_mut() { loop { let n = cursor.node(); - if n.is_extra() { - dbg!("Extra"); - dbg!(n); - } if let Some(name) = cursor.field_name() { if name == field_name { let out = LT::extract(Some(n), source, *last_idx, closure_ref); From 1b05e2031215af27a8847d53cfe0270a1c3625a6 Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Fri, 11 Jul 2025 13:10:07 -0500 Subject: [PATCH 18/50] Refactor a bit to put version/author at the top, bump version to 0.5.0 --- Cargo.lock | 10 +++++----- Cargo.toml | 3 +++ common/Cargo.toml | 4 ++-- example/Cargo.toml | 4 ++-- macro/Cargo.toml | 8 ++++---- macro/src/expansion.rs | 2 +- .../rust_sitter_macro__tests__enum_prec_left.snap | 2 +- .../rust_sitter_macro__tests__enum_recursive.snap | 2 +- ...t_sitter_macro__tests__enum_transformed_fields.snap | 2 +- ...ust_sitter_macro__tests__enum_with_named_field.snap | 2 +- ...t_sitter_macro__tests__enum_with_unamed_vector.snap | 2 +- ...ust_sitter_macro__tests__grammar_unboxed_field.snap | 2 +- .../rust_sitter_macro__tests__spanned_in_vec.snap | 2 +- .../rust_sitter_macro__tests__struct_extra.snap | 2 +- .../rust_sitter_macro__tests__struct_optional.snap | 2 +- .../rust_sitter_macro__tests__struct_repeat.snap | 2 +- runtime/Cargo.toml | 6 +++--- tool/Cargo.toml | 6 +++--- 18 files changed, 33 insertions(+), 30 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c0dff8c..38f3e7f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -475,7 +475,7 @@ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "rust-sitter" -version = "0.4.5" +version = "0.5.0" dependencies = [ "insta", "rust-sitter-macro", @@ -486,7 +486,7 @@ dependencies = [ [[package]] name = "rust-sitter-common" -version = "0.4.5" +version = "0.5.0" dependencies = [ "proc-macro2", "quote", @@ -496,7 +496,7 @@ dependencies = [ [[package]] name = "rust-sitter-example" -version = "0.4.5" +version = "0.5.0" dependencies = [ "codemap", "codemap-diagnostic", @@ -508,7 +508,7 @@ dependencies = [ [[package]] name = "rust-sitter-macro" -version = "0.4.5" +version = "0.5.0" dependencies = [ "insta", "proc-macro2", @@ -520,7 +520,7 @@ dependencies = [ [[package]] name = "rust-sitter-tool" -version = "0.4.5" +version = "0.5.0" dependencies = [ "cc", "insta", diff --git a/Cargo.toml b/Cargo.toml index 20ca59c..f1e8c54 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,3 +7,6 @@ members = [ "example", "common", ] +[workspace.package] +version = "0.5.0" +authors = ["Shadaj Laddad "] diff --git a/common/Cargo.toml b/common/Cargo.toml index 66a5e87..c2bc801 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -3,8 +3,8 @@ name = "rust-sitter-common" description = "Shared logic for the Rust Sitter macro and tool" readme = "../README.md" repository = "https://github.com/hydro-project/rust-sitter" -version = "0.4.5" -authors = ["Shadaj Laddad "] +version.workspace = true +authors.workspace = true edition = "2024" license = "MIT" keywords = ["parsing", "codegen"] diff --git a/example/Cargo.toml b/example/Cargo.toml index 898df92..72f6e17 100644 --- a/example/Cargo.toml +++ b/example/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "rust-sitter-example" -version = "0.4.5" -authors = ["Shadaj Laddad "] +version.workspace = true +authors.workspace = true edition = "2021" publish = false diff --git a/macro/Cargo.toml b/macro/Cargo.toml index 57d0ac9..c65bbce 100644 --- a/macro/Cargo.toml +++ b/macro/Cargo.toml @@ -3,9 +3,9 @@ name = "rust-sitter-macro" description = "Procedural macros for Rust Sitter" readme = "../README.md" repository = "https://github.com/hydro-project/rust-sitter" -version = "0.4.5" -authors = ["Shadaj Laddad "] -edition = "2021" +version.workspace = true +authors.workspace = true +edition = "2024" license = "MIT" keywords = ["parsing", "codegen"] categories = ["development-tools"] @@ -18,7 +18,7 @@ path = "src/lib.rs" syn = { version = "2", features = [ "full", "extra-traits" ] } quote = "1" proc-macro2 = "1" -rust-sitter-common = { version= "0.4.5", path = "../common" } +rust-sitter-common = { path = "../common" } [dev-dependencies] insta = "1.39" diff --git a/macro/src/expansion.rs b/macro/src/expansion.rs index 129c19f..ee5dd52 100644 --- a/macro/src/expansion.rs +++ b/macro/src/expansion.rs @@ -333,7 +333,7 @@ pub fn expand_grammar(input: ItemMod) -> Result { let tree_sitter_ident = Ident::new(&format!("tree_sitter_{grammar_name}"), Span::call_site()); transformed.push(syn::parse_quote! { - extern "C" { + unsafe extern "C" { fn #tree_sitter_ident() -> ::rust_sitter::tree_sitter::Language; } }); diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap index e4f86ee..3138137 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap @@ -70,7 +70,7 @@ mod grammar { } } } - extern "C" { + unsafe extern "C" { fn tree_sitter_test() -> ::rust_sitter::tree_sitter::Language; } pub fn language() -> ::rust_sitter::tree_sitter::Language { diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap index ee24e84..7ee3e41 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap @@ -65,7 +65,7 @@ mod grammar { } } } - extern "C" { + unsafe extern "C" { fn tree_sitter_test() -> ::rust_sitter::tree_sitter::Language; } pub fn language() -> ::rust_sitter::tree_sitter::Language { diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap index 89f8ada..bff8bb6 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap @@ -45,7 +45,7 @@ mod grammar { } } } - extern "C" { + unsafe extern "C" { fn tree_sitter_test() -> ::rust_sitter::tree_sitter::Language; } pub fn language() -> ::rust_sitter::tree_sitter::Language { diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap index 5fc0562..3f7d745 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap @@ -63,7 +63,7 @@ mod grammar { } } } - extern "C" { + unsafe extern "C" { fn tree_sitter_test() -> ::rust_sitter::tree_sitter::Language; } pub fn language() -> ::rust_sitter::tree_sitter::Language { diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap index 6ffdfbc..7e1ed78 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap @@ -69,7 +69,7 @@ mod grammar { } } } - extern "C" { + unsafe extern "C" { fn tree_sitter_test() -> ::rust_sitter::tree_sitter::Language; } pub fn language() -> ::rust_sitter::tree_sitter::Language { diff --git a/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap b/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap index a399403..9efae38 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap @@ -69,7 +69,7 @@ mod grammar { } } } - extern "C" { + unsafe extern "C" { fn tree_sitter_test() -> ::rust_sitter::tree_sitter::Language; } pub fn language() -> ::rust_sitter::tree_sitter::Language { diff --git a/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap b/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap index 3010be6..6cd7cd6 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap @@ -80,7 +80,7 @@ mod grammar { }) } } - extern "C" { + unsafe extern "C" { fn tree_sitter_test() -> ::rust_sitter::tree_sitter::Language; } pub fn language() -> ::rust_sitter::tree_sitter::Language { diff --git a/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap b/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap index 829feb9..57d8c89 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap @@ -73,7 +73,7 @@ mod grammar { }) } } - extern "C" { + unsafe extern "C" { fn tree_sitter_test() -> ::rust_sitter::tree_sitter::Language; } pub fn language() -> ::rust_sitter::tree_sitter::Language { diff --git a/macro/src/snapshots/rust_sitter_macro__tests__struct_optional.snap b/macro/src/snapshots/rust_sitter_macro__tests__struct_optional.snap index 335cfd2..74e73ea 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__struct_optional.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__struct_optional.snap @@ -57,7 +57,7 @@ mod grammar { }) } } - extern "C" { + unsafe extern "C" { fn tree_sitter_test() -> ::rust_sitter::tree_sitter::Language; } pub fn language() -> ::rust_sitter::tree_sitter::Language { diff --git a/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap b/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap index 0651c0e..528ad7d 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap @@ -79,7 +79,7 @@ mod grammar { }) } } - extern "C" { + unsafe extern "C" { fn tree_sitter_test() -> ::rust_sitter::tree_sitter::Language; } pub fn language() -> ::rust_sitter::tree_sitter::Language { diff --git a/runtime/Cargo.toml b/runtime/Cargo.toml index 718e312..b8980ff 100644 --- a/runtime/Cargo.toml +++ b/runtime/Cargo.toml @@ -3,8 +3,8 @@ name = "rust-sitter" description = "A package for defining tree-sitter grammars alongside Rust logic" readme = "../README.md" repository = "https://github.com/hydro-project/rust-sitter" -version = "0.4.5" -authors = ["Shadaj Laddad "] +version.workspace = true +authors.workspace = true edition = "2021" license = "MIT" keywords = ["parsing", "codegen"] @@ -21,7 +21,7 @@ tree-sitter-standard = ["tree-sitter-runtime-standard"] [dependencies] tree-sitter-runtime-c2rust = { package = "tree-sitter-c2rust", version = "0.25.2", optional = true } tree-sitter-runtime-standard = { package = "tree-sitter", version = "0.25.2", optional = true } -rust-sitter-macro = { version = "0.4.5", path = "../macro" } +rust-sitter-macro = { path = "../macro" } [dev-dependencies] insta = "1.39" diff --git a/tool/Cargo.toml b/tool/Cargo.toml index fdb94b0..772d538 100644 --- a/tool/Cargo.toml +++ b/tool/Cargo.toml @@ -3,8 +3,8 @@ name = "rust-sitter-tool" description = "The external tool for Rust Sitter that extracts grammars from Rust definitions" readme = "../README.md" repository = "https://github.com/hydro-project/rust-sitter" -version = "0.4.5" -authors = ["Shadaj Laddad "] +version.workspace = true +authors.workspace = true license = "MIT" edition = "2024" keywords = ["parsing", "codegen"] @@ -24,7 +24,7 @@ syn = { version = "2", features = ["full", "extra-traits"] } syn-inline-mod = "0.6" serde = { version = "1", features = ["derive"] } serde_json = { version = "1", features = ["preserve_order"] } -rust-sitter-common = { version = "0.4.5", path = "../common" } +rust-sitter-common = { path = "../common" } tempfile = { version = "3", optional = true } tree-sitter = { version = "0.25.2", optional = true } From 3732860fc835729cf2521247ffff0cd70122583c Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Mon, 14 Jul 2025 10:23:52 -0500 Subject: [PATCH 19/50] Add point to span --- common/src/lib.rs | 8 +++++ ..._optionals__tests__optional_grammar-2.snap | 12 ++++++- ..._optionals__tests__optional_grammar-3.snap | 12 ++++++- ..._optionals__tests__optional_grammar-4.snap | 12 ++++++- ..._optionals__tests__optional_grammar-5.snap | 12 ++++++- ..._optionals__tests__optional_grammar-6.snap | 12 ++++++- ..._optionals__tests__optional_grammar-7.snap | 12 ++++++- ..._optionals__tests__optional_grammar-8.snap | 12 ++++++- ...e__optionals__tests__optional_grammar.snap | 12 ++++++- ...titions__tests__repetitions_grammar-2.snap | 24 +++++++++++-- ...titions__tests__repetitions_grammar-3.snap | 36 +++++++++++++++++-- ...itions__tests__repetitions_grammar2-2.snap | 24 +++++++++++-- ...itions__tests__repetitions_grammar2-3.snap | 36 +++++++++++++++++-- ...etitions__tests__repetitions_grammar2.snap | 12 ++++++- ...itions__tests__repetitions_grammar3-2.snap | 24 +++++++++++-- ...itions__tests__repetitions_grammar3-3.snap | 36 +++++++++++++++++-- ...itions__tests__repetitions_grammar3-4.snap | 36 +++++++++++++++++-- ...itions__tests__repetitions_grammar3-5.snap | 36 +++++++++++++++++-- ...etitions__tests__repetitions_grammar3.snap | 12 ++++++- macro/src/lib.rs | 20 +++++++++++ runtime/src/lib.rs | 35 ++++++++++++++++-- 21 files changed, 402 insertions(+), 33 deletions(-) diff --git a/common/src/lib.rs b/common/src/lib.rs index 147c1a4..5dc997a 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -163,6 +163,14 @@ impl TsInput { "value": get_str(get_arg(args, 0, 1)?)?, }) } + // nodes can be double wrapped in fields, although I'm not sure what happens + // when you ask the cursor for the field name? May not be possible to handle + // that in this case. + "field" => { + let _field_name = get_str(get_arg(args, 0, 2)?)?; + let _inner = get_arg(args, 1, 2)?; + todo!() + } k => { return Err(syn::Error::new( Span::call_site(), diff --git a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-2.snap b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-2.snap index 3524791..6d80c40 100644 --- a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-2.snap +++ b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-2.snap @@ -8,10 +8,20 @@ Ok( _s: (), t: Spanned { value: None, - span: ( + byte_span: ( 1, 1, ), + line_span: ( + Point { + line: 0, + column: 0, + }, + Point { + line: 0, + column: 0, + }, + ), }, _d: Some( (), diff --git a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-3.snap b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-3.snap index 944ec51..ecfbfc6 100644 --- a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-3.snap +++ b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-3.snap @@ -10,10 +10,20 @@ Ok( _s: (), t: Spanned { value: None, - span: ( + byte_span: ( 2, 2, ), + line_span: ( + Point { + line: 0, + column: 0, + }, + Point { + line: 0, + column: 0, + }, + ), }, _d: None, }, diff --git a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-4.snap b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-4.snap index e1ce440..db1c9b4 100644 --- a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-4.snap +++ b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-4.snap @@ -10,10 +10,20 @@ Ok( _s: (), t: Spanned { value: None, - span: ( + byte_span: ( 2, 2, ), + line_span: ( + Point { + line: 0, + column: 0, + }, + Point { + line: 0, + column: 0, + }, + ), }, _d: Some( (), diff --git a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-5.snap b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-5.snap index 8615253..7454502 100644 --- a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-5.snap +++ b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-5.snap @@ -14,10 +14,20 @@ Ok( v: 2, }, ), - span: ( + byte_span: ( 2, 3, ), + line_span: ( + Point { + line: 1, + column: 3, + }, + Point { + line: 1, + column: 4, + }, + ), }, _d: None, }, diff --git a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-6.snap b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-6.snap index fc88456..0244844 100644 --- a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-6.snap +++ b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-6.snap @@ -14,10 +14,20 @@ Ok( v: 2, }, ), - span: ( + byte_span: ( 2, 3, ), + line_span: ( + Point { + line: 1, + column: 3, + }, + Point { + line: 1, + column: 4, + }, + ), }, _d: Some( (), diff --git a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-7.snap b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-7.snap index ff209e0..6cf3be4 100644 --- a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-7.snap +++ b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-7.snap @@ -12,10 +12,20 @@ Ok( v: 2, }, ), - span: ( + byte_span: ( 1, 2, ), + line_span: ( + Point { + line: 1, + column: 2, + }, + Point { + line: 1, + column: 3, + }, + ), }, _d: None, }, diff --git a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-8.snap b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-8.snap index 70321bb..772f452 100644 --- a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-8.snap +++ b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-8.snap @@ -12,10 +12,20 @@ Ok( v: 2, }, ), - span: ( + byte_span: ( 1, 2, ), + line_span: ( + Point { + line: 1, + column: 2, + }, + Point { + line: 1, + column: 3, + }, + ), }, _d: Some( (), diff --git a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar.snap b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar.snap index da6a7b4..06b230e 100644 --- a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar.snap +++ b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar.snap @@ -8,10 +8,20 @@ Ok( _s: (), t: Spanned { value: None, - span: ( + byte_span: ( 1, 1, ), + line_span: ( + Point { + line: 0, + column: 0, + }, + Point { + line: 0, + column: 0, + }, + ), }, _d: None, }, diff --git a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar-2.snap b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar-2.snap index a2c1782..f46d9f8 100644 --- a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar-2.snap +++ b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar-2.snap @@ -8,16 +8,36 @@ Ok( value: [ Spanned { value: 1, - span: ( + byte_span: ( 0, 1, ), + line_span: ( + Point { + line: 1, + column: 1, + }, + Point { + line: 1, + column: 2, + }, + ), }, ], - span: ( + byte_span: ( 0, 1, ), + line_span: ( + Point { + line: 1, + column: 1, + }, + Point { + line: 1, + column: 2, + }, + ), }, }, ) diff --git a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar-3.snap b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar-3.snap index 911fce0..caef295 100644 --- a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar-3.snap +++ b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar-3.snap @@ -8,23 +8,53 @@ Ok( value: [ Spanned { value: 1, - span: ( + byte_span: ( 0, 1, ), + line_span: ( + Point { + line: 1, + column: 1, + }, + Point { + line: 1, + column: 2, + }, + ), }, Spanned { value: 2, - span: ( + byte_span: ( 3, 4, ), + line_span: ( + Point { + line: 1, + column: 4, + }, + Point { + line: 1, + column: 5, + }, + ), }, ], - span: ( + byte_span: ( 0, 4, ), + line_span: ( + Point { + line: 1, + column: 1, + }, + Point { + line: 1, + column: 5, + }, + ), }, }, ) diff --git a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar2-2.snap b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar2-2.snap index c152824..27f29c4 100644 --- a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar2-2.snap +++ b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar2-2.snap @@ -8,16 +8,36 @@ Ok( value: [ Spanned { value: 1, - span: ( + byte_span: ( 0, 1, ), + line_span: ( + Point { + line: 1, + column: 1, + }, + Point { + line: 1, + column: 2, + }, + ), }, ], - span: ( + byte_span: ( 0, 1, ), + line_span: ( + Point { + line: 1, + column: 1, + }, + Point { + line: 1, + column: 2, + }, + ), }, }, ) diff --git a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar2-3.snap b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar2-3.snap index 1541591..a0be55d 100644 --- a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar2-3.snap +++ b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar2-3.snap @@ -8,23 +8,53 @@ Ok( value: [ Spanned { value: 1, - span: ( + byte_span: ( 0, 1, ), + line_span: ( + Point { + line: 1, + column: 1, + }, + Point { + line: 1, + column: 2, + }, + ), }, Spanned { value: 2, - span: ( + byte_span: ( 2, 3, ), + line_span: ( + Point { + line: 1, + column: 3, + }, + Point { + line: 1, + column: 4, + }, + ), }, ], - span: ( + byte_span: ( 0, 3, ), + line_span: ( + Point { + line: 1, + column: 1, + }, + Point { + line: 1, + column: 4, + }, + ), }, }, ) diff --git a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar2.snap b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar2.snap index 9b45326..990568f 100644 --- a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar2.snap +++ b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar2.snap @@ -6,10 +6,20 @@ Ok( NumberList { numbers: Spanned { value: [], - span: ( + byte_span: ( 0, 0, ), + line_span: ( + Point { + line: 0, + column: 0, + }, + Point { + line: 0, + column: 0, + }, + ), }, }, ) diff --git a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar3-2.snap b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar3-2.snap index 78611b1..58d5a1c 100644 --- a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar3-2.snap +++ b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar3-2.snap @@ -10,16 +10,36 @@ Ok( value: Some( 1, ), - span: ( + byte_span: ( 0, 1, ), + line_span: ( + Point { + line: 1, + column: 1, + }, + Point { + line: 1, + column: 2, + }, + ), }, ], - span: ( + byte_span: ( 0, 2, ), + line_span: ( + Point { + line: 1, + column: 1, + }, + Point { + line: 1, + column: 3, + }, + ), }, metadata: 123, }, diff --git a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar3-3.snap b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar3-3.snap index ad53176..4834d86 100644 --- a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar3-3.snap +++ b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar3-3.snap @@ -10,25 +10,55 @@ Ok( value: Some( 1, ), - span: ( + byte_span: ( 0, 1, ), + line_span: ( + Point { + line: 1, + column: 1, + }, + Point { + line: 1, + column: 2, + }, + ), }, Spanned { value: Some( 2, ), - span: ( + byte_span: ( 3, 4, ), + line_span: ( + Point { + line: 1, + column: 4, + }, + Point { + line: 1, + column: 5, + }, + ), }, ], - span: ( + byte_span: ( 0, 4, ), + line_span: ( + Point { + line: 1, + column: 1, + }, + Point { + line: 1, + column: 5, + }, + ), }, metadata: 123, }, diff --git a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar3-4.snap b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar3-4.snap index e172dff..3afd20f 100644 --- a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar3-4.snap +++ b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar3-4.snap @@ -10,25 +10,55 @@ Ok( value: Some( 1, ), - span: ( + byte_span: ( 0, 1, ), + line_span: ( + Point { + line: 1, + column: 1, + }, + Point { + line: 1, + column: 2, + }, + ), }, Spanned { value: Some( 2, ), - span: ( + byte_span: ( 4, 5, ), + line_span: ( + Point { + line: 1, + column: 5, + }, + Point { + line: 1, + column: 6, + }, + ), }, ], - span: ( + byte_span: ( 0, 5, ), + line_span: ( + Point { + line: 1, + column: 1, + }, + Point { + line: 1, + column: 6, + }, + ), }, metadata: 123, }, diff --git a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar3-5.snap b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar3-5.snap index f9c38ef..a9d5d67 100644 --- a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar3-5.snap +++ b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar3-5.snap @@ -10,25 +10,55 @@ Ok( value: Some( 1, ), - span: ( + byte_span: ( 0, 1, ), + line_span: ( + Point { + line: 1, + column: 1, + }, + Point { + line: 1, + column: 2, + }, + ), }, Spanned { value: Some( 2, ), - span: ( + byte_span: ( 4, 5, ), + line_span: ( + Point { + line: 1, + column: 5, + }, + Point { + line: 1, + column: 6, + }, + ), }, ], - span: ( + byte_span: ( 0, 6, ), + line_span: ( + Point { + line: 1, + column: 1, + }, + Point { + line: 1, + column: 7, + }, + ), }, metadata: 123, }, diff --git a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar3.snap b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar3.snap index 6ab2e90..9dcf8fb 100644 --- a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar3.snap +++ b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar3.snap @@ -6,10 +6,20 @@ Ok( NumberList { numbers: Spanned { value: [], - span: ( + byte_span: ( 0, 0, ), + line_span: ( + Point { + line: 0, + column: 0, + }, + Point { + line: 0, + column: 0, + }, + ), }, metadata: 123, }, diff --git a/macro/src/lib.rs b/macro/src/lib.rs index 06423a1..4156245 100644 --- a/macro/src/lib.rs +++ b/macro/src/lib.rs @@ -3,8 +3,27 @@ use syn::{parse_macro_input, ItemMod}; mod errors; mod expansion; +// mod grammar; use expansion::*; +// // TODO: Make a direct grammar function... +// This would allow us to write something like: +// struct Function { +// name: String, +// inputs: Vec, +// grammar! { +// rule: seq("function", $.ident, "(", repeat($.input), ")") -> |id, inputs| Function { name, +// inputs: inputs.into() }; +// +// ident: /re/; +// input: seq($.ident, ":", $.ident); +// +// } +// #[proc_macro] +// pub fn grammar2(input: proc_macro::TokenStream) -> proc_macro::TokenStream { +// grammar::parse_grammar_macro(input) +// } + #[proc_macro_attribute] /// Marks the top level AST node where parsing should start. /// @@ -22,6 +41,7 @@ pub fn language( item } + #[proc_macro_attribute] /// This annotation marks a node as extra, which can safely be skipped while parsing. /// This is useful for handling whitespace/newlines/comments. diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs index b17358c..03b5eb0 100644 --- a/runtime/src/lib.rs +++ b/runtime/src/lib.rs @@ -195,7 +195,8 @@ pub struct Spanned { pub value: T, /// The span of the node in the source. The first value is the inclusive start /// of the span, and the second value is the exclusive end of the span. - pub span: (usize, usize), + pub byte_span: (usize, usize), + pub line_span: (Point, Point), } impl Deref for Spanned { @@ -206,6 +207,27 @@ impl Deref for Spanned { } } +/// A line and column point in a source parse. These are 1 based to correspond with a text editor +/// line and column. Note, this is a divergence from tree-sitter, which uses a zero-based `Point`. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct Point { + pub line: usize, + pub column: usize, +} + +impl Point { + fn from_tree_sitter(p: tree_sitter::Point) -> Self { + Self { + line: p.row + 1, + column: p.column + 1, + } + } + const EMPTY: Self = Self { line: 0, column: 0 }; + const fn empty() -> Self { + Self::EMPTY + } +} + impl, U> Extract> for Spanned { type LeafFn = T::LeafFn; fn extract( @@ -216,9 +238,18 @@ impl, U> Extract> for Spanned { ) -> Spanned { Spanned { value: T::extract(node, source, last_idx, leaf_fn), - span: node + byte_span: node .map(|n| (n.start_byte(), n.end_byte())) .unwrap_or((last_idx, last_idx)), + line_span: node + .map(|n| { + ( + Point::from_tree_sitter(n.start_position()), + Point::from_tree_sitter(n.end_position()), + ) + }) + // TODO: We can track points as well instead of just `last_idx` as needed here. + .unwrap_or((Point::empty(), Point::empty())), } } } From d2c91badecefc72a3ac0dcb2d5a052f041acff9b Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Mon, 14 Jul 2025 19:30:08 -0500 Subject: [PATCH 20/50] Added various experimental, some finished, some unfinished features: * Can transform on a node directly for more complex parsing * Pass along `Point` for better span handling * Begin experimenting with mapping functions... --- common/src/lib.rs | 5 +- ..._optionals__tests__optional_grammar-2.snap | 8 +- ..._optionals__tests__optional_grammar-3.snap | 8 +- ..._optionals__tests__optional_grammar-4.snap | 8 +- ...e__optionals__tests__optional_grammar.snap | 8 +- ...etitions__tests__repetitions_grammar2.snap | 8 +- ...etitions__tests__repetitions_grammar3.snap | 8 +- macro/src/expansion.rs | 58 +++-- ...t_sitter_macro__tests__enum_prec_left.snap | 19 +- ...t_sitter_macro__tests__enum_recursive.snap | 17 +- ...macro__tests__enum_transformed_fields.snap | 11 +- ...r_macro__tests__enum_with_named_field.snap | 17 +- ...macro__tests__enum_with_unamed_vector.snap | 29 +-- ...r_macro__tests__grammar_unboxed_field.snap | 29 +-- ...t_sitter_macro__tests__spanned_in_vec.snap | 53 +++-- ...ust_sitter_macro__tests__struct_extra.snap | 28 ++- ..._sitter_macro__tests__struct_optional.snap | 38 +-- ...st_sitter_macro__tests__struct_repeat.snap | 53 +++-- runtime/src/__private.rs | 17 +- runtime/src/lib.rs | 219 ++++++++++++++---- 20 files changed, 407 insertions(+), 234 deletions(-) diff --git a/common/src/lib.rs b/common/src/lib.rs index 5dc997a..455346a 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -207,6 +207,7 @@ static RUST_SITTER_ATTRS: LazyLock> = LazyLock::new(|| { "text", "pattern", "with", + "with_node", "transform", ] .into_iter() @@ -313,9 +314,9 @@ pub fn wrap_leaf_type(ty: &Type, skip_over: &HashSet<&str>) -> Type { panic!("Expected angle bracketed path"); } } else { - parse_quote!(rust_sitter::WithLeaf<#ty>) + parse_quote!(rust_sitter::WithLeaf<#ty, _>) } } else { - parse_quote!(rust_sitter::WithLeaf<#ty>) + parse_quote!(rust_sitter::WithLeaf<#ty, _>) } } diff --git a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-2.snap b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-2.snap index 6d80c40..c3c1a4f 100644 --- a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-2.snap +++ b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-2.snap @@ -14,12 +14,12 @@ Ok( ), line_span: ( Point { - line: 0, - column: 0, + line: 1, + column: 2, }, Point { - line: 0, - column: 0, + line: 1, + column: 2, }, ), }, diff --git a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-3.snap b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-3.snap index ecfbfc6..77b42e3 100644 --- a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-3.snap +++ b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-3.snap @@ -16,12 +16,12 @@ Ok( ), line_span: ( Point { - line: 0, - column: 0, + line: 1, + column: 3, }, Point { - line: 0, - column: 0, + line: 1, + column: 3, }, ), }, diff --git a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-4.snap b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-4.snap index db1c9b4..26be7d6 100644 --- a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-4.snap +++ b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-4.snap @@ -16,12 +16,12 @@ Ok( ), line_span: ( Point { - line: 0, - column: 0, + line: 1, + column: 3, }, Point { - line: 0, - column: 0, + line: 1, + column: 3, }, ), }, diff --git a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar.snap b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar.snap index 06b230e..b4a5402 100644 --- a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar.snap +++ b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar.snap @@ -14,12 +14,12 @@ Ok( ), line_span: ( Point { - line: 0, - column: 0, + line: 1, + column: 2, }, Point { - line: 0, - column: 0, + line: 1, + column: 2, }, ), }, diff --git a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar2.snap b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar2.snap index 990568f..54fa677 100644 --- a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar2.snap +++ b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar2.snap @@ -12,12 +12,12 @@ Ok( ), line_span: ( Point { - line: 0, - column: 0, + line: 1, + column: 1, }, Point { - line: 0, - column: 0, + line: 1, + column: 1, }, ), }, diff --git a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar3.snap b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar3.snap index 9dcf8fb..19d7204 100644 --- a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar3.snap +++ b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar3.snap @@ -12,12 +12,12 @@ Ok( ), line_span: ( Point { - line: 0, - column: 0, + line: 1, + column: 1, }, Point { - line: 0, - column: 0, + line: 1, + column: 1, }, ), }, diff --git a/macro/src/expansion.rs b/macro/src/expansion.rs index ee5dd52..a162a3a 100644 --- a/macro/src/expansion.rs +++ b/macro/src/expansion.rs @@ -3,7 +3,7 @@ use std::collections::HashSet; use crate::errors::IteratorExt as _; use proc_macro2::Span; -use quote::{quote, ToTokens}; +use quote::{ToTokens, quote}; use rust_sitter_common::*; use syn::{punctuated::Punctuated, *}; @@ -29,12 +29,17 @@ fn gen_field(ident_str: String, leaf: Field) -> Expr { .iter() .find(|attr| sitter_attr_matches(attr, "leaf")); - let transform_attr = leaf - .attrs - .iter() - .find(|attr| sitter_attr_matches(attr, "transform") || sitter_attr_matches(attr, "with")); + let transform = leaf.attrs.iter().find_map(|attr| { + if sitter_attr_matches(attr, "transform") || sitter_attr_matches(attr, "with") { + Some((false, attr.parse_args::().unwrap())) + } else if sitter_attr_matches(attr, "with_node") { + Some((true, attr.parse_args::().unwrap())) + } else { + None + } + }); - if transform_attr.is_some() && leaf_attr.is_none() { + if transform.is_some() && leaf_attr.is_none() { panic!("Cannot transform non-leaf nodes"); } @@ -51,25 +56,32 @@ fn gen_field(ident_str: String, leaf: Field) -> Expr { }); } - let transform_param = transform_attr - .as_ref() - .map(|attr| attr.parse_args::().unwrap()); - - let (leaf_type, closure_expr): (Type, Expr) = match transform_param { - Some(closure) => { + let (leaf_type, closure_expr): (Type, Expr) = match transform { + Some((is_node, closure)) => { let mut non_leaf = HashSet::new(); - non_leaf.insert("Spanned"); - non_leaf.insert("Box"); - non_leaf.insert("Option"); - non_leaf.insert("Vec"); + // Major hackery... + if !is_node { + non_leaf.insert("Spanned"); + non_leaf.insert("Box"); + non_leaf.insert("Option"); + non_leaf.insert("Vec"); + } let wrapped_leaf_type = wrap_leaf_type(&leaf_type, &non_leaf); - (wrapped_leaf_type, syn::parse_quote!(Some(&#closure))) + let input_type: syn::Type = if is_node { + syn::parse_quote!(&::rust_sitter::NodeExt<'_>) + } else { + syn::parse_quote!(&str) + }; + ( + wrapped_leaf_type, + syn::parse_quote!(Some((#closure) as fn(#input_type) -> #leaf_type)), + ) } None => (leaf_type, syn::parse_quote!(None)), }; syn::parse_quote!({ - ::rust_sitter::__private::extract_field::<#leaf_type,_>(cursor, source, last_idx, #ident_str, #closure_expr) + ::rust_sitter::__private::extract_field::<#leaf_type,_>(cursor, source, last_idx, last_pt, #ident_str, #closure_expr) }) } @@ -166,7 +178,7 @@ fn gen_struct_or_variant( }; Ok( - syn::parse_quote!(::rust_sitter::__private::extract_struct_or_variant(node, move |cursor, last_idx| #construct_expr)), + syn::parse_quote!(::rust_sitter::__private::extract_struct_or_variant(node, move |cursor, last_idx, last_pt| #construct_expr)), ) } @@ -273,10 +285,10 @@ pub fn expand_grammar(input: ItemMod) -> Result { let enum_name = &e.ident; let extract_impl: Item = syn::parse_quote! { impl ::rust_sitter::Extract<#enum_name> for #enum_name { - type LeafFn = (); + type LeafFn<'a> = (); #[allow(non_snake_case)] - fn extract(node: Option<::rust_sitter::tree_sitter::Node>, source: &[u8], _last_idx: usize, _leaf_fn: Option<&Self::LeafFn>) -> Self { + fn extract<'a>(node: Option<::rust_sitter::tree_sitter::Node>, source: &[u8], _last_idx: usize, _last_pt: ::rust_sitter::tree_sitter::Point, _leaf_fn: Option>) -> Self { let node = node.expect("No node found"); let mut cursor = node.walk(); @@ -313,10 +325,10 @@ pub fn expand_grammar(input: ItemMod) -> Result { let extract_impl: Item = syn::parse_quote! { impl ::rust_sitter::Extract<#struct_name> for #struct_name { - type LeafFn = (); + type LeafFn<'a> = (); #[allow(non_snake_case)] - fn extract(node: Option<::rust_sitter::tree_sitter::Node>, source: &[u8], last_idx: usize, _leaf_fn: Option<&Self::LeafFn>) -> Self { + fn extract<'a>(node: Option<::rust_sitter::tree_sitter::Node>, source: &[u8], last_idx: usize, last_pt: ::rust_sitter::tree_sitter::Point, _leaf_fn: Option>) -> Self { let node = node.expect("no node found"); #extract_expr } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap index 3138137..16bb656 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap @@ -8,13 +8,14 @@ mod grammar { Sub(Box, (), Box), } impl ::rust_sitter::Extract for Expression { - type LeafFn = (); + type LeafFn<'a> = (); #[allow(non_snake_case)] - fn extract( + fn extract<'a>( node: Option<::rust_sitter::tree_sitter::Node>, source: &[u8], _last_idx: usize, - _leaf_fn: Option<&Self::LeafFn>, + _last_pt: ::rust_sitter::tree_sitter::Point, + _leaf_fn: Option>, ) -> Self { let node = node.expect("No node found"); let mut cursor = node.walk(); @@ -28,10 +29,10 @@ mod grammar { "Expression_Number" => { return ::rust_sitter::__private::extract_struct_or_variant( node, - move |cursor, last_idx| { + move |cursor, last_idx, last_pt| { Expression::Number({ ::rust_sitter::__private::extract_field::( - cursor, source, last_idx, "0", None, + cursor, source, last_idx, last_pt, "0", None, ) }) }, @@ -40,21 +41,21 @@ mod grammar { "Expression_Sub" => { return ::rust_sitter::__private::extract_struct_or_variant( node, - move |cursor, last_idx| { + move |cursor, last_idx, last_pt| { Expression::Sub( { ::rust_sitter::__private::extract_field::, _>( - cursor, source, last_idx, "0", None, + cursor, source, last_idx, last_pt, "0", None, ) }, { ::rust_sitter::__private::extract_field::<(), _>( - cursor, source, last_idx, "1", None, + cursor, source, last_idx, last_pt, "1", None, ) }, { ::rust_sitter::__private::extract_field::, _>( - cursor, source, last_idx, "2", None, + cursor, source, last_idx, last_pt, "2", None, ) }, ) diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap index 7ee3e41..0839595 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap @@ -8,13 +8,14 @@ mod grammar { Neg((), Box), } impl ::rust_sitter::Extract for Expression { - type LeafFn = (); + type LeafFn<'a> = (); #[allow(non_snake_case)] - fn extract( + fn extract<'a>( node: Option<::rust_sitter::tree_sitter::Node>, source: &[u8], _last_idx: usize, - _leaf_fn: Option<&Self::LeafFn>, + _last_pt: ::rust_sitter::tree_sitter::Point, + _leaf_fn: Option>, ) -> Self { let node = node.expect("No node found"); let mut cursor = node.walk(); @@ -28,10 +29,10 @@ mod grammar { "Expression_Number" => { return ::rust_sitter::__private::extract_struct_or_variant( node, - move |cursor, last_idx| { + move |cursor, last_idx, last_pt| { Expression::Number({ ::rust_sitter::__private::extract_field::( - cursor, source, last_idx, "0", None, + cursor, source, last_idx, last_pt, "0", None, ) }) }, @@ -40,16 +41,16 @@ mod grammar { "Expression_Neg" => { return ::rust_sitter::__private::extract_struct_or_variant( node, - move |cursor, last_idx| { + move |cursor, last_idx, last_pt| { Expression::Neg( { ::rust_sitter::__private::extract_field::<(), _>( - cursor, source, last_idx, "0", None, + cursor, source, last_idx, last_pt, "0", None, ) }, { ::rust_sitter::__private::extract_field::, _>( - cursor, source, last_idx, "1", None, + cursor, source, last_idx, last_pt, "1", None, ) }, ) diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap index bff8bb6..a22e040 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap @@ -7,13 +7,14 @@ mod grammar { Number(i32), } impl ::rust_sitter::Extract for Expression { - type LeafFn = (); + type LeafFn<'a> = (); #[allow(non_snake_case)] - fn extract( + fn extract<'a>( node: Option<::rust_sitter::tree_sitter::Node>, source: &[u8], _last_idx: usize, - _leaf_fn: Option<&Self::LeafFn>, + _last_pt: ::rust_sitter::tree_sitter::Point, + _leaf_fn: Option>, ) -> Self { let node = node.expect("No node found"); let mut cursor = node.walk(); @@ -27,10 +28,10 @@ mod grammar { "Expression_Number" => { return ::rust_sitter::__private::extract_struct_or_variant( node, - move |cursor, last_idx| { + move |cursor, last_idx, last_pt| { Expression::Number({ ::rust_sitter::__private::extract_field::( - cursor, source, last_idx, "0", None, + cursor, source, last_idx, last_pt, "0", None, ) }) }, diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap index 3f7d745..6b9fcd9 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap @@ -8,13 +8,14 @@ mod grammar { Neg { _bang: (), value: Box }, } impl ::rust_sitter::Extract for Expr { - type LeafFn = (); + type LeafFn<'a> = (); #[allow(non_snake_case)] - fn extract( + fn extract<'a>( node: Option<::rust_sitter::tree_sitter::Node>, source: &[u8], _last_idx: usize, - _leaf_fn: Option<&Self::LeafFn>, + _last_pt: ::rust_sitter::tree_sitter::Point, + _leaf_fn: Option>, ) -> Self { let node = node.expect("No node found"); let mut cursor = node.walk(); @@ -28,10 +29,10 @@ mod grammar { "Expr_Number" => { return ::rust_sitter::__private::extract_struct_or_variant( node, - move |cursor, last_idx| { + move |cursor, last_idx, last_pt| { Expr::Number({ ::rust_sitter::__private::extract_field::( - cursor, source, last_idx, "0", None, + cursor, source, last_idx, last_pt, "0", None, ) }) }, @@ -40,15 +41,15 @@ mod grammar { "Expr_Neg" => { return ::rust_sitter::__private::extract_struct_or_variant( node, - move |cursor, last_idx| Expr::Neg { + move |cursor, last_idx, last_pt| Expr::Neg { _bang: { ::rust_sitter::__private::extract_field::<(), _>( - cursor, source, last_idx, "_bang", None, + cursor, source, last_idx, last_pt, "_bang", None, ) }, value: { ::rust_sitter::__private::extract_field::, _>( - cursor, source, last_idx, "value", None, + cursor, source, last_idx, last_pt, "value", None, ) }, }, diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap index 7e1ed78..9485cd1 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap @@ -7,37 +7,40 @@ mod grammar { value: u32, } impl ::rust_sitter::Extract for Number { - type LeafFn = (); + type LeafFn<'a> = (); #[allow(non_snake_case)] - fn extract( + fn extract<'a>( node: Option<::rust_sitter::tree_sitter::Node>, source: &[u8], last_idx: usize, - _leaf_fn: Option<&Self::LeafFn>, + last_pt: ::rust_sitter::tree_sitter::Point, + _leaf_fn: Option>, ) -> Self { let node = node.expect("no node found"); - ::rust_sitter::__private::extract_struct_or_variant(node, move |cursor, last_idx| { - Number { + ::rust_sitter::__private::extract_struct_or_variant( + node, + move |cursor, last_idx, last_pt| Number { value: { ::rust_sitter::__private::extract_field::( - cursor, source, last_idx, "value", None, + cursor, source, last_idx, last_pt, "value", None, ) }, - } - }) + }, + ) } } pub enum Expr { Numbers(Vec), } impl ::rust_sitter::Extract for Expr { - type LeafFn = (); + type LeafFn<'a> = (); #[allow(non_snake_case)] - fn extract( + fn extract<'a>( node: Option<::rust_sitter::tree_sitter::Node>, source: &[u8], _last_idx: usize, - _leaf_fn: Option<&Self::LeafFn>, + _last_pt: ::rust_sitter::tree_sitter::Point, + _leaf_fn: Option>, ) -> Self { let node = node.expect("No node found"); let mut cursor = node.walk(); @@ -51,10 +54,10 @@ mod grammar { "Expr_Numbers" => { return ::rust_sitter::__private::extract_struct_or_variant( node, - move |cursor, last_idx| { + move |cursor, last_idx, last_pt| { Expr::Numbers({ ::rust_sitter::__private::extract_field::, _>( - cursor, source, last_idx, "0", None, + cursor, source, last_idx, last_pt, "0", None, ) }) }, diff --git a/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap b/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap index 9efae38..855ee85 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap @@ -7,37 +7,40 @@ mod grammar { e: Expression, } impl ::rust_sitter::Extract for Language { - type LeafFn = (); + type LeafFn<'a> = (); #[allow(non_snake_case)] - fn extract( + fn extract<'a>( node: Option<::rust_sitter::tree_sitter::Node>, source: &[u8], last_idx: usize, - _leaf_fn: Option<&Self::LeafFn>, + last_pt: ::rust_sitter::tree_sitter::Point, + _leaf_fn: Option>, ) -> Self { let node = node.expect("no node found"); - ::rust_sitter::__private::extract_struct_or_variant(node, move |cursor, last_idx| { - Language { + ::rust_sitter::__private::extract_struct_or_variant( + node, + move |cursor, last_idx, last_pt| Language { e: { ::rust_sitter::__private::extract_field::( - cursor, source, last_idx, "e", None, + cursor, source, last_idx, last_pt, "e", None, ) }, - } - }) + }, + ) } } pub enum Expression { Number(i32), } impl ::rust_sitter::Extract for Expression { - type LeafFn = (); + type LeafFn<'a> = (); #[allow(non_snake_case)] - fn extract( + fn extract<'a>( node: Option<::rust_sitter::tree_sitter::Node>, source: &[u8], _last_idx: usize, - _leaf_fn: Option<&Self::LeafFn>, + _last_pt: ::rust_sitter::tree_sitter::Point, + _leaf_fn: Option>, ) -> Self { let node = node.expect("No node found"); let mut cursor = node.walk(); @@ -51,10 +54,10 @@ mod grammar { "Expression_Number" => { return ::rust_sitter::__private::extract_struct_or_variant( node, - move |cursor, last_idx| { + move |cursor, last_idx, last_pt| { Expression::Number({ ::rust_sitter::__private::extract_field::( - cursor, source, last_idx, "0", None, + cursor, source, last_idx, last_pt, "0", None, ) }) }, diff --git a/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap b/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap index 6cd7cd6..0ffe1cf 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap @@ -8,76 +8,83 @@ mod grammar { numbers: Vec>, } impl ::rust_sitter::Extract for NumberList { - type LeafFn = (); + type LeafFn<'a> = (); #[allow(non_snake_case)] - fn extract( + fn extract<'a>( node: Option<::rust_sitter::tree_sitter::Node>, source: &[u8], last_idx: usize, - _leaf_fn: Option<&Self::LeafFn>, + last_pt: ::rust_sitter::tree_sitter::Point, + _leaf_fn: Option>, ) -> Self { let node = node.expect("no node found"); - ::rust_sitter::__private::extract_struct_or_variant(node, move |cursor, last_idx| { - NumberList { + ::rust_sitter::__private::extract_struct_or_variant( + node, + move |cursor, last_idx, last_pt| NumberList { numbers: { ::rust_sitter::__private::extract_field::>, _>( - cursor, source, last_idx, "numbers", None, + cursor, source, last_idx, last_pt, "numbers", None, ) }, - } - }) + }, + ) } } pub struct Number { v: i32, } impl ::rust_sitter::Extract for Number { - type LeafFn = (); + type LeafFn<'a> = (); #[allow(non_snake_case)] - fn extract( + fn extract<'a>( node: Option<::rust_sitter::tree_sitter::Node>, source: &[u8], last_idx: usize, - _leaf_fn: Option<&Self::LeafFn>, + last_pt: ::rust_sitter::tree_sitter::Point, + _leaf_fn: Option>, ) -> Self { let node = node.expect("no node found"); - ::rust_sitter::__private::extract_struct_or_variant(node, move |cursor, last_idx| { - Number { + ::rust_sitter::__private::extract_struct_or_variant( + node, + move |cursor, last_idx, last_pt| Number { v: { ::rust_sitter::__private::extract_field::( - cursor, source, last_idx, "v", None, + cursor, source, last_idx, last_pt, "v", None, ) }, - } - }) + }, + ) } } struct Whitespace { _whitespace: (), } impl ::rust_sitter::Extract for Whitespace { - type LeafFn = (); + type LeafFn<'a> = (); #[allow(non_snake_case)] - fn extract( + fn extract<'a>( node: Option<::rust_sitter::tree_sitter::Node>, source: &[u8], last_idx: usize, - _leaf_fn: Option<&Self::LeafFn>, + last_pt: ::rust_sitter::tree_sitter::Point, + _leaf_fn: Option>, ) -> Self { let node = node.expect("no node found"); - ::rust_sitter::__private::extract_struct_or_variant(node, move |cursor, last_idx| { - Whitespace { + ::rust_sitter::__private::extract_struct_or_variant( + node, + move |cursor, last_idx, last_pt| Whitespace { _whitespace: { ::rust_sitter::__private::extract_field::<(), _>( cursor, source, last_idx, + last_pt, "_whitespace", None, ) }, - } - }) + }, + ) } } unsafe extern "C" { diff --git a/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap b/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap index 57d8c89..0c51d33 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap @@ -7,13 +7,14 @@ mod grammar { Number(i32), } impl ::rust_sitter::Extract for Expression { - type LeafFn = (); + type LeafFn<'a> = (); #[allow(non_snake_case)] - fn extract( + fn extract<'a>( node: Option<::rust_sitter::tree_sitter::Node>, source: &[u8], _last_idx: usize, - _leaf_fn: Option<&Self::LeafFn>, + _last_pt: ::rust_sitter::tree_sitter::Point, + _leaf_fn: Option>, ) -> Self { let node = node.expect("No node found"); let mut cursor = node.walk(); @@ -27,10 +28,10 @@ mod grammar { "Expression_Number" => { return ::rust_sitter::__private::extract_struct_or_variant( node, - move |cursor, last_idx| { + move |cursor, last_idx, last_pt| { Expression::Number({ ::rust_sitter::__private::extract_field::( - cursor, source, last_idx, "0", None, + cursor, source, last_idx, last_pt, "0", None, ) }) }, @@ -49,28 +50,31 @@ mod grammar { _whitespace: (), } impl ::rust_sitter::Extract for Whitespace { - type LeafFn = (); + type LeafFn<'a> = (); #[allow(non_snake_case)] - fn extract( + fn extract<'a>( node: Option<::rust_sitter::tree_sitter::Node>, source: &[u8], last_idx: usize, - _leaf_fn: Option<&Self::LeafFn>, + last_pt: ::rust_sitter::tree_sitter::Point, + _leaf_fn: Option>, ) -> Self { let node = node.expect("no node found"); - ::rust_sitter::__private::extract_struct_or_variant(node, move |cursor, last_idx| { - Whitespace { + ::rust_sitter::__private::extract_struct_or_variant( + node, + move |cursor, last_idx, last_pt| Whitespace { _whitespace: { ::rust_sitter::__private::extract_field::<(), _>( cursor, source, last_idx, + last_pt, "_whitespace", None, ) }, - } - }) + }, + ) } } unsafe extern "C" { diff --git a/macro/src/snapshots/rust_sitter_macro__tests__struct_optional.snap b/macro/src/snapshots/rust_sitter_macro__tests__struct_optional.snap index 74e73ea..ac87ad5 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__struct_optional.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__struct_optional.snap @@ -8,53 +8,57 @@ mod grammar { t: Option, } impl ::rust_sitter::Extract for Language { - type LeafFn = (); + type LeafFn<'a> = (); #[allow(non_snake_case)] - fn extract( + fn extract<'a>( node: Option<::rust_sitter::tree_sitter::Node>, source: &[u8], last_idx: usize, - _leaf_fn: Option<&Self::LeafFn>, + last_pt: ::rust_sitter::tree_sitter::Point, + _leaf_fn: Option>, ) -> Self { let node = node.expect("no node found"); - ::rust_sitter::__private::extract_struct_or_variant(node, move |cursor, last_idx| { - Language { + ::rust_sitter::__private::extract_struct_or_variant( + node, + move |cursor, last_idx, last_pt| Language { v: { ::rust_sitter::__private::extract_field::, _>( - cursor, source, last_idx, "v", None, + cursor, source, last_idx, last_pt, "v", None, ) }, t: { ::rust_sitter::__private::extract_field::, _>( - cursor, source, last_idx, "t", None, + cursor, source, last_idx, last_pt, "t", None, ) }, - } - }) + }, + ) } } pub struct Number { v: i32, } impl ::rust_sitter::Extract for Number { - type LeafFn = (); + type LeafFn<'a> = (); #[allow(non_snake_case)] - fn extract( + fn extract<'a>( node: Option<::rust_sitter::tree_sitter::Node>, source: &[u8], last_idx: usize, - _leaf_fn: Option<&Self::LeafFn>, + last_pt: ::rust_sitter::tree_sitter::Point, + _leaf_fn: Option>, ) -> Self { let node = node.expect("no node found"); - ::rust_sitter::__private::extract_struct_or_variant(node, move |cursor, last_idx| { - Number { + ::rust_sitter::__private::extract_struct_or_variant( + node, + move |cursor, last_idx, last_pt| Number { v: { ::rust_sitter::__private::extract_field::( - cursor, source, last_idx, "v", None, + cursor, source, last_idx, last_pt, "v", None, ) }, - } - }) + }, + ) } } unsafe extern "C" { diff --git a/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap b/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap index 528ad7d..1d6a4f4 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap @@ -7,76 +7,83 @@ mod grammar { numbers: Vec, } impl ::rust_sitter::Extract for NumberList { - type LeafFn = (); + type LeafFn<'a> = (); #[allow(non_snake_case)] - fn extract( + fn extract<'a>( node: Option<::rust_sitter::tree_sitter::Node>, source: &[u8], last_idx: usize, - _leaf_fn: Option<&Self::LeafFn>, + last_pt: ::rust_sitter::tree_sitter::Point, + _leaf_fn: Option>, ) -> Self { let node = node.expect("no node found"); - ::rust_sitter::__private::extract_struct_or_variant(node, move |cursor, last_idx| { - NumberList { + ::rust_sitter::__private::extract_struct_or_variant( + node, + move |cursor, last_idx, last_pt| NumberList { numbers: { ::rust_sitter::__private::extract_field::, _>( - cursor, source, last_idx, "numbers", None, + cursor, source, last_idx, last_pt, "numbers", None, ) }, - } - }) + }, + ) } } pub struct Number { v: i32, } impl ::rust_sitter::Extract for Number { - type LeafFn = (); + type LeafFn<'a> = (); #[allow(non_snake_case)] - fn extract( + fn extract<'a>( node: Option<::rust_sitter::tree_sitter::Node>, source: &[u8], last_idx: usize, - _leaf_fn: Option<&Self::LeafFn>, + last_pt: ::rust_sitter::tree_sitter::Point, + _leaf_fn: Option>, ) -> Self { let node = node.expect("no node found"); - ::rust_sitter::__private::extract_struct_or_variant(node, move |cursor, last_idx| { - Number { + ::rust_sitter::__private::extract_struct_or_variant( + node, + move |cursor, last_idx, last_pt| Number { v: { ::rust_sitter::__private::extract_field::( - cursor, source, last_idx, "v", None, + cursor, source, last_idx, last_pt, "v", None, ) }, - } - }) + }, + ) } } struct Whitespace { _whitespace: (), } impl ::rust_sitter::Extract for Whitespace { - type LeafFn = (); + type LeafFn<'a> = (); #[allow(non_snake_case)] - fn extract( + fn extract<'a>( node: Option<::rust_sitter::tree_sitter::Node>, source: &[u8], last_idx: usize, - _leaf_fn: Option<&Self::LeafFn>, + last_pt: ::rust_sitter::tree_sitter::Point, + _leaf_fn: Option>, ) -> Self { let node = node.expect("no node found"); - ::rust_sitter::__private::extract_struct_or_variant(node, move |cursor, last_idx| { - Whitespace { + ::rust_sitter::__private::extract_struct_or_variant( + node, + move |cursor, last_idx, last_pt| Whitespace { _whitespace: { ::rust_sitter::__private::extract_field::<(), _>( cursor, source, last_idx, + last_pt, "_whitespace", None, ) }, - } - }) + }, + ) } } unsafe extern "C" { diff --git a/runtime/src/__private.rs b/runtime/src/__private.rs index 8e4eb21..35dc34b 100644 --- a/runtime/src/__private.rs +++ b/runtime/src/__private.rs @@ -8,7 +8,7 @@ use crate::{tree_sitter, Extract}; pub fn extract_struct_or_variant( node: tree_sitter::Node, - construct_expr: impl Fn(&mut Option, &mut usize) -> T, + construct_expr: impl Fn(&mut Option, &mut usize, &mut tree_sitter::Point) -> T, ) -> T { let mut parent_cursor = node.walk(); construct_expr( @@ -18,6 +18,7 @@ pub fn extract_struct_or_variant( None }, &mut node.start_byte(), + &mut node.start_position(), ) } @@ -25,36 +26,39 @@ pub fn extract_field, T>( cursor_opt: &mut Option, source: &[u8], last_idx: &mut usize, + last_pt: &mut tree_sitter::Point, field_name: &str, - closure_ref: Option<<::LeafFn>, + closure_ref: Option>, ) -> T { if let Some(cursor) = cursor_opt.as_mut() { loop { let n = cursor.node(); if let Some(name) = cursor.field_name() { if name == field_name { - let out = LT::extract(Some(n), source, *last_idx, closure_ref); + let out = LT::extract(Some(n), source, *last_idx, *last_pt, closure_ref); if !cursor.goto_next_sibling() { *cursor_opt = None; }; *last_idx = n.end_byte(); + *last_pt = n.end_position(); return out; } else { - return LT::extract(None, source, *last_idx, closure_ref); + return LT::extract(None, source, *last_idx, *last_pt, closure_ref); } } else { *last_idx = n.end_byte(); + *last_pt = n.end_position(); } if !cursor.goto_next_sibling() { - return LT::extract(None, source, *last_idx, closure_ref); + return LT::extract(None, source, *last_idx, *last_pt, closure_ref); } } } else { - LT::extract(None, source, *last_idx, closure_ref) + LT::extract(None, source, *last_idx, *last_pt, closure_ref) } } @@ -96,6 +100,7 @@ pub fn parse>( Some(root_node), input.as_bytes(), 0, + Default::default(), None, )) } diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs index 03b5eb0..e534a72 100644 --- a/runtime/src/lib.rs +++ b/runtime/src/lib.rs @@ -10,81 +10,198 @@ pub use tree_sitter_runtime_standard as tree_sitter; #[cfg(feature = "tree-sitter-c2rust")] pub use tree_sitter_runtime_c2rust as tree_sitter; +use tree_sitter::Node; + /// Defines the logic used to convert a node in a Tree Sitter tree to /// the corresponding Rust type. pub trait Extract { - type LeafFn: ?Sized; + type LeafFn<'a>: Clone; + fn extract<'a>( + node: Option, + source: &[u8], + last_idx: usize, + last_pt: tree_sitter::Point, + leaf_fn: Option>, + ) -> Output; +} + +#[derive(Debug, Clone, Copy)] +pub struct NodeExt<'a> { + pub node: Node<'a>, + pub source: &'a [u8], + pub last_idx: usize, + pub last_pt: tree_sitter::Point, +} + +pub trait StrOrNode { + type Output; + fn apply(self, source: &[u8], node: Node<'_>, last_idx: usize, last_pt: tree_sitter::Point) -> Self::Output; +} + +impl StrOrNode for fn(&str) -> L { + type Output = L; + fn apply(self, source: &[u8], node: Node<'_>, _last_idx: usize, _last_pt: tree_sitter::Point) -> L { + let text = node.utf8_text(source).expect("Could not get text"); + self(text) + } +} + +impl StrOrNode for fn(&NodeExt<'_>) -> L { + type Output = L; + fn apply(self, source: &[u8], node: Node<'_>, last_idx: usize, last_pt: tree_sitter::Point) -> L { + let node = NodeExt { + node, + source, + last_idx, + last_pt, + }; + self(&node) + } +} + +pub trait Handler { fn extract( - node: Option, + self, + node: Option, source: &[u8], last_idx: usize, - leaf_fn: Option<&Self::LeafFn>, + last_pt: tree_sitter::Point, ) -> Output; } -pub struct WithLeaf { +macro_rules! handler_fn { + ($($t:ident),*) => { + impl),*> Handler<($($t),*), O> for F + where F: FnOnce($($t),*) -> O, + { + fn extract( + self, + node: Option, + source: &[u8], + last_idx: usize, + last_pt: tree_sitter::Point, + ) -> O { + let node = node.expect("No node found"); + let mut c = node.walk(); + let mut it = node.children(&mut c); + self( + $( + $t::extract(it.next(), source, last_idx, last_pt, None) + ),* + ) + } + } + + }; +} + +handler_fn!(T1, T2); + +/// Map for `#[with(...)]` +pub struct WithLeaf { _phantom: std::marker::PhantomData, + _f: std::marker::PhantomData, } -impl Extract for WithLeaf { - type LeafFn = dyn Fn(&str) -> L; +impl Extract for WithLeaf +where + F: StrOrNode + Clone, +{ + type LeafFn<'a> = F; - fn extract( - node: Option, + fn extract<'a>( + node: Option, source: &[u8], - _last_idx: usize, - leaf_fn: Option<&Self::LeafFn>, + last_idx: usize, + last_pt: tree_sitter::Point, + leaf_fn: Option>, ) -> L { - node.and_then(|n| n.utf8_text(source).ok()) - .map(|s| leaf_fn.expect("No leaf function on WithLeaf")(s)) - .expect("Could not extract WithLeaf") + let node = node.expect("Expected a node"); + leaf_fn + .expect("No leaf function on WithLeaf") + .apply(source, node, last_idx, last_pt) + } +} + +#[derive(Clone)] +pub struct MappedExtract { + _type: std::marker::PhantomData, + _prev: std::marker::PhantomData, + _curr: std::marker::PhantomData, +} + +#[derive(Clone)] +pub struct MappedLeaf { + prev: Option

, + curr: F, +} + +impl Extract for MappedExtract +where + F: Extract, +{ + type LeafFn<'a> = MappedLeaf, &'a dyn Fn(L0) -> L1>; + fn extract<'a>( + node: Option, + source: &[u8], + last_idx: usize, + last_pt: tree_sitter::Point, + leaf_fn: Option>, + ) -> L1 { + let mapped = leaf_fn.unwrap(); + let prev = F::extract(node, source, last_idx, last_pt, mapped.prev); + (mapped.curr)(prev) } } // Common implementations for various types. impl Extract<()> for () { - type LeafFn = (); - fn extract( - _node: Option, + type LeafFn<'a> = (); + fn extract<'a>( + _node: Option, _source: &[u8], _last_idx: usize, - _leaf_fn: Option<&Self::LeafFn>, + _last_pt: tree_sitter::Point, + _leaf_fn: Option>, ) { } } impl, U> Extract> for Option { - type LeafFn = T::LeafFn; - fn extract( - node: Option, + type LeafFn<'a> = T::LeafFn<'a>; + fn extract<'a>( + node: Option, source: &[u8], last_idx: usize, - leaf_fn: Option<&Self::LeafFn>, + last_pt: tree_sitter::Point, + leaf_fn: Option>, ) -> Option { - node.map(|n| T::extract(Some(n), source, last_idx, leaf_fn)) + node.map(|n| T::extract(Some(n), source, last_idx, last_pt, leaf_fn)) } } impl, U> Extract> for Box { - type LeafFn = T::LeafFn; - fn extract( - node: Option, + type LeafFn<'a> = T::LeafFn<'a>; + fn extract<'a>( + node: Option, source: &[u8], last_idx: usize, - leaf_fn: Option<&Self::LeafFn>, + last_pt: tree_sitter::Point, + leaf_fn: Option>, ) -> Box { - Box::new(T::extract(node, source, last_idx, leaf_fn)) + Box::new(T::extract(node, source, last_idx, last_pt, leaf_fn)) } } impl, U> Extract> for Vec { - type LeafFn = T::LeafFn; - fn extract( - node: Option, + type LeafFn<'a> = T::LeafFn<'a>; + fn extract<'a>( + node: Option, source: &[u8], mut last_idx: usize, - leaf_fn: Option<&Self::LeafFn>, + mut last_pt: tree_sitter::Point, + leaf_fn: Option>, ) -> Vec { node.map(|node| { let mut cursor = node.walk(); @@ -93,10 +210,11 @@ impl, U> Extract> for Vec { loop { let n = cursor.node(); if cursor.field_name().is_some() { - out.push(T::extract(Some(n), source, last_idx, leaf_fn)); + out.push(T::extract(Some(n), source, last_idx, last_pt, leaf_fn.clone())); } last_idx = n.end_byte(); + last_pt = n.end_position(); if !cursor.goto_next_sibling() { break; @@ -113,12 +231,13 @@ impl, U> Extract> for Vec { macro_rules! extract_from_str { ($t:ty) => { impl Extract<$t> for $t { - type LeafFn = (); - fn extract( - node: Option, + type LeafFn<'a> = (); + fn extract<'a>( + node: Option, source: &[u8], _last_idx: usize, - _leaf_fn: Option<&Self::LeafFn>, + _last_pt: tree_sitter::Point, + _leaf_fn: Option>, ) -> Self { let node = node.expect("No node found"); let text = node.utf8_text(source).expect("No text found for node"); @@ -145,19 +264,20 @@ extract_from_str!(String); macro_rules! extract_for_tuple { ($($t:ident),*) => { impl<$($t: Extract<$t>),*> Extract<($($t),*)> for ($($t),*) { - type LeafFn = (); - fn extract( - node: Option, + type LeafFn<'a> = (); + fn extract<'a>( + node: Option, source: &[u8], last_idx: usize, - _leaf_fn: Option<&Self::LeafFn>, + last_pt: tree_sitter::Point, + _leaf_fn: Option>, ) -> Self { let node = node.expect("No node found"); let mut c = node.walk(); let mut it = node.children(&mut c); ( $( - $t::extract(it.next(), source, last_idx, None) + $t::extract(it.next(), source, last_idx, last_pt, None) ),* ) } @@ -229,15 +349,16 @@ impl Point { } impl, U> Extract> for Spanned { - type LeafFn = T::LeafFn; - fn extract( - node: Option, + type LeafFn<'a> = T::LeafFn<'a>; + fn extract<'a>( + node: Option, source: &[u8], last_idx: usize, - leaf_fn: Option<&Self::LeafFn>, + last_pt: tree_sitter::Point, + leaf_fn: Option>, ) -> Spanned { Spanned { - value: T::extract(node, source, last_idx, leaf_fn), + value: T::extract(node, source, last_idx, last_pt, leaf_fn), byte_span: node .map(|n| (n.start_byte(), n.end_byte())) .unwrap_or((last_idx, last_idx)), @@ -248,8 +369,10 @@ impl, U> Extract> for Spanned { Point::from_tree_sitter(n.end_position()), ) }) - // TODO: We can track points as well instead of just `last_idx` as needed here. - .unwrap_or((Point::empty(), Point::empty())), + .unwrap_or(( + Point::from_tree_sitter(last_pt), + Point::from_tree_sitter(last_pt), + )), } } } From e430115a440fbfb91db856ef36db2cd85edb2811 Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Tue, 15 Jul 2025 11:37:18 -0500 Subject: [PATCH 21/50] Better error messages --- runtime/src/lib.rs | 128 ++++++++++++++++++++++++++++++++------------- 1 file changed, 93 insertions(+), 35 deletions(-) diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs index e534a72..1a75942 100644 --- a/runtime/src/lib.rs +++ b/runtime/src/lib.rs @@ -35,12 +35,24 @@ pub struct NodeExt<'a> { pub trait StrOrNode { type Output; - fn apply(self, source: &[u8], node: Node<'_>, last_idx: usize, last_pt: tree_sitter::Point) -> Self::Output; + fn apply( + self, + source: &[u8], + node: Node<'_>, + last_idx: usize, + last_pt: tree_sitter::Point, + ) -> Self::Output; } impl StrOrNode for fn(&str) -> L { type Output = L; - fn apply(self, source: &[u8], node: Node<'_>, _last_idx: usize, _last_pt: tree_sitter::Point) -> L { + fn apply( + self, + source: &[u8], + node: Node<'_>, + _last_idx: usize, + _last_pt: tree_sitter::Point, + ) -> L { let text = node.utf8_text(source).expect("Could not get text"); self(text) } @@ -48,7 +60,13 @@ impl StrOrNode for fn(&str) -> L { impl StrOrNode for fn(&NodeExt<'_>) -> L { type Output = L; - fn apply(self, source: &[u8], node: Node<'_>, last_idx: usize, last_pt: tree_sitter::Point) -> L { + fn apply( + self, + source: &[u8], + node: Node<'_>, + last_idx: usize, + last_pt: tree_sitter::Point, + ) -> L { let node = NodeExt { node, source, @@ -210,7 +228,13 @@ impl, U> Extract> for Vec { loop { let n = cursor.node(); if cursor.field_name().is_some() { - out.push(T::extract(Some(n), source, last_idx, last_pt, leaf_fn.clone())); + out.push(T::extract( + Some(n), + source, + last_idx, + last_pt, + leaf_fn.clone(), + )); } last_idx = n.end_byte(); @@ -239,7 +263,7 @@ macro_rules! extract_from_str { _last_pt: tree_sitter::Point, _leaf_fn: Option>, ) -> Self { - let node = node.expect("No node found"); + let node = node.expect(concat!("No node found in parsing extract: ", stringify!($t))); let text = node.utf8_text(source).expect("No text found for node"); text.parse().expect("Failed to parse type") } @@ -272,7 +296,7 @@ macro_rules! extract_for_tuple { last_pt: tree_sitter::Point, _leaf_fn: Option>, ) -> Self { - let node = node.expect("No node found"); + let node = node.expect("No node found in tuple extract"); let mut c = node.walk(); let mut it = node.children(&mut c); ( @@ -336,16 +360,12 @@ pub struct Point { } impl Point { - fn from_tree_sitter(p: tree_sitter::Point) -> Self { + pub(crate) fn from_tree_sitter(p: tree_sitter::Point) -> Self { Self { line: p.row + 1, column: p.column + 1, } } - const EMPTY: Self = Self { line: 0, column: 0 }; - const fn empty() -> Self { - Self::EMPTY - } } impl, U> Extract> for Spanned { @@ -384,6 +404,8 @@ pub mod errors { #[cfg(feature = "tree-sitter-c2rust")] use tree_sitter_runtime_c2rust as tree_sitter; + use crate::Point; + #[derive(Debug)] /// An explanation for an error that occurred during parsing. pub enum ParseErrorReason { @@ -401,9 +423,34 @@ pub mod errors { pub struct ParseError { pub reason: ParseErrorReason, /// Inclusive start of the error. - pub start: usize, + pub start_byte: usize, /// Exclusive end of the error. - pub end: usize, + pub end_byte: usize, + pub start_point: Point, + pub end_point: Point, + pub text: String, + pub kind: &'static str, + pub parent_context: Option, + } + + impl std::fmt::Display for ParseError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!(f, "Failure to parse node:")?; + write!(f, "\t{}:{} - {}:{}", self.start_point.line, self.start_point.column, self.end_point.line, self.end_point.column)?; + write!(f, " {}", self.text)?; + if let Some(parent) = &self.parent_context { + writeln!(f)?; + write!(f, "\t(parent node: {})", parent.kind)?; + } + Ok(()) + } + } + + #[derive(Debug)] + pub struct ParentContext { + pub kind: &'static str, + pub content: String, + pub sexpr: String, } /// Given the root node of a Tree Sitter parsing result, accumulates all @@ -413,7 +460,21 @@ pub mod errors { source: &[u8], errors: &mut Vec, ) { - if node.is_error() { + let start_byte = node.start_byte(); + let end_byte = node.end_byte(); + let start_point = Point::from_tree_sitter(node.start_position()); + let end_point = Point::from_tree_sitter(node.end_position()); + let kind = node.kind(); + let text = node.utf8_text(source).unwrap().to_owned(); + let mut parent_context = None; + let reason = if node.is_error() { + if let Some(p) = node.parent() { + parent_context = Some(ParentContext { + kind: p.kind(), + content: p.utf8_text(source).unwrap().to_owned(), + sexpr: p.to_sexp(), + }); + } if node.child(0).is_some() { // we managed to parse some children, so collect underlying errors for this node let mut inner_errors = vec![]; @@ -421,37 +482,34 @@ pub mod errors { node.children(&mut cursor) .for_each(|c| collect_parsing_errors(&c, source, &mut inner_errors)); - errors.push(ParseError { - reason: ParseErrorReason::FailedNode(inner_errors), - start: node.start_byte(), - end: node.end_byte(), - }) + ParseErrorReason::FailedNode(inner_errors) } else { let contents = node.utf8_text(source).unwrap(); if !contents.is_empty() { - errors.push(ParseError { - reason: ParseErrorReason::UnexpectedToken(contents.to_string()), - start: node.start_byte(), - end: node.end_byte(), - }) + ParseErrorReason::UnexpectedToken(contents.to_string()) } else { - errors.push(ParseError { - reason: ParseErrorReason::FailedNode(vec![]), - start: node.start_byte(), - end: node.end_byte(), - }) + ParseErrorReason::FailedNode(vec![]) } } } else if node.is_missing() { - errors.push(ParseError { - reason: ParseErrorReason::MissingToken(node.kind().to_string()), - start: node.start_byte(), - end: node.end_byte(), - }) + ParseErrorReason::MissingToken(node.kind().to_string()) } else if node.has_error() { let mut cursor = node.walk(); node.children(&mut cursor) .for_each(|c| collect_parsing_errors(&c, source, errors)); - } + return; + } else { + return; + }; + errors.push(ParseError { + reason, + start_byte, + end_byte, + start_point, + end_point, + text, + kind, + parent_context, + }); } } From 49e985df9dc433c1dd3148c643f565d4c9938e23 Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Thu, 17 Jul 2025 11:01:30 -0500 Subject: [PATCH 22/50] Major rewrite: use `derive(Rule)`, allow expanding multiple sub modules into a common grammar. --- README.md | 1 + common/src/expansion.rs | 475 +++++----- common/src/lib.rs | 3 +- example/build.rs | 13 +- example/src/arithmetic.rs | 43 +- example/src/main.rs | 8 +- example/src/optionals.rs | 34 +- example/src/repetitions.rs | 132 +-- ...e__arithmetic__tests__failed_parses-2.snap | 15 +- ...e__arithmetic__tests__failed_parses-3.snap | 42 +- ...e__arithmetic__tests__failed_parses-4.snap | 42 +- ...ple__arithmetic__tests__failed_parses.snap | 42 +- ...petitions__tests__repetitions_grammar.snap | 15 +- ...xample__words__tests__words_grammar-2.snap | 15 +- ...xample__words__tests__words_grammar-3.snap | 15 +- ..._example__words__tests__words_grammar.snap | 15 +- example/src/words.rs | 26 +- macro/src/expansion.rs | 542 +++++++----- macro/src/lib.rs | 823 ++++++++++-------- ...t_sitter_macro__tests__enum_prec_left.snap | 40 +- ...t_sitter_macro__tests__enum_recursive.snap | 40 +- ...macro__tests__enum_transformed_fields.snap | 40 +- ...r_macro__tests__enum_with_named_field.snap | 40 +- ...macro__tests__enum_with_unamed_vector.snap | 50 +- ...r_macro__tests__grammar_unboxed_field.snap | 48 +- ...t_sitter_macro__tests__spanned_in_vec.snap | 59 +- ...ust_sitter_macro__tests__struct_extra.snap | 48 +- ..._sitter_macro__tests__struct_optional.snap | 49 +- ...st_sitter_macro__tests__struct_repeat.snap | 57 +- runtime/src/lib.rs | 1 + runtime/src/rule.rs | 14 + tool/src/lib.rs | 236 ++--- ...l__tests__enum_conflicts_prec_dynamic.snap | 2 +- ...st_sitter_tool__tests__enum_prec_left.snap | 4 +- ...st_sitter_tool__tests__enum_recursive.snap | 4 +- ..._tool__tests__enum_transformed_fields.snap | 4 +- ...er_tool__tests__enum_with_named_field.snap | 2 +- ..._tool__tests__enum_with_unamed_vector.snap | 2 +- ...st_sitter_tool__tests__grammar_repeat.snap | 2 +- ...t_sitter_tool__tests__grammar_repeat1.snap | 2 +- ...l__tests__grammar_repeat_no_delimiter.snap | 2 +- ...er_tool__tests__grammar_unboxed_field.snap | 4 +- ...tter_tool__tests__grammar_with_extras.snap | 4 +- .../rust_sitter_tool__tests__immediate.snap | 2 +- ...st_sitter_tool__tests__spanned_in_vec.snap | 2 +- ...t_sitter_tool__tests__struct_optional.snap | 2 +- 46 files changed, 1787 insertions(+), 1274 deletions(-) create mode 100644 runtime/src/rule.rs diff --git a/README.md b/README.md index 0a0efa0..cab5039 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,4 @@ +# TODO - OUT OF DATE, needs an update for latest refactor # Rust Sitter - Otonoma fork **This project is a fork of [rust-sitter](https://github.com/hydro-project/rust-sitter). It has been heavily modified in many breaking ways.** diff --git a/common/src/expansion.rs b/common/src/expansion.rs index 5fb88a8..e955989 100644 --- a/common/src/expansion.rs +++ b/common/src/expansion.rs @@ -4,43 +4,200 @@ use super::*; use serde_json::{Map, Value, json}; use syn::{parse::Parse, punctuated::Punctuated}; -/// Generates JSON strings defining Tree Sitter grammars for every Rust Sitter -/// grammar found in the given module and recursive submodules. -pub fn generate_grammars(root_file: Vec) -> Vec { - let mut out = vec![]; - root_file - .iter() - .for_each(|i| generate_all_grammars(i, &mut out)); - out +#[derive(Debug)] +pub struct RuleDerive { + pub ident: syn::Ident, + pub attrs: Vec, + pub extras: Extras, + pub data: syn::Data, } -pub fn generate_grammars_string(root_file: Vec) -> String { - serde_json::to_string(&generate_grammars(root_file)).unwrap() +impl RuleDerive { + pub fn from_derive_input(d: DeriveInput) -> Option { + if d.attrs.iter().any(|a| { + let Ok(list) = a.meta.require_list() else { + return false; + }; + let derives = list + .parse_args_with(Punctuated::::parse_terminated) + .unwrap(); + derives + .iter() + .any(|p| p == &parse_quote!(rust_sitter::Rule) || p == &parse_quote!(Rule)) + }) { + Some(Self::from_derive_input_known(d)) + } else { + None + } + } + + // Used by the proc macro directly. + pub fn from_derive_input_known(d: DeriveInput) -> Self { + let extras = Extras::new(&d.attrs); + Self { + ident: d.ident, + attrs: d.attrs, + extras, + data: d.data, + } + } } -fn generate_all_grammars(item: &Item, out: &mut Vec) { - if let Item::Mod(m) = item { - m.content - .iter() - .for_each(|(_, items)| items.iter().for_each(|i| generate_all_grammars(i, out))); +/// Generate a single grammar per module. +pub fn generate_grammar(root_file: Vec) -> Value { + let mut state = ExpansionState::default(); + // for some reason, source_file must be the first key for things to work + state.rules_map.insert("source_file".to_string(), json!({})); - if m.attrs - .iter() - .any(|a| a.path() == &parse_quote!(rust_sitter::grammar)) - { - out.push(generate_grammar(m)) + for item in root_file { + process_item(item, &mut state); + } + + let language = state + .language_rule + .expect("Must specify exactly one root with #[language]") + .to_string(); + state.rules_map.insert( + "source_file".to_string(), + state.rules_map.get(&language).unwrap().clone(), + ); + let word_rule = state.word_rule; + let rules_map = state.rules_map; + let extras_list = state.extras; + json!({ + "name": language, + "word": word_rule, + "rules": rules_map, + "extras": extras_list + }) +} + +#[derive(Default)] +struct ExpansionState { + rules_map: Map, + word_rule: Option, + language_rule: Option, + extras: Vec, +} + +impl ExpansionState { + fn set_language(&mut self, ident: &Ident) { + if let Some(existing) = &self.language_rule { + panic!( + "Language rule already defined as {}:{:?}, found duplicate with {}:{:?}", + existing, + existing.span(), + ident, + ident.span(), + ); } + self.language_rule = Some(ident.clone()); + } + fn set_word(&mut self, ident: String) { + if let Some(existing) = &self.word_rule { + panic!("Word rule already defined as {existing}, found duplicate with {ident}",); + } + self.word_rule = Some(ident); + } + fn push_extra(&mut self, ident: &Ident) { + self.extras.push(json!({ + "type": "SYMBOL", + "name": ident.to_string(), + })); + } +} + +fn process_item(item: Item, ctx: &mut ExpansionState) { + match item { + Item::Struct(_) | Item::Enum(_) => { + // Try and convert it to a derive. + let stream = item.to_token_stream(); + // stream.into_iter + let input = syn::parse2::(stream) + .map(RuleDerive::from_derive_input) + .expect("Failed to parse as DeriveInput"); + if let Some(input) = input { + // Parse the structure now. + process_rule(input, ctx); + } + } + Item::Mod(m) => { + // Recursively process this now. + let (_, items) = m.content.expect("Module must be inlined"); + for item in items { + process_item(item, ctx); + } + } + _ => {} + } +} + +fn process_rule(input: RuleDerive, ctx: &mut ExpansionState) { + if input.extras.language { + ctx.set_language(&input.ident); + } + // if input.extras.word { + // ctx.set_word(&input.ident); + // } + if input.extras.extra { + ctx.push_extra(&input.ident); + } + + let ident = input.ident; + + match input.data { + Data::Struct(DataStruct { fields, .. }) => { + gen_struct_or_variant(ident.to_string(), &input.attrs, fields.clone(), ctx); + } + Data::Enum(DataEnum { variants, .. }) => { + variants.iter().for_each(|v| { + gen_struct_or_variant( + format!("{}_{}", ident, v.ident), + &v.attrs, + v.fields.clone(), + ctx, + ) + }); + + let mut members: Vec = vec![]; + variants.iter().for_each(|v| { + let variant_path = format!("{}_{}", ident.clone(), v.ident); + members.push(json!({ + "type": "SYMBOL", + "name": variant_path + })) + }); + + let rule = json!({ + "type": "CHOICE", + "members": members + }); + + let precs = input.extras; + if precs.prec_left_param.is_some() || precs.prec_right_param.is_some() { + panic!( + "The attributes `prec_left` and `prec_right` cannot be applied directly to an enum" + ); + } + let rule = precs.apply(rule); + + ctx.rules_map.insert(ident.to_string(), rule); + } + Data::Union(_) => panic!("Union not supported"), } } #[derive(Debug)] -struct Extras { - prec_param: Option, - prec_left_param: Option, - prec_right_param: Option, - prec_dynamic_param: Option, - immediate: bool, - token: bool, +pub struct Extras { + pub prec_param: Option, + pub prec_left_param: Option, + pub prec_right_param: Option, + pub prec_dynamic_param: Option, + pub immediate: bool, + pub token: bool, + pub language: bool, + pub extra: bool, + pub word: bool, } impl Extras { @@ -68,23 +225,29 @@ impl Extras { let prec_dynamic_param = prec_dynamic_attr.and_then(|a| a.parse_args_with(Expr::parse).ok()); - let immediate_attr = attrs + let immediate = attrs .iter() - .find(|attr| sitter_attr_matches(attr, "immediate")); + .any(|attr| sitter_attr_matches(attr, "immediate")); - let token = attrs.iter().find(|attr| sitter_attr_matches(attr, "token")); + let token = attrs.iter().any(|attr| sitter_attr_matches(attr, "token")); + let extra = attrs.iter().any(|attr| sitter_attr_matches(attr, "extra")); + let language = attrs.iter().any(|a| sitter_attr_matches(a, "language")); + let word = attrs.iter().any(|a| sitter_attr_matches(a, "word")); Self { prec_param, prec_left_param, prec_right_param, prec_dynamic_param, - immediate: immediate_attr.is_some(), - token: token.is_some(), + immediate, + token, + extra, + word, + language, } } - fn apply(self, rule: serde_json::Value) -> serde_json::Value { + fn apply(&self, rule: serde_json::Value) -> serde_json::Value { let Self { prec_param, prec_left_param, @@ -92,6 +255,7 @@ impl Extras { prec_dynamic_param, immediate, token, + .. } = self; let rule = if let Some(Expr::Lit(lit)) = prec_param { @@ -148,16 +312,16 @@ impl Extras { rule }; - if immediate && token { + if *immediate && *token { panic!("Cannot be immediate and token"); } - if immediate { + if *immediate { json!({ "type": "IMMEDIATE_TOKEN", "content": rule }) - } else if token { + } else if *token { json!({ "type": "TOKEN", "content": rule, @@ -172,10 +336,19 @@ fn gen_field( path: String, leaf_type: Type, attrs: Vec, - word_rule: &mut Option, - out: &mut Map, + ctx: &mut ExpansionState, ) -> (Value, bool) { let precs = Extras::new(&attrs); + + if precs.word { + // TODO: We don't want to allow this, but because we generate a dummy `_unit` field + // currently, we have to. Super dumb, but we can fix it later. + ctx.set_word(path.clone()); + // panic!("Cannot specify word on a field"); + } + if precs.language { + panic!("Cannot specify language on a field"); + } let leaf_attr = attrs.iter().find(|attr| sitter_attr_matches(attr, "leaf")); let text_attr = attrs.iter().find(|attr| sitter_attr_matches(attr, "text")); @@ -202,19 +375,12 @@ fn gen_field( return (precs.apply(input.evaluate().unwrap()), false); } - if attrs.iter().any(|attr| sitter_attr_matches(attr, "word")) { - if word_rule.is_some() { - panic!("Multiple `word` rules specified"); - } - - *word_rule = Some(path.clone()); - } - let leaf_input = leaf_attr.and_then(|a| a.parse_args::().ok()); if !is_vec && !is_option { if let Some(input) = leaf_input { - out.insert(path.clone(), precs.apply(input.evaluate().unwrap())); + ctx.rules_map + .insert(path.clone(), precs.apply(input.evaluate().unwrap())); ( json!({ @@ -247,16 +413,23 @@ fn gen_field( path.clone(), inner_type_vec, leaf_attr.iter().cloned().cloned().collect(), - word_rule, - out, + ctx, ); - let delimited_attr = attrs + let (delimited_param, repeat_non_empty) = attrs .iter() - .find(|attr| sitter_attr_matches(attr, "delimited")); - - let delimited_param = - delimited_attr.map(|a| a.parse_args::().unwrap()); + .find_map(|attr| { + if sitter_attr_matches(attr, "sep_by") { + Some((Some(attr.parse_args::().unwrap()), false)) + } else if sitter_attr_matches(attr, "sep_by1") { + Some((Some(attr.parse_args::().unwrap()), true)) + } else if sitter_attr_matches(attr, "repeat1") { + Some((None, true)) + } else { + None + } + }) + .unwrap_or_else(|| (None, false)); // NOTE (JAB): All of this is pretty ugly, I think we can flatten some of these types // without losing anything. @@ -265,29 +438,10 @@ fn gen_field( format!("{path}_vec_delimiter"), parse_quote!(()), vec![parse_quote!(#[text(#delimited_param)])], - word_rule, - out, + ctx, ) }); - let repeat_attr = attrs - .iter() - .find(|attr| sitter_attr_matches(attr, "repeat")); - - let repeat_params = repeat_attr.and_then(|a| { - a.parse_args_with(Punctuated::::parse_terminated) - .ok() - }); - - let repeat_non_empty = repeat_params - .and_then(|p| { - p.iter() - .find(|param| param.path == "non_empty") - .map(|p| p.expr.clone()) - }) - .map(|e| e == syn::parse_quote!(true)) - .unwrap_or(false); - let field_rule_non_optional = json!({ "type": "FIELD", "name": format!("{path}_vec_element"), @@ -353,7 +507,7 @@ fn gen_field( let vec_contents = precs.apply(vec_contents); let contents_ident = format!("{path}_vec_contents"); - out.insert(contents_ident.clone(), vec_contents); + ctx.rules_map.insert(contents_ident.clone(), vec_contents); ( json!({ @@ -364,8 +518,7 @@ fn gen_field( ) } else { // is_option - let (field_json, field_optional) = - gen_field(path, inner_type_option, attrs, word_rule, out); + let (field_json, field_optional) = gen_field(path, inner_type_option, attrs, ctx); if field_optional { panic!("Option> is not supported"); @@ -377,16 +530,14 @@ fn gen_field( fn gen_struct_or_variant( path: String, - attrs: Vec, + attrs: &[Attribute], fields: Fields, - out: &mut Map, - word_rule: &mut Option, + ctx: &mut ExpansionState, ) { fn gen_field_optional( path: &str, field: &Field, - word_rule: &mut Option, - out: &mut Map, + ctx: &mut ExpansionState, ident_str: String, ) -> Value { // Produce a cleaner grammar: fields with `_` are hidden fields. @@ -396,7 +547,7 @@ fn gen_struct_or_variant( format!("{path}_{ident_str}") }; let (field_contents, is_option) = - gen_field(path, field.ty.clone(), field.attrs.clone(), word_rule, out); + gen_field(path, field.ty.clone(), field.attrs.clone(), ctx); let core = json!({ "type": "FIELD", @@ -436,17 +587,15 @@ fn gen_struct_or_variant( .map(|v| v.to_string()) .unwrap_or(format!("{i}")); - Some(gen_field_optional(&path, field, word_rule, out, ident_str)) + Some(gen_field_optional(&path, field, ctx, ident_str)) } }) .collect::>(); - let precs = Extras::new(&attrs); - let base_rule = match fields { Fields::Unit => { let dummy_field = Field { - attrs: attrs.clone(), + attrs: attrs.to_owned(), vis: Visibility::Inherited, mutability: FieldMutability::None, ident: None, @@ -456,7 +605,7 @@ fn gen_struct_or_variant( elems: Punctuated::new(), }), }; - gen_field_optional(&path, &dummy_field, word_rule, out, "unit".to_owned()) + gen_field_optional(&path, &dummy_field, ctx, "unit".to_owned()) } _ => json!({ "type": "SEQ", @@ -464,153 +613,7 @@ fn gen_struct_or_variant( }), }; - let rule = precs.apply(base_rule); - - out.insert(path, rule); -} - -pub fn generate_grammar(module: &ItemMod) -> Value { - let mut rules_map = Map::new(); - // for some reason, source_file must be the first key for things to work - rules_map.insert("source_file".to_string(), json!({})); - - let mut extras_list = vec![]; - let attr = module - .attrs - .iter() - .find(|a| a.path() == &syn::parse_quote!(rust_sitter::grammar)) - .expect("Each grammar must have a name"); - let grammar_name_expr = attr - .parse_args_with(Punctuated::::parse_terminated) - .expect("Inputs should be a comma separated list"); - if grammar_name_expr.is_empty() { - panic!("Expected a string literal for grammar name"); - // return Err(syn::Error::new( - // Span::call_site(), - // "Expected a string literal grammar name", - // )); - } - if grammar_name_expr.len() > 2 { - panic!("Expected at most two inputs"); - } - let grammar_name = if let Expr::Lit(ExprLit { - attrs: _, - lit: Lit::Str(s), - }) = grammar_name_expr.first().unwrap() - { - s.value() - } else { - panic!("Expected a string literal grammar name"); - }; - - let _should_parse = if let Some(Expr::Lit(ExprLit { - attrs: _, - lit: Lit::Bool(b), - })) = grammar_name_expr.last() - { - b.value() - } else { - false - }; - - let (_, contents) = module.content.as_ref().unwrap(); - - let root_type = contents - .iter() - .find_map(|item| match item { - Item::Enum(ItemEnum { ident, attrs, .. }) - | Item::Struct(ItemStruct { ident, attrs, .. }) => { - if attrs - .iter() - .any(|attr| attr.path() == &syn::parse_quote!(rust_sitter::language)) - { - Some(ident.clone()) - } else { - None - } - } - _ => None, - }) - .expect("Each parser must have the root type annotated with `#[rust_sitter::language]`") - .to_string(); - - // Optionally locate the rule annotated with `#[rust_sitter::word]`. - let mut word_rule = None; - contents.iter().for_each(|c| { - let (symbol, attrs) = match c { - Item::Enum(e) => { - e.variants.iter().for_each(|v| { - gen_struct_or_variant( - format!("{}_{}", e.ident, v.ident), - v.attrs.clone(), - v.fields.clone(), - &mut rules_map, - &mut word_rule, - ) - }); - - let mut members: Vec = vec![]; - e.variants.iter().for_each(|v| { - let variant_path = format!("{}_{}", e.ident.clone(), v.ident); - members.push(json!({ - "type": "SYMBOL", - "name": variant_path - })) - }); - - let rule = json!({ - "type": "CHOICE", - "members": members - }); - - let precs = Extras::new(&e.attrs); - if precs.prec_left_param.is_some() || precs.prec_right_param.is_some() { - panic!( - "The attributes `prec_left` and `prec_right` cannot be applied directly to an enum" - ); - } - let rule = precs.apply(rule); - - rules_map.insert(e.ident.to_string(), rule); - - (e.ident.to_string(), e.attrs.clone()) - } - - Item::Struct(s) => { - gen_struct_or_variant( - s.ident.to_string(), - s.attrs.clone(), - s.fields.clone(), - &mut rules_map, - &mut word_rule, - ); + let precs = Extras::new(attrs); - (s.ident.to_string(), s.attrs.clone()) - } - - _ => return, - }; - - if attrs - .iter() - .any(|a| sitter_attr_matches(a, "extra")) - { - extras_list.push(json!({ - "type": "SYMBOL", - "name": symbol - })); - } - }); - - rules_map.insert( - "source_file".to_string(), - rules_map.get(&root_type).unwrap().clone(), - ); - - json!({ - "name": grammar_name, - "word": word_rule, - "rules": rules_map, - "extras": extras_list - }) + ctx.rules_map.insert(path, precs.apply(base_rule)); } diff --git a/common/src/lib.rs b/common/src/lib.rs index 455346a..9f7a34b 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -203,7 +203,8 @@ static RUST_SITTER_ATTRS: LazyLock> = LazyLock::new(|| { "prec_dynamic", "extra", "repeat", - "delimited", + "sep_by", + "sep_by1", "text", "pattern", "with", diff --git a/example/build.rs b/example/build.rs index 241bfb2..963f1fc 100644 --- a/example/build.rs +++ b/example/build.rs @@ -1,6 +1,13 @@ -use std::path::PathBuf; - fn main() { println!("cargo:rerun-if-changed=src"); - rust_sitter_tool::build_parsers(&PathBuf::from("src/main.rs")); + let examples = std::fs::read_dir("./src/").unwrap(); + for example in examples { + let example = example.unwrap(); + let path = example.path(); + if path.is_file() { + if path.file_stem().unwrap().to_str().unwrap() != "main" { + rust_sitter_tool::build_parser(&path); + } + } + } } diff --git a/example/src/arithmetic.rs b/example/src/arithmetic.rs index 33813d0..fd05d6b 100644 --- a/example/src/arithmetic.rs +++ b/example/src/arithmetic.rs @@ -1,26 +1,27 @@ -#[rust_sitter::grammar("arithmetic")] pub mod grammar { - #[rust_sitter::language] - #[derive(PartialEq, Eq, Debug)] + use rust_sitter::Rule; + #[derive(PartialEq, Eq, Debug, Rule)] + #[language] pub enum Expression { - Number(#[rust_sitter::leaf(pattern(r"\d+"))] i32), - #[rust_sitter::prec_left(1)] + Number(#[leaf(pattern(r"\d+"))] i32), + #[prec_left(1)] Sub( Box, - #[rust_sitter::leaf("-")] (), + #[leaf("-")] (), Box, ), - #[rust_sitter::prec_left(2)] + #[prec_left(2)] Mul( Box, - #[rust_sitter::leaf("*")] (), + #[leaf("*")] (), Box, ), } - #[rust_sitter::extra] + #[derive(Rule)] + #[extra] struct Whitespace { - #[rust_sitter::leaf(pattern(r"\s"))] + #[leaf(pattern(r"\s"))] _whitespace: (), } } @@ -33,12 +34,12 @@ mod tests { #[wasm_bindgen_test::wasm_bindgen_test] #[test] fn successful_parses() { - assert_eq!(grammar::parse("1").unwrap(), Expression::Number(1)); + assert_eq!(grammar::Expression::parse("1").unwrap(), Expression::Number(1)); - assert_eq!(grammar::parse(" 1").unwrap(), Expression::Number(1)); + assert_eq!(grammar::Expression::parse(" 1").unwrap(), Expression::Number(1)); assert_eq!( - grammar::parse("1 - 2").unwrap(), + grammar::Expression::parse("1 - 2").unwrap(), Expression::Sub( Box::new(Expression::Number(1)), (), @@ -47,7 +48,7 @@ mod tests { ); assert_eq!( - grammar::parse("1 - 2 - 3").unwrap(), + grammar::Expression::parse("1 - 2 - 3").unwrap(), Expression::Sub( Box::new(Expression::Sub( Box::new(Expression::Number(1)), @@ -60,7 +61,7 @@ mod tests { ); assert_eq!( - grammar::parse("1 - 2 * 3").unwrap(), + grammar::Expression::parse("1 - 2 * 3").unwrap(), Expression::Sub( Box::new(Expression::Number(1)), (), @@ -73,7 +74,7 @@ mod tests { ); assert_eq!( - grammar::parse("1 * 2 * 3").unwrap(), + grammar::Expression::parse("1 * 2 * 3").unwrap(), Expression::Mul( Box::new(Expression::Mul( Box::new(Expression::Number(1)), @@ -86,7 +87,7 @@ mod tests { ); assert_eq!( - grammar::parse("1 * 2 - 3").unwrap(), + grammar::Expression::parse("1 * 2 - 3").unwrap(), Expression::Sub( Box::new(Expression::Mul( Box::new(Expression::Number(1)), @@ -101,9 +102,9 @@ mod tests { #[test] fn failed_parses() { - insta::assert_debug_snapshot!(grammar::parse("1 + 2")); - insta::assert_debug_snapshot!(grammar::parse("1 - 2 -")); - insta::assert_debug_snapshot!(grammar::parse("a1")); - insta::assert_debug_snapshot!(grammar::parse("1a")); + insta::assert_debug_snapshot!(grammar::Expression::parse("1 + 2")); + insta::assert_debug_snapshot!(grammar::Expression::parse("1 - 2 -")); + insta::assert_debug_snapshot!(grammar::Expression::parse("a1")); + insta::assert_debug_snapshot!(grammar::Expression::parse("1a")); } } diff --git a/example/src/main.rs b/example/src/main.rs index d9615c0..e892ccc 100644 --- a/example/src/main.rs +++ b/example/src/main.rs @@ -20,7 +20,7 @@ fn convert_parse_error_to_diagnostics( message: format!("Missing token: \"{tok}\""), code: Some("S000".to_string()), spans: vec![SpanLabel { - span: file_span.subspan(error.start as u64, error.end as u64), + span: file_span.subspan(error.start_byte as u64, error.end_byte as u64), style: SpanStyle::Primary, label: Some(format!("missing \"{tok}\"")), }], @@ -31,7 +31,7 @@ fn convert_parse_error_to_diagnostics( message: format!("Unexpected token: \"{tok}\""), code: Some("S000".to_string()), spans: vec![SpanLabel { - span: file_span.subspan(error.start as u64, error.end as u64), + span: file_span.subspan(error.start_byte as u64, error.end_byte as u64), style: SpanStyle::Primary, label: Some(format!("unexpected \"{tok}\"")), }], @@ -44,7 +44,7 @@ fn convert_parse_error_to_diagnostics( message: "Failed to parse node".to_string(), code: Some("S000".to_string()), spans: vec![SpanLabel { - span: file_span.subspan(error.start as u64, error.end as u64), + span: file_span.subspan(error.start_byte as u64, error.end_byte as u64), style: SpanStyle::Primary, label: Some("failed".to_string()), }], @@ -72,7 +72,7 @@ fn main() { break; } - match arithmetic::grammar::parse(input) { + match arithmetic::grammar::Expression::parse(input) { Ok(expr) => println!("{expr:?}"), Err(errs) => { let mut codemap = CodeMap::new(); diff --git a/example/src/optionals.rs b/example/src/optionals.rs index aeb9ec0..4234cff 100644 --- a/example/src/optionals.rs +++ b/example/src/optionals.rs @@ -1,26 +1,26 @@ -#[rust_sitter::grammar("optionals")] #[allow(dead_code)] mod grammar { use rust_sitter::Spanned; + use rust_sitter::Rule; - #[rust_sitter::language] - #[derive(Debug)] + #[derive(Debug, Rule)] + #[language] pub struct Language { - #[rust_sitter::leaf(re(r"\d+"))] + #[leaf(re(r"\d+"))] // Not necessary, done automatically. // #[rust_sitter::with(|v| v.parse().unwrap())] v: Option, - #[rust_sitter::leaf("_")] + #[leaf("_")] _s: (), t: Spanned>, - #[rust_sitter::leaf(".")] + #[leaf(".")] _d: Option<()>, } - #[derive(Debug)] + #[derive(Debug, Rule)] pub struct Number { - #[rust_sitter::leaf(re(r"\d+"))] - #[rust_sitter::with(|v| v.parse().unwrap())] + #[leaf(re(r"\d+"))] + #[with(|v| v.parse().unwrap())] v: i32, } } @@ -31,13 +31,13 @@ mod tests { #[test] fn optional_grammar() { - insta::assert_debug_snapshot!(grammar::parse("_")); - insta::assert_debug_snapshot!(grammar::parse("_.")); - insta::assert_debug_snapshot!(grammar::parse("1_")); - insta::assert_debug_snapshot!(grammar::parse("1_.")); - insta::assert_debug_snapshot!(grammar::parse("1_2")); - insta::assert_debug_snapshot!(grammar::parse("1_2.")); - insta::assert_debug_snapshot!(grammar::parse("_2")); - insta::assert_debug_snapshot!(grammar::parse("_2.")); + insta::assert_debug_snapshot!(grammar::Language::parse("_")); + insta::assert_debug_snapshot!(grammar::Language::parse("_.")); + insta::assert_debug_snapshot!(grammar::Language::parse("1_")); + insta::assert_debug_snapshot!(grammar::Language::parse("1_.")); + insta::assert_debug_snapshot!(grammar::Language::parse("1_2")); + insta::assert_debug_snapshot!(grammar::Language::parse("1_2.")); + insta::assert_debug_snapshot!(grammar::Language::parse("_2")); + insta::assert_debug_snapshot!(grammar::Language::parse("_2.")); } } diff --git a/example/src/repetitions.rs b/example/src/repetitions.rs index 4f6b784..0adaeab 100644 --- a/example/src/repetitions.rs +++ b/example/src/repetitions.rs @@ -1,64 +1,64 @@ -#[rust_sitter::grammar("repetitions")] pub mod grammar { - use rust_sitter::Spanned; + use rust_sitter::{Rule, Spanned}; - #[rust_sitter::language] - #[derive(Debug)] + #[derive(Debug, Rule)] + #[language] #[allow(dead_code)] pub struct NumberList { - #[rust_sitter::repeat(non_empty = true)] - #[rust_sitter::delimited(",")] - #[rust_sitter::leaf(pattern(r"\d+"))] + #[sep_by1(",")] + #[leaf(pattern(r"\d+"))] numbers: Spanned>>, } - #[rust_sitter::extra] + #[derive(Rule)] + #[extra] struct Whitespace { - #[rust_sitter::leaf(pattern(r"\s"))] + #[leaf(pattern(r"\s"))] _whitespace: (), } } -#[rust_sitter::grammar("repetitions_without_delim")] -pub mod grammar2 { - use rust_sitter::Spanned; - - #[rust_sitter::language] - #[derive(Debug)] - #[allow(dead_code)] - pub struct NumberList { - #[rust_sitter::leaf(pattern(r"\d+"))] - numbers: Spanned>>, - } - - #[rust_sitter::extra] - struct Whitespace { - #[rust_sitter::leaf(pattern(r"\s"))] - _whitespace: (), - } -} - -#[rust_sitter::grammar("repetitions_optional_elem")] -pub mod grammar3 { - use rust_sitter::Spanned; - - #[rust_sitter::language] - #[derive(Debug)] - #[allow(dead_code)] - pub struct NumberList { - #[rust_sitter::delimited(",")] - #[rust_sitter::leaf(pattern(r"\d+"))] - numbers: Spanned>>>, - #[rust_sitter::skip(123)] - metadata: u32, - } - - #[rust_sitter::extra] - struct Whitespace { - #[rust_sitter::leaf(pattern(r"\s"))] - _whitespace: (), - } -} +// TODO: Currently not allowed, needs to be fixed. +// pub mod grammar2 { +// use rust_sitter::{Rule, Spanned}; +// +// #[derive(Debug, Rule)] +// #[language] +// #[allow(dead_code)] +// pub struct NumberList { +// #[leaf(pattern(r"\d+"))] +// numbers: Spanned>>, +// } +// +// #[derive(Rule)] +// #[extra] +// struct Whitespace { +// #[leaf(pattern(r"\s"))] +// _whitespace: (), +// } +// } +// +// pub mod grammar3 { +// use rust_sitter::{Rule, Spanned}; +// +// #[derive(Debug, Rule)] +// #[language] +// #[allow(dead_code)] +// pub struct NumberList { +// #[sep_by(",")] +// #[leaf(pattern(r"\d+"))] +// numbers: Spanned>>>, +// #[skip(123)] +// metadata: u32, +// } +// +// #[derive(Rule)] +// #[extra] +// struct Whitespace { +// #[leaf(pattern(r"\s"))] +// _whitespace: (), +// } +// } #[cfg(test)] mod tests { @@ -66,24 +66,24 @@ mod tests { #[test] fn repetitions_grammar() { - insta::assert_debug_snapshot!(grammar::parse("")); - insta::assert_debug_snapshot!(grammar::parse("1")); - insta::assert_debug_snapshot!(grammar::parse("1, 2")); + insta::assert_debug_snapshot!(grammar::NumberList::parse("")); + insta::assert_debug_snapshot!(grammar::NumberList::parse("1")); + insta::assert_debug_snapshot!(grammar::NumberList::parse("1, 2")); } - #[test] - fn repetitions_grammar2() { - insta::assert_debug_snapshot!(grammar2::parse("")); - insta::assert_debug_snapshot!(grammar2::parse("1")); - insta::assert_debug_snapshot!(grammar2::parse("1 2")); - } + // #[test] + // fn repetitions_grammar2() { + // insta::assert_debug_snapshot!(grammar2::parse("")); + // insta::assert_debug_snapshot!(grammar2::parse("1")); + // insta::assert_debug_snapshot!(grammar2::parse("1 2")); + // } - #[test] - fn repetitions_grammar3() { - insta::assert_debug_snapshot!(grammar3::parse("")); - insta::assert_debug_snapshot!(grammar3::parse("1,")); - insta::assert_debug_snapshot!(grammar3::parse("1, 2")); - insta::assert_debug_snapshot!(grammar3::parse("1,, 2")); - insta::assert_debug_snapshot!(grammar3::parse("1,, 2,")); - } + // #[test] + // fn repetitions_grammar3() { + // insta::assert_debug_snapshot!(grammar3::parse("")); + // insta::assert_debug_snapshot!(grammar3::parse("1,")); + // insta::assert_debug_snapshot!(grammar3::parse("1, 2")); + // insta::assert_debug_snapshot!(grammar3::parse("1,, 2")); + // insta::assert_debug_snapshot!(grammar3::parse("1,, 2,")); + // } } diff --git a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-2.snap b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-2.snap index 86710ae..59e0b4a 100644 --- a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-2.snap +++ b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-2.snap @@ -8,8 +8,19 @@ Err( reason: MissingToken( "Expression_Number_0", ), - start: 7, - end: 7, + start_byte: 7, + end_byte: 7, + start_point: Point { + line: 1, + column: 8, + }, + end_point: Point { + line: 1, + column: 8, + }, + text: "", + kind: "Expression_Number_0", + parent_context: None, }, ], ) diff --git a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-3.snap b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-3.snap index b0598f3..2b1f70c 100644 --- a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-3.snap +++ b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-3.snap @@ -11,13 +11,47 @@ Err( reason: UnexpectedToken( "a", ), - start: 0, - end: 1, + start_byte: 0, + end_byte: 1, + start_point: Point { + line: 1, + column: 1, + }, + end_point: Point { + line: 1, + column: 2, + }, + text: "a", + kind: "ERROR", + parent_context: Some( + ParentContext { + kind: "ERROR", + content: "a", + sexpr: "(ERROR (UNEXPECTED 'a'))", + }, + ), }, ], ), - start: 0, - end: 1, + start_byte: 0, + end_byte: 1, + start_point: Point { + line: 1, + column: 1, + }, + end_point: Point { + line: 1, + column: 2, + }, + text: "a", + kind: "ERROR", + parent_context: Some( + ParentContext { + kind: "source_file", + content: "a1", + sexpr: "(source_file (ERROR (UNEXPECTED 'a')) (Expression_Number 0: (Expression_Number_0)))", + }, + ), }, ], ) diff --git a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-4.snap b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-4.snap index a7a3cb4..6bf8f04 100644 --- a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-4.snap +++ b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-4.snap @@ -11,13 +11,47 @@ Err( reason: UnexpectedToken( "a", ), - start: 1, - end: 2, + start_byte: 1, + end_byte: 2, + start_point: Point { + line: 1, + column: 2, + }, + end_point: Point { + line: 1, + column: 3, + }, + text: "a", + kind: "ERROR", + parent_context: Some( + ParentContext { + kind: "ERROR", + content: "a", + sexpr: "(ERROR (UNEXPECTED 'a'))", + }, + ), }, ], ), - start: 1, - end: 2, + start_byte: 1, + end_byte: 2, + start_point: Point { + line: 1, + column: 2, + }, + end_point: Point { + line: 1, + column: 3, + }, + text: "a", + kind: "ERROR", + parent_context: Some( + ParentContext { + kind: "source_file", + content: "1a", + sexpr: "(source_file (Expression_Number 0: (Expression_Number_0)) (ERROR (UNEXPECTED 'a')))", + }, + ), }, ], ) diff --git a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses.snap b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses.snap index 110e5e2..c1e920d 100644 --- a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses.snap +++ b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses.snap @@ -11,13 +11,47 @@ Err( reason: UnexpectedToken( "+", ), - start: 2, - end: 3, + start_byte: 2, + end_byte: 3, + start_point: Point { + line: 1, + column: 3, + }, + end_point: Point { + line: 1, + column: 4, + }, + text: "+", + kind: "ERROR", + parent_context: Some( + ParentContext { + kind: "ERROR", + content: "1 +", + sexpr: "(ERROR (Expression_Number_0) (Whitespace) (UNEXPECTED '+'))", + }, + ), }, ], ), - start: 0, - end: 3, + start_byte: 0, + end_byte: 3, + start_point: Point { + line: 1, + column: 1, + }, + end_point: Point { + line: 1, + column: 4, + }, + text: "1 +", + kind: "ERROR", + parent_context: Some( + ParentContext { + kind: "source_file", + content: "1 + 2", + sexpr: "(source_file (ERROR (Expression_Number_0) (Whitespace) (UNEXPECTED '+')) (Whitespace) (Expression_Number 0: (Expression_Number_0)))", + }, + ), }, ], ) diff --git a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar.snap b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar.snap index 33db0bc..e6bb5a4 100644 --- a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar.snap +++ b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar.snap @@ -8,8 +8,19 @@ Err( reason: FailedNode( [], ), - start: 0, - end: 0, + start_byte: 0, + end_byte: 0, + start_point: Point { + line: 1, + column: 1, + }, + end_point: Point { + line: 1, + column: 1, + }, + text: "", + kind: "ERROR", + parent_context: None, }, ], ) diff --git a/example/src/snapshots/rust_sitter_example__words__tests__words_grammar-2.snap b/example/src/snapshots/rust_sitter_example__words__tests__words_grammar-2.snap index f396fdb..dd8b715 100644 --- a/example/src/snapshots/rust_sitter_example__words__tests__words_grammar-2.snap +++ b/example/src/snapshots/rust_sitter_example__words__tests__words_grammar-2.snap @@ -8,8 +8,19 @@ Err( reason: FailedNode( [], ), - start: 0, - end: 5, + start_byte: 0, + end_byte: 5, + start_point: Point { + line: 1, + column: 1, + }, + end_point: Point { + line: 1, + column: 6, + }, + text: "hello", + kind: "ERROR", + parent_context: None, }, ], ) diff --git a/example/src/snapshots/rust_sitter_example__words__tests__words_grammar-3.snap b/example/src/snapshots/rust_sitter_example__words__tests__words_grammar-3.snap index d9a0e68..e5acc74 100644 --- a/example/src/snapshots/rust_sitter_example__words__tests__words_grammar-3.snap +++ b/example/src/snapshots/rust_sitter_example__words__tests__words_grammar-3.snap @@ -8,8 +8,19 @@ Err( reason: FailedNode( [], ), - start: 0, - end: 7, + start_byte: 0, + end_byte: 7, + start_point: Point { + line: 1, + column: 1, + }, + end_point: Point { + line: 1, + column: 8, + }, + text: "ifhello", + kind: "ERROR", + parent_context: None, }, ], ) diff --git a/example/src/snapshots/rust_sitter_example__words__tests__words_grammar.snap b/example/src/snapshots/rust_sitter_example__words__tests__words_grammar.snap index 104301e..c88d31b 100644 --- a/example/src/snapshots/rust_sitter_example__words__tests__words_grammar.snap +++ b/example/src/snapshots/rust_sitter_example__words__tests__words_grammar.snap @@ -8,8 +8,19 @@ Err( reason: FailedNode( [], ), - start: 0, - end: 2, + start_byte: 0, + end_byte: 2, + start_point: Point { + line: 1, + column: 1, + }, + end_point: Point { + line: 1, + column: 3, + }, + text: "if", + kind: "ERROR", + parent_context: None, }, ], ) diff --git a/example/src/words.rs b/example/src/words.rs index 0b34773..799831e 100644 --- a/example/src/words.rs +++ b/example/src/words.rs @@ -1,19 +1,21 @@ -#[rust_sitter::grammar("words")] pub mod grammar { - #[rust_sitter::language] - #[derive(Debug)] + use rust_sitter::Rule; + + #[derive(Debug, Rule)] + #[language] #[allow(dead_code)] pub struct Words { - #[rust_sitter::leaf("if")] + #[leaf("if")] keyword: (), - #[rust_sitter::word] - #[rust_sitter::leaf(pattern(r"[a-z_]+"))] + #[word] + #[leaf(pattern(r"[a-z_]+"))] word: String, } - #[rust_sitter::extra] + #[derive(Rule)] + #[extra] struct Whitespace { - #[rust_sitter::leaf(pattern(r"\s"))] + #[leaf(pattern(r"\s"))] _whitespace: (), } } @@ -24,9 +26,9 @@ mod tests { #[test] fn words_grammar() { - insta::assert_debug_snapshot!(grammar::parse("if")); - insta::assert_debug_snapshot!(grammar::parse("hello")); - insta::assert_debug_snapshot!(grammar::parse("ifhello")); - insta::assert_debug_snapshot!(grammar::parse("if hello")); + insta::assert_debug_snapshot!(grammar::Words::parse("if")); + insta::assert_debug_snapshot!(grammar::Words::parse("hello")); + insta::assert_debug_snapshot!(grammar::Words::parse("ifhello")); + insta::assert_debug_snapshot!(grammar::Words::parse("if hello")); } } diff --git a/macro/src/expansion.rs b/macro/src/expansion.rs index a162a3a..8a6fb0b 100644 --- a/macro/src/expansion.rs +++ b/macro/src/expansion.rs @@ -1,11 +1,10 @@ -use rust_sitter_common::is_sitter_attr; use std::collections::HashSet; use crate::errors::IteratorExt as _; use proc_macro2::Span; use quote::{ToTokens, quote}; use rust_sitter_common::*; -use syn::{punctuated::Punctuated, *}; +use syn::*; pub enum ParamOrField { Param(Expr), @@ -21,6 +20,137 @@ impl ToTokens for ParamOrField { } } +pub fn expand_rule(input: DeriveInput) -> Result { + // TODO: Allow renaming it. + let is_language = input + .attrs + .iter() + .any(|a| sitter_attr_matches(a, "language")); + let ident = input.ident; + let attrs = input.attrs; + let (extract, rule) = match input.data { + Data::Struct(DataStruct { fields, .. }) => { + let extract_expr = + gen_struct_or_variant(fields.clone(), None, ident.clone(), attrs.clone())?; + + let extract_impl: Item = syn::parse_quote! { + impl ::rust_sitter::Extract<#ident> for #ident { + type LeafFn<'a> = (); + + #[allow(non_snake_case)] + fn extract<'a>(node: Option<::rust_sitter::tree_sitter::Node>, source: &[u8], last_idx: usize, last_pt: ::rust_sitter::tree_sitter::Point, _leaf_fn: Option>) -> Self { + let node = node.expect("no node found"); + #extract_expr + } + } + }; + let ident_str = ident.to_string(); + let rule_impl: Item = syn::parse_quote! { + impl ::rust_sitter::rule::Rule<#ident> for #ident { + fn produce_ast() -> String { + String::new() + } + fn rule_name() -> &'static str { + #ident_str + } + } + }; + + (extract_impl, rule_impl) + } + Data::Enum(DataEnum { variants, .. }) => { + let match_cases: Vec = variants + .iter() + .map(|v| { + let variant_path = format!("{}_{}", ident, v.ident); + + let extract_expr = gen_struct_or_variant( + v.fields.clone(), + Some(v.ident.clone()), + ident.clone(), + v.attrs.clone(), + )?; + Ok(syn::parse_quote! { + #variant_path => return #extract_expr + }) + }) + .sift::>()?; + + let enum_name = &ident; + let ident_str = enum_name.to_string(); + let extract_impl: Item = syn::parse_quote! { + impl ::rust_sitter::Extract<#enum_name> for #enum_name { + type LeafFn<'a> = (); + + #[allow(non_snake_case)] + fn extract<'a>(node: Option<::rust_sitter::tree_sitter::Node>, source: &[u8], _last_idx: usize, _last_pt: ::rust_sitter::tree_sitter::Point, _leaf_fn: Option>) -> Self { + let node = node.expect("No node found"); + + let mut cursor = node.walk(); + assert!(cursor.goto_first_child(), "Could not find a child corresponding to any enum branch"); + loop { + let node = cursor.node(); + match node.kind() { + #(#match_cases),*, + _ => if !cursor.goto_next_sibling() { + panic!("Could not find a child corresponding to any enum branch") + } + } + } + } + } + }; + + let rule_impl: Item = syn::parse_quote! { + impl ::rust_sitter::rule::Rule<#enum_name> for #enum_name { + fn produce_ast() -> String { + String::new() + } + + fn rule_name() -> &'static str { + #ident_str + } + } + }; + (extract_impl, rule_impl) + } + Data::Union(_) => panic!("Union types not supported"), + }; + + // If it is language, then we need to generate the corresponding functions. + let lang = if is_language { + let tree_sitter_ident = Ident::new(&format!("tree_sitter_{ident}"), Span::call_site()); + + let root_type_docstr = format!("[`{ident}`]"); + // TODO: We can maybe make a trait for `language`. It should also have a `parse` function. + quote! { + + impl #ident { + pub fn language() -> ::rust_sitter::tree_sitter::Language { + unsafe extern "C" { + fn #tree_sitter_ident() -> ::rust_sitter::tree_sitter::Language; + } + unsafe { #tree_sitter_ident() } + } + /// Parse an input string according to the grammar. Returns either any parsing errors that happened, or a + #[doc = #root_type_docstr] + /// instance containing the parsed structured data. + pub fn parse(input: &str) -> core::result::Result> { + ::rust_sitter::__private::parse(input, Self::language) + } + } + } + } else { + quote! {} + }; + + Ok(quote! { + #lang + #extract + #rule + }) +} + fn gen_field(ident_str: String, leaf: Field) -> Expr { let leaf_type = leaf.ty; @@ -182,207 +312,207 @@ fn gen_struct_or_variant( ) } -pub fn expand_grammar(input: ItemMod) -> Result { - let attr = input - .attrs - .iter() - .find(|a| a.path() == &syn::parse_quote!(rust_sitter::grammar)) - .ok_or_else(|| syn::Error::new(Span::call_site(), "Each grammar must have a name"))?; - let grammar_name_expr = - attr.parse_args_with(Punctuated::::parse_terminated)?; - if grammar_name_expr.is_empty() { - return Err(syn::Error::new( - Span::call_site(), - "Expected a string literal grammar name", - )); - } - if grammar_name_expr.len() > 2 { - return Err(syn::Error::new( - Span::call_site(), - "Expected at most two inputs", - )); - } - let grammar_name = if let Expr::Lit(ExprLit { - attrs: _, - lit: Lit::Str(s), - }) = grammar_name_expr.first().unwrap() - { - s.value() - } else { - return Err(syn::Error::new( - Span::call_site(), - "Expected a string literal grammar name", - )); - }; - - let should_parse = if let Some(Expr::Lit(ExprLit { - attrs: _, - lit: Lit::Bool(b), - })) = grammar_name_expr.last() - { - b.value() - } else { - false - }; - - let (brace, new_contents) = input.content.as_ref().ok_or_else(|| { - syn::Error::new( - Span::call_site(), - "Expected the module to have inline contents (`mod my_module { .. }` syntax)", - ) - })?; - - let root_type = new_contents - .iter() - .find_map(|item| match item { - Item::Enum(ItemEnum { ident, attrs, .. }) - | Item::Struct(ItemStruct { ident, attrs, .. }) => { - if attrs - .iter() - .any(|attr| attr.path() == &syn::parse_quote!(rust_sitter::language)) - { - Some(ident.clone()) - } else { - None - } - } - _ => None, - }) - .ok_or_else(|| { - syn::Error::new( - Span::call_site(), - "Each parser must have the root type annotated with `#[rust_sitter::language]`", - ) - })?; - - let mut transformed: Vec = new_contents - .iter() - .cloned() - .map(|c| match c { - Item::Enum(mut e) => { - let match_cases: Vec = e.variants.iter().map(|v| { - let variant_path = format!("{}_{}", e.ident, v.ident); - - let extract_expr = gen_struct_or_variant( - v.fields.clone(), - Some(v.ident.clone()), - e.ident.clone(), - v.attrs.clone(), - )?; - Ok(syn::parse_quote! { - #variant_path => return #extract_expr - }) - }).sift::>()?; - - e.attrs.retain(|a| !is_sitter_attr(a)); - e.variants.iter_mut().for_each(|v| { - v.attrs.retain(|a| !is_sitter_attr(a)); - v.fields.iter_mut().for_each(|f| { - f.attrs.retain(|a| !is_sitter_attr(a)); - }); - }); - - let enum_name = &e.ident; - let extract_impl: Item = syn::parse_quote! { - impl ::rust_sitter::Extract<#enum_name> for #enum_name { - type LeafFn<'a> = (); - - #[allow(non_snake_case)] - fn extract<'a>(node: Option<::rust_sitter::tree_sitter::Node>, source: &[u8], _last_idx: usize, _last_pt: ::rust_sitter::tree_sitter::Point, _leaf_fn: Option>) -> Self { - let node = node.expect("No node found"); - - let mut cursor = node.walk(); - assert!(cursor.goto_first_child(), "Could not find a child corresponding to any enum branch"); - loop { - let node = cursor.node(); - match node.kind() { - #(#match_cases),*, - _ => if !cursor.goto_next_sibling() { - panic!("Could not find a child corresponding to any enum branch") - } - } - } - } - } - }; - Ok(vec![Item::Enum(e), extract_impl]) - } - - Item::Struct(mut s) => { - let struct_name = &s.ident; - let extract_expr = gen_struct_or_variant( - s.fields.clone(), - None, - s.ident.clone(), - s.attrs.clone(), - )?; - - s.attrs.retain(|a| !is_sitter_attr(a)); - s.fields.iter_mut().for_each(|f| { - f.attrs.retain(|a| !is_sitter_attr(a)); - }); - - - let extract_impl: Item = syn::parse_quote! { - impl ::rust_sitter::Extract<#struct_name> for #struct_name { - type LeafFn<'a> = (); - - #[allow(non_snake_case)] - fn extract<'a>(node: Option<::rust_sitter::tree_sitter::Node>, source: &[u8], last_idx: usize, last_pt: ::rust_sitter::tree_sitter::Point, _leaf_fn: Option>) -> Self { - let node = node.expect("no node found"); - #extract_expr - } - } - }; - - Ok(vec![Item::Struct(s), extract_impl]) - } - - o => Ok(vec![o]), - }) - .sift::>()?.into_iter().flatten().collect(); - - let tree_sitter_ident = Ident::new(&format!("tree_sitter_{grammar_name}"), Span::call_site()); - - transformed.push(syn::parse_quote! { - unsafe extern "C" { - fn #tree_sitter_ident() -> ::rust_sitter::tree_sitter::Language; - } - }); - - transformed.push(syn::parse_quote! { - pub fn language() -> ::rust_sitter::tree_sitter::Language { - unsafe { #tree_sitter_ident() } - } - }); - - let root_type_docstr = format!("[`{root_type}`]"); - transformed.push(syn::parse_quote! { - /// Parse an input string according to the grammar. Returns either any parsing errors that happened, or a - #[doc = #root_type_docstr] - /// instance containing the parsed structured data. - pub fn parse(input: &str) -> core::result::Result<#root_type, Vec<::rust_sitter::errors::ParseError>> { - ::rust_sitter::__private::parse::<#root_type>(input, language) - } - }); - - // Produces the grammar as a JSON constant. - if should_parse { - let grammars = rust_sitter_common::expansion::generate_grammar(&input).to_string(); - transformed.push(syn::parse_quote! { - pub const GRAMMAR: &str = #grammars; - }); - } - - let mut filtered_attrs = input.attrs; - filtered_attrs.retain(|a| !is_sitter_attr(a)); - Ok(ItemMod { - attrs: filtered_attrs, - vis: input.vis, - unsafety: None, - mod_token: input.mod_token, - ident: input.ident, - content: Some((*brace, transformed)), - semi: input.semi, - }) -} +// pub fn expand_grammar(input: ItemMod) -> Result { +// let attr = input +// .attrs +// .iter() +// .find(|a| a.path() == &syn::parse_quote!(rust_sitter::grammar)) +// .ok_or_else(|| syn::Error::new(Span::call_site(), "Each grammar must have a name"))?; +// let grammar_name_expr = +// attr.parse_args_with(Punctuated::::parse_terminated)?; +// if grammar_name_expr.is_empty() { +// return Err(syn::Error::new( +// Span::call_site(), +// "Expected a string literal grammar name", +// )); +// } +// if grammar_name_expr.len() > 2 { +// return Err(syn::Error::new( +// Span::call_site(), +// "Expected at most two inputs", +// )); +// } +// let grammar_name = if let Expr::Lit(ExprLit { +// attrs: _, +// lit: Lit::Str(s), +// }) = grammar_name_expr.first().unwrap() +// { +// s.value() +// } else { +// return Err(syn::Error::new( +// Span::call_site(), +// "Expected a string literal grammar name", +// )); +// }; +// +// let should_parse = if let Some(Expr::Lit(ExprLit { +// attrs: _, +// lit: Lit::Bool(b), +// })) = grammar_name_expr.last() +// { +// b.value() +// } else { +// false +// }; +// +// let (brace, new_contents) = input.content.as_ref().ok_or_else(|| { +// syn::Error::new( +// Span::call_site(), +// "Expected the module to have inline contents (`mod my_module { .. }` syntax)", +// ) +// })?; +// +// let root_type = new_contents +// .iter() +// .find_map(|item| match item { +// Item::Enum(ItemEnum { ident, attrs, .. }) +// | Item::Struct(ItemStruct { ident, attrs, .. }) => { +// if attrs +// .iter() +// .any(|attr| attr.path() == &syn::parse_quote!(rust_sitter::language)) +// { +// Some(ident.clone()) +// } else { +// None +// } +// } +// _ => None, +// }) +// .ok_or_else(|| { +// syn::Error::new( +// Span::call_site(), +// "Each parser must have the root type annotated with `#[rust_sitter::language]`", +// ) +// })?; +// +// let mut transformed: Vec = new_contents +// .iter() +// .cloned() +// .map(|c| match c { +// Item::Enum(mut e) => { +// let match_cases: Vec = e.variants.iter().map(|v| { +// let variant_path = format!("{}_{}", e.ident, v.ident); +// +// let extract_expr = gen_struct_or_variant( +// v.fields.clone(), +// Some(v.ident.clone()), +// e.ident.clone(), +// v.attrs.clone(), +// )?; +// Ok(syn::parse_quote! { +// #variant_path => return #extract_expr +// }) +// }).sift::>()?; +// +// e.attrs.retain(|a| !is_sitter_attr(a)); +// e.variants.iter_mut().for_each(|v| { +// v.attrs.retain(|a| !is_sitter_attr(a)); +// v.fields.iter_mut().for_each(|f| { +// f.attrs.retain(|a| !is_sitter_attr(a)); +// }); +// }); +// +// let enum_name = &e.ident; +// let extract_impl: Item = syn::parse_quote! { +// impl ::rust_sitter::Extract<#enum_name> for #enum_name { +// type LeafFn<'a> = (); +// +// #[allow(non_snake_case)] +// fn extract<'a>(node: Option<::rust_sitter::tree_sitter::Node>, source: &[u8], _last_idx: usize, _last_pt: ::rust_sitter::tree_sitter::Point, _leaf_fn: Option>) -> Self { +// let node = node.expect("No node found"); +// +// let mut cursor = node.walk(); +// assert!(cursor.goto_first_child(), "Could not find a child corresponding to any enum branch"); +// loop { +// let node = cursor.node(); +// match node.kind() { +// #(#match_cases),*, +// _ => if !cursor.goto_next_sibling() { +// panic!("Could not find a child corresponding to any enum branch") +// } +// } +// } +// } +// } +// }; +// Ok(vec![Item::Enum(e), extract_impl]) +// } +// +// Item::Struct(mut s) => { +// let struct_name = &s.ident; +// let extract_expr = gen_struct_or_variant( +// s.fields.clone(), +// None, +// s.ident.clone(), +// s.attrs.clone(), +// )?; +// +// s.attrs.retain(|a| !is_sitter_attr(a)); +// s.fields.iter_mut().for_each(|f| { +// f.attrs.retain(|a| !is_sitter_attr(a)); +// }); +// +// +// let extract_impl: Item = syn::parse_quote! { +// impl ::rust_sitter::Extract<#struct_name> for #struct_name { +// type LeafFn<'a> = (); +// +// #[allow(non_snake_case)] +// fn extract<'a>(node: Option<::rust_sitter::tree_sitter::Node>, source: &[u8], last_idx: usize, last_pt: ::rust_sitter::tree_sitter::Point, _leaf_fn: Option>) -> Self { +// let node = node.expect("no node found"); +// #extract_expr +// } +// } +// }; +// +// Ok(vec![Item::Struct(s), extract_impl]) +// } +// +// o => Ok(vec![o]), +// }) +// .sift::>()?.into_iter().flatten().collect(); +// +// let tree_sitter_ident = Ident::new(&format!("tree_sitter_{grammar_name}"), Span::call_site()); +// +// transformed.push(syn::parse_quote! { +// unsafe extern "C" { +// fn #tree_sitter_ident() -> ::rust_sitter::tree_sitter::Language; +// } +// }); +// +// transformed.push(syn::parse_quote! { +// pub fn language() -> ::rust_sitter::tree_sitter::Language { +// unsafe { #tree_sitter_ident() } +// } +// }); +// +// let root_type_docstr = format!("[`{root_type}`]"); +// transformed.push(syn::parse_quote! { +// /// Parse an input string according to the grammar. Returns either any parsing errors that happened, or a +// #[doc = #root_type_docstr] +// /// instance containing the parsed structured data. +// pub fn parse(input: &str) -> core::result::Result<#root_type, Vec<::rust_sitter::errors::ParseError>> { +// ::rust_sitter::__private::parse::<#root_type>(input, language) +// } +// }); +// +// // Produces the grammar as a JSON constant. +// if should_parse { +// let grammars = rust_sitter_common::expansion::generate_grammar(&input).to_string(); +// transformed.push(syn::parse_quote! { +// pub const GRAMMAR: &str = #grammars; +// }); +// } +// +// let mut filtered_attrs = input.attrs; +// filtered_attrs.retain(|a| !is_sitter_attr(a)); +// Ok(ItemMod { +// attrs: filtered_attrs, +// vis: input.vis, +// unsafety: None, +// mod_token: input.mod_token, +// ident: input.ident, +// content: Some((*brace, transformed)), +// semi: input.semi, +// }) +// } diff --git a/macro/src/lib.rs b/macro/src/lib.rs index 4156245..ca3d28e 100644 --- a/macro/src/lib.rs +++ b/macro/src/lib.rs @@ -1,5 +1,5 @@ use quote::ToTokens; -use syn::{parse_macro_input, ItemMod}; +use syn::{DeriveInput, ItemMod, parse_macro_input}; mod errors; mod expansion; @@ -24,332 +24,363 @@ use expansion::*; // grammar::parse_grammar_macro(input) // } -#[proc_macro_attribute] -/// Marks the top level AST node where parsing should start. -/// -/// ## Example -/// ```ignore -/// #[rust_sitter::language] -/// pub struct Code { -/// ... -/// } -/// ``` -pub fn language( - _attr: proc_macro::TokenStream, - item: proc_macro::TokenStream, -) -> proc_macro::TokenStream { - item +#[proc_macro_derive( + Rule, + // Alternatively, we can instead have one helper like `baum(...)` - generally looks cleaner. + attributes( + // Helper + language, + word, + leaf, + text, + prec, + prec_left, + prec_right, + prec_dynamic, + token, + extra, + with, + with_node, + transform, + sep_by, + // Helper! + sep_by1, + repeat1, + skip, + ) +)] +pub fn derive_rule(input: proc_macro::TokenStream) -> proc_macro::TokenStream { + let input = parse_macro_input!(input as DeriveInput); + expand_rule(input) + .unwrap_or_else(syn::Error::into_compile_error) + .into() } +// /// Mark a module to be analyzed for a Rust Sitter grammar. Takes a single, unnamed argument, which +// /// specifies the name of the grammar. This name must be unique across all Rust Sitter grammars within +// /// a compilation unit. +// #[proc_macro_attribute] +// pub fn grammar( +// attr: proc_macro::TokenStream, +// input: proc_macro::TokenStream, +// ) -> proc_macro::TokenStream { +// let attr_tokens: proc_macro2::TokenStream = attr.into(); +// let module: ItemMod = parse_macro_input!(input); +// let expanded = derive_rule(syn::parse_quote! { +// #[rust_sitter::grammar[#attr_tokens]] +// #module +// }) +// .map(ToTokens::into_token_stream) +// .unwrap_or_else(syn::Error::into_compile_error); +// proc_macro::TokenStream::from(expanded) +// } -#[proc_macro_attribute] -/// This annotation marks a node as extra, which can safely be skipped while parsing. -/// This is useful for handling whitespace/newlines/comments. -/// -/// ## Example -/// ```ignore -/// #[rust_sitter::extra] -/// struct Whitespace { -/// #[rust_sitter::leaf(re(r"\s"))] -/// _whitespace: (), -/// } -/// ``` -pub fn extra( - _attr: proc_macro::TokenStream, - item: proc_macro::TokenStream, -) -> proc_macro::TokenStream { - item -} - -#[proc_macro_attribute] -/// Defines a field which matches a specific token in the source string. -/// The token can be defined by passing one of two arguments -/// - `text`: a string literal that will be exactly matched -/// - `pattern`: a regular expression that will be matched against the source string -/// -/// If the resulting token needs to be converted into a richer type at runtime, -/// such as a number, then the `transform` argument can be used to specify a function -/// that will be called with the token's text. -/// -/// The attribute can also be applied to a struct or enum variant with no fields. -/// -/// ## Examples -/// -/// Using the `leaf` attribute on a field: -/// ```ignore -/// Number( -/// #[rust_sitter::leaf(re(r"\d+"))] -/// u32 -/// ) -/// ``` -/// -/// Using the attribute on a unit struct or unit enum variant: -/// ```ignore -/// #[rust_sitter::leaf("9")] -/// struct BigDigit; -/// -/// enum SmallDigit { -/// #[rust_sitter::leaf("0")] -/// Zero, -/// #[rust_sitter::leaf("1")] -/// One, -/// } -/// ``` -/// -pub fn leaf( - _attr: proc_macro::TokenStream, - item: proc_macro::TokenStream, -) -> proc_macro::TokenStream { - item -} - -#[proc_macro_attribute] -/// Defines text in the grammar that should be parsed but not explicitly used. No explicit rule is -/// created and these segments are inlined. -/// -/// ## Example -/// ```ignore -/// struct Function { -/// #[text("function")] -/// _function: (), -/// name: Ident, -/// #[text("(")] -/// _lparen: (), -/// // ... -/// } -/// ``` -pub fn text( - _attr: proc_macro::TokenStream, - item: proc_macro::TokenStream, -) -> proc_macro::TokenStream { - item -} - -#[proc_macro_attribute] -/// Defines a field that does not correspond to anything in the input string, -/// such as some metadata. Takes a single, unnamed argument, which is the value -/// used to populate the field at runtime. -/// -/// ## Example -/// ```ignore -/// struct MyNode { -/// ..., -/// #[rust_sitter::skip(false)] -/// node_visited: bool -/// } -/// ``` -pub fn skip( - _attr: proc_macro::TokenStream, - item: proc_macro::TokenStream, -) -> proc_macro::TokenStream { - item -} - -/// Applies a custom transformation for parsing the input text of a `leaf` node. -/// Without using `with` the default extractor is applied. -/// -/// ## Example -/// ```ignore -/// struct CustomInt( -/// #[leaf(re(r"\d+"))] -/// #[with(plus_one)] -/// i32 -/// ); -/// -/// fn plus_one(s: &str) -> i32 { -/// s.parse::().unwrap() + 1 -/// } -/// ``` -#[proc_macro_attribute] -pub fn with( - _attr: proc_macro::TokenStream, - item: proc_macro::TokenStream, -) -> proc_macro::TokenStream { - item -} - -/// Alias for `with`. -#[proc_macro_attribute] -pub fn transform( - _attr: proc_macro::TokenStream, - item: proc_macro::TokenStream, -) -> proc_macro::TokenStream { - item -} - -#[proc_macro_attribute] -/// Defines a precedence level for a non-terminal that has no associativity. -/// -/// This annotation takes a single, unnamed parameter, which specifies the precedence level. -/// This is used to resolve conflicts with other non-terminals, so that the one with the higher -/// precedence will bind more tightly (appear lower in the parse tree). -/// -/// ## Example -/// ```ignore -/// #[rust_sitter::prec(1)] -/// PriorityExpr(Box, Box) -/// ``` -pub fn prec( - _attr: proc_macro::TokenStream, - item: proc_macro::TokenStream, -) -> proc_macro::TokenStream { - item -} - -#[proc_macro_attribute] -/// Defines a precedence level for a non-terminal that should be left-associative. -/// For example, with subtraction we expect 1 - 2 - 3 to be parsed as (1 - 2) - 3, -/// which corresponds to a left-associativity. -/// -/// This annotation takes a single, unnamed parameter, which specifies the precedence level. -/// This is used to resolve conflicts with other non-terminals, so that the one with the higher -/// precedence will bind more tightly (appear lower in the parse tree). -/// -/// ## Example -/// ```ignore -/// #[rust_sitter::prec_left(1)] -/// Subtract(Box, Box) -/// ``` -pub fn prec_left( - _attr: proc_macro::TokenStream, - item: proc_macro::TokenStream, -) -> proc_macro::TokenStream { - item -} - -#[proc_macro_attribute] -/// Defines a precedence level for a non-terminal that should be right-associative. -/// For example, with cons we could have 1 :: 2 :: 3 to be parsed as 1 :: (2 :: 3), -/// which corresponds to a right-associativity. -/// -/// This annotation takes a single, unnamed parameter, which specifies the precedence level. -/// This is used to resolve conflicts with other non-terminals, so that the one with the higher -/// precedence will bind more tightly (appear lower in the parse tree). -/// -/// ## Example -/// ```ignore -/// #[rust_sitter::prec_right(1)] -/// Cons(Box, Box) -/// ``` -pub fn prec_right( - _attr: proc_macro::TokenStream, - item: proc_macro::TokenStream, -) -> proc_macro::TokenStream { - item -} - -#[proc_macro_attribute] -/// This macro is similar to [`prec`], but the given numerical precedence is applied at runtime instead -/// of at parser generation time. This is only necessary when handling a conflict dynamically using -/// [`conflicts`], and when there is a genuine ambiguity: multiple rules correctly -/// match a given piece of code. In that event, Rust-sitter compares the total dynamic precedence -/// associated with each rule, and selects the one with the highest total. -/// -/// This is similar to dynamic precedence directives in Bison grammars. -/// -/// ## Example -/// ```ignore -/// #[rust_sitter::prec_dynamic(1)] -/// Cons(Box, Box) -/// ``` -pub fn prec_dynamic( - _attr: proc_macro::TokenStream, - item: proc_macro::TokenStream, -) -> proc_macro::TokenStream { - item -} - -#[proc_macro_attribute] -/// Usually, whitespace is optional before each token. This attribute means that the token will only match if there is no whitespace. -/// -/// ## Example -/// ```ignore -/// struct StringFragment( -/// #[rust_sitter::immediate] -/// #[rust_sitter::leaf(pattern(r"[^"\\]+"))] -/// () -/// ); -/// ``` -pub fn immediate( - _attr: proc_macro::TokenStream, - item: proc_macro::TokenStream, -) -> proc_macro::TokenStream { - item -} - -#[proc_macro_attribute] -/// Allows the leaf node sequence to be created as a single token. -/// -/// ## Example -/// ```ignore -/// struct StringFragment( -/// #[rust_sitter::token] -/// #[rust_sitter::leaf(pattern(r"[^"\\]+"))] -/// () -/// ); -/// ``` -pub fn token( - _attr: proc_macro::TokenStream, - item: proc_macro::TokenStream, -) -> proc_macro::TokenStream { - item -} - -#[proc_macro_attribute] -/// On `Vec<_>` typed fields, specifies a non-terminal that should be parsed in between the elements. -/// The [`rust_sitter::repeat`] annotation can be used on the field as well. -/// -/// This annotation takes a single, unnamed argument, which specifies a field type to parse. This can -/// either be a reference to another type, or can be defined as a `leaf` field. Generally, the argument -/// is parsed using the same rules as an unnamed field of an enum variant. -/// -/// ## Example -/// ```ignore -/// #[rust_sitter::delimited(",")] -/// numbers: Vec -/// ``` -pub fn delimited( - _attr: proc_macro::TokenStream, - item: proc_macro::TokenStream, -) -> proc_macro::TokenStream { - item -} - -#[proc_macro_attribute] -/// On `Vec<_>` typed fields, specifies additional config for how the repeated elements should -/// be parsed. In particular, this annotation takes the following named arguments: -/// - `non_empty` - if this argument is `true`, then there must be at least one element parsed -/// -/// ## Example -/// ```ignore -/// #[rust_sitter::repeat(non_empty = true)] -/// numbers: Vec -/// ``` -pub fn repeat( - _attr: proc_macro::TokenStream, - item: proc_macro::TokenStream, -) -> proc_macro::TokenStream { - item -} - -/// Mark a module to be analyzed for a Rust Sitter grammar. Takes a single, unnamed argument, which -/// specifies the name of the grammar. This name must be unique across all Rust Sitter grammars within -/// a compilation unit. -#[proc_macro_attribute] -pub fn grammar( - attr: proc_macro::TokenStream, - input: proc_macro::TokenStream, -) -> proc_macro::TokenStream { - let attr_tokens: proc_macro2::TokenStream = attr.into(); - let module: ItemMod = parse_macro_input!(input); - let expanded = expand_grammar(syn::parse_quote! { - #[rust_sitter::grammar[#attr_tokens]] - #module - }) - .map(ToTokens::into_token_stream) - .unwrap_or_else(syn::Error::into_compile_error); - proc_macro::TokenStream::from(expanded) -} +// #[proc_macro_attribute] +// /// Marks the top level AST node where parsing should start. +// /// +// /// ## Example +// /// ```ignore +// /// #[rust_sitter::language] +// /// pub struct Code { +// /// ... +// /// } +// /// ``` +// pub fn language( +// _attr: proc_macro::TokenStream, +// item: proc_macro::TokenStream, +// ) -> proc_macro::TokenStream { +// item +// } +// +// #[proc_macro_attribute] +// /// This annotation marks a node as extra, which can safely be skipped while parsing. +// /// This is useful for handling whitespace/newlines/comments. +// /// +// /// ## Example +// /// ```ignore +// /// #[rust_sitter::extra] +// /// struct Whitespace { +// /// #[rust_sitter::leaf(re(r"\s"))] +// /// _whitespace: (), +// /// } +// /// ``` +// pub fn extra( +// _attr: proc_macro::TokenStream, +// item: proc_macro::TokenStream, +// ) -> proc_macro::TokenStream { +// item +// } +// +// #[proc_macro_attribute] +// /// Defines a field which matches a specific token in the source string. +// /// The token can be defined by passing one of two arguments +// /// - `text`: a string literal that will be exactly matched +// /// - `pattern`: a regular expression that will be matched against the source string +// /// +// /// If the resulting token needs to be converted into a richer type at runtime, +// /// such as a number, then the `transform` argument can be used to specify a function +// /// that will be called with the token's text. +// /// +// /// The attribute can also be applied to a struct or enum variant with no fields. +// /// +// /// ## Examples +// /// +// /// Using the `leaf` attribute on a field: +// /// ```ignore +// /// Number( +// /// #[rust_sitter::leaf(re(r"\d+"))] +// /// u32 +// /// ) +// /// ``` +// /// +// /// Using the attribute on a unit struct or unit enum variant: +// /// ```ignore +// /// #[rust_sitter::leaf("9")] +// /// struct BigDigit; +// /// +// /// enum SmallDigit { +// /// #[rust_sitter::leaf("0")] +// /// Zero, +// /// #[rust_sitter::leaf("1")] +// /// One, +// /// } +// /// ``` +// /// +// pub fn leaf( +// _attr: proc_macro::TokenStream, +// item: proc_macro::TokenStream, +// ) -> proc_macro::TokenStream { +// item +// } +// +// #[proc_macro_attribute] +// /// Defines text in the grammar that should be parsed but not explicitly used. No explicit rule is +// /// created and these segments are inlined. +// /// +// /// ## Example +// /// ```ignore +// /// struct Function { +// /// #[text("function")] +// /// _function: (), +// /// name: Ident, +// /// #[text("(")] +// /// _lparen: (), +// /// // ... +// /// } +// /// ``` +// pub fn text( +// _attr: proc_macro::TokenStream, +// item: proc_macro::TokenStream, +// ) -> proc_macro::TokenStream { +// item +// } +// +// #[proc_macro_attribute] +// /// Defines a field that does not correspond to anything in the input string, +// /// such as some metadata. Takes a single, unnamed argument, which is the value +// /// used to populate the field at runtime. +// /// +// /// ## Example +// /// ```ignore +// /// struct MyNode { +// /// ..., +// /// #[rust_sitter::skip(false)] +// /// node_visited: bool +// /// } +// /// ``` +// pub fn skip( +// _attr: proc_macro::TokenStream, +// item: proc_macro::TokenStream, +// ) -> proc_macro::TokenStream { +// item +// } +// +// /// Applies a custom transformation for parsing the input text of a `leaf` node. +// /// Without using `with` the default extractor is applied. +// /// +// /// ## Example +// /// ```ignore +// /// struct CustomInt( +// /// #[leaf(re(r"\d+"))] +// /// #[with(plus_one)] +// /// i32 +// /// ); +// /// +// /// fn plus_one(s: &str) -> i32 { +// /// s.parse::().unwrap() + 1 +// /// } +// /// ``` +// #[proc_macro_attribute] +// pub fn with( +// _attr: proc_macro::TokenStream, +// item: proc_macro::TokenStream, +// ) -> proc_macro::TokenStream { +// item +// } +// +// /// Alias for `with`. +// #[proc_macro_attribute] +// pub fn transform( +// _attr: proc_macro::TokenStream, +// item: proc_macro::TokenStream, +// ) -> proc_macro::TokenStream { +// item +// } +// +// #[proc_macro_attribute] +// /// Defines a precedence level for a non-terminal that has no associativity. +// /// +// /// This annotation takes a single, unnamed parameter, which specifies the precedence level. +// /// This is used to resolve conflicts with other non-terminals, so that the one with the higher +// /// precedence will bind more tightly (appear lower in the parse tree). +// /// +// /// ## Example +// /// ```ignore +// /// #[rust_sitter::prec(1)] +// /// PriorityExpr(Box, Box) +// /// ``` +// pub fn prec( +// _attr: proc_macro::TokenStream, +// item: proc_macro::TokenStream, +// ) -> proc_macro::TokenStream { +// item +// } +// +// #[proc_macro_attribute] +// /// Defines a precedence level for a non-terminal that should be left-associative. +// /// For example, with subtraction we expect 1 - 2 - 3 to be parsed as (1 - 2) - 3, +// /// which corresponds to a left-associativity. +// /// +// /// This annotation takes a single, unnamed parameter, which specifies the precedence level. +// /// This is used to resolve conflicts with other non-terminals, so that the one with the higher +// /// precedence will bind more tightly (appear lower in the parse tree). +// /// +// /// ## Example +// /// ```ignore +// /// #[rust_sitter::prec_left(1)] +// /// Subtract(Box, Box) +// /// ``` +// pub fn prec_left( +// _attr: proc_macro::TokenStream, +// item: proc_macro::TokenStream, +// ) -> proc_macro::TokenStream { +// item +// } +// +// #[proc_macro_attribute] +// /// Defines a precedence level for a non-terminal that should be right-associative. +// /// For example, with cons we could have 1 :: 2 :: 3 to be parsed as 1 :: (2 :: 3), +// /// which corresponds to a right-associativity. +// /// +// /// This annotation takes a single, unnamed parameter, which specifies the precedence level. +// /// This is used to resolve conflicts with other non-terminals, so that the one with the higher +// /// precedence will bind more tightly (appear lower in the parse tree). +// /// +// /// ## Example +// /// ```ignore +// /// #[rust_sitter::prec_right(1)] +// /// Cons(Box, Box) +// /// ``` +// pub fn prec_right( +// _attr: proc_macro::TokenStream, +// item: proc_macro::TokenStream, +// ) -> proc_macro::TokenStream { +// item +// } +// +// #[proc_macro_attribute] +// /// This macro is similar to [`prec`], but the given numerical precedence is applied at runtime instead +// /// of at parser generation time. This is only necessary when handling a conflict dynamically using +// /// [`conflicts`], and when there is a genuine ambiguity: multiple rules correctly +// /// match a given piece of code. In that event, Rust-sitter compares the total dynamic precedence +// /// associated with each rule, and selects the one with the highest total. +// /// +// /// This is similar to dynamic precedence directives in Bison grammars. +// /// +// /// ## Example +// /// ```ignore +// /// #[rust_sitter::prec_dynamic(1)] +// /// Cons(Box, Box) +// /// ``` +// pub fn prec_dynamic( +// _attr: proc_macro::TokenStream, +// item: proc_macro::TokenStream, +// ) -> proc_macro::TokenStream { +// item +// } +// +// #[proc_macro_attribute] +// /// Usually, whitespace is optional before each token. This attribute means that the token will only match if there is no whitespace. +// /// +// /// ## Example +// /// ```ignore +// /// struct StringFragment( +// /// #[rust_sitter::immediate] +// /// #[rust_sitter::leaf(pattern(r"[^"\\]+"))] +// /// () +// /// ); +// /// ``` +// pub fn immediate( +// _attr: proc_macro::TokenStream, +// item: proc_macro::TokenStream, +// ) -> proc_macro::TokenStream { +// item +// } +// +// #[proc_macro_attribute] +// /// Allows the leaf node sequence to be created as a single token. +// /// +// /// ## Example +// /// ```ignore +// /// struct StringFragment( +// /// #[rust_sitter::token] +// /// #[rust_sitter::leaf(pattern(r"[^"\\]+"))] +// /// () +// /// ); +// /// ``` +// pub fn token( +// _attr: proc_macro::TokenStream, +// item: proc_macro::TokenStream, +// ) -> proc_macro::TokenStream { +// item +// } +// +// #[proc_macro_attribute] +// /// On `Vec<_>` typed fields, specifies a non-terminal that should be parsed in between the elements. +// /// The [`rust_sitter::repeat`] annotation can be used on the field as well. +// /// +// /// This annotation takes a single, unnamed argument, which specifies a field type to parse. This can +// /// either be a reference to another type, or can be defined as a `leaf` field. Generally, the argument +// /// is parsed using the same rules as an unnamed field of an enum variant. +// /// +// /// ## Example +// /// ```ignore +// /// #[rust_sitter::delimited(",")] +// /// numbers: Vec +// /// ``` +// pub fn delimited( +// _attr: proc_macro::TokenStream, +// item: proc_macro::TokenStream, +// ) -> proc_macro::TokenStream { +// item +// } +// +// #[proc_macro_attribute] +// /// On `Vec<_>` typed fields, specifies additional config for how the repeated elements should +// /// be parsed. In particular, this annotation takes the following named arguments: +// /// - `non_empty` - if this argument is `true`, then there must be at least one element parsed +// /// +// /// ## Example +// /// ```ignore +// /// #[rust_sitter::repeat(non_empty = true)] +// /// numbers: Vec +// /// ``` +// pub fn repeat( +// _attr: proc_macro::TokenStream, +// item: proc_macro::TokenStream, +// ) -> proc_macro::TokenStream { +// item +// } #[cfg(test)] mod tests { @@ -358,11 +389,34 @@ mod tests { use std::process::Command; use quote::ToTokens; - use syn::{parse_quote, Result}; + use quote::quote; + use syn::{ItemMod, Result, parse_quote}; use tempfile::tempdir; - use super::expand_grammar; - + use crate::expand_rule; + + // Allows expanding multiple rules at once. + fn expand_grammar(input: ItemMod) -> ItemMod { + let (_, items) = input.content.unwrap(); + let mut output = vec![]; + for item in items { + let stream = item.to_token_stream(); + // This might not actually work... + if let Ok(parsed) = syn::parse2(stream.clone()) { + let result = expand_rule(parsed).unwrap(); + output.push(proc_macro2::TokenStream::from(result)); + } else { + output.push(stream); + } + } + let mod_name = input.ident; + + parse_quote! { + mod #mod_name { + #(#output)* + } + } + } fn rustfmt_code(code: &str) -> String { let dir = tempdir().unwrap(); let file_path = dir.path().join("temp.rs"); @@ -390,17 +444,18 @@ mod tests { fn enum_transformed_fields() -> Result<()> { insta::assert_snapshot!(rustfmt_code( &expand_grammar(parse_quote! { - #[rust_sitter::grammar("test")] mod grammar { - #[rust_sitter::language] + use rust_sitter::Rule; + #[derive(Rule)] + #[language] pub enum Expression { Number( - #[rust_sitter::leaf(re(r"\d+"))] + #[leaf(re(r"\d+"))] i32 ), } } - })? + }) .to_token_stream() .to_string() )); @@ -412,22 +467,22 @@ mod tests { fn enum_recursive() -> Result<()> { insta::assert_snapshot!(rustfmt_code( &expand_grammar(parse_quote! { - #[rust_sitter::grammar("test")] mod grammar { - #[rust_sitter::language] + #[derive(rust_sitter::Rule)] + #[language] pub enum Expression { Number( - #[rust_sitter::leaf(re(r"\d+"))] + #[leaf(re(r"\d+"))] i32 ), Neg( - #[rust_sitter::leaf("-")] + #[leaf("-")] (), Box ), } } - })? + }) .to_token_stream() .to_string() )); @@ -439,24 +494,24 @@ mod tests { fn enum_prec_left() -> Result<()> { insta::assert_snapshot!(rustfmt_code( &expand_grammar(parse_quote! { - #[rust_sitter::grammar("test")] mod grammar { - #[rust_sitter::language] + #[derive(rust_sitter::Rule)] + #[language] pub enum Expression { Number( - #[rust_sitter::leaf(pattern(r"\d+"))] + #[leaf(pattern(r"\d+"))] i32 ), - #[rust_sitter::prec_left(1)] + #[prec_left(1)] Sub( Box, - #[rust_sitter::leaf("-")] + #[leaf("-")] (), Box ), } } - })? + }) .to_token_stream() .to_string() )); @@ -468,22 +523,23 @@ mod tests { fn struct_extra() -> Result<()> { insta::assert_snapshot!(rustfmt_code( &expand_grammar(parse_quote! { - #[rust_sitter::grammar("test")] mod grammar { - #[rust_sitter::language] + #[derive(rust_sitter::Rule)] + #[language] pub enum Expression { Number( - #[rust_sitter::leaf(re(r"\d+"))] i32, + #[leaf(re(r"\d+"))] i32, ), } - #[rust_sitter::extra] + #[derive(Rule)] + #[extra] struct Whitespace { - #[rust_sitter::leaf(pattern(r"\s"))] + #[leaf(pattern(r"\s"))] _whitespace: (), } } - })? + }) .to_token_stream() .to_string() )); @@ -495,21 +551,22 @@ mod tests { fn grammar_unboxed_field() -> Result<()> { insta::assert_snapshot!(rustfmt_code( &expand_grammar(parse_quote! { - #[rust_sitter::grammar("test")] mod grammar { - #[rust_sitter::language] + #[derive(rust_sitter::Rule)] + #[language] pub struct Language { e: Expression, } + #[derive(rust_sitter::Rule)] pub enum Expression { Number( - #[rust_sitter::leaf(re(r"\d+"))] + #[leaf(re(r"\d+"))] i32 ), } } - })? + }) .to_token_stream() .to_string() )); @@ -521,25 +578,27 @@ mod tests { fn struct_repeat() -> Result<()> { insta::assert_snapshot!(rustfmt_code( &expand_grammar(parse_quote! { - #[rust_sitter::grammar("test")] mod grammar { - #[rust_sitter::language] + #[derive(rust_sitter::Rule)] + #[language] pub struct NumberList { numbers: Vec, } + #[derive(rust_sitter::Rule)] pub struct Number { - #[rust_sitter::leaf(re(r"\d+"))] + #[leaf(re(r"\d+"))] v: i32 } - #[rust_sitter::extra] + #[derive(rust_sitter::Rule)] + #[extra] struct Whitespace { - #[rust_sitter::leaf(pattern(r"\s"))] + #[leaf(pattern(r"\s"))] _whitespace: (), } } - })? + }) .to_token_stream() .to_string() )); @@ -551,21 +610,22 @@ mod tests { fn struct_optional() -> Result<()> { insta::assert_snapshot!(rustfmt_code( &expand_grammar(parse_quote! { - #[rust_sitter::grammar("test")] mod grammar { - #[rust_sitter::language] + #[derive(rust_sitter::Rule)] + #[language] pub struct Language { - #[rust_sitter::leaf(re(r"\d+"))] + #[leaf(re(r"\d+"))] v: Option, t: Option, } + #[derive(rust_sitter::Rule)] pub struct Number { - #[rust_sitter::leaf(re(r"\d+"))] + #[leaf(re(r"\d+"))] v: i32 } } - })? + }) .to_token_stream() .to_string() )); @@ -577,22 +637,23 @@ mod tests { fn enum_with_unamed_vector() -> Result<()> { insta::assert_snapshot!(rustfmt_code( &expand_grammar(parse_quote! { - #[rust_sitter::grammar("test")] mod grammar { + #[derive(rust_sitter::Rule)] pub struct Number { - #[rust_sitter::leaf(re(r"\d+"))] + #[leaf(re(r"\d+"))] value: u32 } - #[rust_sitter::language] + #[derive(rust_sitter::Rule)] + #[language] pub enum Expr { Numbers( - #[rust_sitter::repeat(non_empty = true)] + #[repeat1] Vec ) } } - })? + }) .to_token_stream() .to_string() )); @@ -604,22 +665,22 @@ mod tests { fn enum_with_named_field() -> Result<()> { insta::assert_snapshot!(rustfmt_code( &expand_grammar(parse_quote! { - #[rust_sitter::grammar("test")] mod grammar { - #[rust_sitter::language] + #[derive(rust_sitter::Rule)] + #[language] pub enum Expr { Number( - #[rust_sitter::leaf(pattern(r"\d+"))] + #[leaf(pattern(r"\d+"))] u32 ), Neg { - #[rust_sitter::leaf("!")] + #[leaf("!")] _bang: (), value: Box, } } } - })? + }) .to_token_stream() .to_string() )); @@ -631,27 +692,29 @@ mod tests { fn spanned_in_vec() -> Result<()> { insta::assert_snapshot!(rustfmt_code( &expand_grammar(parse_quote! { - #[rust_sitter::grammar("test")] mod grammar { - use rust_sitter::Spanned; + use rust_sitter::{Rule, Spanned}; - #[rust_sitter::language] + #[derive(Rule)] + #[language] pub struct NumberList { numbers: Vec>, } + #[derive(Rule)] pub struct Number { - #[rust_sitter::leaf(re(r"\d+"))] + #[leaf(re(r"\d+"))] v: i32 } - #[rust_sitter::extra] + #[derive(Rule)] + #[extra] struct Whitespace { - #[rust_sitter::leaf(pattern(r"\s"))] + #[leaf(pattern(r"\s"))] _whitespace: (), } } - })? + }) .to_token_stream() .to_string() )); diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap index 16bb656..32a974c 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap @@ -1,11 +1,23 @@ --- source: macro/src/lib.rs -expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n #[rust_sitter::grammar(\"test\")] mod grammar\n {\n #[rust_sitter::language] pub enum Expression\n {\n Number(#[rust_sitter::leaf(pattern(r\"\\d+\"))] i32),\n #[rust_sitter::prec_left(1)]\n Sub(Box, #[rust_sitter::leaf(\"-\")] (),\n Box),\n }\n }\n})? .to_token_stream().to_string())" +expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n mod grammar\n {\n #[derive(rust_sitter::Rule)] #[language] pub enum Expression\n {\n Number(#[leaf(pattern(r\"\\d+\"))] i32), #[prec_left(1)]\n Sub(Box, #[leaf(\"-\")] (), Box),\n }\n }\n}).to_token_stream().to_string())" --- mod grammar { - pub enum Expression { - Number(i32), - Sub(Box, (), Box), + impl Expression { + pub fn language() -> ::rust_sitter::tree_sitter::Language { + unsafe extern "C" { + fn tree_sitter_Expression() -> ::rust_sitter::tree_sitter::Language; + } + unsafe { tree_sitter_Expression() } + } + #[doc = r" Parse an input string according to the grammar. Returns either any parsing errors that happened, or a"] + #[doc = "[`Expression`]"] + #[doc = r" instance containing the parsed structured data."] + pub fn parse( + input: &str, + ) -> core::result::Result> { + ::rust_sitter::__private::parse(input, Self::language) + } } impl ::rust_sitter::Extract for Expression { type LeafFn<'a> = (); @@ -71,18 +83,12 @@ mod grammar { } } } - unsafe extern "C" { - fn tree_sitter_test() -> ::rust_sitter::tree_sitter::Language; - } - pub fn language() -> ::rust_sitter::tree_sitter::Language { - unsafe { tree_sitter_test() } - } - #[doc = r" Parse an input string according to the grammar. Returns either any parsing errors that happened, or a"] - #[doc = "[`Expression`]"] - #[doc = r" instance containing the parsed structured data."] - pub fn parse( - input: &str, - ) -> core::result::Result> { - ::rust_sitter::__private::parse::(input, language) + impl ::rust_sitter::rule::Rule for Expression { + fn produce_ast() -> String { + String::new() + } + fn rule_name() -> &'static str { + "Expression" + } } } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap index 0839595..4cfcf78 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap @@ -1,11 +1,23 @@ --- source: macro/src/lib.rs -expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n #[rust_sitter::grammar(\"test\")] mod grammar\n {\n #[rust_sitter::language] pub enum Expression\n {\n Number(#[rust_sitter::leaf(re(r\"\\d+\"))] i32),\n Neg(#[rust_sitter::leaf(\"-\")] (), Box),\n }\n }\n})? .to_token_stream().to_string())" +expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n mod grammar\n {\n #[derive(rust_sitter::Rule)] #[language] pub enum Expression\n {\n Number(#[leaf(re(r\"\\d+\"))] i32),\n Neg(#[leaf(\"-\")] (), Box),\n }\n }\n}).to_token_stream().to_string())" --- mod grammar { - pub enum Expression { - Number(i32), - Neg((), Box), + impl Expression { + pub fn language() -> ::rust_sitter::tree_sitter::Language { + unsafe extern "C" { + fn tree_sitter_Expression() -> ::rust_sitter::tree_sitter::Language; + } + unsafe { tree_sitter_Expression() } + } + #[doc = r" Parse an input string according to the grammar. Returns either any parsing errors that happened, or a"] + #[doc = "[`Expression`]"] + #[doc = r" instance containing the parsed structured data."] + pub fn parse( + input: &str, + ) -> core::result::Result> { + ::rust_sitter::__private::parse(input, Self::language) + } } impl ::rust_sitter::Extract for Expression { type LeafFn<'a> = (); @@ -66,18 +78,12 @@ mod grammar { } } } - unsafe extern "C" { - fn tree_sitter_test() -> ::rust_sitter::tree_sitter::Language; - } - pub fn language() -> ::rust_sitter::tree_sitter::Language { - unsafe { tree_sitter_test() } - } - #[doc = r" Parse an input string according to the grammar. Returns either any parsing errors that happened, or a"] - #[doc = "[`Expression`]"] - #[doc = r" instance containing the parsed structured data."] - pub fn parse( - input: &str, - ) -> core::result::Result> { - ::rust_sitter::__private::parse::(input, language) + impl ::rust_sitter::rule::Rule for Expression { + fn produce_ast() -> String { + String::new() + } + fn rule_name() -> &'static str { + "Expression" + } } } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap index a22e040..f17518e 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap @@ -1,10 +1,24 @@ --- source: macro/src/lib.rs -expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n #[rust_sitter::grammar(\"test\")] mod grammar\n {\n #[rust_sitter::language] pub enum Expression\n { Number(#[rust_sitter::leaf(re(r\"\\d+\"))] i32), }\n }\n})? .to_token_stream().to_string())" +expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n mod grammar\n {\n use rust_sitter::Rule; #[derive(Rule)] #[language] pub enum Expression\n { Number(#[leaf(re(r\"\\d+\"))] i32), }\n }\n}).to_token_stream().to_string())" --- mod grammar { - pub enum Expression { - Number(i32), + use rust_sitter::Rule; + impl Expression { + pub fn language() -> ::rust_sitter::tree_sitter::Language { + unsafe extern "C" { + fn tree_sitter_Expression() -> ::rust_sitter::tree_sitter::Language; + } + unsafe { tree_sitter_Expression() } + } + #[doc = r" Parse an input string according to the grammar. Returns either any parsing errors that happened, or a"] + #[doc = "[`Expression`]"] + #[doc = r" instance containing the parsed structured data."] + pub fn parse( + input: &str, + ) -> core::result::Result> { + ::rust_sitter::__private::parse(input, Self::language) + } } impl ::rust_sitter::Extract for Expression { type LeafFn<'a> = (); @@ -46,18 +60,12 @@ mod grammar { } } } - unsafe extern "C" { - fn tree_sitter_test() -> ::rust_sitter::tree_sitter::Language; - } - pub fn language() -> ::rust_sitter::tree_sitter::Language { - unsafe { tree_sitter_test() } - } - #[doc = r" Parse an input string according to the grammar. Returns either any parsing errors that happened, or a"] - #[doc = "[`Expression`]"] - #[doc = r" instance containing the parsed structured data."] - pub fn parse( - input: &str, - ) -> core::result::Result> { - ::rust_sitter::__private::parse::(input, language) + impl ::rust_sitter::rule::Rule for Expression { + fn produce_ast() -> String { + String::new() + } + fn rule_name() -> &'static str { + "Expression" + } } } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap index 6b9fcd9..9cac676 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap @@ -1,11 +1,23 @@ --- source: macro/src/lib.rs -expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n #[rust_sitter::grammar(\"test\")] mod grammar\n {\n #[rust_sitter::language] pub enum Expr\n {\n Number(#[rust_sitter::leaf(pattern(r\"\\d+\"))] u32), Neg\n { #[rust_sitter::leaf(\"!\")] _bang: (), value: Box, }\n }\n }\n})? .to_token_stream().to_string())" +expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n mod grammar\n {\n #[derive(rust_sitter::Rule)] #[language] pub enum Expr\n {\n Number(#[leaf(pattern(r\"\\d+\"))] u32), Neg\n { #[leaf(\"!\")] _bang: (), value: Box, }\n }\n }\n}).to_token_stream().to_string())" --- mod grammar { - pub enum Expr { - Number(u32), - Neg { _bang: (), value: Box }, + impl Expr { + pub fn language() -> ::rust_sitter::tree_sitter::Language { + unsafe extern "C" { + fn tree_sitter_Expr() -> ::rust_sitter::tree_sitter::Language; + } + unsafe { tree_sitter_Expr() } + } + #[doc = r" Parse an input string according to the grammar. Returns either any parsing errors that happened, or a"] + #[doc = "[`Expr`]"] + #[doc = r" instance containing the parsed structured data."] + pub fn parse( + input: &str, + ) -> core::result::Result> { + ::rust_sitter::__private::parse(input, Self::language) + } } impl ::rust_sitter::Extract for Expr { type LeafFn<'a> = (); @@ -64,18 +76,12 @@ mod grammar { } } } - unsafe extern "C" { - fn tree_sitter_test() -> ::rust_sitter::tree_sitter::Language; - } - pub fn language() -> ::rust_sitter::tree_sitter::Language { - unsafe { tree_sitter_test() } - } - #[doc = r" Parse an input string according to the grammar. Returns either any parsing errors that happened, or a"] - #[doc = "[`Expr`]"] - #[doc = r" instance containing the parsed structured data."] - pub fn parse( - input: &str, - ) -> core::result::Result> { - ::rust_sitter::__private::parse::(input, language) + impl ::rust_sitter::rule::Rule for Expr { + fn produce_ast() -> String { + String::new() + } + fn rule_name() -> &'static str { + "Expr" + } } } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap index 9485cd1..ded37ec 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap @@ -1,11 +1,8 @@ --- source: macro/src/lib.rs -expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n #[rust_sitter::grammar(\"test\")] mod grammar\n {\n pub struct Number { #[rust_sitter::leaf(re(r\"\\d+\"))] value: u32 }\n #[rust_sitter::language] pub enum Expr\n { Numbers(#[rust_sitter::repeat(non_empty = true)] Vec) }\n }\n})? .to_token_stream().to_string())" +expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n mod grammar\n {\n #[derive(rust_sitter::Rule)] pub struct Number\n { #[leaf(re(r\"\\d+\"))] value: u32 } #[derive(rust_sitter::Rule)]\n #[language] pub enum Expr\n { Numbers(#[repeat(non_empty = true)] Vec) }\n }\n}).to_token_stream().to_string())" --- mod grammar { - pub struct Number { - value: u32, - } impl ::rust_sitter::Extract for Number { type LeafFn<'a> = (); #[allow(non_snake_case)] @@ -29,8 +26,29 @@ mod grammar { ) } } - pub enum Expr { - Numbers(Vec), + impl ::rust_sitter::rule::Rule for Number { + fn produce_ast() -> String { + String::new() + } + fn rule_name() -> &'static str { + "Number" + } + } + impl Expr { + pub fn language() -> ::rust_sitter::tree_sitter::Language { + unsafe extern "C" { + fn tree_sitter_Expr() -> ::rust_sitter::tree_sitter::Language; + } + unsafe { tree_sitter_Expr() } + } + #[doc = r" Parse an input string according to the grammar. Returns either any parsing errors that happened, or a"] + #[doc = "[`Expr`]"] + #[doc = r" instance containing the parsed structured data."] + pub fn parse( + input: &str, + ) -> core::result::Result> { + ::rust_sitter::__private::parse(input, Self::language) + } } impl ::rust_sitter::Extract for Expr { type LeafFn<'a> = (); @@ -72,18 +90,12 @@ mod grammar { } } } - unsafe extern "C" { - fn tree_sitter_test() -> ::rust_sitter::tree_sitter::Language; - } - pub fn language() -> ::rust_sitter::tree_sitter::Language { - unsafe { tree_sitter_test() } - } - #[doc = r" Parse an input string according to the grammar. Returns either any parsing errors that happened, or a"] - #[doc = "[`Expr`]"] - #[doc = r" instance containing the parsed structured data."] - pub fn parse( - input: &str, - ) -> core::result::Result> { - ::rust_sitter::__private::parse::(input, language) + impl ::rust_sitter::rule::Rule for Expr { + fn produce_ast() -> String { + String::new() + } + fn rule_name() -> &'static str { + "Expr" + } } } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap b/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap index 855ee85..ef6354b 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap @@ -1,10 +1,23 @@ --- source: macro/src/lib.rs -expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n #[rust_sitter::grammar(\"test\")] mod grammar\n {\n #[rust_sitter::language] pub struct Language { e: Expression, } pub\n enum Expression { Number(#[rust_sitter::leaf(re(r\"\\d+\"))] i32), }\n }\n})? .to_token_stream().to_string())" +expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n mod grammar\n {\n #[derive(rust_sitter::Rule)] #[language] pub struct Language\n { e: Expression, } #[derive(rust_sitter::Rule)] pub enum Expression\n { Number(#[leaf(re(r\"\\d+\"))] i32), }\n }\n}).to_token_stream().to_string())" --- mod grammar { - pub struct Language { - e: Expression, + impl Language { + pub fn language() -> ::rust_sitter::tree_sitter::Language { + unsafe extern "C" { + fn tree_sitter_Language() -> ::rust_sitter::tree_sitter::Language; + } + unsafe { tree_sitter_Language() } + } + #[doc = r" Parse an input string according to the grammar. Returns either any parsing errors that happened, or a"] + #[doc = "[`Language`]"] + #[doc = r" instance containing the parsed structured data."] + pub fn parse( + input: &str, + ) -> core::result::Result> { + ::rust_sitter::__private::parse(input, Self::language) + } } impl ::rust_sitter::Extract for Language { type LeafFn<'a> = (); @@ -29,8 +42,13 @@ mod grammar { ) } } - pub enum Expression { - Number(i32), + impl ::rust_sitter::rule::Rule for Language { + fn produce_ast() -> String { + String::new() + } + fn rule_name() -> &'static str { + "Language" + } } impl ::rust_sitter::Extract for Expression { type LeafFn<'a> = (); @@ -72,18 +90,12 @@ mod grammar { } } } - unsafe extern "C" { - fn tree_sitter_test() -> ::rust_sitter::tree_sitter::Language; - } - pub fn language() -> ::rust_sitter::tree_sitter::Language { - unsafe { tree_sitter_test() } - } - #[doc = r" Parse an input string according to the grammar. Returns either any parsing errors that happened, or a"] - #[doc = "[`Language`]"] - #[doc = r" instance containing the parsed structured data."] - pub fn parse( - input: &str, - ) -> core::result::Result> { - ::rust_sitter::__private::parse::(input, language) + impl ::rust_sitter::rule::Rule for Expression { + fn produce_ast() -> String { + String::new() + } + fn rule_name() -> &'static str { + "Expression" + } } } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap b/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap index 0ffe1cf..a02de1e 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap @@ -1,11 +1,24 @@ --- source: macro/src/lib.rs -expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n #[rust_sitter::grammar(\"test\")] mod grammar\n {\n use rust_sitter::Spanned; #[rust_sitter::language] pub struct\n NumberList { numbers: Vec>, } pub struct Number\n { #[rust_sitter::leaf(re(r\"\\d+\"))] v: i32 } #[rust_sitter::extra]\n struct Whitespace\n { #[rust_sitter::leaf(pattern(r\"\\s\"))] _whitespace: (), }\n }\n})? .to_token_stream().to_string())" +expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n mod grammar\n {\n use rust_sitter::{Rule, Spanned}; #[derive(Rule)] #[language] pub\n struct NumberList { numbers: Vec>, } #[derive(Rule)]\n pub struct Number { #[leaf(re(r\"\\d+\"))] v: i32 } #[derive(Rule)]\n #[extra] struct Whitespace\n { #[leaf(pattern(r\"\\s\"))] _whitespace: (), }\n }\n}).to_token_stream().to_string())" --- mod grammar { - use rust_sitter::Spanned; - pub struct NumberList { - numbers: Vec>, + use rust_sitter::{Rule, Spanned}; + impl NumberList { + pub fn language() -> ::rust_sitter::tree_sitter::Language { + unsafe extern "C" { + fn tree_sitter_NumberList() -> ::rust_sitter::tree_sitter::Language; + } + unsafe { tree_sitter_NumberList() } + } + #[doc = r" Parse an input string according to the grammar. Returns either any parsing errors that happened, or a"] + #[doc = "[`NumberList`]"] + #[doc = r" instance containing the parsed structured data."] + pub fn parse( + input: &str, + ) -> core::result::Result> { + ::rust_sitter::__private::parse(input, Self::language) + } } impl ::rust_sitter::Extract for NumberList { type LeafFn<'a> = (); @@ -30,8 +43,13 @@ mod grammar { ) } } - pub struct Number { - v: i32, + impl ::rust_sitter::rule::Rule for NumberList { + fn produce_ast() -> String { + String::new() + } + fn rule_name() -> &'static str { + "NumberList" + } } impl ::rust_sitter::Extract for Number { type LeafFn<'a> = (); @@ -56,8 +74,13 @@ mod grammar { ) } } - struct Whitespace { - _whitespace: (), + impl ::rust_sitter::rule::Rule for Number { + fn produce_ast() -> String { + String::new() + } + fn rule_name() -> &'static str { + "Number" + } } impl ::rust_sitter::Extract for Whitespace { type LeafFn<'a> = (); @@ -87,18 +110,12 @@ mod grammar { ) } } - unsafe extern "C" { - fn tree_sitter_test() -> ::rust_sitter::tree_sitter::Language; - } - pub fn language() -> ::rust_sitter::tree_sitter::Language { - unsafe { tree_sitter_test() } - } - #[doc = r" Parse an input string according to the grammar. Returns either any parsing errors that happened, or a"] - #[doc = "[`NumberList`]"] - #[doc = r" instance containing the parsed structured data."] - pub fn parse( - input: &str, - ) -> core::result::Result> { - ::rust_sitter::__private::parse::(input, language) + impl ::rust_sitter::rule::Rule for Whitespace { + fn produce_ast() -> String { + String::new() + } + fn rule_name() -> &'static str { + "Whitespace" + } } } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap b/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap index 0c51d33..2479c41 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap @@ -1,10 +1,23 @@ --- source: macro/src/lib.rs -expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n #[rust_sitter::grammar(\"test\")] mod grammar\n {\n #[rust_sitter::language] pub enum Expression\n { Number(#[rust_sitter::leaf(re(r\"\\d+\"))] i32,), }\n #[rust_sitter::extra] struct Whitespace\n { #[rust_sitter::leaf(pattern(r\"\\s\"))] _whitespace: (), }\n }\n})? .to_token_stream().to_string())" +expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n mod grammar\n {\n #[derive(rust_sitter::Rule)] #[language] pub enum Expression\n { Number(#[leaf(re(r\"\\d+\"))] i32,), } #[derive(Rule)] #[extra] struct\n Whitespace { #[leaf(pattern(r\"\\s\"))] _whitespace: (), }\n }\n}).to_token_stream().to_string())" --- mod grammar { - pub enum Expression { - Number(i32), + impl Expression { + pub fn language() -> ::rust_sitter::tree_sitter::Language { + unsafe extern "C" { + fn tree_sitter_Expression() -> ::rust_sitter::tree_sitter::Language; + } + unsafe { tree_sitter_Expression() } + } + #[doc = r" Parse an input string according to the grammar. Returns either any parsing errors that happened, or a"] + #[doc = "[`Expression`]"] + #[doc = r" instance containing the parsed structured data."] + pub fn parse( + input: &str, + ) -> core::result::Result> { + ::rust_sitter::__private::parse(input, Self::language) + } } impl ::rust_sitter::Extract for Expression { type LeafFn<'a> = (); @@ -46,8 +59,13 @@ mod grammar { } } } - struct Whitespace { - _whitespace: (), + impl ::rust_sitter::rule::Rule for Expression { + fn produce_ast() -> String { + String::new() + } + fn rule_name() -> &'static str { + "Expression" + } } impl ::rust_sitter::Extract for Whitespace { type LeafFn<'a> = (); @@ -77,18 +95,12 @@ mod grammar { ) } } - unsafe extern "C" { - fn tree_sitter_test() -> ::rust_sitter::tree_sitter::Language; - } - pub fn language() -> ::rust_sitter::tree_sitter::Language { - unsafe { tree_sitter_test() } - } - #[doc = r" Parse an input string according to the grammar. Returns either any parsing errors that happened, or a"] - #[doc = "[`Expression`]"] - #[doc = r" instance containing the parsed structured data."] - pub fn parse( - input: &str, - ) -> core::result::Result> { - ::rust_sitter::__private::parse::(input, language) + impl ::rust_sitter::rule::Rule for Whitespace { + fn produce_ast() -> String { + String::new() + } + fn rule_name() -> &'static str { + "Whitespace" + } } } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__struct_optional.snap b/macro/src/snapshots/rust_sitter_macro__tests__struct_optional.snap index ac87ad5..739c00e 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__struct_optional.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__struct_optional.snap @@ -1,11 +1,23 @@ --- source: macro/src/lib.rs -expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n #[rust_sitter::grammar(\"test\")] mod grammar\n {\n #[rust_sitter::language] pub struct Language\n {\n #[rust_sitter::leaf(re(r\"\\d+\"))] v: Option, t:\n Option,\n } pub struct Number { #[rust_sitter::leaf(re(r\"\\d+\"))] v: i32 }\n }\n})? .to_token_stream().to_string())" +expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n mod grammar\n {\n #[derive(rust_sitter::Rule)] #[language] pub struct Language\n { #[leaf(re(r\"\\d+\"))] v: Option, t: Option, }\n #[derive(rust_sitter::Rule)] pub struct Number\n { #[leaf(re(r\"\\d+\"))] v: i32 }\n }\n}).to_token_stream().to_string())" --- mod grammar { - pub struct Language { - v: Option, - t: Option, + impl Language { + pub fn language() -> ::rust_sitter::tree_sitter::Language { + unsafe extern "C" { + fn tree_sitter_Language() -> ::rust_sitter::tree_sitter::Language; + } + unsafe { tree_sitter_Language() } + } + #[doc = r" Parse an input string according to the grammar. Returns either any parsing errors that happened, or a"] + #[doc = "[`Language`]"] + #[doc = r" instance containing the parsed structured data."] + pub fn parse( + input: &str, + ) -> core::result::Result> { + ::rust_sitter::__private::parse(input, Self::language) + } } impl ::rust_sitter::Extract for Language { type LeafFn<'a> = (); @@ -35,8 +47,13 @@ mod grammar { ) } } - pub struct Number { - v: i32, + impl ::rust_sitter::rule::Rule for Language { + fn produce_ast() -> String { + String::new() + } + fn rule_name() -> &'static str { + "Language" + } } impl ::rust_sitter::Extract for Number { type LeafFn<'a> = (); @@ -61,18 +78,12 @@ mod grammar { ) } } - unsafe extern "C" { - fn tree_sitter_test() -> ::rust_sitter::tree_sitter::Language; - } - pub fn language() -> ::rust_sitter::tree_sitter::Language { - unsafe { tree_sitter_test() } - } - #[doc = r" Parse an input string according to the grammar. Returns either any parsing errors that happened, or a"] - #[doc = "[`Language`]"] - #[doc = r" instance containing the parsed structured data."] - pub fn parse( - input: &str, - ) -> core::result::Result> { - ::rust_sitter::__private::parse::(input, language) + impl ::rust_sitter::rule::Rule for Number { + fn produce_ast() -> String { + String::new() + } + fn rule_name() -> &'static str { + "Number" + } } } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap b/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap index 1d6a4f4..83d4ac5 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap @@ -1,10 +1,23 @@ --- source: macro/src/lib.rs -expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n #[rust_sitter::grammar(\"test\")] mod grammar\n {\n #[rust_sitter::language] pub struct NumberList\n { numbers: Vec, } pub struct Number\n { #[rust_sitter::leaf(re(r\"\\d+\"))] v: i32 } #[rust_sitter::extra]\n struct Whitespace\n { #[rust_sitter::leaf(pattern(r\"\\s\"))] _whitespace: (), }\n }\n})? .to_token_stream().to_string())" +expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n mod grammar\n {\n #[derive(rust_sitter::Rule)] #[language] pub struct NumberList\n { numbers: Vec, } #[derive(rust_sitter::Rule)] pub struct\n Number { #[leaf(re(r\"\\d+\"))] v: i32 } #[derive(rust_sitter::Rule)]\n #[extra] struct Whitespace\n { #[leaf(pattern(r\"\\s\"))] _whitespace: (), }\n }\n}).to_token_stream().to_string())" --- mod grammar { - pub struct NumberList { - numbers: Vec, + impl NumberList { + pub fn language() -> ::rust_sitter::tree_sitter::Language { + unsafe extern "C" { + fn tree_sitter_NumberList() -> ::rust_sitter::tree_sitter::Language; + } + unsafe { tree_sitter_NumberList() } + } + #[doc = r" Parse an input string according to the grammar. Returns either any parsing errors that happened, or a"] + #[doc = "[`NumberList`]"] + #[doc = r" instance containing the parsed structured data."] + pub fn parse( + input: &str, + ) -> core::result::Result> { + ::rust_sitter::__private::parse(input, Self::language) + } } impl ::rust_sitter::Extract for NumberList { type LeafFn<'a> = (); @@ -29,8 +42,13 @@ mod grammar { ) } } - pub struct Number { - v: i32, + impl ::rust_sitter::rule::Rule for NumberList { + fn produce_ast() -> String { + String::new() + } + fn rule_name() -> &'static str { + "NumberList" + } } impl ::rust_sitter::Extract for Number { type LeafFn<'a> = (); @@ -55,8 +73,13 @@ mod grammar { ) } } - struct Whitespace { - _whitespace: (), + impl ::rust_sitter::rule::Rule for Number { + fn produce_ast() -> String { + String::new() + } + fn rule_name() -> &'static str { + "Number" + } } impl ::rust_sitter::Extract for Whitespace { type LeafFn<'a> = (); @@ -86,18 +109,12 @@ mod grammar { ) } } - unsafe extern "C" { - fn tree_sitter_test() -> ::rust_sitter::tree_sitter::Language; - } - pub fn language() -> ::rust_sitter::tree_sitter::Language { - unsafe { tree_sitter_test() } - } - #[doc = r" Parse an input string according to the grammar. Returns either any parsing errors that happened, or a"] - #[doc = "[`NumberList`]"] - #[doc = r" instance containing the parsed structured data."] - pub fn parse( - input: &str, - ) -> core::result::Result> { - ::rust_sitter::__private::parse::(input, language) + impl ::rust_sitter::rule::Rule for Whitespace { + fn produce_ast() -> String { + String::new() + } + fn rule_name() -> &'static str { + "Whitespace" + } } } diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs index 1a75942..2f20f1a 100644 --- a/runtime/src/lib.rs +++ b/runtime/src/lib.rs @@ -1,4 +1,5 @@ pub mod __private; +pub mod rule; use std::ops::Deref; diff --git a/runtime/src/rule.rs b/runtime/src/rule.rs new file mode 100644 index 0000000..2681da7 --- /dev/null +++ b/runtime/src/rule.rs @@ -0,0 +1,14 @@ + +pub trait Rule { + // TODO: Consider using serde_json::Value instead. Or just a serialized actual type + // representing the different constructs... + fn produce_ast() -> String; + // Maybe Cow instead. + fn rule_name() -> &'static str; +} + +// ...like this. +// pub enum TreeSitterType { +// Choice(TreeSitterChoice), +// Seq(TreeSitterSeq), +// } diff --git a/tool/src/lib.rs b/tool/src/lib.rs index 6e35d33..910d665 100644 --- a/tool/src/lib.rs +++ b/tool/src/lib.rs @@ -11,11 +11,12 @@ use tree_sitter_generate::generate_parser_for_grammar; /// Using the `cc` crate, generates and compiles a C parser with Tree Sitter /// for every Rust Sitter grammar found in the given module and recursive /// submodules. -pub fn build_parsers(root_file: &Path) { - let root_file = syn_inline_mod::parse_and_inline_modules(root_file); - rust_sitter_common::expansion::generate_grammars(root_file.items) - .iter() - .for_each(generate_parser); +pub fn build_parser

(root_file: &P) +where P: AsRef + ?Sized +{ + let root_file = syn_inline_mod::parse_and_inline_modules(root_file.as_ref()); + let grammar = rust_sitter_common::expansion::generate_grammar(root_file.items); + generate_parser(&grammar); } fn generate_parser(grammar: &serde_json::Value) { @@ -117,25 +118,29 @@ fn generate_parser(grammar: &serde_json::Value) { #[cfg(test)] mod tests { - use syn::parse_quote; + use syn::{parse_quote, ItemMod}; use super::GENERATED_SEMANTIC_VERSION; - use rust_sitter_common::expansion::generate_grammar; + // use rust_sitter_common::expansion::generate_grammar; use tree_sitter_generate::generate_parser_for_grammar; + fn generate_grammar(item: ItemMod) -> serde_json::Value { + let (_, items) = item.content.unwrap(); + rust_sitter_common::expansion::generate_grammar(items) + } #[test] fn enum_with_named_field() { let m = if let syn::Item::Mod(m) = parse_quote! { - #[rust_sitter::grammar("test")] mod grammar { - #[rust_sitter::language] + #[derive(rust_sitter::Rule)] + #[language] pub enum Expr { Number( - #[rust_sitter::leaf(pattern(r"\d+"))] + #[leaf(pattern(r"\d+"))] u32 ), Neg { - #[rust_sitter::leaf("!")] + #[leaf("!")] _bang: (), value: Box, } @@ -147,7 +152,7 @@ mod tests { panic!() }; - let grammar = generate_grammar(&m); + let grammar = generate_grammar(m); insta::assert_snapshot!(grammar); generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap(); } @@ -155,13 +160,13 @@ mod tests { #[test] fn enum_transformed_fields() { let m = if let syn::Item::Mod(m) = parse_quote! { - #[rust_sitter::grammar("test")] mod grammar { - #[rust_sitter::language] + #[derive(rust_sitter::Rule)] + #[language] pub enum Expression { Number( - #[rust_sitter::leaf(pattern(r"\d+"))] - #[rust_sitter::transform(|v: &str| v.parse::().unwrap())] + #[leaf(pattern(r"\d+"))] + #[transform(|v: &str| v.parse::().unwrap())] i32 ), } @@ -172,7 +177,7 @@ mod tests { panic!() }; - let grammar = generate_grammar(&m); + let grammar = generate_grammar(m); insta::assert_snapshot!(grammar); generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap(); } @@ -180,16 +185,16 @@ mod tests { #[test] fn enum_recursive() { let m = if let syn::Item::Mod(m) = parse_quote! { - #[rust_sitter::grammar("test")] mod grammar { - #[rust_sitter::language] + #[derive(rust_sitter::Rule)] + #[language] pub enum Expression { Number( - #[rust_sitter::leaf(pattern(r"\d+"))] + #[leaf(pattern(r"\d+"))] i32 ), Neg( - #[rust_sitter::leaf("-")] + #[leaf("-")] (), Box ), @@ -201,7 +206,7 @@ mod tests { panic!() }; - let grammar = generate_grammar(&m); + let grammar = generate_grammar(m); insta::assert_snapshot!(grammar); generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap(); } @@ -209,18 +214,18 @@ mod tests { #[test] fn enum_prec_left() { let m = if let syn::Item::Mod(m) = parse_quote! { - #[rust_sitter::grammar("test")] mod grammar { - #[rust_sitter::language] + #[derive(rust_sitter::Rule)] + #[language] pub enum Expression { Number( - #[rust_sitter::leaf(pattern(r"\d+"))] + #[leaf(pattern(r"\d+"))] i32 ), - #[rust_sitter::prec_left(1)] + #[prec_left(1)] Sub( Box, - #[rust_sitter::leaf("-")] + #[leaf("-")] (), Box ), @@ -232,7 +237,7 @@ mod tests { panic!() }; - let grammar = generate_grammar(&m); + let grammar = generate_grammar(m); insta::assert_snapshot!(grammar); generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap(); } @@ -240,73 +245,82 @@ mod tests { #[test] fn enum_conflicts_prec_dynamic() { let m = if let syn::Item::Mod(m) = parse_quote! { - #[rust_sitter::grammar("test")] mod grammar { - #[rust_sitter::language] + #[derive(rust_sitter::Rule)] + #[language] pub struct Program(pub Vec); + #[derive(rust_sitter::Rule)] pub enum Statement { ExpressionStatement(ExpressionStatement), IfStatement(Box), } + #[derive(rust_sitter::Rule)] pub enum Expression { Identifier(Identifier), Number(Number), BinaryExpression(Box), } - #[rust_sitter::prec_left(1)] + #[derive(rust_sitter::Rule)] + #[prec_left(1)] pub struct BinaryExpression { pub expression: Expression, pub binary_expression_inner: BinaryExpressionInner, pub expression2: Expression, } + #[derive(rust_sitter::Rule)] pub enum BinaryExpressionInner { - String(#[rust_sitter::leaf("+")] ()), - String2(#[rust_sitter::leaf("-")] ()), - String3(#[rust_sitter::leaf("*")] ()), - String4(#[rust_sitter::leaf("/")] ()), + String(#[leaf("+")] ()), + String2(#[leaf("-")] ()), + String3(#[leaf("*")] ()), + String4(#[leaf("/")] ()), } + #[derive(rust_sitter::Rule)] pub struct ExpressionStatement { pub expression: Expression, - #[rust_sitter::leaf(";")] + #[leaf(";")] pub _semicolon: (), } - #[rust_sitter::prec_dynamic(1)] + #[derive(rust_sitter::Rule)] + #[prec_dynamic(1)] pub struct IfStatement { - #[rust_sitter::leaf("if")] + #[leaf("if")] pub _if: (), - #[rust_sitter::leaf("(")] + #[leaf("(")] pub _lparen: (), pub expression: Expression, - #[rust_sitter::leaf(")")] + #[leaf(")")] pub _rparen: (), - #[rust_sitter::leaf("{")] + #[leaf("{")] pub _lbrace: (), pub statement: Statement, - #[rust_sitter::leaf("}")] + #[leaf("}")] pub _rbrace: (), pub if_statement_inner: Option, } + #[derive(rust_sitter::Rule)] pub struct IfStatementElse { - #[rust_sitter::leaf("else")] + #[leaf("else")] pub _else: (), - #[rust_sitter::leaf("{")] + #[leaf("{")] pub _lbrace: (), pub statement: Statement, - #[rust_sitter::leaf("}")] + #[leaf("}")] pub _rbrace: (), } - #[rust_sitter::word] - pub struct Identifier(#[rust_sitter::leaf(pattern("[a-zA-Z_][a-zA-Z0-9_]*"))] ()); + #[derive(rust_sitter::Rule)] + #[word] + pub struct Identifier(#[leaf(pattern("[a-zA-Z_][a-zA-Z0-9_]*"))] ()); - pub struct Number(#[rust_sitter::leaf(pattern("\\d+"))] ()); + #[derive(rust_sitter::Rule)] + pub struct Number(#[leaf(pattern("\\d+"))] ()); } } { m @@ -314,7 +328,7 @@ mod tests { panic!() }; - let grammar = generate_grammar(&m); + let grammar = generate_grammar(m); insta::assert_snapshot!(grammar); generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap(); } @@ -322,19 +336,20 @@ mod tests { #[test] fn grammar_with_extras() { let m = if let syn::Item::Mod(m) = parse_quote! { - #[rust_sitter::grammar("test")] mod grammar { - #[rust_sitter::language] + #[derive(rust_sitter::Rule)] + #[language] pub enum Expression { Number( - #[rust_sitter::leaf(re(r"\d+"))] + #[leaf(re(r"\d+"))] i32 ), } - #[rust_sitter::extra] + #[derive(rust_sitter::Rule)] + #[extra] struct Whitespace { - #[rust_sitter::leaf(re(r"\s"))] + #[leaf(re(r"\s"))] _whitespace: (), } } @@ -344,7 +359,7 @@ mod tests { panic!() }; - let grammar = generate_grammar(&m); + let grammar = generate_grammar(m); insta::assert_snapshot!(grammar); generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap(); } @@ -352,16 +367,17 @@ mod tests { #[test] fn grammar_unboxed_field() { let m = if let syn::Item::Mod(m) = parse_quote! { - #[rust_sitter::grammar("test")] mod grammar { - #[rust_sitter::language] + #[derive(rust_sitter::Rule)] + #[language] pub struct Language { e: Expression, } + #[derive(rust_sitter::Rule)] pub enum Expression { Number( - #[rust_sitter::leaf(re(r"\d+"))] + #[leaf(re(r"\d+"))] i32 ), } @@ -372,7 +388,7 @@ mod tests { panic!() }; - let grammar = generate_grammar(&m); + let grammar = generate_grammar(m); insta::assert_snapshot!(grammar); generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap(); } @@ -380,22 +396,24 @@ mod tests { #[test] fn grammar_repeat() { let m = if let syn::Item::Mod(m) = parse_quote! { - #[rust_sitter::grammar("test")] pub mod grammar { - #[rust_sitter::language] + #[derive(rust_sitter::Rule)] + #[language] pub struct NumberList { - #[rust_sitter::delimited(",")] + #[sep_by(",")] numbers: Vec, } + #[derive(Rule)] pub struct Number { - #[rust_sitter::leaf(re(r"\d+"))] + #[leaf(re(r"\d+"))] v: i32, } - #[rust_sitter::extra] + #[derive(Rule)] + #[extra] struct Whitespace { - #[rust_sitter::leaf(pattern(r"\s"))] + #[leaf(pattern(r"\s"))] _whitespace: (), } } @@ -405,7 +423,7 @@ mod tests { panic!() }; - let grammar = generate_grammar(&m); + let grammar = generate_grammar(m); insta::assert_snapshot!(grammar); generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap(); } @@ -413,21 +431,23 @@ mod tests { #[test] fn grammar_repeat_no_delimiter() { let m = if let syn::Item::Mod(m) = parse_quote! { - #[rust_sitter::grammar("test")] pub mod grammar { - #[rust_sitter::language] + #[derive(rust_sitter::Rule)] + #[language] pub struct NumberList { numbers: Vec, } + #[derive(rust_sitter::Rule)] pub struct Number { - #[rust_sitter::leaf(re(r"\d+"))] + #[leaf(re(r"\d+"))] v: i32, } - #[rust_sitter::extra] + #[derive(rust_sitter::Rule)] + #[extra] struct Whitespace { - #[rust_sitter::leaf(pattern(r"\s"))] + #[leaf(pattern(r"\s"))] _whitespace: (), } } @@ -437,7 +457,7 @@ mod tests { panic!() }; - let grammar = generate_grammar(&m); + let grammar = generate_grammar(m); insta::assert_snapshot!(grammar); generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap(); } @@ -445,23 +465,25 @@ mod tests { #[test] fn grammar_repeat1() { let m = if let syn::Item::Mod(m) = parse_quote! { - #[rust_sitter::grammar("test")] pub mod grammar { - #[rust_sitter::language] + #[derive(rust_sitter::Rule)] + #[language] pub struct NumberList { - #[rust_sitter::repeat(non_empty = true)] - #[rust_sitter::delimited(",")] + #[repeat(non_empty = true)] + #[delimited(",")] numbers: Vec, } + #[derive(rust_sitter::Rule)] pub struct Number { - #[rust_sitter::leaf(re(r"\d+"))] + #[leaf(re(r"\d+"))] v: i32, } - #[rust_sitter::extra] + #[derive(rust_sitter::Rule)] + #[extra] struct Whitespace { - #[rust_sitter::leaf(pattern(r"\s"))] + #[leaf(pattern(r"\s"))] _whitespace: (), } } @@ -471,7 +493,7 @@ mod tests { panic!() }; - let grammar = generate_grammar(&m); + let grammar = generate_grammar(m); insta::assert_snapshot!(grammar); generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap(); } @@ -479,19 +501,20 @@ mod tests { #[test] fn struct_optional() { let m = if let syn::Item::Mod(m) = parse_quote! { - #[rust_sitter::grammar("test")] mod grammar { - #[rust_sitter::language] + #[derive(rust_sitter::Rule)] + #[language] pub struct Language { - #[rust_sitter::leaf(re(r"\d+"))] + #[leaf(re(r"\d+"))] v: Option, - #[rust_sitter::leaf(re(r" "))] + #[leaf(re(r" "))] space: (), t: Option, } + #[derive(rust_sitter::Rule)] pub struct Number { - #[rust_sitter::leaf(re(r"\d+"))] + #[leaf(re(r"\d+"))] v: i32 } } @@ -501,7 +524,7 @@ mod tests { panic!() }; - let grammar = generate_grammar(&m); + let grammar = generate_grammar(m); insta::assert_snapshot!(grammar); generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap(); } @@ -509,17 +532,18 @@ mod tests { #[test] fn enum_with_unamed_vector() { let m = if let syn::Item::Mod(m) = parse_quote! { - #[rust_sitter::grammar("test")] mod grammar { + #[derive(rust_sitter::Rule)] pub struct Number { - #[rust_sitter::leaf(re(r"\d+"))] + #[leaf(re(r"\d+"))] value: u32 } - #[rust_sitter::language] + #[derive(rust_sitter::Rule)] + #[language] pub enum Expr { Numbers( - #[rust_sitter::repeat(non_empty = true)] + #[repeat1] Vec ) } @@ -530,7 +554,7 @@ mod tests { panic!() }; - let grammar = generate_grammar(&m); + let grammar = generate_grammar(m); insta::assert_snapshot!(grammar); generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap(); } @@ -538,19 +562,20 @@ mod tests { #[test] fn spanned_in_vec() { let m = if let syn::Item::Mod(m) = parse_quote! { - #[rust_sitter::grammar("test")] mod grammar { use rust_sitter::Spanned; - #[rust_sitter::language] + #[derive(rust_sitter::Rule)] + #[language] pub struct NumberList { - #[rust_sitter::leaf(re(r"\d+"))] + #[leaf(re(r"\d+"))] numbers: Vec>, } - #[rust_sitter::extra] + #[derive(rust_sitter::Rule)] + #[extra] struct Whitespace { - #[rust_sitter::leaf(pattern(r"\s"))] + #[leaf(pattern(r"\s"))] _whitespace: (), } } @@ -560,7 +585,7 @@ mod tests { panic!() }; - let grammar = generate_grammar(&m); + let grammar = generate_grammar(m); insta::assert_snapshot!(grammar); generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap(); } @@ -568,19 +593,20 @@ mod tests { #[test] fn immediate() { let m = if let syn::Item::Mod(m) = parse_quote! { - #[rust_sitter::grammar("test")] mod grammar { - #[rust_sitter::language] + #[derive(rust_sitter::Rule)] + #[language] pub struct StringFragment( - #[rust_sitter::immediate] - #[rust_sitter::prec(1)] - #[rust_sitter::leaf(pattern(r#"[^"\\]+"#))] + #[immediate] + #[prec(1)] + #[leaf(pattern(r#"[^"\\]+"#))] () ); - #[rust_sitter::extra] + #[derive(rust_sitter::Rule)] + #[extra] struct Whitespace { - #[rust_sitter::leaf(pattern(r"\s"))] + #[leaf(pattern(r"\s"))] _whitespace: (), } } @@ -590,7 +616,7 @@ mod tests { panic!() }; - let grammar = generate_grammar(&m); + let grammar = generate_grammar(m); insta::assert_snapshot!(grammar); generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION).unwrap(); } diff --git a/tool/src/snapshots/rust_sitter_tool__tests__enum_conflicts_prec_dynamic.snap b/tool/src/snapshots/rust_sitter_tool__tests__enum_conflicts_prec_dynamic.snap index 6778575..bb1a6a9 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__enum_conflicts_prec_dynamic.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__enum_conflicts_prec_dynamic.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"test","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Program_0_vec_contents"}}]}]},"Program_0_vec_contents":{"type":"REPEAT1","content":{"type":"FIELD","name":"Program_0_vec_element","content":{"type":"SYMBOL","name":"Statement"}}},"Program":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Program_0_vec_contents"}}]}]},"Statement_ExpressionStatement":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"ExpressionStatement"}}]},"Statement_IfStatement":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"IfStatement"}}]},"Statement":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Statement_ExpressionStatement"},{"type":"SYMBOL","name":"Statement_IfStatement"}]},"Expression_Identifier":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Identifier"}}]},"Expression_Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Number"}}]},"Expression_BinaryExpression":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"BinaryExpression"}}]},"Expression":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Identifier"},{"type":"SYMBOL","name":"Expression_Number"},{"type":"SYMBOL","name":"Expression_BinaryExpression"}]},"BinaryExpression":{"type":"PREC_LEFT","value":1,"content":{"type":"SEQ","members":[{"type":"FIELD","name":"expression","content":{"type":"SYMBOL","name":"Expression"}},{"type":"FIELD","name":"binary_expression_inner","content":{"type":"SYMBOL","name":"BinaryExpressionInner"}},{"type":"FIELD","name":"expression2","content":{"type":"SYMBOL","name":"Expression"}}]}},"BinaryExpressionInner_String_0":{"type":"STRING","value":"+"},"BinaryExpressionInner_String":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"BinaryExpressionInner_String_0"}}]},"BinaryExpressionInner_String2_0":{"type":"STRING","value":"-"},"BinaryExpressionInner_String2":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"BinaryExpressionInner_String2_0"}}]},"BinaryExpressionInner_String3_0":{"type":"STRING","value":"*"},"BinaryExpressionInner_String3":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"BinaryExpressionInner_String3_0"}}]},"BinaryExpressionInner_String4_0":{"type":"STRING","value":"/"},"BinaryExpressionInner_String4":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"BinaryExpressionInner_String4_0"}}]},"BinaryExpressionInner":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"BinaryExpressionInner_String"},{"type":"SYMBOL","name":"BinaryExpressionInner_String2"},{"type":"SYMBOL","name":"BinaryExpressionInner_String3"},{"type":"SYMBOL","name":"BinaryExpressionInner_String4"}]},"_ExpressionStatement__semicolon":{"type":"STRING","value":";"},"ExpressionStatement":{"type":"SEQ","members":[{"type":"FIELD","name":"expression","content":{"type":"SYMBOL","name":"Expression"}},{"type":"FIELD","name":"_semicolon","content":{"type":"SYMBOL","name":"_ExpressionStatement__semicolon"}}]},"_IfStatement__if":{"type":"STRING","value":"if"},"_IfStatement__lparen":{"type":"STRING","value":"("},"_IfStatement__rparen":{"type":"STRING","value":")"},"_IfStatement__lbrace":{"type":"STRING","value":"{"},"_IfStatement__rbrace":{"type":"STRING","value":"}"},"IfStatement":{"type":"PREC_DYNAMIC","value":1,"content":{"type":"SEQ","members":[{"type":"FIELD","name":"_if","content":{"type":"SYMBOL","name":"_IfStatement__if"}},{"type":"FIELD","name":"_lparen","content":{"type":"SYMBOL","name":"_IfStatement__lparen"}},{"type":"FIELD","name":"expression","content":{"type":"SYMBOL","name":"Expression"}},{"type":"FIELD","name":"_rparen","content":{"type":"SYMBOL","name":"_IfStatement__rparen"}},{"type":"FIELD","name":"_lbrace","content":{"type":"SYMBOL","name":"_IfStatement__lbrace"}},{"type":"FIELD","name":"statement","content":{"type":"SYMBOL","name":"Statement"}},{"type":"FIELD","name":"_rbrace","content":{"type":"SYMBOL","name":"_IfStatement__rbrace"}},{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"if_statement_inner","content":{"type":"SYMBOL","name":"IfStatementElse"}}]}]}},"_IfStatementElse__else":{"type":"STRING","value":"else"},"_IfStatementElse__lbrace":{"type":"STRING","value":"{"},"_IfStatementElse__rbrace":{"type":"STRING","value":"}"},"IfStatementElse":{"type":"SEQ","members":[{"type":"FIELD","name":"_else","content":{"type":"SYMBOL","name":"_IfStatementElse__else"}},{"type":"FIELD","name":"_lbrace","content":{"type":"SYMBOL","name":"_IfStatementElse__lbrace"}},{"type":"FIELD","name":"statement","content":{"type":"SYMBOL","name":"Statement"}},{"type":"FIELD","name":"_rbrace","content":{"type":"SYMBOL","name":"_IfStatementElse__rbrace"}}]},"Identifier_0":{"type":"PATTERN","value":"[a-zA-Z_][a-zA-Z0-9_]*"},"Identifier":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Identifier_0"}}]},"Number_0":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Number_0"}}]}},"extras":[]} +{"name":"Program","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Program_0_vec_contents"}}]}]},"Program_0_vec_contents":{"type":"REPEAT1","content":{"type":"FIELD","name":"Program_0_vec_element","content":{"type":"SYMBOL","name":"Statement"}}},"Program":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Program_0_vec_contents"}}]}]},"Statement_ExpressionStatement":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"ExpressionStatement"}}]},"Statement_IfStatement":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"IfStatement"}}]},"Statement":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Statement_ExpressionStatement"},{"type":"SYMBOL","name":"Statement_IfStatement"}]},"Expression_Identifier":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Identifier"}}]},"Expression_Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Number"}}]},"Expression_BinaryExpression":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"BinaryExpression"}}]},"Expression":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Identifier"},{"type":"SYMBOL","name":"Expression_Number"},{"type":"SYMBOL","name":"Expression_BinaryExpression"}]},"BinaryExpression":{"type":"PREC_LEFT","value":1,"content":{"type":"SEQ","members":[{"type":"FIELD","name":"expression","content":{"type":"SYMBOL","name":"Expression"}},{"type":"FIELD","name":"binary_expression_inner","content":{"type":"SYMBOL","name":"BinaryExpressionInner"}},{"type":"FIELD","name":"expression2","content":{"type":"SYMBOL","name":"Expression"}}]}},"BinaryExpressionInner_String_0":{"type":"STRING","value":"+"},"BinaryExpressionInner_String":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"BinaryExpressionInner_String_0"}}]},"BinaryExpressionInner_String2_0":{"type":"STRING","value":"-"},"BinaryExpressionInner_String2":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"BinaryExpressionInner_String2_0"}}]},"BinaryExpressionInner_String3_0":{"type":"STRING","value":"*"},"BinaryExpressionInner_String3":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"BinaryExpressionInner_String3_0"}}]},"BinaryExpressionInner_String4_0":{"type":"STRING","value":"/"},"BinaryExpressionInner_String4":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"BinaryExpressionInner_String4_0"}}]},"BinaryExpressionInner":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"BinaryExpressionInner_String"},{"type":"SYMBOL","name":"BinaryExpressionInner_String2"},{"type":"SYMBOL","name":"BinaryExpressionInner_String3"},{"type":"SYMBOL","name":"BinaryExpressionInner_String4"}]},"_ExpressionStatement__semicolon":{"type":"STRING","value":";"},"ExpressionStatement":{"type":"SEQ","members":[{"type":"FIELD","name":"expression","content":{"type":"SYMBOL","name":"Expression"}},{"type":"FIELD","name":"_semicolon","content":{"type":"SYMBOL","name":"_ExpressionStatement__semicolon"}}]},"_IfStatement__if":{"type":"STRING","value":"if"},"_IfStatement__lparen":{"type":"STRING","value":"("},"_IfStatement__rparen":{"type":"STRING","value":")"},"_IfStatement__lbrace":{"type":"STRING","value":"{"},"_IfStatement__rbrace":{"type":"STRING","value":"}"},"IfStatement":{"type":"PREC_DYNAMIC","value":1,"content":{"type":"SEQ","members":[{"type":"FIELD","name":"_if","content":{"type":"SYMBOL","name":"_IfStatement__if"}},{"type":"FIELD","name":"_lparen","content":{"type":"SYMBOL","name":"_IfStatement__lparen"}},{"type":"FIELD","name":"expression","content":{"type":"SYMBOL","name":"Expression"}},{"type":"FIELD","name":"_rparen","content":{"type":"SYMBOL","name":"_IfStatement__rparen"}},{"type":"FIELD","name":"_lbrace","content":{"type":"SYMBOL","name":"_IfStatement__lbrace"}},{"type":"FIELD","name":"statement","content":{"type":"SYMBOL","name":"Statement"}},{"type":"FIELD","name":"_rbrace","content":{"type":"SYMBOL","name":"_IfStatement__rbrace"}},{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"if_statement_inner","content":{"type":"SYMBOL","name":"IfStatementElse"}}]}]}},"_IfStatementElse__else":{"type":"STRING","value":"else"},"_IfStatementElse__lbrace":{"type":"STRING","value":"{"},"_IfStatementElse__rbrace":{"type":"STRING","value":"}"},"IfStatementElse":{"type":"SEQ","members":[{"type":"FIELD","name":"_else","content":{"type":"SYMBOL","name":"_IfStatementElse__else"}},{"type":"FIELD","name":"_lbrace","content":{"type":"SYMBOL","name":"_IfStatementElse__lbrace"}},{"type":"FIELD","name":"statement","content":{"type":"SYMBOL","name":"Statement"}},{"type":"FIELD","name":"_rbrace","content":{"type":"SYMBOL","name":"_IfStatementElse__rbrace"}}]},"Identifier_0":{"type":"PATTERN","value":"[a-zA-Z_][a-zA-Z0-9_]*"},"Identifier":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Identifier_0"}}]},"Number_0":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Number_0"}}]}},"extras":[]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__enum_prec_left.snap b/tool/src/snapshots/rust_sitter_tool__tests__enum_prec_left.snap index c2c0aec..b8aca76 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__enum_prec_left.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__enum_prec_left.snap @@ -1,5 +1,5 @@ --- source: tool/src/lib.rs -expression: generate_grammar(&m) +expression: grammar --- -{"name":"test","word":null,"rules":{"source_file":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"},{"type":"SYMBOL","name":"Expression_Sub"}]},"Expression_Number_0":{"type":"PATTERN","value":"\\d+"},"Expression_Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Expression_Number_0"}}]},"Expression_Sub_1":{"type":"STRING","value":"-"},"Expression_Sub":{"type":"PREC_LEFT","value":1,"content":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Expression"}},{"type":"FIELD","name":"1","content":{"type":"SYMBOL","name":"Expression_Sub_1"}},{"type":"FIELD","name":"2","content":{"type":"SYMBOL","name":"Expression"}}]}},"Expression":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"},{"type":"SYMBOL","name":"Expression_Sub"}]}},"extras":[]} +{"name":"Expression","word":null,"rules":{"source_file":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"},{"type":"SYMBOL","name":"Expression_Sub"}]},"Expression_Number_0":{"type":"PATTERN","value":"\\d+"},"Expression_Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Expression_Number_0"}}]},"Expression_Sub_1":{"type":"STRING","value":"-"},"Expression_Sub":{"type":"PREC_LEFT","value":1,"content":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Expression"}},{"type":"FIELD","name":"1","content":{"type":"SYMBOL","name":"Expression_Sub_1"}},{"type":"FIELD","name":"2","content":{"type":"SYMBOL","name":"Expression"}}]}},"Expression":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"},{"type":"SYMBOL","name":"Expression_Sub"}]}},"extras":[]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__enum_recursive.snap b/tool/src/snapshots/rust_sitter_tool__tests__enum_recursive.snap index 14afcf2..4732692 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__enum_recursive.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__enum_recursive.snap @@ -1,5 +1,5 @@ --- source: tool/src/lib.rs -expression: generate_grammar(&m) +expression: grammar --- -{"name":"test","word":null,"rules":{"source_file":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"},{"type":"SYMBOL","name":"Expression_Neg"}]},"Expression_Number_0":{"type":"PATTERN","value":"\\d+"},"Expression_Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Expression_Number_0"}}]},"Expression_Neg_0":{"type":"STRING","value":"-"},"Expression_Neg":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Expression_Neg_0"}},{"type":"FIELD","name":"1","content":{"type":"SYMBOL","name":"Expression"}}]},"Expression":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"},{"type":"SYMBOL","name":"Expression_Neg"}]}},"extras":[]} +{"name":"Expression","word":null,"rules":{"source_file":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"},{"type":"SYMBOL","name":"Expression_Neg"}]},"Expression_Number_0":{"type":"PATTERN","value":"\\d+"},"Expression_Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Expression_Number_0"}}]},"Expression_Neg_0":{"type":"STRING","value":"-"},"Expression_Neg":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Expression_Neg_0"}},{"type":"FIELD","name":"1","content":{"type":"SYMBOL","name":"Expression"}}]},"Expression":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"},{"type":"SYMBOL","name":"Expression_Neg"}]}},"extras":[]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__enum_transformed_fields.snap b/tool/src/snapshots/rust_sitter_tool__tests__enum_transformed_fields.snap index 016e818..86e431e 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__enum_transformed_fields.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__enum_transformed_fields.snap @@ -1,5 +1,5 @@ --- source: tool/src/lib.rs -expression: generate_grammar(&m) +expression: grammar --- -{"name":"test","word":null,"rules":{"source_file":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"}]},"Expression_Number_0":{"type":"PATTERN","value":"\\d+"},"Expression_Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Expression_Number_0"}}]},"Expression":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"}]}},"extras":[]} +{"name":"Expression","word":null,"rules":{"source_file":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"}]},"Expression_Number_0":{"type":"PATTERN","value":"\\d+"},"Expression_Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Expression_Number_0"}}]},"Expression":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"}]}},"extras":[]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__enum_with_named_field.snap b/tool/src/snapshots/rust_sitter_tool__tests__enum_with_named_field.snap index f2865f9..64f76f3 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__enum_with_named_field.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__enum_with_named_field.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"test","word":null,"rules":{"source_file":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expr_Number"},{"type":"SYMBOL","name":"Expr_Neg"}]},"Expr_Number_0":{"type":"PATTERN","value":"\\d+"},"Expr_Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Expr_Number_0"}}]},"_Expr_Neg__bang":{"type":"STRING","value":"!"},"Expr_Neg":{"type":"SEQ","members":[{"type":"FIELD","name":"_bang","content":{"type":"SYMBOL","name":"_Expr_Neg__bang"}},{"type":"FIELD","name":"value","content":{"type":"SYMBOL","name":"Expr"}}]},"Expr":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expr_Number"},{"type":"SYMBOL","name":"Expr_Neg"}]}},"extras":[]} +{"name":"Expr","word":null,"rules":{"source_file":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expr_Number"},{"type":"SYMBOL","name":"Expr_Neg"}]},"Expr_Number_0":{"type":"PATTERN","value":"\\d+"},"Expr_Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Expr_Number_0"}}]},"_Expr_Neg__bang":{"type":"STRING","value":"!"},"Expr_Neg":{"type":"SEQ","members":[{"type":"FIELD","name":"_bang","content":{"type":"SYMBOL","name":"_Expr_Neg__bang"}},{"type":"FIELD","name":"value","content":{"type":"SYMBOL","name":"Expr"}}]},"Expr":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expr_Number"},{"type":"SYMBOL","name":"Expr_Neg"}]}},"extras":[]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__enum_with_unamed_vector.snap b/tool/src/snapshots/rust_sitter_tool__tests__enum_with_unamed_vector.snap index 3dee28f..7da7603 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__enum_with_unamed_vector.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__enum_with_unamed_vector.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"test","word":null,"rules":{"source_file":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expr_Numbers"}]},"Number_value":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"value","content":{"type":"SYMBOL","name":"Number_value"}}]},"Expr_Numbers_0_vec_contents":{"type":"REPEAT1","content":{"type":"FIELD","name":"Expr_Numbers_0_vec_element","content":{"type":"SYMBOL","name":"Number"}}},"Expr_Numbers":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Expr_Numbers_0_vec_contents"}}]},"Expr":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expr_Numbers"}]}},"extras":[]} +{"name":"Expr","word":null,"rules":{"source_file":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expr_Numbers"}]},"Number_value":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"value","content":{"type":"SYMBOL","name":"Number_value"}}]},"Expr_Numbers_0_vec_contents":{"type":"REPEAT1","content":{"type":"FIELD","name":"Expr_Numbers_0_vec_element","content":{"type":"SYMBOL","name":"Number"}}},"Expr_Numbers":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Expr_Numbers_0_vec_contents"}}]},"Expr":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expr_Numbers"}]}},"extras":[]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat.snap b/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat.snap index e8adffa..4d442de 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"test","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]}]},"NumberList_numbers_vec_contents":{"type":"SEQ","members":[{"type":"FIELD","name":"NumberList_numbers_vec_element","content":{"type":"SYMBOL","name":"Number"}},{"type":"REPEAT","content":{"type":"SEQ","members":[{"type":"STRING","value":","},{"type":"FIELD","name":"NumberList_numbers_vec_element","content":{"type":"SYMBOL","name":"Number"}}]}}]},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]}]},"Number_v":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"SYMBOL","name":"Number_v"}}]},"_Whitespace__whitespace":{"type":"PATTERN","value":"\\s"},"Whitespace":{"type":"SEQ","members":[{"type":"FIELD","name":"_whitespace","content":{"type":"SYMBOL","name":"_Whitespace__whitespace"}}]}},"extras":[{"type":"SYMBOL","name":"Whitespace"}]} +{"name":"NumberList","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]}]},"NumberList_numbers_vec_contents":{"type":"SEQ","members":[{"type":"FIELD","name":"NumberList_numbers_vec_element","content":{"type":"SYMBOL","name":"Number"}},{"type":"REPEAT","content":{"type":"SEQ","members":[{"type":"STRING","value":","},{"type":"FIELD","name":"NumberList_numbers_vec_element","content":{"type":"SYMBOL","name":"Number"}}]}}]},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]}]},"Number_v":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"SYMBOL","name":"Number_v"}}]},"_Whitespace__whitespace":{"type":"PATTERN","value":"\\s"},"Whitespace":{"type":"SEQ","members":[{"type":"FIELD","name":"_whitespace","content":{"type":"SYMBOL","name":"_Whitespace__whitespace"}}]}},"extras":[{"type":"SYMBOL","name":"Whitespace"}]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat1.snap b/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat1.snap index c20f062..881805c 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat1.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat1.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"test","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]},"NumberList_numbers_vec_contents":{"type":"SEQ","members":[{"type":"FIELD","name":"NumberList_numbers_vec_element","content":{"type":"SYMBOL","name":"Number"}},{"type":"REPEAT","content":{"type":"SEQ","members":[{"type":"STRING","value":","},{"type":"FIELD","name":"NumberList_numbers_vec_element","content":{"type":"SYMBOL","name":"Number"}}]}}]},"NumberList":{"type":"SEQ","members":[{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]},"Number_v":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"SYMBOL","name":"Number_v"}}]},"_Whitespace__whitespace":{"type":"PATTERN","value":"\\s"},"Whitespace":{"type":"SEQ","members":[{"type":"FIELD","name":"_whitespace","content":{"type":"SYMBOL","name":"_Whitespace__whitespace"}}]}},"extras":[{"type":"SYMBOL","name":"Whitespace"}]} +{"name":"NumberList","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]}]},"NumberList_numbers_vec_contents":{"type":"REPEAT1","content":{"type":"FIELD","name":"NumberList_numbers_vec_element","content":{"type":"SYMBOL","name":"Number"}}},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]}]},"Number_v":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"SYMBOL","name":"Number_v"}}]},"_Whitespace__whitespace":{"type":"PATTERN","value":"\\s"},"Whitespace":{"type":"SEQ","members":[{"type":"FIELD","name":"_whitespace","content":{"type":"SYMBOL","name":"_Whitespace__whitespace"}}]}},"extras":[{"type":"SYMBOL","name":"Whitespace"}]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat_no_delimiter.snap b/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat_no_delimiter.snap index 4e4cf04..881805c 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat_no_delimiter.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat_no_delimiter.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"test","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]}]},"NumberList_numbers_vec_contents":{"type":"REPEAT1","content":{"type":"FIELD","name":"NumberList_numbers_vec_element","content":{"type":"SYMBOL","name":"Number"}}},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]}]},"Number_v":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"SYMBOL","name":"Number_v"}}]},"_Whitespace__whitespace":{"type":"PATTERN","value":"\\s"},"Whitespace":{"type":"SEQ","members":[{"type":"FIELD","name":"_whitespace","content":{"type":"SYMBOL","name":"_Whitespace__whitespace"}}]}},"extras":[{"type":"SYMBOL","name":"Whitespace"}]} +{"name":"NumberList","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]}]},"NumberList_numbers_vec_contents":{"type":"REPEAT1","content":{"type":"FIELD","name":"NumberList_numbers_vec_element","content":{"type":"SYMBOL","name":"Number"}}},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]}]},"Number_v":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"SYMBOL","name":"Number_v"}}]},"_Whitespace__whitespace":{"type":"PATTERN","value":"\\s"},"Whitespace":{"type":"SEQ","members":[{"type":"FIELD","name":"_whitespace","content":{"type":"SYMBOL","name":"_Whitespace__whitespace"}}]}},"extras":[{"type":"SYMBOL","name":"Whitespace"}]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__grammar_unboxed_field.snap b/tool/src/snapshots/rust_sitter_tool__tests__grammar_unboxed_field.snap index 246cfe3..fcf50cd 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__grammar_unboxed_field.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__grammar_unboxed_field.snap @@ -1,5 +1,5 @@ --- source: tool/src/lib.rs -expression: generate_grammar(&m) +expression: grammar --- -{"name":"test","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"FIELD","name":"e","content":{"type":"SYMBOL","name":"Expression"}}]},"Language":{"type":"SEQ","members":[{"type":"FIELD","name":"e","content":{"type":"SYMBOL","name":"Expression"}}]},"Expression_Number_0":{"type":"PATTERN","value":"\\d+"},"Expression_Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Expression_Number_0"}}]},"Expression":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"}]}},"extras":[]} +{"name":"Language","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"FIELD","name":"e","content":{"type":"SYMBOL","name":"Expression"}}]},"Language":{"type":"SEQ","members":[{"type":"FIELD","name":"e","content":{"type":"SYMBOL","name":"Expression"}}]},"Expression_Number_0":{"type":"PATTERN","value":"\\d+"},"Expression_Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Expression_Number_0"}}]},"Expression":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"}]}},"extras":[]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__grammar_with_extras.snap b/tool/src/snapshots/rust_sitter_tool__tests__grammar_with_extras.snap index 0f094bc..48e12e9 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__grammar_with_extras.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__grammar_with_extras.snap @@ -1,5 +1,5 @@ --- source: tool/src/lib.rs -expression: generate_grammar(&m) +expression: grammar --- -{"name":"test","word":null,"rules":{"source_file":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"}]},"Expression_Number_0":{"type":"PATTERN","value":"\\d+"},"Expression_Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Expression_Number_0"}}]},"Expression":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"}]},"_Whitespace__whitespace":{"type":"PATTERN","value":"\\s"},"Whitespace":{"type":"SEQ","members":[{"type":"FIELD","name":"_whitespace","content":{"type":"SYMBOL","name":"_Whitespace__whitespace"}}]}},"extras":[{"type":"SYMBOL","name":"Whitespace"}]} +{"name":"Expression","word":null,"rules":{"source_file":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"}]},"Expression_Number_0":{"type":"PATTERN","value":"\\d+"},"Expression_Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Expression_Number_0"}}]},"Expression":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"}]},"_Whitespace__whitespace":{"type":"PATTERN","value":"\\s"},"Whitespace":{"type":"SEQ","members":[{"type":"FIELD","name":"_whitespace","content":{"type":"SYMBOL","name":"_Whitespace__whitespace"}}]}},"extras":[{"type":"SYMBOL","name":"Whitespace"}]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__immediate.snap b/tool/src/snapshots/rust_sitter_tool__tests__immediate.snap index a7ebcc9..b8e7f07 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__immediate.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__immediate.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"test","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"StringFragment_0"}}]},"StringFragment_0":{"type":"IMMEDIATE_TOKEN","content":{"type":"PREC","value":1,"content":{"type":"PATTERN","value":"[^\"\\\\]+"}}},"StringFragment":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"StringFragment_0"}}]},"_Whitespace__whitespace":{"type":"PATTERN","value":"\\s"},"Whitespace":{"type":"SEQ","members":[{"type":"FIELD","name":"_whitespace","content":{"type":"SYMBOL","name":"_Whitespace__whitespace"}}]}},"extras":[{"type":"SYMBOL","name":"Whitespace"}]} +{"name":"StringFragment","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"StringFragment_0"}}]},"StringFragment_0":{"type":"IMMEDIATE_TOKEN","content":{"type":"PREC","value":1,"content":{"type":"PATTERN","value":"[^\"\\\\]+"}}},"StringFragment":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"StringFragment_0"}}]},"_Whitespace__whitespace":{"type":"PATTERN","value":"\\s"},"Whitespace":{"type":"SEQ","members":[{"type":"FIELD","name":"_whitespace","content":{"type":"SYMBOL","name":"_Whitespace__whitespace"}}]}},"extras":[{"type":"SYMBOL","name":"Whitespace"}]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__spanned_in_vec.snap b/tool/src/snapshots/rust_sitter_tool__tests__spanned_in_vec.snap index 663fa1a..3f7c1b1 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__spanned_in_vec.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__spanned_in_vec.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"test","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]}]},"NumberList_numbers":{"type":"PATTERN","value":"\\d+"},"NumberList_numbers_vec_contents":{"type":"REPEAT1","content":{"type":"FIELD","name":"NumberList_numbers_vec_element","content":{"type":"SYMBOL","name":"NumberList_numbers"}}},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]}]},"_Whitespace__whitespace":{"type":"PATTERN","value":"\\s"},"Whitespace":{"type":"SEQ","members":[{"type":"FIELD","name":"_whitespace","content":{"type":"SYMBOL","name":"_Whitespace__whitespace"}}]}},"extras":[{"type":"SYMBOL","name":"Whitespace"}]} +{"name":"NumberList","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]}]},"NumberList_numbers":{"type":"PATTERN","value":"\\d+"},"NumberList_numbers_vec_contents":{"type":"REPEAT1","content":{"type":"FIELD","name":"NumberList_numbers_vec_element","content":{"type":"SYMBOL","name":"NumberList_numbers"}}},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]}]},"_Whitespace__whitespace":{"type":"PATTERN","value":"\\s"},"Whitespace":{"type":"SEQ","members":[{"type":"FIELD","name":"_whitespace","content":{"type":"SYMBOL","name":"_Whitespace__whitespace"}}]}},"extras":[{"type":"SYMBOL","name":"Whitespace"}]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__struct_optional.snap b/tool/src/snapshots/rust_sitter_tool__tests__struct_optional.snap index 9f2629e..470b6fc 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__struct_optional.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__struct_optional.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"test","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"v","content":{"type":"SYMBOL","name":"Language_v"}}]},{"type":"FIELD","name":"space","content":{"type":"SYMBOL","name":"Language_space"}},{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"t","content":{"type":"SYMBOL","name":"Number"}}]}]},"Language_v":{"type":"PATTERN","value":"\\d+"},"Language_space":{"type":"PATTERN","value":" "},"Language":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"v","content":{"type":"SYMBOL","name":"Language_v"}}]},{"type":"FIELD","name":"space","content":{"type":"SYMBOL","name":"Language_space"}},{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"t","content":{"type":"SYMBOL","name":"Number"}}]}]},"Number_v":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"SYMBOL","name":"Number_v"}}]}},"extras":[]} +{"name":"Language","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"v","content":{"type":"SYMBOL","name":"Language_v"}}]},{"type":"FIELD","name":"space","content":{"type":"SYMBOL","name":"Language_space"}},{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"t","content":{"type":"SYMBOL","name":"Number"}}]}]},"Language_v":{"type":"PATTERN","value":"\\d+"},"Language_space":{"type":"PATTERN","value":" "},"Language":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"v","content":{"type":"SYMBOL","name":"Language_v"}}]},{"type":"FIELD","name":"space","content":{"type":"SYMBOL","name":"Language_space"}},{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"t","content":{"type":"SYMBOL","name":"Number"}}]}]},"Number_v":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"SYMBOL","name":"Number_v"}}]}},"extras":[]} From e34969ef7c2180b3d26b0639fb1aa7bc6127583e Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Thu, 17 Jul 2025 11:10:14 -0500 Subject: [PATCH 23/50] Remove unused imports --- macro/src/lib.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/macro/src/lib.rs b/macro/src/lib.rs index ca3d28e..ec72c2f 100644 --- a/macro/src/lib.rs +++ b/macro/src/lib.rs @@ -1,5 +1,4 @@ -use quote::ToTokens; -use syn::{DeriveInput, ItemMod, parse_macro_input}; +use syn::{DeriveInput, parse_macro_input}; mod errors; mod expansion; From ffa145dfe2d605429c8e952cb6101716519d3463 Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Thu, 17 Jul 2025 15:29:09 -0500 Subject: [PATCH 24/50] Refactor to produce better errors at each stage --- Cargo.lock | 16 ++ Cargo.toml | 5 +- common/Cargo.toml | 1 + common/src/expansion.rs | 396 +++++++++++++++++++++++++--------------- common/src/lib.rs | 61 ++----- example/build.rs | 6 +- macro/src/expansion.rs | 265 +++++---------------------- macro/src/lib.rs | 3 +- tool/src/lib.rs | 67 ++++--- 9 files changed, 363 insertions(+), 457 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 38f3e7f..dde5444 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -103,6 +103,12 @@ dependencies = [ "syn 2.0.98", ] +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + [[package]] name = "encode_unicode" version = "1.0.0" @@ -332,6 +338,15 @@ dependencies = [ "similar", ] +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.14" @@ -488,6 +503,7 @@ dependencies = [ name = "rust-sitter-common" version = "0.5.0" dependencies = [ + "itertools", "proc-macro2", "quote", "serde_json", diff --git a/Cargo.toml b/Cargo.toml index f1e8c54..832e633 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,4 +9,7 @@ members = [ ] [workspace.package] version = "0.5.0" -authors = ["Shadaj Laddad "] +authors = [ + "Jason Boatman", + "(formerly) Shadaj Laddad" +] diff --git a/common/Cargo.toml b/common/Cargo.toml index c2bc801..9dd92d9 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -19,3 +19,4 @@ proc-macro2 = "1" quote = "1" serde_json = "1" +itertools = "0.14" diff --git a/common/src/expansion.rs b/common/src/expansion.rs index e955989..257e0a4 100644 --- a/common/src/expansion.rs +++ b/common/src/expansion.rs @@ -1,8 +1,9 @@ use std::collections::HashSet; use super::*; +use itertools::Itertools; use serde_json::{Map, Value, json}; -use syn::{parse::Parse, punctuated::Punctuated}; +use syn::{parse::Parse, punctuated::Punctuated, spanned::Spanned}; #[derive(Debug)] pub struct RuleDerive { @@ -13,49 +14,73 @@ pub struct RuleDerive { } impl RuleDerive { - pub fn from_derive_input(d: DeriveInput) -> Option { + pub fn from_derive_input(d: DeriveInput) -> Result> { if d.attrs.iter().any(|a| { let Ok(list) = a.meta.require_list() else { return false; }; - let derives = list + let Some(derives) = list .parse_args_with(Punctuated::::parse_terminated) - .unwrap(); + .ok() + else { + // Should be infallible to parse derive tokens + return false; + }; derives .iter() .any(|p| p == &parse_quote!(rust_sitter::Rule) || p == &parse_quote!(Rule)) }) { - Some(Self::from_derive_input_known(d)) + Ok(Some(Self::from_derive_input_known(d)?)) } else { - None + Ok(None) } } // Used by the proc macro directly. - pub fn from_derive_input_known(d: DeriveInput) -> Self { - let extras = Extras::new(&d.attrs); - Self { + pub fn from_derive_input_known(d: DeriveInput) -> Result { + let extras = Extras::new(&d.attrs)?; + Ok(Self { ident: d.ident, attrs: d.attrs, extras, data: d.data, - } + }) } } /// Generate a single grammar per module. -pub fn generate_grammar(root_file: Vec) -> Value { +pub fn generate_grammar(root_file: Vec) -> Result> { let mut state = ExpansionState::default(); // for some reason, source_file must be the first key for things to work state.rules_map.insert("source_file".to_string(), json!({})); - for item in root_file { - process_item(item, &mut state); + if root_file + .into_iter() + .map(|item| process_item(item, &mut state)) + .flat_map(|r| r.err()) + .reduce(|mut acc, e| { + acc.combine(e); + acc + }) + .map(Err::<(), _>) + .is_some() + { + state.err()?; + // Theoretically, we can use this for something. But, this generate_grammar function is + // only used in `build.rs`, and all of these errors which are useful will be caught + // during macro expansion, which is much more useful for development purposes. + return Ok(None); } + // This error is useful for us and cannot be generated by proc macro expansion. let language = state .language_rule - .expect("Must specify exactly one root with #[language]") + .ok_or_else(|| { + Error::new( + Span::call_site(), + "Must specify exactly one root with #[language]", + ) + })? .to_string(); state.rules_map.insert( "source_file".to_string(), @@ -64,40 +89,65 @@ pub fn generate_grammar(root_file: Vec) -> Value { let word_rule = state.word_rule; let rules_map = state.rules_map; let extras_list = state.extras; - json!({ + Ok(Some(json!({ "name": language, "word": word_rule, "rules": rules_map, "extras": extras_list - }) + }))) } #[derive(Default)] -struct ExpansionState { +pub struct ExpansionState { rules_map: Map, word_rule: Option, language_rule: Option, extras: Vec, + // Accumulated errors. + error: Option, } impl ExpansionState { - fn set_language(&mut self, ident: &Ident) { + fn err(&mut self) -> Result<()> { + if let Some(err) = self.error.take() { + Err(err) + } else { + Ok(()) + } + } + fn accumulate_error(&mut self, err: Error) -> Error { + if let Some(inner) = &mut self.error { + inner.combine(err.clone()); + } else { + self.error = Some(err.clone()); + } + err + } + fn set_language(&mut self, ident: &Ident) -> Result<()> { if let Some(existing) = &self.language_rule { - panic!( - "Language rule already defined as {}:{:?}, found duplicate with {}:{:?}", - existing, + return Err(self.accumulate_error(Error::new( existing.span(), - ident, - ident.span(), - ); + format!( + "Language rule already defined as {}:{:?}, found duplicate with {}:{:?}", + existing, + existing.span(), + ident, + ident.span(), + ), + ))); } self.language_rule = Some(ident.clone()); + Ok(()) } - fn set_word(&mut self, ident: String) { + fn set_word(&mut self, ident: String) -> Result<()> { if let Some(existing) = &self.word_rule { - panic!("Word rule already defined as {existing}, found duplicate with {ident}",); + return Err(self.accumulate_error(Error::new( + Span::call_site(), + format!("Word rule already defined as {existing}, found duplicate with {ident}"), + ))); } self.word_rule = Some(ident); + Ok(()) } fn push_extra(&mut self, ident: &Ident) { self.extras.push(json!({ @@ -107,34 +157,35 @@ impl ExpansionState { } } -fn process_item(item: Item, ctx: &mut ExpansionState) { +fn process_item(item: Item, ctx: &mut ExpansionState) -> Result<()> { match item { Item::Struct(_) | Item::Enum(_) => { // Try and convert it to a derive. let stream = item.to_token_stream(); - // stream.into_iter - let input = syn::parse2::(stream) - .map(RuleDerive::from_derive_input) - .expect("Failed to parse as DeriveInput"); + let input = RuleDerive::from_derive_input(syn::parse2(stream)?)?; if let Some(input) = input { // Parse the structure now. - process_rule(input, ctx); + process_rule(input, ctx)?; } } Item::Mod(m) => { // Recursively process this now. - let (_, items) = m.content.expect("Module must be inlined"); + let (_, items) = m + .content + .ok_or_else(|| Error::new(Span::call_site(), "Module must be inlined"))?; for item in items { - process_item(item, ctx); + process_item(item, ctx)?; } } _ => {} } + + Ok(()) } -fn process_rule(input: RuleDerive, ctx: &mut ExpansionState) { +pub fn process_rule(input: RuleDerive, ctx: &mut ExpansionState) -> Result<()> { if input.extras.language { - ctx.set_language(&input.ident); + ctx.set_language(&input.ident)?; } // if input.extras.word { // ctx.set_word(&input.ident); @@ -147,21 +198,30 @@ fn process_rule(input: RuleDerive, ctx: &mut ExpansionState) { match input.data { Data::Struct(DataStruct { fields, .. }) => { - gen_struct_or_variant(ident.to_string(), &input.attrs, fields.clone(), ctx); + gen_struct_or_variant(ident.to_string(), &input.attrs, fields.clone(), ctx)?; } Data::Enum(DataEnum { variants, .. }) => { - variants.iter().for_each(|v| { - gen_struct_or_variant( - format!("{}_{}", ident, v.ident), - &v.attrs, - v.fields.clone(), - ctx, - ) - }); + variants + .iter() + .flat_map(|v| { + gen_struct_or_variant( + format!("{}_{}", ident, v.ident), + &v.attrs, + v.fields.clone(), + ctx, + ) + .err() + }) + .reduce(|mut acc, e| { + acc.combine(e); + acc + }) + .map(Err::<(), _>) + .transpose()?; let mut members: Vec = vec![]; variants.iter().for_each(|v| { - let variant_path = format!("{}_{}", ident.clone(), v.ident); + let variant_path = format!("{}_{}", ident, v.ident); members.push(json!({ "type": "SYMBOL", "name": variant_path @@ -174,17 +234,14 @@ fn process_rule(input: RuleDerive, ctx: &mut ExpansionState) { }); let precs = input.extras; - if precs.prec_left_param.is_some() || precs.prec_right_param.is_some() { - panic!( - "The attributes `prec_left` and `prec_right` cannot be applied directly to an enum" - ); - } - let rule = precs.apply(rule); + let rule = precs.apply(rule)?; ctx.rules_map.insert(ident.to_string(), rule); } - Data::Union(_) => panic!("Union not supported"), + Data::Union(_) => return Err(Error::new(ident.span(), "Union not supported")), } + + Ok(()) } #[derive(Debug)] @@ -201,53 +258,71 @@ pub struct Extras { } impl Extras { - fn new(attrs: &[Attribute]) -> Self { + fn new(attrs: &[Attribute]) -> Result { let prec_attr = attrs.iter().find(|attr| sitter_attr_matches(attr, "prec")); - let prec_param = prec_attr.and_then(|a| a.parse_args_with(Expr::parse).ok()); + let prec_param = prec_attr + .map(|a| a.parse_args_with(Expr::parse)) + .transpose()?; let prec_left_attr = attrs .iter() .find(|attr| sitter_attr_matches(attr, "prec_left")); - let prec_left_param = prec_left_attr.and_then(|a| a.parse_args_with(Expr::parse).ok()); + let prec_left_param = prec_left_attr + .map(|a| a.parse_args_with(Expr::parse)) + .transpose()?; let prec_right_attr = attrs .iter() .find(|attr| sitter_attr_matches(attr, "prec_right")); - let prec_right_param = prec_right_attr.and_then(|a| a.parse_args_with(Expr::parse).ok()); + let prec_right_param = prec_right_attr + .map(|a| a.parse_args_with(Expr::parse)) + .transpose()?; let prec_dynamic_attr = attrs .iter() .find(|attr| sitter_attr_matches(attr, "prec_dynamic")); - let prec_dynamic_param = - prec_dynamic_attr.and_then(|a| a.parse_args_with(Expr::parse).ok()); + let prec_dynamic_param = prec_dynamic_attr + .map(|a| a.parse_args_with(Expr::parse)) + .transpose()?; + let token = attrs.iter().find(|attr| sitter_attr_matches(attr, "token")); let immediate = attrs .iter() - .any(|attr| sitter_attr_matches(attr, "immediate")); + .find(|attr| sitter_attr_matches(attr, "immediate")); + + if let (Some(im), Some(_tok)) = (&immediate, &token) { + return Err(Error::new(im.span(), "Cannot be immediate and token")); + } + + if let (Some(prec_left), Some(_prec_right)) = (prec_left_attr, prec_right_attr) { + return Err(Error::new( + prec_left.span(), + "only one of prec, prec_left, and prec_right can be specified", + )); + } - let token = attrs.iter().any(|attr| sitter_attr_matches(attr, "token")); let extra = attrs.iter().any(|attr| sitter_attr_matches(attr, "extra")); let language = attrs.iter().any(|a| sitter_attr_matches(a, "language")); let word = attrs.iter().any(|a| sitter_attr_matches(a, "word")); - Self { + Ok(Self { prec_param, prec_left_param, prec_right_param, prec_dynamic_param, - immediate, - token, + immediate: immediate.is_some(), + token: token.is_some(), extra, word, language, - } + }) } - fn apply(&self, rule: serde_json::Value) -> serde_json::Value { + fn apply(&self, rule: serde_json::Value) -> Result { let Self { prec_param, prec_left_param, @@ -259,28 +334,26 @@ impl Extras { } = self; let rule = if let Some(Expr::Lit(lit)) = prec_param { - if prec_left_param.is_some() || prec_right_param.is_some() { - panic!("only one of prec, prec_left, and prec_right can be specified"); - } - if let Lit::Int(i) = &lit.lit { json!({ "type": "PREC", - "value": i.base10_parse::().unwrap(), + "value": i.base10_parse::()?, "content": rule }) } else { - panic!("Expected integer literal for precedence"); + return Err(Error::new( + lit.span(), + "Expected integer literal for precedence", + )); } } else if let Some(Expr::Lit(lit)) = prec_left_param { - if prec_right_param.is_some() { - panic!("only one of prec, prec_left, and prec_right can be specified"); - } - let value = if let Lit::Int(i) = &lit.lit { - i.base10_parse::().unwrap() + i.base10_parse::()? } else { - 0 + return Err(Error::new( + lit.span(), + "Expected integer literal for precedence", + )); }; json!({ "type": "PREC_LEFT", @@ -289,9 +362,12 @@ impl Extras { }) } else if let Some(Expr::Lit(lit)) = prec_right_param { let value = if let Lit::Int(i) = &lit.lit { - i.base10_parse::().unwrap() + i.base10_parse::()? } else { - 0 + return Err(Error::new( + lit.span(), + "Expected integer literal for precedence", + )); }; json!({ "type": "PREC_RIGHT", @@ -302,32 +378,31 @@ impl Extras { if let Lit::Int(i) = &lit.lit { json!({ "type": "PREC_DYNAMIC", - "value": i.base10_parse::().unwrap(), + "value": i.base10_parse::()?, "content": rule }) } else { - panic!("Expected integer literal for dynamic precedence"); + return Err(Error::new( + lit.span(), + "Expected integer literal for precedence", + )); } } else { rule }; - if *immediate && *token { - panic!("Cannot be immediate and token"); - } - if *immediate { - json!({ + Ok(json!({ "type": "IMMEDIATE_TOKEN", "content": rule - }) + })) } else if *token { - json!({ + Ok(json!({ "type": "TOKEN", "content": rule, - }) + })) } else { - rule + Ok(rule) } } } @@ -337,24 +412,31 @@ fn gen_field( leaf_type: Type, attrs: Vec, ctx: &mut ExpansionState, -) -> (Value, bool) { - let precs = Extras::new(&attrs); +) -> Result<(Value, bool)> { + let precs = Extras::new(&attrs)?; if precs.word { // TODO: We don't want to allow this, but because we generate a dummy `_unit` field // currently, we have to. Super dumb, but we can fix it later. - ctx.set_word(path.clone()); - // panic!("Cannot specify word on a field"); + ctx.set_word(path.clone())?; } if precs.language { - panic!("Cannot specify language on a field"); + return Err(Error::new( + leaf_type.span(), + "Cannot specify language on a field", + )); } let leaf_attr = attrs.iter().find(|attr| sitter_attr_matches(attr, "leaf")); let text_attr = attrs.iter().find(|attr| sitter_attr_matches(attr, "text")); - if leaf_attr.is_some() && text_attr.is_some() { - panic!("Cannot specify leaf and text at the same time"); + if let Some(leaf_attr) = leaf_attr + && text_attr.is_some() + { + return Err(Error::new( + leaf_attr.span(), + "Cannot specify leaf and text at the same time", + )); } let mut skip_over = HashSet::new(); @@ -365,48 +447,50 @@ fn gen_field( let (inner_type_option, is_option) = try_extract_inner_type(&leaf_type, "Option", &skip_over); if let Some(text) = text_attr { - let input: TsInput = text.parse_args().unwrap(); + let input: TsInput = text.parse_args()?; // text is only used to parse a bunch of tokens which are then not used directly. As such, // the type is required to be `()` or else it will fail to compile. - match &leaf_type { - Type::Tuple(t) if t.elems.is_empty() => {} - _ => panic!("Unexpected type `()` is required for rust_sitter::text"), - } - return (precs.apply(input.evaluate().unwrap()), false); + // Not necessary, handled by `Extract`. + // match &leaf_type { + // Type::Tuple(t) if t.elems.is_empty() => {} + // t => { + // dbg!(t); + // return Err(Error::new( + // t.span(), + // "Unexpected type `()` is required for text", + // )); + // } + // } + return Ok((precs.apply(input.evaluate()?)?, false)); } - let leaf_input = leaf_attr.and_then(|a| a.parse_args::().ok()); + let leaf_input = leaf_attr.map(|a| a.parse_args::()).transpose()?; if !is_vec && !is_option { if let Some(input) = leaf_input { ctx.rules_map - .insert(path.clone(), precs.apply(input.evaluate().unwrap())); + .insert(path.clone(), precs.apply(input.evaluate()?)?); - ( + Ok(( json!({ "type": "SYMBOL", "name": path }), is_option, - ) + )) } else { - let symbol_name = if let Type::Path(p) = filter_inner_type(&leaf_type, &skip_over) { - if p.path.segments.len() == 1 { - p.path.segments[0].ident.to_string() - } else { - panic!("Expected a single segment path"); - } - } else { - panic!("Expected a path"); + let symbol_name = match filter_inner_type(&leaf_type, &skip_over) { + Type::Path(p) => p.path.require_ident()?.to_string(), + t => return Err(Error::new(t.span(), "Expected a path")), }; - ( + Ok(( precs.apply(json!({ "type": "SYMBOL", "name": symbol_name, - })), + }))?, false, - ) + )) } } else if is_vec { let (field_json, field_optional) = gen_field( @@ -414,33 +498,39 @@ fn gen_field( inner_type_vec, leaf_attr.iter().cloned().cloned().collect(), ctx, - ); + )?; let (delimited_param, repeat_non_empty) = attrs .iter() .find_map(|attr| { if sitter_attr_matches(attr, "sep_by") { - Some((Some(attr.parse_args::().unwrap()), false)) + Some((Some(attr), false)) } else if sitter_attr_matches(attr, "sep_by1") { - Some((Some(attr.parse_args::().unwrap()), true)) + Some((Some(attr), true)) } else if sitter_attr_matches(attr, "repeat1") { Some((None, true)) } else { None } }) - .unwrap_or_else(|| (None, false)); + .unwrap_or((None, false)); + let delimited_param = delimited_param + .map(|a| a.parse_args::()) + .transpose()?; // NOTE (JAB): All of this is pretty ugly, I think we can flatten some of these types // without losing anything. - let delimiter_json = delimited_param.as_ref().map(|_| { - gen_field( - format!("{path}_vec_delimiter"), - parse_quote!(()), - vec![parse_quote!(#[text(#delimited_param)])], - ctx, - ) - }); + let delimiter_json = delimited_param + .as_ref() + .map(|_| { + gen_field( + format!("{path}_vec_delimiter"), + parse_quote!(()), + vec![parse_quote!(#[text(#delimited_param)])], + ctx, + ) + }) + .transpose()?; let field_rule_non_optional = json!({ "type": "FIELD", @@ -504,27 +594,30 @@ fn gen_field( }) }; - let vec_contents = precs.apply(vec_contents); + let vec_contents = precs.apply(vec_contents)?; let contents_ident = format!("{path}_vec_contents"); ctx.rules_map.insert(contents_ident.clone(), vec_contents); - ( + Ok(( json!({ "type": "SYMBOL", "name": contents_ident, }), !repeat_non_empty, - ) + )) } else { // is_option - let (field_json, field_optional) = gen_field(path, inner_type_option, attrs, ctx); + let (field_json, field_optional) = gen_field(path, inner_type_option, attrs, ctx)?; if field_optional { - panic!("Option> is not supported"); + return Err(Error::new( + Span::call_site(), + "Option> is not supported", + )); } - (precs.apply(field_json), true) + Ok((precs.apply(field_json)?, true)) } } @@ -533,13 +626,13 @@ fn gen_struct_or_variant( attrs: &[Attribute], fields: Fields, ctx: &mut ExpansionState, -) { +) -> Result<()> { fn gen_field_optional( path: &str, field: &Field, ctx: &mut ExpansionState, ident_str: String, - ) -> Value { + ) -> Result { // Produce a cleaner grammar: fields with `_` are hidden fields. let path = if ident_str.starts_with("_") { format!("_{path}_{ident_str}") @@ -547,7 +640,7 @@ fn gen_struct_or_variant( format!("{path}_{ident_str}") }; let (field_contents, is_option) = - gen_field(path, field.ty.clone(), field.attrs.clone(), ctx); + gen_field(path, field.ty.clone(), field.attrs.clone(), ctx)?; let core = json!({ "type": "FIELD", @@ -555,7 +648,7 @@ fn gen_struct_or_variant( "content": field_contents }); - if is_option { + let r = if is_option { json!({ "type": "CHOICE", "members": [ @@ -567,10 +660,11 @@ fn gen_struct_or_variant( }) } else { core - } + }; + Ok(r) } - let children = fields + let (children, errs): (Vec<_>, Vec<_>) = fields .iter() .enumerate() .filter_map(|(i, field)| { @@ -590,7 +684,14 @@ fn gen_struct_or_variant( Some(gen_field_optional(&path, field, ctx, ident_str)) } }) - .collect::>(); + .partition_result(); + let err = errs.into_iter().reduce(|mut acc, e| { + acc.combine(e); + acc + }); + if let Some(err) = err { + return Err(err); + } let base_rule = match fields { Fields::Unit => { @@ -605,7 +706,7 @@ fn gen_struct_or_variant( elems: Punctuated::new(), }), }; - gen_field_optional(&path, &dummy_field, ctx, "unit".to_owned()) + gen_field_optional(&path, &dummy_field, ctx, "unit".to_owned())? } _ => json!({ "type": "SEQ", @@ -613,7 +714,8 @@ fn gen_struct_or_variant( }), }; - let precs = Extras::new(attrs); + let precs = Extras::new(attrs)?; - ctx.rules_map.insert(path, precs.apply(base_rule)); + ctx.rules_map.insert(path, precs.apply(base_rule)?); + Ok(()) } diff --git a/common/src/lib.rs b/common/src/lib.rs index 9f7a34b..4bea022 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -1,10 +1,8 @@ use proc_macro2::Span; use quote::ToTokens; -use std::{collections::HashSet, sync::LazyLock}; +use std::collections::HashSet; use syn::{ - parse::{Parse, ParseStream}, - punctuated::Punctuated, - *, + parse::{Parse, ParseStream}, punctuated::Punctuated, spanned::Spanned, * }; pub mod expansion; @@ -83,14 +81,15 @@ impl TsInput { attrs: _, lit: Lit::Str(f), }) => f, - _ => return Err(syn::Error::new(Span::call_site(), "expected a string")), + _ => return Err(syn::Error::new(e.span(), "expected a string")), }; Ok(s.value()) } fn get_arg(p: &Punctuated, i: usize, expected: usize) -> Result<&Expr> { assert!(i < expected); if p.len() != expected { - return Err(syn::Error::new(Span::call_site(), "Too many arguments")); + // TODO: Fix the span + return Err(syn::Error::new(p.span(), "Too many arguments")); } Ok(p.get(i).unwrap()) } @@ -108,15 +107,15 @@ impl TsInput { paren_token: _, args, }) => { - let func = match &**func { + let name = match &**func { Expr::Path(ExprPath { attrs: _, qself: _, path, }) => path.require_ident()?.to_string(), - _ => return Err(syn::Error::new(Span::call_site(), "Expected path")), + k => return Err(syn::Error::new(k.span(), "Expected path")), }; - match func.as_str() { + match name.as_str() { "optional" => { let inner = Self::new(get_arg(args, 0, 1)?); let mut members = vec![]; @@ -173,8 +172,8 @@ impl TsInput { } k => { return Err(syn::Error::new( - Span::call_site(), - format!("Unexpected function call {k}"), + func.span(), + format!("Unexpected function call `{k}`"), )); } } @@ -186,54 +185,18 @@ impl TsInput { "name": ident.to_string(), }) } - k => return Err(syn::Error::new(Span::call_site(), format!("Unexpected input type: {k:?}"))), + k => return Err(syn::Error::new(k.span(), format!("Unexpected input type: {k:?}"))), }; Ok(json) } } -static RUST_SITTER_ATTRS: LazyLock> = LazyLock::new(|| { - [ - "leaf", - "token", - "immediate", - "prec", - "prec_left", - "prec_right", - "prec_dynamic", - "extra", - "repeat", - "sep_by", - "sep_by1", - "text", - "pattern", - "with", - "with_node", - "transform", - ] - .into_iter() - .collect() -}); - -pub fn is_sitter_attr(attr: &Attribute) -> bool { - let is_explicit = attr - .path() - .segments - .iter() - .next() - .map(|segment| segment.ident == "rust_sitter") - .unwrap_or(false); - is_explicit || { - attr.path().segments.len() == 1 - && RUST_SITTER_ATTRS.contains(attr.path().segments[0].ident.to_string().as_str()) - } -} - pub fn sitter_attr_matches(attr: &Attribute, name: &str) -> bool { let path = attr.path(); if path.segments.len() == 1 { path.segments[0].ident == name } else if path.segments.len() == 2 { + // This is no longer possible, we can clean this up. path.segments[0].ident == "rust_sitter" && path.segments[1].ident == name } else { false diff --git a/example/build.rs b/example/build.rs index 963f1fc..2a4adee 100644 --- a/example/build.rs +++ b/example/build.rs @@ -4,10 +4,8 @@ fn main() { for example in examples { let example = example.unwrap(); let path = example.path(); - if path.is_file() { - if path.file_stem().unwrap().to_str().unwrap() != "main" { - rust_sitter_tool::build_parser(&path); - } + if path.is_file() && path.file_stem().unwrap().to_str().unwrap() != "main" { + rust_sitter_tool::build_parser(&path); } } } diff --git a/macro/src/expansion.rs b/macro/src/expansion.rs index 8a6fb0b..193383a 100644 --- a/macro/src/expansion.rs +++ b/macro/src/expansion.rs @@ -3,8 +3,8 @@ use std::collections::HashSet; use crate::errors::IteratorExt as _; use proc_macro2::Span; use quote::{ToTokens, quote}; -use rust_sitter_common::*; -use syn::*; +use rust_sitter_common::{expansion::{ExpansionState, RuleDerive}, *}; +use syn::{spanned::Spanned, *}; pub enum ParamOrField { Param(Expr), @@ -21,6 +21,13 @@ impl ToTokens for ParamOrField { } pub fn expand_rule(input: DeriveInput) -> Result { + // At the very beginning, parse out the common rule json. This will pick up all of the errors + // there at compile time, and allow us to cleanly represent them. This is a lot of extra + // compilation time but it is the best we can do for now. Probably isn't noticable in general. + let d = RuleDerive::from_derive_input_known(input.clone())?; + let mut ctx = ExpansionState::default(); + rust_sitter_common::expansion::process_rule(d, &mut ctx)?; + // TODO: Allow renaming it. let is_language = input .attrs @@ -114,7 +121,7 @@ pub fn expand_rule(input: DeriveInput) -> Result { }; (extract_impl, rule_impl) } - Data::Union(_) => panic!("Union types not supported"), + Data::Union(_) => return Err(Error::new(ident.span(), "Union types not supported")), }; // If it is language, then we need to generate the corresponding functions. @@ -151,8 +158,8 @@ pub fn expand_rule(input: DeriveInput) -> Result { }) } -fn gen_field(ident_str: String, leaf: Field) -> Expr { - let leaf_type = leaf.ty; +fn gen_field(ident_str: String, leaf: Field) -> Result { + let leaf_type = &leaf.ty; let leaf_attr = leaf .attrs @@ -161,33 +168,48 @@ fn gen_field(ident_str: String, leaf: Field) -> Expr { let transform = leaf.attrs.iter().find_map(|attr| { if sitter_attr_matches(attr, "transform") || sitter_attr_matches(attr, "with") { - Some((false, attr.parse_args::().unwrap())) + Some((false, attr)) } else if sitter_attr_matches(attr, "with_node") { - Some((true, attr.parse_args::().unwrap())) + Some((true, attr)) } else { None } }); if transform.is_some() && leaf_attr.is_none() { - panic!("Cannot transform non-leaf nodes"); + return Err(Error::new(leaf.span(), "Cannot transform non-leaf nodes")); } let text_attr = leaf .attrs .iter() .find(|attr| sitter_attr_matches(attr, "text")); - if text_attr.is_some() { - if leaf_attr.is_some() { - panic!("Cannot use leaf and text at the same time"); + if let Some(text_attr) = text_attr { + if let Some(attr) = leaf_attr { + return Err(Error::new( + attr.span(), + "Cannot use leaf and text at the same time", + )); } - return syn::parse_quote!({ + let text_input = text_attr.parse_args::()?; + text_input.evaluate()?; + return Ok(syn::parse_quote!({ ::rust_sitter::__private::skip_text(cursor, #ident_str); - }); + })); + } + + // NOTE (JAB, 2025-07-17): We want to use this eventually in the extract generation, so it + // makes sense to parse it here. Additionally, we get compile time errors at this level instead + // of at the parser generation phase. + let leaf_input = leaf_attr.map(|a| a.parse_args::()).transpose()?; + // But for now, we just evaluate it to make sure it works correctly. + if let Some(leaf_input) = leaf_input { + leaf_input.evaluate()?; } let (leaf_type, closure_expr): (Type, Expr) = match transform { Some((is_node, closure)) => { + let closure = closure.parse_args::()?; let mut non_leaf = HashSet::new(); // Major hackery... if !is_node { @@ -196,7 +218,7 @@ fn gen_field(ident_str: String, leaf: Field) -> Expr { non_leaf.insert("Option"); non_leaf.insert("Vec"); } - let wrapped_leaf_type = wrap_leaf_type(&leaf_type, &non_leaf); + let wrapped_leaf_type = wrap_leaf_type(leaf_type, &non_leaf); let input_type: syn::Type = if is_node { syn::parse_quote!(&::rust_sitter::NodeExt<'_>) } else { @@ -207,12 +229,12 @@ fn gen_field(ident_str: String, leaf: Field) -> Expr { syn::parse_quote!(Some((#closure) as fn(#input_type) -> #leaf_type)), ) } - None => (leaf_type, syn::parse_quote!(None)), + None => (leaf_type.clone(), syn::parse_quote!(None)), }; - syn::parse_quote!({ + Ok(syn::parse_quote!({ ::rust_sitter::__private::extract_field::<#leaf_type,_>(cursor, source, last_idx, last_pt, #ident_str, #closure_expr) - }) + })) } fn gen_struct_or_variant( @@ -232,7 +254,7 @@ fn gen_struct_or_variant( ty: Type::Verbatim(quote!(())), // unit type. }; - gen_field("unit".to_string(), dummy_field) + gen_field("unit".to_string(), dummy_field)? }; vec![ParamOrField::Param(expr)] } else { @@ -253,7 +275,7 @@ fn gen_struct_or_variant( .map(|v| v.to_string()) .unwrap_or(format!("{i}")); - gen_field(ident_str, field.clone()) + gen_field(ident_str, field.clone())? }; let field = if let Some(field_name) = &field.ident { @@ -311,208 +333,3 @@ fn gen_struct_or_variant( syn::parse_quote!(::rust_sitter::__private::extract_struct_or_variant(node, move |cursor, last_idx, last_pt| #construct_expr)), ) } - -// pub fn expand_grammar(input: ItemMod) -> Result { -// let attr = input -// .attrs -// .iter() -// .find(|a| a.path() == &syn::parse_quote!(rust_sitter::grammar)) -// .ok_or_else(|| syn::Error::new(Span::call_site(), "Each grammar must have a name"))?; -// let grammar_name_expr = -// attr.parse_args_with(Punctuated::::parse_terminated)?; -// if grammar_name_expr.is_empty() { -// return Err(syn::Error::new( -// Span::call_site(), -// "Expected a string literal grammar name", -// )); -// } -// if grammar_name_expr.len() > 2 { -// return Err(syn::Error::new( -// Span::call_site(), -// "Expected at most two inputs", -// )); -// } -// let grammar_name = if let Expr::Lit(ExprLit { -// attrs: _, -// lit: Lit::Str(s), -// }) = grammar_name_expr.first().unwrap() -// { -// s.value() -// } else { -// return Err(syn::Error::new( -// Span::call_site(), -// "Expected a string literal grammar name", -// )); -// }; -// -// let should_parse = if let Some(Expr::Lit(ExprLit { -// attrs: _, -// lit: Lit::Bool(b), -// })) = grammar_name_expr.last() -// { -// b.value() -// } else { -// false -// }; -// -// let (brace, new_contents) = input.content.as_ref().ok_or_else(|| { -// syn::Error::new( -// Span::call_site(), -// "Expected the module to have inline contents (`mod my_module { .. }` syntax)", -// ) -// })?; -// -// let root_type = new_contents -// .iter() -// .find_map(|item| match item { -// Item::Enum(ItemEnum { ident, attrs, .. }) -// | Item::Struct(ItemStruct { ident, attrs, .. }) => { -// if attrs -// .iter() -// .any(|attr| attr.path() == &syn::parse_quote!(rust_sitter::language)) -// { -// Some(ident.clone()) -// } else { -// None -// } -// } -// _ => None, -// }) -// .ok_or_else(|| { -// syn::Error::new( -// Span::call_site(), -// "Each parser must have the root type annotated with `#[rust_sitter::language]`", -// ) -// })?; -// -// let mut transformed: Vec = new_contents -// .iter() -// .cloned() -// .map(|c| match c { -// Item::Enum(mut e) => { -// let match_cases: Vec = e.variants.iter().map(|v| { -// let variant_path = format!("{}_{}", e.ident, v.ident); -// -// let extract_expr = gen_struct_or_variant( -// v.fields.clone(), -// Some(v.ident.clone()), -// e.ident.clone(), -// v.attrs.clone(), -// )?; -// Ok(syn::parse_quote! { -// #variant_path => return #extract_expr -// }) -// }).sift::>()?; -// -// e.attrs.retain(|a| !is_sitter_attr(a)); -// e.variants.iter_mut().for_each(|v| { -// v.attrs.retain(|a| !is_sitter_attr(a)); -// v.fields.iter_mut().for_each(|f| { -// f.attrs.retain(|a| !is_sitter_attr(a)); -// }); -// }); -// -// let enum_name = &e.ident; -// let extract_impl: Item = syn::parse_quote! { -// impl ::rust_sitter::Extract<#enum_name> for #enum_name { -// type LeafFn<'a> = (); -// -// #[allow(non_snake_case)] -// fn extract<'a>(node: Option<::rust_sitter::tree_sitter::Node>, source: &[u8], _last_idx: usize, _last_pt: ::rust_sitter::tree_sitter::Point, _leaf_fn: Option>) -> Self { -// let node = node.expect("No node found"); -// -// let mut cursor = node.walk(); -// assert!(cursor.goto_first_child(), "Could not find a child corresponding to any enum branch"); -// loop { -// let node = cursor.node(); -// match node.kind() { -// #(#match_cases),*, -// _ => if !cursor.goto_next_sibling() { -// panic!("Could not find a child corresponding to any enum branch") -// } -// } -// } -// } -// } -// }; -// Ok(vec![Item::Enum(e), extract_impl]) -// } -// -// Item::Struct(mut s) => { -// let struct_name = &s.ident; -// let extract_expr = gen_struct_or_variant( -// s.fields.clone(), -// None, -// s.ident.clone(), -// s.attrs.clone(), -// )?; -// -// s.attrs.retain(|a| !is_sitter_attr(a)); -// s.fields.iter_mut().for_each(|f| { -// f.attrs.retain(|a| !is_sitter_attr(a)); -// }); -// -// -// let extract_impl: Item = syn::parse_quote! { -// impl ::rust_sitter::Extract<#struct_name> for #struct_name { -// type LeafFn<'a> = (); -// -// #[allow(non_snake_case)] -// fn extract<'a>(node: Option<::rust_sitter::tree_sitter::Node>, source: &[u8], last_idx: usize, last_pt: ::rust_sitter::tree_sitter::Point, _leaf_fn: Option>) -> Self { -// let node = node.expect("no node found"); -// #extract_expr -// } -// } -// }; -// -// Ok(vec![Item::Struct(s), extract_impl]) -// } -// -// o => Ok(vec![o]), -// }) -// .sift::>()?.into_iter().flatten().collect(); -// -// let tree_sitter_ident = Ident::new(&format!("tree_sitter_{grammar_name}"), Span::call_site()); -// -// transformed.push(syn::parse_quote! { -// unsafe extern "C" { -// fn #tree_sitter_ident() -> ::rust_sitter::tree_sitter::Language; -// } -// }); -// -// transformed.push(syn::parse_quote! { -// pub fn language() -> ::rust_sitter::tree_sitter::Language { -// unsafe { #tree_sitter_ident() } -// } -// }); -// -// let root_type_docstr = format!("[`{root_type}`]"); -// transformed.push(syn::parse_quote! { -// /// Parse an input string according to the grammar. Returns either any parsing errors that happened, or a -// #[doc = #root_type_docstr] -// /// instance containing the parsed structured data. -// pub fn parse(input: &str) -> core::result::Result<#root_type, Vec<::rust_sitter::errors::ParseError>> { -// ::rust_sitter::__private::parse::<#root_type>(input, language) -// } -// }); -// -// // Produces the grammar as a JSON constant. -// if should_parse { -// let grammars = rust_sitter_common::expansion::generate_grammar(&input).to_string(); -// transformed.push(syn::parse_quote! { -// pub const GRAMMAR: &str = #grammars; -// }); -// } -// -// let mut filtered_attrs = input.attrs; -// filtered_attrs.retain(|a| !is_sitter_attr(a)); -// Ok(ItemMod { -// attrs: filtered_attrs, -// vis: input.vis, -// unsafety: None, -// mod_token: input.mod_token, -// ident: input.ident, -// content: Some((*brace, transformed)), -// semi: input.semi, -// }) -// } diff --git a/macro/src/lib.rs b/macro/src/lib.rs index ec72c2f..c4f6826 100644 --- a/macro/src/lib.rs +++ b/macro/src/lib.rs @@ -388,7 +388,6 @@ mod tests { use std::process::Command; use quote::ToTokens; - use quote::quote; use syn::{ItemMod, Result, parse_quote}; use tempfile::tempdir; @@ -403,7 +402,7 @@ mod tests { // This might not actually work... if let Ok(parsed) = syn::parse2(stream.clone()) { let result = expand_rule(parsed).unwrap(); - output.push(proc_macro2::TokenStream::from(result)); + output.push(result); } else { output.push(stream); } diff --git a/tool/src/lib.rs b/tool/src/lib.rs index 910d665..397adb2 100644 --- a/tool/src/lib.rs +++ b/tool/src/lib.rs @@ -1,4 +1,5 @@ -const GENERATED_SEMANTIC_VERSION: Option<(u8, u8, u8)> = Some((0, 25, 2)); +// TODO: Switch on which version we are using specifically. +const GENERATED_SEMANTIC_VERSION: Option<(u8, u8, u8)> = Some((0, 25, 6)); #[cfg(feature = "build_parsers")] use std::io::Write; @@ -11,27 +12,35 @@ use tree_sitter_generate::generate_parser_for_grammar; /// Using the `cc` crate, generates and compiles a C parser with Tree Sitter /// for every Rust Sitter grammar found in the given module and recursive /// submodules. -pub fn build_parser

(root_file: &P) -where P: AsRef + ?Sized +pub fn build_parser

(root_file: &P) +where + P: AsRef + ?Sized, { let root_file = syn_inline_mod::parse_and_inline_modules(root_file.as_ref()); - let grammar = rust_sitter_common::expansion::generate_grammar(root_file.items); - generate_parser(&grammar); + match rust_sitter_common::expansion::generate_grammar(root_file.items) { + Err(e) => panic!("{e}"), + Ok(None) => {} + Ok(Some(grammar)) => { + let out_dir = std::env::var("OUT_DIR").unwrap(); + // TODO: We want to generate better errors here as well. However, it isn't really + // possible to generate it until we can produce a full grammar, which we also can't do + // if we derive on Rule. + if let Err(e) = generate_parser(&grammar, &out_dir) { + panic!("{e}"); + } + } + } } -fn generate_parser(grammar: &serde_json::Value) { - use std::env; - let out_dir = env::var("OUT_DIR").unwrap(); - let emit_artifacts: bool = env::var("RUST_SITTER_EMIT_ARTIFACTS") - .map(|s| s.parse().unwrap_or(false)) - .unwrap_or(false); - +// TODO: Rewrite this function to support specifying the out dir and target manually, to allow +// generating the parser to a local folder for easier integration with external text editors. +fn generate_parser(grammar: &serde_json::Value, _out_dir: &str) -> Result<(), String> { let (grammar_name, grammar_c) = match generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION) { Ok(o) => o, Err(e) => { // Doing it this way produces a clean error from tree-sitter on failure. - panic!("generation error: {e}"); + return Err(format!("generation error: {e}")); } }; let tempfile = tempfile::Builder::new() @@ -39,19 +48,16 @@ fn generate_parser(grammar: &serde_json::Value) { .tempdir() .unwrap(); - let dir = if emit_artifacts { - let grammar_dir = Path::new(out_dir.as_str()).join(format!("grammar_{grammar_name}",)); - if grammar_dir.is_dir() { - std::fs::remove_dir_all(&grammar_dir).expect("Couldn't clear old artifacts"); - } - std::fs::DirBuilder::new() - .recursive(true) - .create(grammar_dir.clone()) - .expect("Couldn't create grammar JSON directory"); - grammar_dir - } else { - tempfile.path().into() - }; + let dir = tempfile.path(); + // let grammar_dir = Path::new(out_dir.as_str()).join(format!("grammar_{grammar_name}",)); + // if grammar_dir.is_dir() { + // std::fs::remove_dir_all(&grammar_dir).expect("Couldn't clear old artifacts"); + // } + // std::fs::DirBuilder::new() + // .recursive(true) + // .create(grammar_dir.clone()) + // .expect("Couldn't create grammar JSON directory"); + // grammar_dir let grammar_file = dir.join("parser.c"); let mut f = std::fs::File::create(grammar_file).unwrap(); @@ -76,7 +82,7 @@ fn generate_parser(grammar: &serde_json::Value) { drop(parser_file); let sysroot_dir = dir.join("sysroot"); - if env::var("TARGET").unwrap().starts_with("wasm32") { + if std::env::var("TARGET").unwrap().starts_with("wasm32") { std::fs::create_dir(&sysroot_dir).unwrap(); let mut stdint = std::fs::File::create(sysroot_dir.join("stdint.h")).unwrap(); stdint @@ -104,7 +110,7 @@ fn generate_parser(grammar: &serde_json::Value) { } let mut c_config = cc::Build::new(); - c_config.std("c11").include(&dir).include(&sysroot_dir); + c_config.std("c11").include(dir).include(&sysroot_dir); c_config .flag_if_supported("-Wno-unused-label") .flag_if_supported("-Wno-unused-parameter") @@ -114,18 +120,19 @@ fn generate_parser(grammar: &serde_json::Value) { c_config.file(dir.join("parser.c")); c_config.compile(&grammar_name); + Ok(()) } #[cfg(test)] mod tests { - use syn::{parse_quote, ItemMod}; + use syn::{ItemMod, parse_quote}; use super::GENERATED_SEMANTIC_VERSION; // use rust_sitter_common::expansion::generate_grammar; use tree_sitter_generate::generate_parser_for_grammar; fn generate_grammar(item: ItemMod) -> serde_json::Value { let (_, items) = item.content.unwrap(); - rust_sitter_common::expansion::generate_grammar(items) + rust_sitter_common::expansion::generate_grammar(items).unwrap().unwrap() } #[test] From c114938f6c1076bee428b2c03b134f3e700c499c Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Thu, 17 Jul 2025 15:37:19 -0500 Subject: [PATCH 25/50] Update README for latest changes --- README.md | 126 ++++++++++++++++++++++++++---------------------------- 1 file changed, 60 insertions(+), 66 deletions(-) diff --git a/README.md b/README.md index cab5039..89b4992 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,3 @@ -# TODO - OUT OF DATE, needs an update for latest refactor # Rust Sitter - Otonoma fork **This project is a fork of [rust-sitter](https://github.com/hydro-project/rust-sitter). It has been heavily modified in many breaking ways.** @@ -24,48 +23,46 @@ use std::path::PathBuf; fn main() { println!("cargo:rerun-if-changed=src"); - // Path to the file containing your grammar. - rust_sitter_tool::build_parsers(&PathBuf::from("src/main.rs")); + // Path to the file containing your grammar and any submodules. + rust_sitter_tool::build_parsers("src/grammar/mod.rs")); } ``` ## Defining a Grammar -Now that we have Rust Sitter added to our project, we can define our grammar. Rust Sitter grammars are defined in annotated Rust modules. First, we define the module that will contain our grammar +Now that we have Rust Sitter added to our project, we can define our grammar. Rust Sitter grammars are defined in Rust modules. First, we create a module file for the grammar in `src/grammar/mod.rs`. Note, this can be any module, however, +due to various quirks with the build system it is required that you have one grammar per module, and all types +in the grammar are defined within it, or a submodule of the module. -```rust -#[rust_sitter::grammar("arithmetic")] -mod grammar { - -} -``` - -Then, inside the module, we can define individual AST nodes. For this simple example, we'll define an expression that can be used in a mathematical expression. Note that we annotate this type as `#[rust_sitter::language]` to indicate that it is the root AST type. +Then, inside the module, we can define individual AST nodes. For this simple example, we'll define an expression that can be used in a mathematical expression. Note that we annotate this type as `#[language]` to indicate that it is the root AST type. ```rust -#[rust_sitter::language] +// in ./src/grammar/mod.rs +use rust_sitter::Rule; +#[derive(Rule)] +#[language] pub enum Expr { Number(u32), Add(Box, Box) } ``` -Now that we have the type defined, we must annotate the enum variants to describe how to identify them in the text being parsed. First, we can apply `rust_sitter::leaf` to use a regular expression to match digits corresponding to a number. +Now that we have the type defined, we must annotate the enum variants to describe how to identify them in the text being parsed. First, we can apply `leaf` to use a regular expression to match digits corresponding to a number. The value will try to extract the value using a default extraction for the type. For numeric types, this -defaults to `FromStr`. You can specify an alternate function using `#[tree_sitter::with]`. +defaults to `FromStr`. You can specify an alternate function using `#[with]`. ```rust Number( - #[rust_sitter::leaf(re(r"\d+"))] + #[leaf(re(r"\d+"))] u32, ) ``` -For the `Add` variant, things are a bit more complicated. First, we add an extra field corresponding to the `+` that must sit between the two sub-expressions. This can be achieved with `rust_sitter::text` or `rust_sitter::leaf`, which instructs the parser to match a specific string. Because we are parsing to `()`, we do not need to provide a transformation. +For the `Add` variant, things are a bit more complicated. First, we add an extra field corresponding to the `+` that must sit between the two sub-expressions. This can be achieved with `text` or `leaf`, which instructs the parser to match a specific string. ```rust Add( Box, - #[rust_sitter::leaf("+")] (), + #[text("+")] (), Box, ) ``` @@ -73,10 +70,10 @@ Add( If we try to compile this grammar, however, we will see ane error due to conflicting parse trees for expressions like `1 + 2 + 3`, which could be parsed as `(1 + 2) + 3` or `1 + (2 + 3)`. We want the former, so we can add a further annotation specifying that we want left-associativity for this rule. ```rust -#[rust_sitter::prec_left(1)] +#[prec_left(1)] Add( Box, - #[rust_sitter::leaf("+")] (), + #[text("+")] (), Box, ) ``` @@ -84,30 +81,29 @@ Add( All together, our grammar looks like this: ```rust -#[rust_sitter::grammar("arithmetic")] -mod grammar { - #[rust_sitter::language] - pub enum Expr { - Number( - #[rust_sitter::leaf(re(r"\d+"))] - u32, - ), - #[rust_sitter::prec_left(1)] - Add( - Box, - #[rust_sitter::leaf("+")] (), - Box, - ) - } +use rust_sitter::Rule; +#[derive(Rule)] +#[language] +pub enum Expr { + Number( + #[leaf(re(r"\d+"))] + u32, + ), + #[prec_left(1)] + Add( + Box, + #[text("+")] (), + Box, + ) } ``` We can then parse text using this grammar: ```rust -dbg!(grammar::parse("1+2+3")); +dbg!(grammar::Expr::parse("1+2+3")); /* -grammar::parse("1+2+3") = Ok(Add( +grammar::Expr::parse("1+2+3") = Ok(Add( Add( Number( 1, @@ -128,30 +124,32 @@ grammar::parse("1+2+3") = Ok(Add( ## Type Annotations Rust Sitter supports a number of annotations that can be applied to type and fields in your grammar. These annotations can be used to control how the parser behaves, and how the resulting AST is constructed. -### `#[rust_sitter::language]` +### `#[language]` This annotation marks the entrypoint for parsing, and determines which AST type will be returned from parsing. Only one type in the grammar can be marked as the entrypoint. ```rust -#[rust_sitter::language] +#[derive(Rule)] +#[language] struct Code { ... } ```` -### `#[rust_sitter::extra]` +### `#[extra]` This annotation marks a node as extra and can safely be skipped while parsing. This is useful for handling whitespace/newlines/comments. ```rust -#[rust_sitter::extra] -#[rust_sitter::leaf(re(r"\s"))] +#[derive(Rule)] +#[extra] +#[leaf(re(r"\s"))] // Structs and fields that start with `_` are hidden from the output grammar. struct _Whitespace; ``` ## Field Annotations -### `#[rust_sitter::leaf(...)]` and `#[rust_sitter::text(...)]` -The `#[rust_sitter::leaf(...)]` annotation can be used to define a leaf node in the AST. -`#[rust_sitter::text(...)]` is similar, but it does not create a named node in the grammar and cannot be +### `#[leaf(...)]` and `#[text(...)]` +The `#[leaf(...)]` annotation can be used to define a leaf node in the AST. +`#[text(...)]` is similar, but it does not create a named node in the grammar and cannot be extracted. It must always be assigned to `()`. `leaf` and `text` take an input that looks like the [tree sitter @@ -168,27 +166,29 @@ Others can be added in the future as needed. `leaf` can either be applied to a field in a struct / enum variant (as seen above), or directly on a type with no fields: ```rust -#[rust_sitter::leaf("9")] +#[derive(Rule)] +#[leaf("9")] struct BigDigit; +#[derive(Rule)] enum SmallDigit { - #[rust_sitter::leaf("0")] + #[leaf("0")] Zero, - #[rust_sitter::leaf("1")] + #[leaf("1")] One, } ``` -### `#[rust_sitter::prec(...)]` / `#[rust_sitter::prec_left(...)]` / `#[rust_sitter::prec_right(...)]` / `#[rust_sitter::prec_dynamic(...)]` +### `#[prec(...)]` / `#[prec_left(...)]` / `#[prec_right(...)]` / `#[prec_dynamic(...)]` This annotation can be used to define a non/left/right-associative operator. This annotation takes a single parameter, which is the precedence level of the operator (higher binds more tightly). -### `[#rust_sitter::immediate]` +### `#[immediate]` Usually, whitespace is optional before each token. This attribute means that the token will only match if there is no whitespace. -### `#[rust_sitter::skip(...)]` +### `#[skip(...)]` This annotation can be used to define a field that does not correspond to anything in the input string, such as some metadata. This annotation takes a single parameter, which is the value that should be used to populate that field at runtime. -### `#[rust_sitter::word]` +### `#[word]` This annotation marks the field as a Tree Sitter [word](https://tree-sitter.github.io/tree-sitter/creating-parsers#keywords), which is useful when handling errors involving keywords. Only one field in the grammar can be marked as a word. ## Special Types @@ -197,24 +197,24 @@ Rust Sitter has a few special types that can be used to define more complex gram ### `Vec` To parse repeating structures, you can use a `Vec` to parse a list of `T`s. Note that the `Vec` type **cannot** be wrapped in another `Vec` (create additional structs if this is necessary). There are two special attributes that can be applied to a `Vec` field to control the parsing behavior. -The `#[rust_sitter::delimited(...)]` attribute can be used to specify a separator between elements of the +The `#[sep_by(...)]` attribute can be used to specify a separator between elements of the list. This is parsed in the same way as `text` and `leaf` and therefore supports all of the listed tree-sitter grammar above. ```rust pub struct CommaSeparatedExprs { - #[rust_sitter::delimited(",")] + #[sep_by(",")] numbers: Vec, } ``` -The `#[rust_sitter::repeat(...)]` attribute can be used to specify additional configuration for the parser. Currently, there is only one available parameter: `non_empty`, which takes a boolean that specifies if the list must contain at least one element. For example, we can define a grammar that parses a non-empty comma-separated list of numbers: +The `#[repeat1]` can be used to specify that the list must contain at least, or you can use `#[sep_by1(...)] ```rust pub struct CommaSeparatedExprs { - #[rust_sitter::repeat(non_empty = true)] - #[rust_sitter::delimited(",")] - )] + #[repeat1] + #[sep_by(",")] + // Or just use #[sep_by1(",")] numbers: Vec, } ``` @@ -224,8 +224,7 @@ To parse optional structures, you can use an `Option` to parse a single `T` o ```rust pub struct CommaSeparatedExprs { - #[rust_sitter::repeat(non_empty = true)] - #[rust_sitter::delimited(",")] + #[sep_by1(",")] numbers: Vec>, } ``` @@ -235,15 +234,10 @@ When using Rust Sitter to power diagnostic tools, it can be helpful to access sp ```rust pub struct CommaSeparatedExprs { - #[rust_sitter::repeat(non_empty = true)] - #[rust_sitter::delimited(",")] + #[sep_by1(",")] numbers: Vec>>, } ``` ### `Box` Boxes are automatically constructed around the inner type when parsing, but Rust Sitter doesn't do anything extra beyond that. - -## Debugging - -To view the generated grammar, you can set the `RUST_SITTER_EMIT_ARTIFACTS` environment variable to `true`. This will cause the generated grammar to be written to wherever cargo sets `OUT_DIR` (usually `target/debug/build/-/out`). From e71a52a0a80c9d17fa3706aa107d0fc6f7777317 Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Thu, 17 Jul 2025 15:55:25 -0500 Subject: [PATCH 26/50] Revert author change --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 832e633..4dbc31e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,5 +11,5 @@ members = [ version = "0.5.0" authors = [ "Jason Boatman", - "(formerly) Shadaj Laddad" + "Shadaj Laddad " ] From 6ea7709a72ac794e63deecf931034163d5db175a Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Fri, 18 Jul 2025 10:26:17 -0500 Subject: [PATCH 27/50] Don't fail to compile in `build.rs` phase if a `SYMBOL` is used which doesn't exist. --- common/src/expansion.rs | 54 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/common/src/expansion.rs b/common/src/expansion.rs index 257e0a4..4b3bedd 100644 --- a/common/src/expansion.rs +++ b/common/src/expansion.rs @@ -71,6 +71,9 @@ pub fn generate_grammar(root_file: Vec) -> Result> { // during macro expansion, which is much more useful for development purposes. return Ok(None); } + if state.verify_seen().is_err() { + return Ok(None); + } // This error is useful for us and cannot be generated by proc macro expansion. let language = state @@ -123,6 +126,57 @@ impl ExpansionState { } err } + fn verify_seen(&self) -> Result<()> { + if let Some(e) = self + .rules_map + .values() + .flat_map(|v| self.check_seen_value(v).err()) + .reduce(|mut acc, e| { + acc.combine(e); + acc + }) + { + Err(e) + } else { + Ok(()) + } + } + // TODO: This could be made a lot simpler by eventually having actual types for this. That + // could also make it easier to generate traits which produce grammars instead. + fn check_seen_value(&self, value: &Value) -> Result<()> { + // Each value is always a map. + let map = value.as_object().unwrap(); + if map.contains_key("members") { + let members = map["members"].as_array().unwrap(); + for member in members { + self.check_seen_value(member)?; + } + } else { + if map.is_empty() { + return Ok(()); + } + // type is always present, expect on the empty rule for source_file. + match map["type"].as_str().unwrap() { + "SYMBOL" => { + // Check if another top level rule exists, otherwise this is an error. + let name = map["name"].as_str().unwrap(); + if !self.rules_map.contains_key(name) { + return Err(Error::new( + Span::call_site(), + format!("Symbol found with no corresponding value: {name}"), + )); + } + } + _ => { + if let Some(content) = map.get("content") { + self.check_seen_value(content)?; + } + } + } + } + + Ok(()) + } fn set_language(&mut self, ident: &Ident) -> Result<()> { if let Some(existing) = &self.language_rule { return Err(self.accumulate_error(Error::new( From 2248359aa4401a07188da92a406c1ebd5c32d33d Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Mon, 21 Jul 2025 13:56:44 -0500 Subject: [PATCH 28/50] Some minor refactoring --- README.md | 3 +- example/src/main.rs | 2 +- ...e__arithmetic__tests__failed_parses-3.snap | 6 +- ...e__arithmetic__tests__failed_parses-4.snap | 6 +- ...ple__arithmetic__tests__failed_parses.snap | 6 +- macro/src/expansion.rs | 2 +- macro/src/lib.rs | 2 +- ...t_sitter_macro__tests__enum_prec_left.snap | 2 +- ...t_sitter_macro__tests__enum_recursive.snap | 2 +- ...macro__tests__enum_transformed_fields.snap | 2 +- ...r_macro__tests__enum_with_named_field.snap | 2 +- ...macro__tests__enum_with_unamed_vector.snap | 4 +- ...r_macro__tests__grammar_unboxed_field.snap | 2 +- ...t_sitter_macro__tests__spanned_in_vec.snap | 2 +- ...ust_sitter_macro__tests__struct_extra.snap | 2 +- ..._sitter_macro__tests__struct_optional.snap | 2 +- ...st_sitter_macro__tests__struct_repeat.snap | 2 +- runtime/src/__private.rs | 4 +- runtime/src/error.rs | 117 +++++++++++++++++ runtime/src/lib.rs | 118 +----------------- 20 files changed, 139 insertions(+), 149 deletions(-) create mode 100644 runtime/src/error.rs diff --git a/README.md b/README.md index 89b4992..0a9de28 100644 --- a/README.md +++ b/README.md @@ -136,7 +136,8 @@ struct Code { ```` ### `#[extra]` -This annotation marks a node as extra and can safely be skipped while parsing. This is useful for handling whitespace/newlines/comments. +This annotation marks a node as extra which instructs tree-sitter that it can appear anywhere within the +grammar. This is useful for handling whitespace/newlines/comments. ```rust #[derive(Rule)] diff --git a/example/src/main.rs b/example/src/main.rs index e892ccc..6f94371 100644 --- a/example/src/main.rs +++ b/example/src/main.rs @@ -2,7 +2,7 @@ use std::io::Write; use codemap::CodeMap; use codemap_diagnostic::{ColorConfig, Diagnostic, Emitter, Level, SpanLabel, SpanStyle}; -use rust_sitter::errors::{ParseError, ParseErrorReason}; +use rust_sitter::error::{ParseError, ParseErrorReason}; mod arithmetic; mod optionals; diff --git a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-3.snap b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-3.snap index 2b1f70c..7a30cdf 100644 --- a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-3.snap +++ b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-3.snap @@ -1,6 +1,6 @@ --- source: example/src/arithmetic.rs -expression: "grammar::parse(\"a1\")" +expression: "grammar::Expression::parse(\"a1\")" --- Err( [ @@ -26,8 +26,6 @@ Err( parent_context: Some( ParentContext { kind: "ERROR", - content: "a", - sexpr: "(ERROR (UNEXPECTED 'a'))", }, ), }, @@ -48,8 +46,6 @@ Err( parent_context: Some( ParentContext { kind: "source_file", - content: "a1", - sexpr: "(source_file (ERROR (UNEXPECTED 'a')) (Expression_Number 0: (Expression_Number_0)))", }, ), }, diff --git a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-4.snap b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-4.snap index 6bf8f04..f3315cb 100644 --- a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-4.snap +++ b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-4.snap @@ -1,6 +1,6 @@ --- source: example/src/arithmetic.rs -expression: "grammar::parse(\"1a\")" +expression: "grammar::Expression::parse(\"1a\")" --- Err( [ @@ -26,8 +26,6 @@ Err( parent_context: Some( ParentContext { kind: "ERROR", - content: "a", - sexpr: "(ERROR (UNEXPECTED 'a'))", }, ), }, @@ -48,8 +46,6 @@ Err( parent_context: Some( ParentContext { kind: "source_file", - content: "1a", - sexpr: "(source_file (Expression_Number 0: (Expression_Number_0)) (ERROR (UNEXPECTED 'a')))", }, ), }, diff --git a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses.snap b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses.snap index c1e920d..b5d9645 100644 --- a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses.snap +++ b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses.snap @@ -1,6 +1,6 @@ --- source: example/src/arithmetic.rs -expression: "grammar::parse(\"1 + 2\")" +expression: "grammar::Expression::parse(\"1 + 2\")" --- Err( [ @@ -26,8 +26,6 @@ Err( parent_context: Some( ParentContext { kind: "ERROR", - content: "1 +", - sexpr: "(ERROR (Expression_Number_0) (Whitespace) (UNEXPECTED '+'))", }, ), }, @@ -48,8 +46,6 @@ Err( parent_context: Some( ParentContext { kind: "source_file", - content: "1 + 2", - sexpr: "(source_file (ERROR (Expression_Number_0) (Whitespace) (UNEXPECTED '+')) (Whitespace) (Expression_Number 0: (Expression_Number_0)))", }, ), }, diff --git a/macro/src/expansion.rs b/macro/src/expansion.rs index 193383a..f722a1f 100644 --- a/macro/src/expansion.rs +++ b/macro/src/expansion.rs @@ -142,7 +142,7 @@ pub fn expand_rule(input: DeriveInput) -> Result { /// Parse an input string according to the grammar. Returns either any parsing errors that happened, or a #[doc = #root_type_docstr] /// instance containing the parsed structured data. - pub fn parse(input: &str) -> core::result::Result> { + pub fn parse(input: &str) -> core::result::Result> { ::rust_sitter::__private::parse(input, Self::language) } } diff --git a/macro/src/lib.rs b/macro/src/lib.rs index c4f6826..54eb22d 100644 --- a/macro/src/lib.rs +++ b/macro/src/lib.rs @@ -25,7 +25,7 @@ use expansion::*; #[proc_macro_derive( Rule, - // Alternatively, we can instead have one helper like `baum(...)` - generally looks cleaner. + // Alternatively, we can instead have one helper like `tree(...)` - generally looks cleaner. attributes( // Helper language, diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap index 32a974c..e4ed000 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap @@ -15,7 +15,7 @@ mod grammar { #[doc = r" instance containing the parsed structured data."] pub fn parse( input: &str, - ) -> core::result::Result> { + ) -> core::result::Result> { ::rust_sitter::__private::parse(input, Self::language) } } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap index 4cfcf78..b0279c6 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap @@ -15,7 +15,7 @@ mod grammar { #[doc = r" instance containing the parsed structured data."] pub fn parse( input: &str, - ) -> core::result::Result> { + ) -> core::result::Result> { ::rust_sitter::__private::parse(input, Self::language) } } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap index f17518e..222697b 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap @@ -16,7 +16,7 @@ mod grammar { #[doc = r" instance containing the parsed structured data."] pub fn parse( input: &str, - ) -> core::result::Result> { + ) -> core::result::Result> { ::rust_sitter::__private::parse(input, Self::language) } } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap index 9cac676..51edaa2 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap @@ -15,7 +15,7 @@ mod grammar { #[doc = r" instance containing the parsed structured data."] pub fn parse( input: &str, - ) -> core::result::Result> { + ) -> core::result::Result> { ::rust_sitter::__private::parse(input, Self::language) } } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap index ded37ec..ac32f1a 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap @@ -1,6 +1,6 @@ --- source: macro/src/lib.rs -expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n mod grammar\n {\n #[derive(rust_sitter::Rule)] pub struct Number\n { #[leaf(re(r\"\\d+\"))] value: u32 } #[derive(rust_sitter::Rule)]\n #[language] pub enum Expr\n { Numbers(#[repeat(non_empty = true)] Vec) }\n }\n}).to_token_stream().to_string())" +expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n mod grammar\n {\n #[derive(rust_sitter::Rule)] pub struct Number\n { #[leaf(re(r\"\\d+\"))] value: u32 } #[derive(rust_sitter::Rule)]\n #[language] pub enum Expr { Numbers(#[repeat1] Vec) }\n }\n}).to_token_stream().to_string())" --- mod grammar { impl ::rust_sitter::Extract for Number { @@ -46,7 +46,7 @@ mod grammar { #[doc = r" instance containing the parsed structured data."] pub fn parse( input: &str, - ) -> core::result::Result> { + ) -> core::result::Result> { ::rust_sitter::__private::parse(input, Self::language) } } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap b/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap index ef6354b..97437f4 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap @@ -15,7 +15,7 @@ mod grammar { #[doc = r" instance containing the parsed structured data."] pub fn parse( input: &str, - ) -> core::result::Result> { + ) -> core::result::Result> { ::rust_sitter::__private::parse(input, Self::language) } } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap b/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap index a02de1e..dda1a20 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap @@ -16,7 +16,7 @@ mod grammar { #[doc = r" instance containing the parsed structured data."] pub fn parse( input: &str, - ) -> core::result::Result> { + ) -> core::result::Result> { ::rust_sitter::__private::parse(input, Self::language) } } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap b/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap index 2479c41..e6f7a9a 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap @@ -15,7 +15,7 @@ mod grammar { #[doc = r" instance containing the parsed structured data."] pub fn parse( input: &str, - ) -> core::result::Result> { + ) -> core::result::Result> { ::rust_sitter::__private::parse(input, Self::language) } } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__struct_optional.snap b/macro/src/snapshots/rust_sitter_macro__tests__struct_optional.snap index 739c00e..3ae111b 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__struct_optional.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__struct_optional.snap @@ -15,7 +15,7 @@ mod grammar { #[doc = r" instance containing the parsed structured data."] pub fn parse( input: &str, - ) -> core::result::Result> { + ) -> core::result::Result> { ::rust_sitter::__private::parse(input, Self::language) } } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap b/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap index 83d4ac5..e180f52 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap @@ -15,7 +15,7 @@ mod grammar { #[doc = r" instance containing the parsed structured data."] pub fn parse( input: &str, - ) -> core::result::Result> { + ) -> core::result::Result> { ::rust_sitter::__private::parse(input, Self::language) } } diff --git a/runtime/src/__private.rs b/runtime/src/__private.rs index 35dc34b..a9763ec 100644 --- a/runtime/src/__private.rs +++ b/runtime/src/__private.rs @@ -84,7 +84,7 @@ pub fn skip_text(cursor_opt: &mut Option, field_name: & pub fn parse>( input: &str, language: impl Fn() -> tree_sitter::Language, -) -> core::result::Result> { +) -> core::result::Result> { let mut parser = crate::tree_sitter::Parser::new(); parser.set_language(&language()).unwrap(); let tree = parser.parse(input, None).expect("Failed to parse"); @@ -92,7 +92,7 @@ pub fn parse>( if root_node.has_error() { let mut errors = vec![]; - crate::errors::collect_parsing_errors(&root_node, input.as_bytes(), &mut errors); + crate::error::collect_parsing_errors(&root_node, input.as_bytes(), &mut errors); Err(errors) } else { diff --git a/runtime/src/error.rs b/runtime/src/error.rs new file mode 100644 index 0000000..3aaec90 --- /dev/null +++ b/runtime/src/error.rs @@ -0,0 +1,117 @@ +#[cfg(feature = "tree-sitter-standard")] +use tree_sitter_runtime_standard as tree_sitter; + +#[cfg(feature = "tree-sitter-c2rust")] +use tree_sitter_runtime_c2rust as tree_sitter; + +use crate::Point; + +#[derive(Debug)] +/// An explanation for an error that occurred during parsing. +pub enum ParseErrorReason { + /// The parser did not expect to see some token. + UnexpectedToken(String), + /// Tree Sitter failed to parse a specific intermediate node. + /// The underlying failures are in the vector. + FailedNode(Vec), + /// The parser expected a specific token, but it was not found. + MissingToken(String), +} + +#[derive(Debug)] +/// An error that occurred during parsing. +pub struct ParseError { + pub reason: ParseErrorReason, + /// Inclusive start of the error. + pub start_byte: usize, + /// Exclusive end of the error. + pub end_byte: usize, + pub start_point: Point, + pub end_point: Point, + pub text: String, + pub kind: &'static str, + pub parent_context: Option, +} + +impl std::fmt::Display for ParseError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!(f, "Failure to parse node:")?; + write!( + f, + "\t{}:{} - {}:{}", + self.start_point.line, + self.start_point.column, + self.end_point.line, + self.end_point.column + )?; + write!(f, " {}", self.text)?; + if let Some(parent) = &self.parent_context { + writeln!(f)?; + write!(f, "\t(parent node: {})", parent.kind)?; + } + Ok(()) + } +} + +#[derive(Debug)] +pub struct ParentContext { + pub kind: &'static str, +} + +/// Given the root node of a Tree Sitter parsing result, accumulates all +/// errors that were emitted. +pub fn collect_parsing_errors( + node: &tree_sitter::Node, + source: &[u8], + errors: &mut Vec, +) { + let start_byte = node.start_byte(); + let end_byte = node.end_byte(); + let start_point = Point::from_tree_sitter(node.start_position()); + let end_point = Point::from_tree_sitter(node.end_position()); + let kind = node.kind(); + let text = node.utf8_text(source).unwrap().to_owned(); + let mut parent_context = None; + let reason = if node.is_error() { + if let Some(p) = node.parent() { + parent_context = Some(ParentContext { + kind: p.kind(), + }); + } + if node.child(0).is_some() { + // we managed to parse some children, so collect underlying errors for this node + let mut inner_errors = vec![]; + let mut cursor = node.walk(); + node.children(&mut cursor) + .for_each(|c| collect_parsing_errors(&c, source, &mut inner_errors)); + + ParseErrorReason::FailedNode(inner_errors) + } else { + let contents = node.utf8_text(source).unwrap(); + if !contents.is_empty() { + ParseErrorReason::UnexpectedToken(contents.to_string()) + } else { + ParseErrorReason::FailedNode(vec![]) + } + } + } else if node.is_missing() { + ParseErrorReason::MissingToken(node.kind().to_string()) + } else if node.has_error() { + let mut cursor = node.walk(); + node.children(&mut cursor) + .for_each(|c| collect_parsing_errors(&c, source, errors)); + return; + } else { + return; + }; + errors.push(ParseError { + reason, + start_byte, + end_byte, + start_point, + end_point, + text, + kind, + parent_context, + }); +} diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs index 2f20f1a..d4a76be 100644 --- a/runtime/src/lib.rs +++ b/runtime/src/lib.rs @@ -1,5 +1,6 @@ pub mod __private; pub mod rule; +pub mod error; use std::ops::Deref; @@ -397,120 +398,3 @@ impl, U> Extract> for Spanned { } } } - -pub mod errors { - #[cfg(feature = "tree-sitter-standard")] - use tree_sitter_runtime_standard as tree_sitter; - - #[cfg(feature = "tree-sitter-c2rust")] - use tree_sitter_runtime_c2rust as tree_sitter; - - use crate::Point; - - #[derive(Debug)] - /// An explanation for an error that occurred during parsing. - pub enum ParseErrorReason { - /// The parser did not expect to see some token. - UnexpectedToken(String), - /// Tree Sitter failed to parse a specific intermediate node. - /// The underlying failures are in the vector. - FailedNode(Vec), - /// The parser expected a specific token, but it was not found. - MissingToken(String), - } - - #[derive(Debug)] - /// An error that occurred during parsing. - pub struct ParseError { - pub reason: ParseErrorReason, - /// Inclusive start of the error. - pub start_byte: usize, - /// Exclusive end of the error. - pub end_byte: usize, - pub start_point: Point, - pub end_point: Point, - pub text: String, - pub kind: &'static str, - pub parent_context: Option, - } - - impl std::fmt::Display for ParseError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - writeln!(f, "Failure to parse node:")?; - write!(f, "\t{}:{} - {}:{}", self.start_point.line, self.start_point.column, self.end_point.line, self.end_point.column)?; - write!(f, " {}", self.text)?; - if let Some(parent) = &self.parent_context { - writeln!(f)?; - write!(f, "\t(parent node: {})", parent.kind)?; - } - Ok(()) - } - } - - #[derive(Debug)] - pub struct ParentContext { - pub kind: &'static str, - pub content: String, - pub sexpr: String, - } - - /// Given the root node of a Tree Sitter parsing result, accumulates all - /// errors that were emitted. - pub fn collect_parsing_errors( - node: &tree_sitter::Node, - source: &[u8], - errors: &mut Vec, - ) { - let start_byte = node.start_byte(); - let end_byte = node.end_byte(); - let start_point = Point::from_tree_sitter(node.start_position()); - let end_point = Point::from_tree_sitter(node.end_position()); - let kind = node.kind(); - let text = node.utf8_text(source).unwrap().to_owned(); - let mut parent_context = None; - let reason = if node.is_error() { - if let Some(p) = node.parent() { - parent_context = Some(ParentContext { - kind: p.kind(), - content: p.utf8_text(source).unwrap().to_owned(), - sexpr: p.to_sexp(), - }); - } - if node.child(0).is_some() { - // we managed to parse some children, so collect underlying errors for this node - let mut inner_errors = vec![]; - let mut cursor = node.walk(); - node.children(&mut cursor) - .for_each(|c| collect_parsing_errors(&c, source, &mut inner_errors)); - - ParseErrorReason::FailedNode(inner_errors) - } else { - let contents = node.utf8_text(source).unwrap(); - if !contents.is_empty() { - ParseErrorReason::UnexpectedToken(contents.to_string()) - } else { - ParseErrorReason::FailedNode(vec![]) - } - } - } else if node.is_missing() { - ParseErrorReason::MissingToken(node.kind().to_string()) - } else if node.has_error() { - let mut cursor = node.walk(); - node.children(&mut cursor) - .for_each(|c| collect_parsing_errors(&c, source, errors)); - return; - } else { - return; - }; - errors.push(ParseError { - reason, - start_byte, - end_byte, - start_point, - end_point, - text, - kind, - parent_context, - }); - } -} From 8f4daaabd97b2b43cd28ef0fa765213a0b6837d9 Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Tue, 22 Jul 2025 19:51:03 -0500 Subject: [PATCH 29/50] WIP: Improved error parsing --- example/src/main.rs | 58 +++--- macro/src/expansion.rs | 48 +++-- runtime/Cargo.toml | 2 +- runtime/src/__private.rs | 174 +++++++++++----- runtime/src/error.rs | 66 +++--- runtime/src/extract.rs | 436 +++++++++++++++++++++++++++++++++++++++ runtime/src/lib.rs | 361 +++----------------------------- 7 files changed, 700 insertions(+), 445 deletions(-) create mode 100644 runtime/src/extract.rs diff --git a/example/src/main.rs b/example/src/main.rs index 6f94371..2795ae7 100644 --- a/example/src/main.rs +++ b/example/src/main.rs @@ -25,36 +25,38 @@ fn convert_parse_error_to_diagnostics( label: Some(format!("missing \"{tok}\"")), }], }), + ParseErrorReason::Lookahead(_lookahead) => todo!(), + ParseErrorReason::Unknown => todo!(), - ParseErrorReason::UnexpectedToken(tok) => diagnostics.push(Diagnostic { - level: Level::Error, - message: format!("Unexpected token: \"{tok}\""), - code: Some("S000".to_string()), - spans: vec![SpanLabel { - span: file_span.subspan(error.start_byte as u64, error.end_byte as u64), - style: SpanStyle::Primary, - label: Some(format!("unexpected \"{tok}\"")), - }], - }), + // ParseErrorReason::UnexpectedToken(tok) => diagnostics.push(Diagnostic { + // level: Level::Error, + // message: format!("Unexpected token: \"{tok}\""), + // code: Some("S000".to_string()), + // spans: vec![SpanLabel { + // span: file_span.subspan(error.start_byte as u64, error.end_byte as u64), + // style: SpanStyle::Primary, + // label: Some(format!("unexpected \"{tok}\"")), + // }], + // }), - ParseErrorReason::FailedNode(errors) => { - if errors.is_empty() { - diagnostics.push(Diagnostic { - level: Level::Error, - message: "Failed to parse node".to_string(), - code: Some("S000".to_string()), - spans: vec![SpanLabel { - span: file_span.subspan(error.start_byte as u64, error.end_byte as u64), - style: SpanStyle::Primary, - label: Some("failed".to_string()), - }], - }) - } else { - for error in errors { - convert_parse_error_to_diagnostics(file_span, error, diagnostics); - } - } - } + // ParseErrorReason::FailedNode(errors) => { + // if errors.is_empty() { + // diagnostics.push(Diagnostic { + // level: Level::Error, + // message: "Failed to parse node".to_string(), + // code: Some("S000".to_string()), + // spans: vec![SpanLabel { + // span: file_span.subspan(error.start_byte as u64, error.end_byte as u64), + // style: SpanStyle::Primary, + // label: Some("failed".to_string()), + // }], + // }) + // } else { + // for error in errors { + // convert_parse_error_to_diagnostics(file_span, error, diagnostics); + // } + // } + // } } } diff --git a/macro/src/expansion.rs b/macro/src/expansion.rs index f722a1f..2910a36 100644 --- a/macro/src/expansion.rs +++ b/macro/src/expansion.rs @@ -45,7 +45,13 @@ pub fn expand_rule(input: DeriveInput) -> Result { type LeafFn<'a> = (); #[allow(non_snake_case)] - fn extract<'a>(node: Option<::rust_sitter::tree_sitter::Node>, source: &[u8], last_idx: usize, last_pt: ::rust_sitter::tree_sitter::Point, _leaf_fn: Option>) -> Self { + fn extract<'a>( + node: Option<::rust_sitter::tree_sitter::Node>, + source: &[u8], + last_idx: usize, + last_pt: ::rust_sitter::tree_sitter::Point, + _leaf_fn: Option>, + ) -> Result { let node = node.expect("no node found"); #extract_expr } @@ -90,7 +96,13 @@ pub fn expand_rule(input: DeriveInput) -> Result { type LeafFn<'a> = (); #[allow(non_snake_case)] - fn extract<'a>(node: Option<::rust_sitter::tree_sitter::Node>, source: &[u8], _last_idx: usize, _last_pt: ::rust_sitter::tree_sitter::Point, _leaf_fn: Option>) -> Self { + fn extract<'a>( + node: Option<::rust_sitter::tree_sitter::Node>, + source: &[u8], + _last_idx: usize, + _last_pt: ::rust_sitter::tree_sitter::Point, + _leaf_fn: Option>, + ) -> Result { let node = node.expect("No node found"); let mut cursor = node.walk(); @@ -99,8 +111,8 @@ pub fn expand_rule(input: DeriveInput) -> Result { let node = cursor.node(); match node.kind() { #(#match_cases),*, - _ => if !cursor.goto_next_sibling() { - panic!("Could not find a child corresponding to any enum branch") + k => if !cursor.goto_next_sibling() { + panic!("Could not find a child corresponding to any enum branch: {k}") } } } @@ -142,7 +154,7 @@ pub fn expand_rule(input: DeriveInput) -> Result { /// Parse an input string according to the grammar. Returns either any parsing errors that happened, or a #[doc = #root_type_docstr] /// instance containing the parsed structured data. - pub fn parse(input: &str) -> core::result::Result> { + pub fn parse(input: &str) -> core::result::Result { ::rust_sitter::__private::parse(input, Self::language) } } @@ -193,8 +205,10 @@ fn gen_field(ident_str: String, leaf: Field) -> Result { } let text_input = text_attr.parse_args::()?; text_input.evaluate()?; + // TODO: Handle this correctly. return Ok(syn::parse_quote!({ - ::rust_sitter::__private::skip_text(cursor, #ident_str); + ::rust_sitter::__private::skip_text(state, #ident_str); + Ok::<_, ::rust_sitter::extract::ExtractError>(()) })); } @@ -220,7 +234,7 @@ fn gen_field(ident_str: String, leaf: Field) -> Result { } let wrapped_leaf_type = wrap_leaf_type(leaf_type, &non_leaf); let input_type: syn::Type = if is_node { - syn::parse_quote!(&::rust_sitter::NodeExt<'_>) + syn::parse_quote!(&::rust_sitter::extract::NodeExt<'_>) } else { syn::parse_quote!(&str) }; @@ -233,7 +247,7 @@ fn gen_field(ident_str: String, leaf: Field) -> Result { }; Ok(syn::parse_quote!({ - ::rust_sitter::__private::extract_field::<#leaf_type,_>(cursor, source, last_idx, last_pt, #ident_str, #closure_expr) + ::rust_sitter::__private::extract_field::<#leaf_type,_>(state, source, #ident_str, #closure_expr) })) } @@ -311,25 +325,25 @@ fn gen_struct_or_variant( quote! { { - #expr; - #construct_name + #expr?; + Ok(#construct_name) } } } Fields::Named(_) => quote! { - #construct_name { - #(#children_parsed),* - } + Ok(#construct_name { + #(#children_parsed?),* + }) }, Fields::Unnamed(_) => quote! { - #construct_name( - #(#children_parsed),* - ) + Ok(#construct_name( + #(#children_parsed?),* + )) }, } }; Ok( - syn::parse_quote!(::rust_sitter::__private::extract_struct_or_variant(node, move |cursor, last_idx, last_pt| #construct_expr)), + syn::parse_quote!(::rust_sitter::__private::extract_struct_or_variant(node, move |state| #construct_expr)), ) } diff --git a/runtime/Cargo.toml b/runtime/Cargo.toml index b8980ff..c9aa824 100644 --- a/runtime/Cargo.toml +++ b/runtime/Cargo.toml @@ -5,7 +5,7 @@ readme = "../README.md" repository = "https://github.com/hydro-project/rust-sitter" version.workspace = true authors.workspace = true -edition = "2021" +edition = "2024" license = "MIT" keywords = ["parsing", "codegen"] categories = ["development-tools"] diff --git a/runtime/src/__private.rs b/runtime/src/__private.rs index a9763ec..912c8a0 100644 --- a/runtime/src/__private.rs +++ b/runtime/src/__private.rs @@ -4,104 +4,186 @@ //! They need to be public so they can be accessed at all (\*cough\* macro hygiene), but //! they are not intended to actually be called in any other circumstance. -use crate::{tree_sitter, Extract}; +use crate::{ + Extract, Span, + extract::{ExtractError, Result}, + tree_sitter, +}; pub fn extract_struct_or_variant( node: tree_sitter::Node, - construct_expr: impl Fn(&mut Option, &mut usize, &mut tree_sitter::Point) -> T, -) -> T { + construct_expr: impl Fn(&mut ExtractStructState<'_>) -> Result, +) -> Result { let mut parent_cursor = node.walk(); - construct_expr( - &mut if parent_cursor.goto_first_child() { + let mut state = ExtractStructState { + cursor: if parent_cursor.goto_first_child() { Some(parent_cursor) } else { None }, - &mut node.start_byte(), - &mut node.start_position(), - ) + last_idx: node.start_byte(), + last_pt: node.start_position(), + error: ExtractError::empty(), + }; + construct_expr(&mut state) +} + +pub struct ExtractStructState<'a> { + cursor: Option>, + last_idx: usize, + last_pt: tree_sitter::Point, + error: ExtractError, } -pub fn extract_field, T>( - cursor_opt: &mut Option, +// impl<'a> ExtractStructState<'a> { +// fn extract_node, T>( +// &mut self, +// node: tree_sitter::Node, +// source: &[u8], +// closure_ref: Option>, +// ) -> Result { +// } +// } + +// pub struct TryExtractState { +// pub span: Span, +// pub err: Option, +// } + +// pub fn try_extract, T>( +// err_state: &mut TryExtractState, +// node: Option, +// source: &[u8], +// last_idx: usize, +// last_pt: tree_sitter::Point, +// leaf_fn: Option>, +// ) -> Option { +// // TODO: Double check this. +// err_state.span.end_byte = last_idx; +// match LT::extract(node, source, last_idx, last_pt, leaf_fn) { +// Ok(t) => Some(t), +// Err(err) => { +// todo!() +// } +// } +// } + +pub fn extract_field, T: std::fmt::Debug>( + state: &mut ExtractStructState<'_>, source: &[u8], - last_idx: &mut usize, - last_pt: &mut tree_sitter::Point, field_name: &str, closure_ref: Option>, -) -> T { - if let Some(cursor) = cursor_opt.as_mut() { +) -> Result { + dbg!(field_name); + if let Some(cursor) = state.cursor.as_mut() { loop { let n = cursor.node(); - if let Some(name) = cursor.field_name() { + println!("Extracting node from text: {} - {}", n.utf8_text(source).unwrap(), n.to_sexp()); + if n.is_error() { + println!("Processing error..."); + // Try and parse it anyway, returning the result if we manage to get it. + if !cursor.goto_first_child() { + state.cursor = None; + state.last_idx = n.end_byte(); + state.last_pt = n.end_position(); + return Err(ExtractError::new(n, field_name.to_owned())); + } + let n = cursor.node(); + let out = LT::extract(Some(n), source, state.last_idx, state.last_pt, closure_ref)?; + // let out = match out { + // Ok(out) => { + // // ???; I guess this would be only possible in the wrapped type case. + // Some(out) + // } + // Err(e) => { + // state.error.merge(e); + // None + // } + // }; + // if !cursor.goto_next_sibling() { + // state.cursor = None; + // }; + + state.last_idx = n.end_byte(); + state.last_pt = n.end_position(); + + return Ok(out); + } else if let Some(name) = cursor.field_name() { if name == field_name { - let out = LT::extract(Some(n), source, *last_idx, *last_pt, closure_ref); + // TODO: Need to keep going if it fails. + let out = + LT::extract(Some(n), source, state.last_idx, state.last_pt, closure_ref)?; if !cursor.goto_next_sibling() { - *cursor_opt = None; + dbg!(name); + state.cursor = None; }; - *last_idx = n.end_byte(); - *last_pt = n.end_position(); + state.last_idx = n.end_byte(); + state.last_pt = n.end_position(); - return out; + return Ok(dbg!(out)); } else { - return LT::extract(None, source, *last_idx, *last_pt, closure_ref); + return LT::extract(None, source, state.last_idx, state.last_pt, closure_ref); } } else { - *last_idx = n.end_byte(); - *last_pt = n.end_position(); + state.last_idx = n.end_byte(); + state.last_pt = n.end_position(); } if !cursor.goto_next_sibling() { - return LT::extract(None, source, *last_idx, *last_pt, closure_ref); + return LT::extract(None, source, state.last_idx, state.last_pt, closure_ref); } } } else { - LT::extract(None, source, *last_idx, *last_pt, closure_ref) + LT::extract(None, source, state.last_idx, state.last_pt, closure_ref) } } -pub fn skip_text(cursor_opt: &mut Option, field_name: &str) { - if let Some(cursor) = cursor_opt.as_mut() { +// TODO: Handle errors in this one too. +pub fn skip_text(state: &mut ExtractStructState<'_>, field_name: &str) -> Result<()> { + if let Some(cursor) = state.cursor.as_mut() { loop { if let Some(name) = cursor.field_name() { if name == field_name { if !cursor.goto_next_sibling() { - *cursor_opt = None; - return; + state.cursor = None; + return Ok(()); } } else { - return; + return Ok(()); } } else { - return; + return Ok(()); } } } + + Ok(()) } pub fn parse>( input: &str, language: impl Fn() -> tree_sitter::Language, -) -> core::result::Result> { +) -> core::result::Result { let mut parser = crate::tree_sitter::Parser::new(); parser.set_language(&language()).unwrap(); let tree = parser.parse(input, None).expect("Failed to parse"); let root_node = tree.root_node(); - if root_node.has_error() { - let mut errors = vec![]; - crate::error::collect_parsing_errors(&root_node, input.as_bytes(), &mut errors); - - Err(errors) - } else { - Ok(>::extract( - Some(root_node), - input.as_bytes(), - 0, - Default::default(), - None, - )) - } + if root_node.has_error() { + let mut errors = vec![]; + crate::error::collect_parsing_errors(&root_node, input.as_bytes(), &mut errors); + for error in errors { + println!("{error}"); + } + } + >::extract( + Some(root_node), + input.as_bytes(), + 0, + Default::default(), + None, + ) + // } } diff --git a/runtime/src/error.rs b/runtime/src/error.rs index 3aaec90..97ced35 100644 --- a/runtime/src/error.rs +++ b/runtime/src/error.rs @@ -9,13 +9,10 @@ use crate::Point; #[derive(Debug)] /// An explanation for an error that occurred during parsing. pub enum ParseErrorReason { - /// The parser did not expect to see some token. - UnexpectedToken(String), - /// Tree Sitter failed to parse a specific intermediate node. - /// The underlying failures are in the vector. - FailedNode(Vec), /// The parser expected a specific token, but it was not found. MissingToken(String), + Lookahead(Vec<&'static str>), + Unknown, } #[derive(Debug)] @@ -42,14 +39,21 @@ impl std::fmt::Display for ParseError { self.start_point.line, self.start_point.column, self.end_point.line, - self.end_point.column + self.end_point.column, )?; write!(f, " {}", self.text)?; if let Some(parent) = &self.parent_context { writeln!(f)?; - write!(f, "\t(parent node: {})", parent.kind)?; + writeln!(f, "\t(parent node: {})", parent.kind)?; + } + write!(f, "\treason: ")?; + match &self.reason { + ParseErrorReason::MissingToken(tok) => write!(f, "missing token: {tok}"), + ParseErrorReason::Unknown => write!(f, "unknown"), + ParseErrorReason::Lookahead(lookahead) => { + write!(f, "expected one of: {}", lookahead.join(" | ")) + } } - Ok(()) } } @@ -72,31 +76,41 @@ pub fn collect_parsing_errors( let kind = node.kind(); let text = node.utf8_text(source).unwrap().to_owned(); let mut parent_context = None; + if let Some(p) = node.parent() { + parent_context = Some(ParentContext { kind: p.kind() }); + } let reason = if node.is_error() { - if let Some(p) = node.parent() { - parent_context = Some(ParentContext { - kind: p.kind(), - }); + // Narrow down the node range if possible. + fn walk_node(node: &tree_sitter::Node) { + let mut children = node.walk(); + dbg!(node); + dbg!(node.kind()); + for child in node.children(&mut children) { + walk_node(&child); + } } - if node.child(0).is_some() { - // we managed to parse some children, so collect underlying errors for this node - let mut inner_errors = vec![]; - let mut cursor = node.walk(); - node.children(&mut cursor) - .for_each(|c| collect_parsing_errors(&c, source, &mut inner_errors)); - - ParseErrorReason::FailedNode(inner_errors) + walk_node(node); + dbg!(node.to_sexp()); + // Traverse down to find the next parse state and display it in the error. + let mut c = node.walk(); + while c.goto_first_child() {} + let state = c.node().next_parse_state(); + let state = if state != 0 { + state } else { - let contents = node.utf8_text(source).unwrap(); - if !contents.is_empty() { - ParseErrorReason::UnexpectedToken(contents.to_string()) - } else { - ParseErrorReason::FailedNode(vec![]) - } + c.node().parse_state() + }; + if state != 0 + && let Some(mut it) = node.language().lookahead_iterator(state) + { + ParseErrorReason::Lookahead(it.iter_names().collect()) + } else { + ParseErrorReason::Unknown } } else if node.is_missing() { ParseErrorReason::MissingToken(node.kind().to_string()) } else if node.has_error() { + // A node somewhere down in the tree from here has an error, recursively find it. let mut cursor = node.walk(); node.children(&mut cursor) .for_each(|c| collect_parsing_errors(&c, source, errors)); diff --git a/runtime/src/extract.rs b/runtime/src/extract.rs new file mode 100644 index 0000000..fb9fc0f --- /dev/null +++ b/runtime/src/extract.rs @@ -0,0 +1,436 @@ +use crate::Span; + +use super::{Node, tree_sitter}; +/// Defines the logic used to convert a node in a Tree Sitter tree to +/// the corresponding Rust type. +pub trait Extract { + type LeafFn<'a>: Clone; + fn extract<'a>( + node: Option, + source: &[u8], + last_idx: usize, + last_pt: tree_sitter::Point, + leaf_fn: Option>, + ) -> Result; +} + +#[derive(Default)] +pub struct ExtractState { + pub last_idx: usize, + pub last_pt: tree_sitter::Point, + pub error: Option, +} + +impl ExtractState { + pub fn error(&mut self, err: ExtractError) -> &mut Self { + if let Some(existing) = &mut self.error { + existing.merge(err); + } else { + self.error = Some(err); + } + self + } +} + +// pub struct ExtractResult { +// pub value: Option, +// pub is_partial: bool, +// /// Indicates this parse failed, or an inner parse failed which propogated its error. +// pub error: Option, +// /// Indicates somewhere within `value` there was an error. +// pub has_error: bool, +// } + +pub type Result = std::result::Result; + +// NOTE: This could hold references if we want this to be fast like tree-sitter is. +#[derive(Debug)] +pub struct ExtractError { + inner: Vec, +} + +#[derive(Debug)] +struct ExtractErrorInner { + /// Span of the node which failed to extract. + span: Span, + reason: ExtractErrorReason, +} + +impl ExtractError { + pub(crate) fn empty() -> Self { + Self { inner: vec![] } + } + pub(crate) fn prop(self) -> Result<()> { + if self.inner.is_empty() { + Ok(()) + } else { + Err(self) + } + } + pub(crate) fn new(n: tree_sitter::Node<'_>, expected_field: String) -> Self { + let span = Span::new(n.start_byte(), n.end_byte()); + Self { + inner: vec![ExtractErrorInner { + span, + reason: ExtractErrorReason::Parse { expected_field }, + }], + } + } + pub(crate) fn merge(&mut self, err: ExtractError) { + self.inner.extend(err.inner); + } + + pub(crate) fn type_conversion( + n: tree_sitter::Node<'_>, + e: impl std::error::Error + Send + 'static, + ) -> Self { + let span = Span::new(n.start_byte(), n.end_byte()); + Self { + inner: vec![ExtractErrorInner { + span, + reason: ExtractErrorReason::TypeConversion(Box::new(e)), + }], + } + } +} + +#[derive(Debug)] +pub enum ExtractErrorReason { + /// Failed to parse at the tree-sitter level. + Parse { + // Can be &'static? + expected_field: String, + }, + /// Parsed OK, but failed to extract to the given type. + TypeConversion(Box), +} + +#[derive(Debug, Clone, Copy)] +pub struct NodeExt<'a> { + pub node: Node<'a>, + pub source: &'a [u8], + pub last_idx: usize, + pub last_pt: tree_sitter::Point, +} + +pub trait StrOrNode { + type Output; + fn apply( + self, + source: &[u8], + node: Node<'_>, + last_idx: usize, + last_pt: tree_sitter::Point, + ) -> Self::Output; +} + +impl StrOrNode for fn(&str) -> L { + type Output = L; + fn apply( + self, + source: &[u8], + node: Node<'_>, + _last_idx: usize, + _last_pt: tree_sitter::Point, + ) -> L { + let text = node.utf8_text(source).expect("Could not get text"); + self(text) + } +} + +impl StrOrNode for fn(&NodeExt<'_>) -> L { + type Output = L; + fn apply( + self, + source: &[u8], + node: Node<'_>, + last_idx: usize, + last_pt: tree_sitter::Point, + ) -> L { + let node = NodeExt { + node, + source, + last_idx, + last_pt, + }; + self(&node) + } +} + +// pub trait Handler { +// fn extract( +// self, +// node: Option, +// source: &[u8], +// last_idx: usize, +// last_pt: tree_sitter::Point, +// ) -> Output; +// } +// +// macro_rules! handler_fn { +// ($($t:ident),*) => { +// impl),*> Handler<($($t),*), O> for F +// where F: FnOnce($($t),*) -> O, +// { +// fn extract( +// self, +// node: Option, +// source: &[u8], +// last_idx: usize, +// last_pt: tree_sitter::Point, +// ) -> O { +// let node = node.expect("No node found"); +// let mut c = node.walk(); +// let mut it = node.children(&mut c); +// self( +// $( +// $t::extract(it.next(), source, last_idx, last_pt, None) +// ),* +// ) +// } +// } +// +// }; +// } +// +// handler_fn!(T1, T2); + +/// Map for `#[with(...)]` +pub struct WithLeaf { + _phantom: std::marker::PhantomData, + _f: std::marker::PhantomData, +} + +impl Extract for WithLeaf +where + F: StrOrNode + Clone, +{ + type LeafFn<'a> = F; + + fn extract<'a>( + node: Option, + source: &[u8], + last_idx: usize, + last_pt: tree_sitter::Point, + leaf_fn: Option>, + ) -> Result { + let node = node.expect("Expected a node"); + // TODO: Consider if this should be fallible as well. + Ok(leaf_fn + .expect("No leaf function on WithLeaf") + .apply(source, node, last_idx, last_pt)) + } +} + +// #[derive(Clone)] +// pub struct MappedExtract { +// _type: std::marker::PhantomData, +// _prev: std::marker::PhantomData, +// _curr: std::marker::PhantomData, +// } +// +// #[derive(Clone)] +// pub struct MappedLeaf { +// prev: Option

, +// curr: F, +// } +// +// impl Extract for MappedExtract +// where +// F: Extract, +// { +// type LeafFn<'a> = MappedLeaf, &'a dyn Fn(L0) -> L1>; +// fn extract<'a>( +// node: Option, +// source: &[u8], +// last_idx: usize, +// last_pt: tree_sitter::Point, +// leaf_fn: Option>, +// ) -> L1 { +// let mapped = leaf_fn.unwrap(); +// let prev = F::extract(node, source, last_idx, last_pt, mapped.prev); +// (mapped.curr)(prev) +// } +// } + +// Common implementations for various types. + +impl Extract<()> for () { + type LeafFn<'a> = (); + fn extract<'a>( + _node: Option, + _source: &[u8], + _last_idx: usize, + _last_pt: tree_sitter::Point, + _leaf_fn: Option>, + ) -> Result<()> { + // TODO: Do we need to handle this here? Does `extract` itself need to expect an error? + Ok(()) + } +} + +impl, U> Extract> for Option { + type LeafFn<'a> = T::LeafFn<'a>; + fn extract<'a>( + node: Option, + source: &[u8], + last_idx: usize, + last_pt: tree_sitter::Point, + leaf_fn: Option>, + ) -> Result> { + node.map(|n| T::extract(Some(n), source, last_idx, last_pt, leaf_fn)) + .transpose() + } +} + +impl, U> Extract> for Box { + type LeafFn<'a> = T::LeafFn<'a>; + fn extract<'a>( + node: Option, + source: &[u8], + last_idx: usize, + last_pt: tree_sitter::Point, + leaf_fn: Option>, + ) -> Result> { + Ok(Box::new(T::extract( + node, source, last_idx, last_pt, leaf_fn, + )?)) + } +} + +impl, U> Extract> for Vec { + type LeafFn<'a> = T::LeafFn<'a>; + fn extract<'a>( + node: Option, + source: &[u8], + mut last_idx: usize, + mut last_pt: tree_sitter::Point, + leaf_fn: Option>, + ) -> Result> { + let node = match node { + Some(node) => node, + None => return Ok(vec![]), + }; + let mut cursor = node.walk(); + let mut out = vec![]; + let mut error = ExtractError::empty(); + if cursor.goto_first_child() { + loop { + let n = cursor.node(); + // Try and parse the error specially. + if n.is_error() { + match T::extract(Some(n), source, last_idx, last_pt, leaf_fn.clone()) { + Ok(o) => { + out.push(o); + } + Err(e) => { + error.merge(e); + } + } + } else if cursor.field_name().is_some() { + out.push(T::extract( + Some(n), + source, + last_idx, + last_pt, + leaf_fn.clone(), + )?); + } + last_idx = n.end_byte(); + last_pt = n.end_position(); + + if !cursor.goto_next_sibling() { + break; + } + } + } + error.prop()?; + Ok(out) + } +} + +macro_rules! extract_from_str { + ($t:ty) => { + impl Extract<$t> for $t { + type LeafFn<'a> = (); + fn extract<'a>( + node: Option, + source: &[u8], + _last_idx: usize, + _last_pt: tree_sitter::Point, + _leaf_fn: Option>, + ) -> Result { + let node = node.expect(concat!( + "No node found in parsing extract: ", + stringify!($t) + )); + let text = node.utf8_text(source).expect("No text found for node"); + match text.parse() { + Ok(t) => Ok(t), + Err(e) => Err(ExtractError::type_conversion(node, e)), + } + } + } + }; +} + +extract_from_str!(u8); +extract_from_str!(i8); +extract_from_str!(u16); +extract_from_str!(i16); +extract_from_str!(u32); +extract_from_str!(i32); +extract_from_str!(u64); +extract_from_str!(i64); +// NOTE: These two may not work as intended due to rounding issues. +extract_from_str!(f32); +extract_from_str!(f64); +// Sort of silly, but keeps it general. +extract_from_str!(String); + +macro_rules! extract_for_tuple { + ($($t:ident),*) => { + impl<$($t: Extract<$t>),*> Extract<($($t),*)> for ($($t),*) { + type LeafFn<'a> = (); + fn extract<'a>( + node: Option, + source: &[u8], + last_idx: usize, + last_pt: tree_sitter::Point, + _leaf_fn: Option>, + ) -> Result { + let node = node.expect("No node found in tuple extract"); + let mut c = node.walk(); + let mut it = node.children(&mut c); + Ok(( + $( + $t::extract(it.next(), source, last_idx, last_pt, None)? + ),* + )) + } + } + + }; +} + +extract_for_tuple!(T1, T2); +extract_for_tuple!(T1, T2, T3); +extract_for_tuple!(T1, T2, T3, T4); +extract_for_tuple!(T1, T2, T3, T4, T5); +extract_for_tuple!(T1, T2, T3, T4, T5, T6); +extract_for_tuple!(T1, T2, T3, T4, T5, T6, T7); +extract_for_tuple!(T1, T2, T3, T4, T5, T6, T7, T8); +// Good enough, can maybe generate all of these with a macro if we are clever enough. + +// Would like this to extract optionals specifically if they exist - probably means if a node is +// present then it is true. Might be too magic though. +// impl Extract for bool { +// type LeafFn = (); +// fn extract( +// node: Option, +// source: &[u8], +// last_idx: usize, +// leaf_fn: Option<&Self::LeafFn>, +// ) -> bool { +// } +// } diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs index d4a76be..ffc7d13 100644 --- a/runtime/src/lib.rs +++ b/runtime/src/lib.rs @@ -1,6 +1,9 @@ pub mod __private; -pub mod rule; pub mod error; +pub mod extract; +pub mod rule; + +pub use extract::{Extract, WithLeaf}; use std::ops::Deref; @@ -14,326 +17,6 @@ pub use tree_sitter_runtime_c2rust as tree_sitter; use tree_sitter::Node; -/// Defines the logic used to convert a node in a Tree Sitter tree to -/// the corresponding Rust type. -pub trait Extract { - type LeafFn<'a>: Clone; - fn extract<'a>( - node: Option, - source: &[u8], - last_idx: usize, - last_pt: tree_sitter::Point, - leaf_fn: Option>, - ) -> Output; -} - -#[derive(Debug, Clone, Copy)] -pub struct NodeExt<'a> { - pub node: Node<'a>, - pub source: &'a [u8], - pub last_idx: usize, - pub last_pt: tree_sitter::Point, -} - -pub trait StrOrNode { - type Output; - fn apply( - self, - source: &[u8], - node: Node<'_>, - last_idx: usize, - last_pt: tree_sitter::Point, - ) -> Self::Output; -} - -impl StrOrNode for fn(&str) -> L { - type Output = L; - fn apply( - self, - source: &[u8], - node: Node<'_>, - _last_idx: usize, - _last_pt: tree_sitter::Point, - ) -> L { - let text = node.utf8_text(source).expect("Could not get text"); - self(text) - } -} - -impl StrOrNode for fn(&NodeExt<'_>) -> L { - type Output = L; - fn apply( - self, - source: &[u8], - node: Node<'_>, - last_idx: usize, - last_pt: tree_sitter::Point, - ) -> L { - let node = NodeExt { - node, - source, - last_idx, - last_pt, - }; - self(&node) - } -} - -pub trait Handler { - fn extract( - self, - node: Option, - source: &[u8], - last_idx: usize, - last_pt: tree_sitter::Point, - ) -> Output; -} - -macro_rules! handler_fn { - ($($t:ident),*) => { - impl),*> Handler<($($t),*), O> for F - where F: FnOnce($($t),*) -> O, - { - fn extract( - self, - node: Option, - source: &[u8], - last_idx: usize, - last_pt: tree_sitter::Point, - ) -> O { - let node = node.expect("No node found"); - let mut c = node.walk(); - let mut it = node.children(&mut c); - self( - $( - $t::extract(it.next(), source, last_idx, last_pt, None) - ),* - ) - } - } - - }; -} - -handler_fn!(T1, T2); - -/// Map for `#[with(...)]` -pub struct WithLeaf { - _phantom: std::marker::PhantomData, - _f: std::marker::PhantomData, -} - -impl Extract for WithLeaf -where - F: StrOrNode + Clone, -{ - type LeafFn<'a> = F; - - fn extract<'a>( - node: Option, - source: &[u8], - last_idx: usize, - last_pt: tree_sitter::Point, - leaf_fn: Option>, - ) -> L { - let node = node.expect("Expected a node"); - leaf_fn - .expect("No leaf function on WithLeaf") - .apply(source, node, last_idx, last_pt) - } -} - -#[derive(Clone)] -pub struct MappedExtract { - _type: std::marker::PhantomData, - _prev: std::marker::PhantomData, - _curr: std::marker::PhantomData, -} - -#[derive(Clone)] -pub struct MappedLeaf { - prev: Option

, - curr: F, -} - -impl Extract for MappedExtract -where - F: Extract, -{ - type LeafFn<'a> = MappedLeaf, &'a dyn Fn(L0) -> L1>; - fn extract<'a>( - node: Option, - source: &[u8], - last_idx: usize, - last_pt: tree_sitter::Point, - leaf_fn: Option>, - ) -> L1 { - let mapped = leaf_fn.unwrap(); - let prev = F::extract(node, source, last_idx, last_pt, mapped.prev); - (mapped.curr)(prev) - } -} - -// Common implementations for various types. - -impl Extract<()> for () { - type LeafFn<'a> = (); - fn extract<'a>( - _node: Option, - _source: &[u8], - _last_idx: usize, - _last_pt: tree_sitter::Point, - _leaf_fn: Option>, - ) { - } -} - -impl, U> Extract> for Option { - type LeafFn<'a> = T::LeafFn<'a>; - fn extract<'a>( - node: Option, - source: &[u8], - last_idx: usize, - last_pt: tree_sitter::Point, - leaf_fn: Option>, - ) -> Option { - node.map(|n| T::extract(Some(n), source, last_idx, last_pt, leaf_fn)) - } -} - -impl, U> Extract> for Box { - type LeafFn<'a> = T::LeafFn<'a>; - fn extract<'a>( - node: Option, - source: &[u8], - last_idx: usize, - last_pt: tree_sitter::Point, - leaf_fn: Option>, - ) -> Box { - Box::new(T::extract(node, source, last_idx, last_pt, leaf_fn)) - } -} - -impl, U> Extract> for Vec { - type LeafFn<'a> = T::LeafFn<'a>; - fn extract<'a>( - node: Option, - source: &[u8], - mut last_idx: usize, - mut last_pt: tree_sitter::Point, - leaf_fn: Option>, - ) -> Vec { - node.map(|node| { - let mut cursor = node.walk(); - let mut out = vec![]; - if cursor.goto_first_child() { - loop { - let n = cursor.node(); - if cursor.field_name().is_some() { - out.push(T::extract( - Some(n), - source, - last_idx, - last_pt, - leaf_fn.clone(), - )); - } - - last_idx = n.end_byte(); - last_pt = n.end_position(); - - if !cursor.goto_next_sibling() { - break; - } - } - } - - out - }) - .unwrap_or_default() - } -} - -macro_rules! extract_from_str { - ($t:ty) => { - impl Extract<$t> for $t { - type LeafFn<'a> = (); - fn extract<'a>( - node: Option, - source: &[u8], - _last_idx: usize, - _last_pt: tree_sitter::Point, - _leaf_fn: Option>, - ) -> Self { - let node = node.expect(concat!("No node found in parsing extract: ", stringify!($t))); - let text = node.utf8_text(source).expect("No text found for node"); - text.parse().expect("Failed to parse type") - } - } - }; -} - -extract_from_str!(u8); -extract_from_str!(i8); -extract_from_str!(u16); -extract_from_str!(i16); -extract_from_str!(u32); -extract_from_str!(i32); -extract_from_str!(u64); -extract_from_str!(i64); -// NOTE: These two may not work as intended due to rounding issues. -extract_from_str!(f32); -extract_from_str!(f64); -// Sort of silly, but keeps it general. -extract_from_str!(String); - -macro_rules! extract_for_tuple { - ($($t:ident),*) => { - impl<$($t: Extract<$t>),*> Extract<($($t),*)> for ($($t),*) { - type LeafFn<'a> = (); - fn extract<'a>( - node: Option, - source: &[u8], - last_idx: usize, - last_pt: tree_sitter::Point, - _leaf_fn: Option>, - ) -> Self { - let node = node.expect("No node found in tuple extract"); - let mut c = node.walk(); - let mut it = node.children(&mut c); - ( - $( - $t::extract(it.next(), source, last_idx, last_pt, None) - ),* - ) - } - } - - }; -} - -extract_for_tuple!(T1, T2); -extract_for_tuple!(T1, T2, T3); -extract_for_tuple!(T1, T2, T3, T4); -extract_for_tuple!(T1, T2, T3, T4, T5); -extract_for_tuple!(T1, T2, T3, T4, T5, T6); -extract_for_tuple!(T1, T2, T3, T4, T5, T6, T7); -extract_for_tuple!(T1, T2, T3, T4, T5, T6, T7, T8); -// Good enough, can maybe generate all of these with a macro if we are clever enough. - -// Would like this to extract optionals specifically if they exist - probably means if a node is -// present then it is true. Might be too magic though. -// impl Extract for bool { -// type LeafFn = (); -// fn extract( -// node: Option, -// source: &[u8], -// last_idx: usize, -// leaf_fn: Option<&Self::LeafFn>, -// ) -> bool { -// } -// } - #[derive(Clone, Debug)] /// A wrapper around a value that also contains the span of the value in the source. pub struct Spanned { @@ -341,7 +24,7 @@ pub struct Spanned { pub value: T, /// The span of the node in the source. The first value is the inclusive start /// of the span, and the second value is the exclusive end of the span. - pub byte_span: (usize, usize), + pub byte_span: Span, pub line_span: (Point, Point), } @@ -353,6 +36,29 @@ impl Deref for Spanned { } } +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct Span { + pub start_byte: usize, + pub end_byte: usize, + // Do we need point? I don't think so in reality, because end tools can do the conversion, + // which tends to be the pattern in other parser tools. +} + +impl Span { + pub fn new(start_byte: usize, end_byte: usize) -> Self { + Self { + start_byte, + end_byte, + } + } +} + +impl From<(usize, usize)> for Span { + fn from((start, end): (usize, usize)) -> Self { + Self::new(start, end) + } +} + /// A line and column point in a source parse. These are 1 based to correspond with a text editor /// line and column. Note, this is a divergence from tree-sitter, which uses a zero-based `Point`. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] @@ -378,12 +84,13 @@ impl, U> Extract> for Spanned { last_idx: usize, last_pt: tree_sitter::Point, leaf_fn: Option>, - ) -> Spanned { - Spanned { - value: T::extract(node, source, last_idx, last_pt, leaf_fn), + ) -> extract::Result> { + Ok(Spanned { + value: T::extract(node, source, last_idx, last_pt, leaf_fn)?, byte_span: node .map(|n| (n.start_byte(), n.end_byte())) - .unwrap_or((last_idx, last_idx)), + .unwrap_or((last_idx, last_idx)) + .into(), line_span: node .map(|n| { ( @@ -395,6 +102,6 @@ impl, U> Extract> for Spanned { Point::from_tree_sitter(last_pt), Point::from_tree_sitter(last_pt), )), - } + }) } } From 6f4625544afbf8cfd65bf2e32d68ca851a8469f4 Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Mon, 28 Jul 2025 14:41:33 -0500 Subject: [PATCH 30/50] Use latest dependencies --- Cargo.lock | 464 ++++++++++++++++++++++++--------------------- runtime/Cargo.toml | 4 +- tool/Cargo.toml | 6 +- 3 files changed, 257 insertions(+), 217 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dde5444..4808891 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13,21 +13,21 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.96" +version = "1.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b964d184e89d9b6b67dd2715bc8e74cf3107fb2b529990c90cf517326150bf4" +checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" [[package]] name = "bitflags" -version = "2.8.0" +version = "2.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36" +checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" [[package]] name = "bumpalo" -version = "3.17.0" +version = "3.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" +checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" [[package]] name = "c2rust-bitfields" @@ -51,18 +51,18 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.14" +version = "1.2.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c3d1b2e905a3a7b00a6141adb0e4c0bb941d11caf55349d863942a1cc44e3c9" +checksum = "deec109607ca693028562ed836a5f1c4b8bd77755c4e132fc5ce11b0b6211ae7" dependencies = [ "shlex", ] [[package]] name = "cfg-if" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" [[package]] name = "codemap" @@ -82,14 +82,14 @@ dependencies = [ [[package]] name = "console" -version = "0.15.10" +version = "0.15.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea3c6ecd8059b57859df5c69830340ed3c41d30e3da0c1cbed90a96ac853041b" +checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8" dependencies = [ "encode_unicode", "libc", "once_cell", - "windows-sys", + "windows-sys 0.59.0", ] [[package]] @@ -100,7 +100,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.104", ] [[package]] @@ -123,12 +123,12 @@ checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] name = "errno" -version = "0.3.10" +version = "0.3.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" +checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad" dependencies = [ "libc", - "windows-sys", + "windows-sys 0.60.2", ] [[package]] @@ -148,21 +148,21 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.3.1" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8" +checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" dependencies = [ "cfg-if", "libc", + "r-efi", "wasi", - "windows-targets", ] [[package]] name = "hashbrown" -version = "0.15.2" +version = "0.15.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" +checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5" [[package]] name = "heck" @@ -172,21 +172,22 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] name = "icu_collections" -version = "1.5.0" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" +checksum = "200072f5d0e3614556f94a9930d5dc3e0662a652823904c3a75dc3b0af7fee47" dependencies = [ "displaydoc", + "potential_utf", "yoke", "zerofrom", "zerovec", ] [[package]] -name = "icu_locid" -version = "1.5.0" +name = "icu_locale_core" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +checksum = "0cde2700ccaed3872079a65fb1a78f6c0a36c91570f28755dda67bc8f7d9f00a" dependencies = [ "displaydoc", "litemap", @@ -195,31 +196,11 @@ dependencies = [ "zerovec", ] -[[package]] -name = "icu_locid_transform" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" -dependencies = [ - "displaydoc", - "icu_locid", - "icu_locid_transform_data", - "icu_provider", - "tinystr", - "zerovec", -] - -[[package]] -name = "icu_locid_transform_data" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" - [[package]] name = "icu_normalizer" -version = "1.5.0" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" +checksum = "436880e8e18df4d7bbc06d58432329d6458cc84531f7ac5f024e93deadb37979" dependencies = [ "displaydoc", "icu_collections", @@ -227,67 +208,54 @@ dependencies = [ "icu_properties", "icu_provider", "smallvec", - "utf16_iter", - "utf8_iter", - "write16", "zerovec", ] [[package]] name = "icu_normalizer_data" -version = "1.5.0" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" +checksum = "00210d6893afc98edb752b664b8890f0ef174c8adbb8d0be9710fa66fbbf72d3" [[package]] name = "icu_properties" -version = "1.5.1" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5" +checksum = "016c619c1eeb94efb86809b015c58f479963de65bdb6253345c1a1276f22e32b" dependencies = [ "displaydoc", "icu_collections", - "icu_locid_transform", + "icu_locale_core", "icu_properties_data", "icu_provider", - "tinystr", + "potential_utf", + "zerotrie", "zerovec", ] [[package]] name = "icu_properties_data" -version = "1.5.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" +checksum = "298459143998310acd25ffe6810ed544932242d3f07083eee1084d83a71bd632" [[package]] name = "icu_provider" -version = "1.5.0" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" +checksum = "03c80da27b5f4187909049ee2d72f276f0d9f99a42c306bd0131ecfe04d8e5af" dependencies = [ "displaydoc", - "icu_locid", - "icu_provider_macros", + "icu_locale_core", "stable_deref_trait", "tinystr", "writeable", "yoke", "zerofrom", + "zerotrie", "zerovec", ] -[[package]] -name = "icu_provider_macros" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.98", -] - [[package]] name = "idna" version = "1.0.3" @@ -301,9 +269,9 @@ dependencies = [ [[package]] name = "idna_adapter" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" +checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" dependencies = [ "icu_normalizer", "icu_properties", @@ -311,9 +279,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.7.1" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c9c992b02b5b4c94ea26e32fe5bccb7aa7d9f390ab5c1221ff895bc7ea8b652" +checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661" dependencies = [ "equivalent", "hashbrown", @@ -321,20 +289,18 @@ dependencies = [ [[package]] name = "indoc" -version = "2.0.5" +version = "2.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" +checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd" [[package]] name = "insta" -version = "1.42.1" +version = "1.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71c1b125e30d93896b365e156c33dadfffab45ee8400afcbba4752f59de08a86" +checksum = "154934ea70c58054b556dd430b99a98c2a7ff5309ac9891597e339b5c28f4371" dependencies = [ "console", - "linked-hash-map", "once_cell", - "pin-project", "similar", ] @@ -349,9 +315,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.14" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" [[package]] name = "js-sys" @@ -365,39 +331,33 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.169" +version = "0.2.174" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" - -[[package]] -name = "linked-hash-map" -version = "0.5.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" +checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" [[package]] name = "linux-raw-sys" -version = "0.4.15" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" +checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" [[package]] name = "litemap" -version = "0.7.4" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104" +checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956" [[package]] name = "log" -version = "0.4.25" +version = "0.4.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f" +checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" [[package]] name = "memchr" -version = "2.7.4" +version = "2.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" [[package]] name = "minicov" @@ -411,9 +371,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.20.3" +version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "945462a4b81e43c4e3ba96bd7b49d834c6f61198356aa858733bc4acf3cbe62e" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "percent-encoding" @@ -422,43 +382,38 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] -name = "pin-project" -version = "1.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfe2e71e1471fe07709406bf725f710b02927c9c54b2b5b2ec0e8087d97c327d" -dependencies = [ - "pin-project-internal", -] - -[[package]] -name = "pin-project-internal" -version = "1.1.9" +name = "potential_utf" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6e859e6e5bd50440ab63c47e3ebabc90f26251f7c73c3d3e837b74a1cc3fa67" +checksum = "e5a7c30837279ca13e7c867e9e40053bc68740f988cb07f7ca6df43cc734b585" dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.98", + "zerovec", ] [[package]] name = "proc-macro2" -version = "1.0.93" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99" +checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.38" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" dependencies = [ "proc-macro2", ] +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + [[package]] name = "regex" version = "1.11.1" @@ -507,7 +462,7 @@ dependencies = [ "proc-macro2", "quote", "serde_json", - "syn 2.0.98", + "syn 2.0.104", ] [[package]] @@ -530,7 +485,7 @@ dependencies = [ "proc-macro2", "quote", "rust-sitter-common", - "syn 2.0.98", + "syn 2.0.104", "tempfile", ] @@ -543,7 +498,7 @@ dependencies = [ "rust-sitter-common", "serde", "serde_json", - "syn 2.0.98", + "syn 2.0.104", "syn-inline-mod", "tempfile", "tree-sitter", @@ -558,22 +513,22 @@ checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" [[package]] name = "rustix" -version = "0.38.44" +version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" +checksum = "11181fbabf243db407ef8df94a6ce0b2f9a733bd8be4ad02b4eda9602296cac8" dependencies = [ "bitflags", "errno", "libc", "linux-raw-sys", - "windows-sys", + "windows-sys 0.60.2", ] [[package]] name = "ryu" -version = "1.0.19" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ea1a2d0a644769cc99faa24c3ad26b379b786fe7c36fd3c546254801650e6dd" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" [[package]] name = "same-file" @@ -586,38 +541,38 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.25" +version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f79dfe2d285b0488816f30e700a7438c5a73d816b5b7d3ac72fbc48b0d185e03" +checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" dependencies = [ "serde", ] [[package]] name = "serde" -version = "1.0.218" +version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8dfc9d19bdbf6d17e22319da49161d5d0108e4188e8b680aef6299eed22df60" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.218" +version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f09503e191f4e797cb8aac08e9a4a4695c5edf6a2e70e376d961ddd5c969f82b" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.104", ] [[package]] name = "serde_json" -version = "1.0.139" +version = "1.0.141" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44f86c3acccc9c65b153fe1b85a3be07fe5515274ec9f0653b4a0875731c72a6" +checksum = "30b9eff21ebe718216c6ec64e1d9ac57087aad11efc64e32002bce4a0d4c03d3" dependencies = [ "indexmap", "itoa", @@ -646,9 +601,9 @@ checksum = "d31d263dd118560e1a492922182ab6ca6dc1d03a3bf54e7699993f31a4150e3f" [[package]] name = "smallvec" -version = "1.14.0" +version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcf8323ef1faaee30a44a340193b1ac6814fd9b7b4e88e9d4519a3e4abe1cfd" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" [[package]] name = "stable_deref_trait" @@ -675,9 +630,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.98" +version = "2.0.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36147f1a48ae0ec2b5b3bc5b537d267457555a10dc06f3dbc8cb11ba3006d3b1" +checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" dependencies = [ "proc-macro2", "quote", @@ -691,32 +646,31 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2fa6dca1fdb7b2ed46dd534a326725419d4fb10f23d8c85a8b2860e5eb25d0f9" dependencies = [ "proc-macro2", - "syn 2.0.98", + "syn 2.0.104", ] [[package]] name = "synstructure" -version = "0.13.1" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.104", ] [[package]] name = "tempfile" -version = "3.17.1" +version = "3.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22e5a0acb1f3f55f65cc4a866c361b2fb2a0ff6366785ae6fbb5f85df07ba230" +checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1" dependencies = [ - "cfg-if", "fastrand", "getrandom", "once_cell", "rustix", - "windows-sys", + "windows-sys 0.59.0", ] [[package]] @@ -730,39 +684,45 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.11" +version = "2.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d452f284b73e6d76dd36758a0c8684b1d5be31f92b89d07fd5822175732206fc" +checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "2.0.11" +version = "2.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26afc1baea8a989337eeb52b6e72a039780ce45c3edfcc9c5b9d112feeb173c2" +checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.104", ] [[package]] name = "tinystr" -version = "0.7.6" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" +checksum = "5d4f6d1145dcb577acf783d4e601bc1d76a13337bb54e6233add580b07344c8b" dependencies = [ "displaydoc", "zerovec", ] +[[package]] +name = "topological-sort" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea68304e134ecd095ac6c3574494fc62b909f416c4fca77e440530221e549d3d" + [[package]] name = "tree-sitter" -version = "0.25.2" +version = "0.25.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5168a515fe492af54c5cc8800ff8c840be09fa5168de45838afaecd3e008bce4" +checksum = "6d7b8994f367f16e6fa14b5aebbcb350de5d7cbea82dc5b00ae997dd71680dd2" dependencies = [ "cc", "regex", @@ -788,9 +748,9 @@ dependencies = [ [[package]] name = "tree-sitter-generate" -version = "0.25.1" +version = "0.25.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05beaa8023e08df928535c282cf684c4ca5151475348e73457161f76a50778e8" +checksum = "9f4687294bab21cc81b492715bfca7b709adf1c67b306953a3ae717d3a0afd0e" dependencies = [ "anyhow", "heck", @@ -805,6 +765,7 @@ dependencies = [ "serde_json", "smallbitvec", "thiserror", + "topological-sort", "tree-sitter", "url", ] @@ -817,9 +778,9 @@ checksum = "c4013970217383f67b18aef68f6fb2e8d409bc5755227092d32efb0422ba24b8" [[package]] name = "unicode-ident" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00e2473a93778eb0bad35909dff6a10d28e63f792f16ed15e404fca9d5eeedbe" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" [[package]] name = "url" @@ -833,12 +794,6 @@ dependencies = [ "serde", ] -[[package]] -name = "utf16_iter" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" - [[package]] name = "utf8_iter" version = "1.0.4" @@ -857,9 +812,9 @@ dependencies = [ [[package]] name = "wasi" -version = "0.13.3+wasi-0.2.2" +version = "0.14.2+wasi-0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2" +checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" dependencies = [ "wit-bindgen-rt", ] @@ -885,7 +840,7 @@ dependencies = [ "log", "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.104", "wasm-bindgen-shared", ] @@ -920,7 +875,7 @@ checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.104", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -955,7 +910,7 @@ checksum = "17d5042cc5fa009658f9a7333ef24291b1291a25b6382dd68862a7f3b969f69b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.104", ] [[package]] @@ -974,16 +929,31 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys", + "windows-sys 0.59.0", ] +[[package]] +name = "windows-link" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" + [[package]] name = "windows-sys" version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" dependencies = [ - "windows-targets", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets 0.53.3", ] [[package]] @@ -992,14 +962,31 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_gnullvm", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm 0.52.6", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.53.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5fe6031c4041849d7c496a8ded650796e7b6ecc19df1a431c1a363342e5dc91" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm 0.53.0", + "windows_aarch64_msvc 0.53.0", + "windows_i686_gnu 0.53.0", + "windows_i686_gnullvm 0.53.0", + "windows_i686_msvc 0.53.0", + "windows_x86_64_gnu 0.53.0", + "windows_x86_64_gnullvm 0.53.0", + "windows_x86_64_msvc 0.53.0", ] [[package]] @@ -1008,74 +995,116 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" + [[package]] name = "windows_aarch64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" + [[package]] name = "windows_i686_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" +[[package]] +name = "windows_i686_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" + [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" + [[package]] name = "windows_i686_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" +[[package]] +name = "windows_i686_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" + [[package]] name = "windows_x86_64_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" + [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" + [[package]] name = "windows_x86_64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" + [[package]] name = "wit-bindgen-rt" -version = "0.33.0" +version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c" +checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" dependencies = [ "bitflags", ] -[[package]] -name = "write16" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" - [[package]] name = "writeable" -version = "0.5.5" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" +checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb" [[package]] name = "yoke" -version = "0.7.5" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40" +checksum = "5f41bb01b8226ef4bfd589436a297c53d118f65921786300e427be8d487695cc" dependencies = [ "serde", "stable_deref_trait", @@ -1085,42 +1114,53 @@ dependencies = [ [[package]] name = "yoke-derive" -version = "0.7.5" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" +checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.104", "synstructure", ] [[package]] name = "zerofrom" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e" +checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" dependencies = [ "zerofrom-derive", ] [[package]] name = "zerofrom-derive" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" +checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.104", "synstructure", ] +[[package]] +name = "zerotrie" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36f0bbd478583f79edad978b407914f61b2972f5af6fa089686016be8f9af595" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", +] + [[package]] name = "zerovec" -version = "0.10.4" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079" +checksum = "4a05eb080e015ba39cc9e23bbe5e7fb04d5fb040350f99f34e338d5fdd294428" dependencies = [ "yoke", "zerofrom", @@ -1129,11 +1169,11 @@ dependencies = [ [[package]] name = "zerovec-derive" -version = "0.10.3" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" +checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.104", ] diff --git a/runtime/Cargo.toml b/runtime/Cargo.toml index b8980ff..9c926bb 100644 --- a/runtime/Cargo.toml +++ b/runtime/Cargo.toml @@ -19,8 +19,8 @@ tree-sitter-c2rust = ["tree-sitter-runtime-c2rust"] tree-sitter-standard = ["tree-sitter-runtime-standard"] [dependencies] -tree-sitter-runtime-c2rust = { package = "tree-sitter-c2rust", version = "0.25.2", optional = true } -tree-sitter-runtime-standard = { package = "tree-sitter", version = "0.25.2", optional = true } +tree-sitter-runtime-c2rust = { package = "tree-sitter-c2rust", version = "0.25", optional = true } +tree-sitter-runtime-standard = { package = "tree-sitter", version = "0.25", optional = true } rust-sitter-macro = { path = "../macro" } [dev-dependencies] diff --git a/tool/Cargo.toml b/tool/Cargo.toml index 772d538..bdfde17 100644 --- a/tool/Cargo.toml +++ b/tool/Cargo.toml @@ -27,9 +27,9 @@ serde_json = { version = "1", features = ["preserve_order"] } rust-sitter-common = { path = "../common" } tempfile = { version = "3", optional = true } -tree-sitter = { version = "0.25.2", optional = true } -tree-sitter-generate = { version = "0.25.1", optional = true } +tree-sitter = { version = "0.25", optional = true } +tree-sitter-generate = { version = "0.25", optional = true } cc = { version = "1", optional = true } [dev-dependencies] -insta = "1.39" +insta = "1" From fbd89ab73eb8c47492cb1d47ad219479a7c118a4 Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Thu, 31 Jul 2025 13:47:36 -0500 Subject: [PATCH 31/50] Dump some changes --- Cargo.lock | 16 ++++---- example/Cargo.toml | 2 +- macro/src/expansion.rs | 8 ++-- runtime/Cargo.toml | 4 +- runtime/src/__private.rs | 83 +++++++++++++++++++--------------------- runtime/src/error.rs | 49 +++++++++++++++++++++--- runtime/src/extract.rs | 77 +++++++++++++++++-------------------- runtime/src/lib.rs | 12 +++--- tool/Cargo.toml | 4 +- 9 files changed, 141 insertions(+), 114 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4808891..bac923c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -720,16 +720,14 @@ checksum = "ea68304e134ecd095ac6c3574494fc62b909f416c4fca77e440530221e549d3d" [[package]] name = "tree-sitter" -version = "0.25.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d7b8994f367f16e6fa14b5aebbcb350de5d7cbea82dc5b00ae997dd71680dd2" +version = "0.26.0" dependencies = [ "cc", "regex", "regex-syntax", "serde_json", "streaming-iterator", - "tree-sitter-language", + "tree-sitter-language 0.1.4", ] [[package]] @@ -743,14 +741,12 @@ dependencies = [ "regex", "regex-syntax", "streaming-iterator", - "tree-sitter-language", + "tree-sitter-language 0.1.5", ] [[package]] name = "tree-sitter-generate" -version = "0.25.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f4687294bab21cc81b492715bfca7b709adf1c67b306953a3ae717d3a0afd0e" +version = "0.26.0" dependencies = [ "anyhow", "heck", @@ -770,6 +766,10 @@ dependencies = [ "url", ] +[[package]] +name = "tree-sitter-language" +version = "0.1.4" + [[package]] name = "tree-sitter-language" version = "0.1.5" diff --git a/example/Cargo.toml b/example/Cargo.toml index 72f6e17..83a17e8 100644 --- a/example/Cargo.toml +++ b/example/Cargo.toml @@ -6,7 +6,7 @@ edition = "2021" publish = false [features] -default = ["tree-sitter-c2rust"] +default = ["tree-sitter-standard"] tree-sitter-c2rust = ["rust-sitter/tree-sitter-c2rust"] tree-sitter-standard = ["rust-sitter/tree-sitter-standard"] diff --git a/macro/src/expansion.rs b/macro/src/expansion.rs index 2910a36..c73ad89 100644 --- a/macro/src/expansion.rs +++ b/macro/src/expansion.rs @@ -46,10 +46,9 @@ pub fn expand_rule(input: DeriveInput) -> Result { #[allow(non_snake_case)] fn extract<'a>( + ctx: &mut ::rust_sitter::extract::ExtractContext<'_>, node: Option<::rust_sitter::tree_sitter::Node>, source: &[u8], - last_idx: usize, - last_pt: ::rust_sitter::tree_sitter::Point, _leaf_fn: Option>, ) -> Result { let node = node.expect("no node found"); @@ -97,10 +96,9 @@ pub fn expand_rule(input: DeriveInput) -> Result { #[allow(non_snake_case)] fn extract<'a>( + _ctx: &mut ::rust_sitter::extract::ExtractContext<'_>, node: Option<::rust_sitter::tree_sitter::Node>, source: &[u8], - _last_idx: usize, - _last_pt: ::rust_sitter::tree_sitter::Point, _leaf_fn: Option>, ) -> Result { let node = node.expect("No node found"); @@ -207,7 +205,7 @@ fn gen_field(ident_str: String, leaf: Field) -> Result { text_input.evaluate()?; // TODO: Handle this correctly. return Ok(syn::parse_quote!({ - ::rust_sitter::__private::skip_text(state, #ident_str); + ::rust_sitter::__private::skip_text(state, #ident_str)?; Ok::<_, ::rust_sitter::extract::ExtractError>(()) })); } diff --git a/runtime/Cargo.toml b/runtime/Cargo.toml index 2fe9399..f76fda7 100644 --- a/runtime/Cargo.toml +++ b/runtime/Cargo.toml @@ -14,13 +14,13 @@ categories = ["development-tools"] path = "src/lib.rs" [features] -default = ["tree-sitter-c2rust"] +default = ["tree-sitter-standard"] tree-sitter-c2rust = ["tree-sitter-runtime-c2rust"] tree-sitter-standard = ["tree-sitter-runtime-standard"] [dependencies] tree-sitter-runtime-c2rust = { package = "tree-sitter-c2rust", version = "0.25", optional = true } -tree-sitter-runtime-standard = { package = "tree-sitter", version = "0.25", optional = true } +tree-sitter-runtime-standard = { package = "tree-sitter", version = "0.26", optional = true } rust-sitter-macro = { path = "../macro" } [dev-dependencies] diff --git a/runtime/src/__private.rs b/runtime/src/__private.rs index 912c8a0..20f49dc 100644 --- a/runtime/src/__private.rs +++ b/runtime/src/__private.rs @@ -5,8 +5,8 @@ //! they are not intended to actually be called in any other circumstance. use crate::{ - Extract, Span, - extract::{ExtractError, Result}, + Extract, + extract::{ExtractContext, ExtractError, Result}, tree_sitter, }; @@ -75,56 +75,50 @@ pub fn extract_field, T: std::fmt::Debug>( closure_ref: Option>, ) -> Result { dbg!(field_name); + let mut ctx = ExtractContext { + last_idx: state.last_idx, + last_pt: state.last_pt, + field_name, + }; if let Some(cursor) = state.cursor.as_mut() { loop { let n = cursor.node(); - println!("Extracting node from text: {} - {}", n.utf8_text(source).unwrap(), n.to_sexp()); + println!( + "Extracting node from text: {} - {}", + n.utf8_text(source).unwrap(), + n.to_sexp() + ); if n.is_error() { println!("Processing error..."); // Try and parse it anyway, returning the result if we manage to get it. if !cursor.goto_first_child() { state.cursor = None; - state.last_idx = n.end_byte(); - state.last_pt = n.end_position(); + ctx.last_idx = n.end_byte(); + ctx.last_pt = n.end_position(); return Err(ExtractError::new(n, field_name.to_owned())); } let n = cursor.node(); - let out = LT::extract(Some(n), source, state.last_idx, state.last_pt, closure_ref)?; - // let out = match out { - // Ok(out) => { - // // ???; I guess this would be only possible in the wrapped type case. - // Some(out) - // } - // Err(e) => { - // state.error.merge(e); - // None - // } - // }; - // if !cursor.goto_next_sibling() { - // state.cursor = None; - // }; - - state.last_idx = n.end_byte(); - state.last_pt = n.end_position(); + let out = LT::extract(&mut ctx, Some(n), source, closure_ref)?; + ctx.last_idx = n.end_byte(); + ctx.last_pt = n.end_position(); return Ok(out); } else if let Some(name) = cursor.field_name() { if name == field_name { // TODO: Need to keep going if it fails. - let out = - LT::extract(Some(n), source, state.last_idx, state.last_pt, closure_ref)?; + let out = LT::extract(&mut ctx, Some(n), source, closure_ref)?; if !cursor.goto_next_sibling() { dbg!(name); state.cursor = None; }; - state.last_idx = n.end_byte(); - state.last_pt = n.end_position(); + ctx.last_idx = n.end_byte(); + ctx.last_pt = n.end_position(); - return Ok(dbg!(out)); + return Ok(out); } else { - return LT::extract(None, source, state.last_idx, state.last_pt, closure_ref); + return LT::extract(&mut ctx, None, source, closure_ref); } } else { state.last_idx = n.end_byte(); @@ -132,11 +126,11 @@ pub fn extract_field, T: std::fmt::Debug>( } if !cursor.goto_next_sibling() { - return LT::extract(None, source, state.last_idx, state.last_pt, closure_ref); + return LT::extract(&mut ctx, None, source, closure_ref); } } } else { - LT::extract(None, source, state.last_idx, state.last_pt, closure_ref) + LT::extract(&mut ctx, None, source, closure_ref) } } @@ -171,19 +165,20 @@ pub fn parse>( let tree = parser.parse(input, None).expect("Failed to parse"); let root_node = tree.root_node(); - if root_node.has_error() { - let mut errors = vec![]; - crate::error::collect_parsing_errors(&root_node, input.as_bytes(), &mut errors); - for error in errors { - println!("{error}"); - } - } - >::extract( - Some(root_node), - input.as_bytes(), - 0, - Default::default(), - None, - ) + if root_node.has_error() { + let mut errors = vec![]; + crate::error::collect_parsing_errors(&root_node, input.as_bytes(), &mut errors); + for error in errors { + println!("{error}"); + } + panic!(); + } + let mut ctx = ExtractContext { + last_pt: Default::default(), + last_idx: 0, + field_name: "root", + }; + >::extract(&mut ctx, Some(root_node), input.as_bytes(), None) + // } } diff --git a/runtime/src/error.rs b/runtime/src/error.rs index 97ced35..d3c6ed9 100644 --- a/runtime/src/error.rs +++ b/runtime/src/error.rs @@ -89,17 +89,56 @@ pub fn collect_parsing_errors( walk_node(&child); } } - walk_node(node); - dbg!(node.to_sexp()); + // let q = tree_sitter::Query::new(&node.language(), "(ERROR_INTERNAL) @error").unwrap(); + // let mut qcur = tree_sitter::QueryCursor::new(); + // let mut it = qcur.captures(&q, *node, source); + // use tree_sitter::StreamingIterator; + // // NOTE: Instead of just using the first internal error, we should use all of them that are + // // non-overlapping. + // let Some((cap, _)) = it.next() else { + // panic!("Could not capture ERROR_INTERNAL"); + // }; + // // Should only be one capture since we only have `@error` + // let error_internal = cap.captures[0].node; + println!("Error range: {:?}", node.error_range().unwrap()); + let mut err_cur = node.walk(); + for err in node.error_children(&mut err_cur).unwrap() { + dbg!(err); + } + + let end = node.error_child(0).unwrap().prev_sibling().unwrap(); + // walk_node(node); + // dbg!(error_internal.to_sexp()); // Traverse down to find the next parse state and display it in the error. - let mut c = node.walk(); - while c.goto_first_child() {} - let state = c.node().next_parse_state(); + let mut c = end.walk(); + // c.goto_descendant(dbg!(node.descendant_count() - 1)); + // c.goto_first_child(); + // while c.node().child_count() > 0 && c.goto_next_sibling() {} + // c.goto_previous_sibling(); + // c.goto_first_child(); + // c.goto_next_sibling(); + while c.goto_last_child() {} + // while c.goto_next_sibling() {} + // loop { + // let mut run = false; + // while c.goto_first_child() { run = true ;} + // dbg!(c.node()); + // if c.goto_next_sibling() {run = true ;} + // dbg!(c.node()); + // if !run { + // break; + // } + // } + // dbg!(c.node()); + // dbg!(c.node().next_parse_state()); + let state = dbg!(c.node().next_parse_state()); + // let state = c.node().next_parse_state(); let state = if state != 0 { state } else { c.node().parse_state() }; + dbg!(state); if state != 0 && let Some(mut it) = node.language().lookahead_iterator(state) { diff --git a/runtime/src/extract.rs b/runtime/src/extract.rs index fb9fc0f..18918ba 100644 --- a/runtime/src/extract.rs +++ b/runtime/src/extract.rs @@ -6,14 +6,20 @@ use super::{Node, tree_sitter}; pub trait Extract { type LeafFn<'a>: Clone; fn extract<'a>( + ctx: &mut ExtractContext<'_>, node: Option, source: &[u8], - last_idx: usize, - last_pt: tree_sitter::Point, leaf_fn: Option>, ) -> Result; } +pub struct ExtractContext<'a> { + // TODO: We may need to keep a stack of these, and put them in a separate structure. + pub last_idx: usize, + pub last_pt: tree_sitter::Point, + pub field_name: &'a str, +} + #[derive(Default)] pub struct ExtractState { pub last_idx: usize, @@ -208,17 +214,19 @@ where type LeafFn<'a> = F; fn extract<'a>( + ctx: &mut ExtractContext<'_>, node: Option, source: &[u8], - last_idx: usize, - last_pt: tree_sitter::Point, leaf_fn: Option>, ) -> Result { let node = node.expect("Expected a node"); // TODO: Consider if this should be fallible as well. - Ok(leaf_fn - .expect("No leaf function on WithLeaf") - .apply(source, node, last_idx, last_pt)) + Ok(leaf_fn.expect("No leaf function on WithLeaf").apply( + source, + node, + ctx.last_idx, + ctx.last_pt, + )) } } @@ -258,10 +266,9 @@ where impl Extract<()> for () { type LeafFn<'a> = (); fn extract<'a>( + _ctx: &mut ExtractContext<'_>, _node: Option, _source: &[u8], - _last_idx: usize, - _last_pt: tree_sitter::Point, _leaf_fn: Option>, ) -> Result<()> { // TODO: Do we need to handle this here? Does `extract` itself need to expect an error? @@ -272,13 +279,12 @@ impl Extract<()> for () { impl, U> Extract> for Option { type LeafFn<'a> = T::LeafFn<'a>; fn extract<'a>( + ctx: &mut ExtractContext<'_>, node: Option, source: &[u8], - last_idx: usize, - last_pt: tree_sitter::Point, leaf_fn: Option>, ) -> Result> { - node.map(|n| T::extract(Some(n), source, last_idx, last_pt, leaf_fn)) + node.map(|n| T::extract(ctx, Some(n), source, leaf_fn)) .transpose() } } @@ -286,25 +292,21 @@ impl, U> Extract> for Option { impl, U> Extract> for Box { type LeafFn<'a> = T::LeafFn<'a>; fn extract<'a>( + ctx: &mut ExtractContext<'_>, node: Option, source: &[u8], - last_idx: usize, - last_pt: tree_sitter::Point, leaf_fn: Option>, ) -> Result> { - Ok(Box::new(T::extract( - node, source, last_idx, last_pt, leaf_fn, - )?)) + Ok(Box::new(T::extract(ctx, node, source, leaf_fn)?)) } } impl, U> Extract> for Vec { type LeafFn<'a> = T::LeafFn<'a>; fn extract<'a>( + ctx: &mut ExtractContext<'_>, node: Option, source: &[u8], - mut last_idx: usize, - mut last_pt: tree_sitter::Point, leaf_fn: Option>, ) -> Result> { let node = match node { @@ -319,25 +321,20 @@ impl, U> Extract> for Vec { let n = cursor.node(); // Try and parse the error specially. if n.is_error() { - match T::extract(Some(n), source, last_idx, last_pt, leaf_fn.clone()) { - Ok(o) => { - out.push(o); - } - Err(e) => { - error.merge(e); - } - } + println!("Processing error... for {}", ctx.field_name); + // match T::extract(ctx, Some(n), source, leaf_fn.clone()) { + // Ok(o) => { + // out.push(o); + // } + // Err(e) => { + // error.merge(e); + // } + // } } else if cursor.field_name().is_some() { - out.push(T::extract( - Some(n), - source, - last_idx, - last_pt, - leaf_fn.clone(), - )?); + out.push(T::extract(ctx, Some(n), source, leaf_fn.clone())?); } - last_idx = n.end_byte(); - last_pt = n.end_position(); + ctx.last_idx = n.end_byte(); + ctx.last_pt = n.end_position(); if !cursor.goto_next_sibling() { break; @@ -354,10 +351,9 @@ macro_rules! extract_from_str { impl Extract<$t> for $t { type LeafFn<'a> = (); fn extract<'a>( + _ctx: &mut ExtractContext<'_>, node: Option, source: &[u8], - _last_idx: usize, - _last_pt: tree_sitter::Point, _leaf_fn: Option>, ) -> Result { let node = node.expect(concat!( @@ -393,10 +389,9 @@ macro_rules! extract_for_tuple { impl<$($t: Extract<$t>),*> Extract<($($t),*)> for ($($t),*) { type LeafFn<'a> = (); fn extract<'a>( + ctx: &mut ExtractContext<'_>, node: Option, source: &[u8], - last_idx: usize, - last_pt: tree_sitter::Point, _leaf_fn: Option>, ) -> Result { let node = node.expect("No node found in tuple extract"); @@ -404,7 +399,7 @@ macro_rules! extract_for_tuple { let mut it = node.children(&mut c); Ok(( $( - $t::extract(it.next(), source, last_idx, last_pt, None)? + $t::extract(ctx, it.next(), source, None)? ),* )) } diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs index ffc7d13..1f33326 100644 --- a/runtime/src/lib.rs +++ b/runtime/src/lib.rs @@ -3,6 +3,7 @@ pub mod error; pub mod extract; pub mod rule; +use extract::ExtractContext; pub use extract::{Extract, WithLeaf}; use std::ops::Deref; @@ -79,17 +80,16 @@ impl Point { impl, U> Extract> for Spanned { type LeafFn<'a> = T::LeafFn<'a>; fn extract<'a>( + ctx: &mut ExtractContext<'_>, node: Option, source: &[u8], - last_idx: usize, - last_pt: tree_sitter::Point, leaf_fn: Option>, ) -> extract::Result> { Ok(Spanned { - value: T::extract(node, source, last_idx, last_pt, leaf_fn)?, + value: T::extract(ctx, node, source, leaf_fn)?, byte_span: node .map(|n| (n.start_byte(), n.end_byte())) - .unwrap_or((last_idx, last_idx)) + .unwrap_or((ctx.last_idx, ctx.last_idx)) .into(), line_span: node .map(|n| { @@ -99,8 +99,8 @@ impl, U> Extract> for Spanned { ) }) .unwrap_or(( - Point::from_tree_sitter(last_pt), - Point::from_tree_sitter(last_pt), + Point::from_tree_sitter(ctx.last_pt), + Point::from_tree_sitter(ctx.last_pt), )), }) } diff --git a/tool/Cargo.toml b/tool/Cargo.toml index bdfde17..e093d7e 100644 --- a/tool/Cargo.toml +++ b/tool/Cargo.toml @@ -27,8 +27,8 @@ serde_json = { version = "1", features = ["preserve_order"] } rust-sitter-common = { path = "../common" } tempfile = { version = "3", optional = true } -tree-sitter = { version = "0.25", optional = true } -tree-sitter-generate = { version = "0.25", optional = true } +tree-sitter = { version = "0.26", optional = true } +tree-sitter-generate = { version = "0.26", optional = true } cc = { version = "1", optional = true } [dev-dependencies] From 2558acf97e147c176647042de7e1bac4dbcae931 Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Fri, 15 Aug 2025 20:29:40 -0500 Subject: [PATCH 32/50] Implement better error outputting and reporting --- Cargo.lock | 124 +- Cargo.toml | 4 + common/src/expansion.rs | 100 +- example/Cargo.toml | 7 +- example/src/arithmetic.rs | 33 +- example/src/main.rs | 75 +- example/src/optionals.rs | 1 + example/src/repetitions.rs | 2 + ...e__arithmetic__tests__failed_parses-2.snap | 76 +- ...e__arithmetic__tests__failed_parses-3.snap | 82 +- ...e__arithmetic__tests__failed_parses-4.snap | 78 +- ...ple__arithmetic__tests__failed_parses.snap | 78 +- ..._optionals__tests__optional_grammar-2.snap | 48 +- ..._optionals__tests__optional_grammar-3.snap | 48 +- ..._optionals__tests__optional_grammar-4.snap | 50 +- ..._optionals__tests__optional_grammar-5.snap | 56 +- ..._optionals__tests__optional_grammar-6.snap | 58 +- ..._optionals__tests__optional_grammar-7.snap | 54 +- ..._optionals__tests__optional_grammar-8.snap | 56 +- ...e__optionals__tests__optional_grammar.snap | 46 +- ...titions__tests__repetitions_grammar-2.snap | 71 +- ...titions__tests__repetitions_grammar-3.snap | 100 +- ...petitions__tests__repetitions_grammar.snap | 62 +- ...xample__words__tests__words_grammar-2.snap | 78 +- ...xample__words__tests__words_grammar-3.snap | 78 +- ...xample__words__tests__words_grammar-4.snap | 17 +- ..._example__words__tests__words_grammar.snap | 74 +- example/src/words.rs | 1 + macro/src/expansion.rs | 27 +- ...t_sitter_macro__tests__enum_prec_left.snap | 50 +- ...t_sitter_macro__tests__enum_recursive.snap | 46 +- ...macro__tests__enum_transformed_fields.snap | 32 +- ...r_macro__tests__enum_with_named_field.snap | 56 +- ...macro__tests__enum_with_unamed_vector.snap | 54 +- ...r_macro__tests__grammar_unboxed_field.snap | 54 +- ...t_sitter_macro__tests__spanned_in_vec.snap | 81 +- ...ust_sitter_macro__tests__struct_extra.snap | 56 +- ..._sitter_macro__tests__struct_optional.snap | 61 +- ...st_sitter_macro__tests__struct_repeat.snap | 81 +- runtime/Cargo.toml | 11 +- runtime/src/__private.rs | 93 +- runtime/src/error.rs | 501 +++++--- runtime/src/extract.rs | 199 +-- runtime/src/grammar.rs | 156 +++ runtime/src/lib.rs | 110 +- runtime/src/rule.rs | 44 +- tool/Cargo.toml | 17 +- tool/src/lib.rs | 96 +- ...l__tests__enum_conflicts_prec_dynamic.snap | 2 +- ..._tool__tests__enum_with_unamed_vector.snap | 2 +- ...st_sitter_tool__tests__grammar_repeat.snap | 2 +- ...t_sitter_tool__tests__grammar_repeat1.snap | 2 +- ...l__tests__grammar_repeat_no_delimiter.snap | 2 +- ...st_sitter_tool__tests__spanned_in_vec.snap | 2 +- .../rust-analyzer/metadata/sysroot/Cargo.lock | 511 ++++++++ .../metadata/workspace/Cargo.lock | 1129 +++++++++++++++++ 56 files changed, 3521 insertions(+), 1513 deletions(-) create mode 100644 runtime/src/grammar.rs create mode 100644 tool/target/rust-analyzer/metadata/sysroot/Cargo.lock create mode 100644 tool/target/rust-analyzer/metadata/workspace/Cargo.lock diff --git a/Cargo.lock b/Cargo.lock index bac923c..94d3e25 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13,9 +13,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.98" +version = "1.0.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" +checksum = "b0674a1ddeecb70197781e945de4b3b8ffb61fa939a5597bcf48503737663100" [[package]] name = "bitflags" @@ -29,31 +29,11 @@ version = "3.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" -[[package]] -name = "c2rust-bitfields" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb34f0c0ace43530b2df7f18bc69ee0c4082158aa451ece29602f8c841e73764" -dependencies = [ - "c2rust-bitfields-derive", -] - -[[package]] -name = "c2rust-bitfields-derive" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3dd1601a7b828ab874d890e5a895563ca8ad485bdd3d2a359f148c8b72537241" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "cc" -version = "1.2.30" +version = "1.2.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "deec109607ca693028562ed836a5f1c4b8bd77755c4e132fc5ce11b0b6211ae7" +checksum = "2352e5597e9c544d5e6d9c95190d5d27738ade584fa8db0a16e130e5c2b5296e" dependencies = [ "shlex", ] @@ -100,7 +80,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn", ] [[package]] @@ -160,9 +140,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.15.4" +version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" [[package]] name = "heck" @@ -331,9 +311,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.174" +version = "0.2.175" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" +checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543" [[package]] name = "linux-raw-sys" @@ -392,9 +372,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.95" +version = "1.0.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" +checksum = "d61789d7719defeb74ea5fe81f2fdfdbd28a803847077cecce2ff14e1472f6f1" dependencies = [ "unicode-ident", ] @@ -449,9 +429,10 @@ version = "0.5.0" dependencies = [ "insta", "rust-sitter-macro", + "serde", + "serde_json", "tempfile", "tree-sitter", - "tree-sitter-c2rust", ] [[package]] @@ -462,7 +443,7 @@ dependencies = [ "proc-macro2", "quote", "serde_json", - "syn 2.0.104", + "syn", ] [[package]] @@ -485,7 +466,7 @@ dependencies = [ "proc-macro2", "quote", "rust-sitter-common", - "syn 2.0.104", + "syn", "tempfile", ] @@ -498,7 +479,7 @@ dependencies = [ "rust-sitter-common", "serde", "serde_json", - "syn 2.0.104", + "syn", "syn-inline-mod", "tempfile", "tree-sitter", @@ -565,14 +546,14 @@ checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn", ] [[package]] name = "serde_json" -version = "1.0.141" +version = "1.0.142" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30b9eff21ebe718216c6ec64e1d9ac57087aad11efc64e32002bce4a0d4c03d3" +checksum = "030fedb782600dcbd6f02d479bf0d817ac3bb40d644745b769d6a96bc3afc5a7" dependencies = [ "indexmap", "itoa", @@ -619,20 +600,9 @@ checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" [[package]] name = "syn" -version = "1.0.109" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "syn" -version = "2.0.104" +version = "2.0.105" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" +checksum = "7bc3fcb250e53458e712715cf74285c1f889686520d79294a9ef3bd7aa1fc619" dependencies = [ "proc-macro2", "quote", @@ -646,7 +616,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2fa6dca1fdb7b2ed46dd534a326725419d4fb10f23d8c85a8b2860e5eb25d0f9" dependencies = [ "proc-macro2", - "syn 2.0.104", + "syn", ] [[package]] @@ -657,7 +627,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn", ] [[package]] @@ -684,22 +654,22 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.12" +version = "2.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708" +checksum = "0b0949c3a6c842cbde3f1686d6eea5a010516deb7085f79db747562d4102f41e" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "2.0.12" +version = "2.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d" +checksum = "cc5b44b4ab9c2fdd0e0512e6bece8388e214c0749f5862b114cc5b7a25daf227" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn", ] [[package]] @@ -727,21 +697,7 @@ dependencies = [ "regex-syntax", "serde_json", "streaming-iterator", - "tree-sitter-language 0.1.4", -] - -[[package]] -name = "tree-sitter-c2rust" -version = "0.25.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e1e4909668d7a5e0eb8d66bd9fe2e789106066626056d3bead80dc6b9d5aeee" -dependencies = [ - "c2rust-bitfields", - "once_cell", - "regex", - "regex-syntax", - "streaming-iterator", - "tree-sitter-language 0.1.5", + "tree-sitter-language", ] [[package]] @@ -770,12 +726,6 @@ dependencies = [ name = "tree-sitter-language" version = "0.1.4" -[[package]] -name = "tree-sitter-language" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4013970217383f67b18aef68f6fb2e8d409bc5755227092d32efb0422ba24b8" - [[package]] name = "unicode-ident" version = "1.0.18" @@ -840,7 +790,7 @@ dependencies = [ "log", "proc-macro2", "quote", - "syn 2.0.104", + "syn", "wasm-bindgen-shared", ] @@ -875,7 +825,7 @@ checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -910,7 +860,7 @@ checksum = "17d5042cc5fa009658f9a7333ef24291b1291a25b6382dd68862a7f3b969f69b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn", ] [[package]] @@ -1120,7 +1070,7 @@ checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn", "synstructure", ] @@ -1141,7 +1091,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn", "synstructure", ] @@ -1158,9 +1108,9 @@ dependencies = [ [[package]] name = "zerovec" -version = "0.11.2" +version = "0.11.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a05eb080e015ba39cc9e23bbe5e7fb04d5fb040350f99f34e338d5fdd294428" +checksum = "e7aa2bd55086f1ab526693ecbe444205da57e25f4489879da80635a46d90e73b" dependencies = [ "yoke", "zerofrom", @@ -1175,5 +1125,5 @@ checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.104", + "syn", ] diff --git a/Cargo.toml b/Cargo.toml index 4dbc31e..f19df91 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,3 +13,7 @@ authors = [ "Jason Boatman", "Shadaj Laddad " ] + +[workspace.dependencies] +tree-sitter = { git = "https://github.com/jaboatman/tree-sitter", branch = "combined" } +tree-sitter-generate = { git = "https://github.com/jaboatman/tree-sitter", branch = "combined" } diff --git a/common/src/expansion.rs b/common/src/expansion.rs index 4b3bedd..9a6921b 100644 --- a/common/src/expansion.rs +++ b/common/src/expansion.rs @@ -463,7 +463,7 @@ impl Extras { fn gen_field( path: String, - leaf_type: Type, + leaf_type: Option, attrs: Vec, ctx: &mut ExpansionState, ) -> Result<(Value, bool)> { @@ -493,33 +493,34 @@ fn gen_field( )); } - let mut skip_over = HashSet::new(); - skip_over.insert("Spanned"); - skip_over.insert("Box"); - - let (inner_type_vec, is_vec) = try_extract_inner_type(&leaf_type, "Vec", &skip_over); - let (inner_type_option, is_option) = try_extract_inner_type(&leaf_type, "Option", &skip_over); - if let Some(text) = text_attr { let input: TsInput = text.parse_args()?; - // text is only used to parse a bunch of tokens which are then not used directly. As such, - // the type is required to be `()` or else it will fail to compile. - // Not necessary, handled by `Extract`. - // match &leaf_type { - // Type::Tuple(t) if t.elems.is_empty() => {} - // t => { - // dbg!(t); - // return Err(Error::new( - // t.span(), - // "Unexpected type `()` is required for text", - // )); - // } - // } return Ok((precs.apply(input.evaluate()?)?, false)); } let leaf_input = leaf_attr.map(|a| a.parse_args::()).transpose()?; + let leaf_type = match leaf_type { + Some(ty) => ty, + None => { + let Some(leaf_input) = leaf_input else { + // TODO: Narrow the span + return Err(Error::new( + Span::call_site(), + "Empty types must have a leaf or text attribute", + )); + }; + return Ok((precs.apply(leaf_input.evaluate()?)?, false)); + } + }; + + let mut skip_over = HashSet::new(); + skip_over.insert("Spanned"); + skip_over.insert("Box"); + + let (inner_type_vec, is_vec) = try_extract_inner_type(&leaf_type, "Vec", &skip_over); + let (inner_type_option, is_option) = try_extract_inner_type(&leaf_type, "Option", &skip_over); + if !is_vec && !is_option { if let Some(input) = leaf_input { ctx.rules_map @@ -549,7 +550,7 @@ fn gen_field( } else if is_vec { let (field_json, field_optional) = gen_field( path.clone(), - inner_type_vec, + Some(inner_type_vec), leaf_attr.iter().cloned().cloned().collect(), ctx, )?; @@ -572,23 +573,14 @@ fn gen_field( .map(|a| a.parse_args::()) .transpose()?; - // NOTE (JAB): All of this is pretty ugly, I think we can flatten some of these types - // without losing anything. let delimiter_json = delimited_param .as_ref() - .map(|_| { - gen_field( - format!("{path}_vec_delimiter"), - parse_quote!(()), - vec![parse_quote!(#[text(#delimited_param)])], - ctx, - ) - }) + .map(|p| precs.apply(p.evaluate()?)) .transpose()?; let field_rule_non_optional = json!({ "type": "FIELD", - "name": format!("{path}_vec_element"), + "name": format!("{path}_element"), "content": field_json }); @@ -599,28 +591,14 @@ fn gen_field( { "type": "BLANK" }, - field_rule_non_optional + field_rule_non_optional, ] }) } else { field_rule_non_optional }; - let vec_contents = if let Some((delimiter_json, delimiter_optional)) = delimiter_json { - let delim_made_optional = if delimiter_optional { - json!({ - "type": "CHOICE", - "members": [ - { - "type": "BLANK" - }, - delimiter_json - ] - }) - } else { - delimiter_json - }; - + let vec_contents = if let Some(delimiter_json) = delimiter_json { json!({ "type": "SEQ", "members": [ @@ -634,7 +612,7 @@ fn gen_field( "content": { "type": "SEQ", "members": [ - delim_made_optional, + delimiter_json, field_rule, ] } @@ -650,10 +628,11 @@ fn gen_field( let vec_contents = precs.apply(vec_contents)?; - let contents_ident = format!("{path}_vec_contents"); + let contents_ident = format!("List_{path}"); ctx.rules_map.insert(contents_ident.clone(), vec_contents); Ok(( + // vec_contents, json!({ "type": "SYMBOL", "name": contents_ident, @@ -662,7 +641,7 @@ fn gen_field( )) } else { // is_option - let (field_json, field_optional) = gen_field(path, inner_type_option, attrs, ctx)?; + let (field_json, field_optional) = gen_field(path, Some(inner_type_option), attrs, ctx)?; if field_optional { return Err(Error::new( @@ -694,7 +673,7 @@ fn gen_struct_or_variant( format!("{path}_{ident_str}") }; let (field_contents, is_option) = - gen_field(path, field.ty.clone(), field.attrs.clone(), ctx)?; + gen_field(path, Some(field.ty.clone()), field.attrs.clone(), ctx)?; let core = json!({ "type": "FIELD", @@ -749,18 +728,9 @@ fn gen_struct_or_variant( let base_rule = match fields { Fields::Unit => { - let dummy_field = Field { - attrs: attrs.to_owned(), - vis: Visibility::Inherited, - mutability: FieldMutability::None, - ident: None, - colon_token: None, - ty: Type::Tuple(TypeTuple { - paren_token: Default::default(), - elems: Punctuated::new(), - }), - }; - gen_field_optional(&path, &dummy_field, ctx, "unit".to_owned())? + let (field_contents, _is_option) = + gen_field(path.clone(), None, attrs.to_owned(), ctx)?; + field_contents } _ => json!({ "type": "SEQ", diff --git a/example/Cargo.toml b/example/Cargo.toml index 83a17e8..5ca107a 100644 --- a/example/Cargo.toml +++ b/example/Cargo.toml @@ -5,13 +5,8 @@ authors.workspace = true edition = "2021" publish = false -[features] -default = ["tree-sitter-standard"] -tree-sitter-c2rust = ["rust-sitter/tree-sitter-c2rust"] -tree-sitter-standard = ["rust-sitter/tree-sitter-standard"] - [dependencies] -rust-sitter = { path = "../runtime", default-features = false } +rust-sitter = { path = "../runtime" } codemap = "0.1.3" codemap-diagnostic = "0.1.1" diff --git a/example/src/arithmetic.rs b/example/src/arithmetic.rs index fd05d6b..9fd70d1 100644 --- a/example/src/arithmetic.rs +++ b/example/src/arithmetic.rs @@ -5,17 +5,9 @@ pub mod grammar { pub enum Expression { Number(#[leaf(pattern(r"\d+"))] i32), #[prec_left(1)] - Sub( - Box, - #[leaf("-")] (), - Box, - ), + Sub(Box, #[leaf("-")] (), Box), #[prec_left(2)] - Mul( - Box, - #[leaf("*")] (), - Box, - ), + Mul(Box, #[leaf("*")] (), Box), } #[derive(Rule)] @@ -30,16 +22,23 @@ pub mod grammar { mod tests { use super::*; use grammar::Expression; + use rust_sitter::Language; #[wasm_bindgen_test::wasm_bindgen_test] #[test] fn successful_parses() { - assert_eq!(grammar::Expression::parse("1").unwrap(), Expression::Number(1)); + assert_eq!( + grammar::Expression::parse("1").into_result().unwrap(), + Expression::Number(1) + ); - assert_eq!(grammar::Expression::parse(" 1").unwrap(), Expression::Number(1)); + assert_eq!( + grammar::Expression::parse(" 1").into_result().unwrap(), + Expression::Number(1) + ); assert_eq!( - grammar::Expression::parse("1 - 2").unwrap(), + grammar::Expression::parse("1 - 2").into_result().unwrap(), Expression::Sub( Box::new(Expression::Number(1)), (), @@ -48,7 +47,7 @@ mod tests { ); assert_eq!( - grammar::Expression::parse("1 - 2 - 3").unwrap(), + grammar::Expression::parse("1 - 2 - 3").into_result().unwrap(), Expression::Sub( Box::new(Expression::Sub( Box::new(Expression::Number(1)), @@ -61,7 +60,7 @@ mod tests { ); assert_eq!( - grammar::Expression::parse("1 - 2 * 3").unwrap(), + grammar::Expression::parse("1 - 2 * 3").into_result().unwrap(), Expression::Sub( Box::new(Expression::Number(1)), (), @@ -74,7 +73,7 @@ mod tests { ); assert_eq!( - grammar::Expression::parse("1 * 2 * 3").unwrap(), + grammar::Expression::parse("1 * 2 * 3").into_result().unwrap(), Expression::Mul( Box::new(Expression::Mul( Box::new(Expression::Number(1)), @@ -87,7 +86,7 @@ mod tests { ); assert_eq!( - grammar::Expression::parse("1 * 2 - 3").unwrap(), + grammar::Expression::parse("1 * 2 - 3").into_result().unwrap(), Expression::Sub( Box::new(Expression::Mul( Box::new(Expression::Number(1)), diff --git a/example/src/main.rs b/example/src/main.rs index 2795ae7..d515e51 100644 --- a/example/src/main.rs +++ b/example/src/main.rs @@ -1,62 +1,36 @@ +use rust_sitter::Language; use std::io::Write; use codemap::CodeMap; use codemap_diagnostic::{ColorConfig, Diagnostic, Emitter, Level, SpanLabel, SpanStyle}; -use rust_sitter::error::{ParseError, ParseErrorReason}; +use rust_sitter::error::ParseError; mod arithmetic; mod optionals; mod repetitions; mod words; -fn convert_parse_error_to_diagnostics( - file_span: &codemap::Span, - error: &ParseError, - diagnostics: &mut Vec, -) { - match &error.reason { - ParseErrorReason::MissingToken(tok) => diagnostics.push(Diagnostic { - level: Level::Error, - message: format!("Missing token: \"{tok}\""), - code: Some("S000".to_string()), - spans: vec![SpanLabel { - span: file_span.subspan(error.start_byte as u64, error.end_byte as u64), - style: SpanStyle::Primary, - label: Some(format!("missing \"{tok}\"")), - }], - }), - ParseErrorReason::Lookahead(_lookahead) => todo!(), - ParseErrorReason::Unknown => todo!(), - - // ParseErrorReason::UnexpectedToken(tok) => diagnostics.push(Diagnostic { - // level: Level::Error, - // message: format!("Unexpected token: \"{tok}\""), - // code: Some("S000".to_string()), - // spans: vec![SpanLabel { - // span: file_span.subspan(error.start_byte as u64, error.end_byte as u64), - // style: SpanStyle::Primary, - // label: Some(format!("unexpected \"{tok}\"")), - // }], - // }), +fn convert_parse_error_to_diagnostics(file_span: &codemap::Span, error: &ParseError) -> Diagnostic { + let mut message = format!("syntax error. reason: {:?}", error.reason); + if !error.lookaheads.is_empty() { + message += &format!( + "\nPossible expected inputs: {}", + error.lookaheads.join(" | ") + ); + } - // ParseErrorReason::FailedNode(errors) => { - // if errors.is_empty() { - // diagnostics.push(Diagnostic { - // level: Level::Error, - // message: "Failed to parse node".to_string(), - // code: Some("S000".to_string()), - // spans: vec![SpanLabel { - // span: file_span.subspan(error.start_byte as u64, error.end_byte as u64), - // style: SpanStyle::Primary, - // label: Some("failed".to_string()), - // }], - // }) - // } else { - // for error in errors { - // convert_parse_error_to_diagnostics(file_span, error, diagnostics); - // } - // } - // } + Diagnostic { + level: Level::Error, + spans: vec![SpanLabel { + span: file_span.subspan( + error.error_position.bytes.start as u64, + error.error_position.bytes.end as u64, + ), + style: SpanStyle::Primary, + label: None, // TODO + }], + code: None, + message, } } @@ -74,14 +48,15 @@ fn main() { break; } - match arithmetic::grammar::Expression::parse(input) { + match arithmetic::grammar::Expression::parse(input).into_result() { Ok(expr) => println!("{expr:?}"), Err(errs) => { let mut codemap = CodeMap::new(); let file_span = codemap.add_file("".to_string(), input.to_string()); let mut diagnostics = vec![]; for error in errs { - convert_parse_error_to_diagnostics(&file_span.span, &error, &mut diagnostics); + let d = convert_parse_error_to_diagnostics(&file_span.span, &error); + diagnostics.push(d); } let mut emitter = Emitter::stderr(ColorConfig::Always, Some(&codemap)); diff --git a/example/src/optionals.rs b/example/src/optionals.rs index 4234cff..3d9c15f 100644 --- a/example/src/optionals.rs +++ b/example/src/optionals.rs @@ -28,6 +28,7 @@ mod grammar { #[cfg(test)] mod tests { use super::*; + use rust_sitter::Language; #[test] fn optional_grammar() { diff --git a/example/src/repetitions.rs b/example/src/repetitions.rs index 0adaeab..e81ba6a 100644 --- a/example/src/repetitions.rs +++ b/example/src/repetitions.rs @@ -63,9 +63,11 @@ pub mod grammar { #[cfg(test)] mod tests { use super::*; + use rust_sitter::Language; #[test] fn repetitions_grammar() { + // Bug in latest tree-sitter: empty parse on a top-level repeat1 segfaults. insta::assert_debug_snapshot!(grammar::NumberList::parse("")); insta::assert_debug_snapshot!(grammar::NumberList::parse("1")); insta::assert_debug_snapshot!(grammar::NumberList::parse("1, 2")); diff --git a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-2.snap b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-2.snap index 59e0b4a..37328c6 100644 --- a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-2.snap +++ b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-2.snap @@ -1,26 +1,68 @@ --- source: example/src/arithmetic.rs -expression: "grammar::parse(\"1 - 2 -\")" +expression: "grammar::Expression::parse(\"1 - 2 -\")" --- -Err( - [ +ParseResult { + result: None, + errors: [ ParseError { - reason: MissingToken( + node_position: Position { + bytes: 7..7, + start: Point { + line: 1, + column: 8, + }, + end: Point { + line: 1, + column: 8, + }, + }, + error_position: Position { + bytes: 7..7, + start: Point { + line: 1, + column: 8, + }, + end: Point { + line: 1, + column: 8, + }, + }, + lookaheads: [ "Expression_Number_0", - ), - start_byte: 7, - end_byte: 7, - start_point: Point { - line: 1, - column: 8, + "_Whitespace__whitespace", + ], + reason: Missing, + }, + ParseError { + node_position: Position { + bytes: 7..7, + start: Point { + line: 1, + column: 8, + }, + end: Point { + line: 1, + column: 8, + }, }, - end_point: Point { - line: 1, - column: 8, + error_position: Position { + bytes: 7..7, + start: Point { + line: 1, + column: 8, + }, + end: Point { + line: 1, + column: 8, + }, }, - text: "", - kind: "Expression_Number_0", - parent_context: None, + lookaheads: [], + reason: TypeConversion( + ParseIntError { + kind: Empty, + }, + ), }, ], -) +} diff --git a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-3.snap b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-3.snap index 7a30cdf..4b624e1 100644 --- a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-3.snap +++ b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-3.snap @@ -2,52 +2,46 @@ source: example/src/arithmetic.rs expression: "grammar::Expression::parse(\"a1\")" --- -Err( - [ +ParseResult { + result: Some( + Number( + 1, + ), + ), + errors: [ ParseError { - reason: FailedNode( - [ - ParseError { - reason: UnexpectedToken( - "a", - ), - start_byte: 0, - end_byte: 1, - start_point: Point { - line: 1, - column: 1, - }, - end_point: Point { - line: 1, - column: 2, - }, - text: "a", - kind: "ERROR", - parent_context: Some( - ParentContext { - kind: "ERROR", - }, - ), - }, - ], - ), - start_byte: 0, - end_byte: 1, - start_point: Point { - line: 1, - column: 1, - }, - end_point: Point { - line: 1, - column: 2, + node_position: Position { + bytes: 0..1, + start: Point { + line: 1, + column: 1, + }, + end: Point { + line: 1, + column: 2, + }, }, - text: "a", - kind: "ERROR", - parent_context: Some( - ParentContext { - kind: "source_file", + error_position: Position { + bytes: 0..1, + start: Point { + line: 1, + column: 1, }, - ), + end: Point { + line: 1, + column: 2, + }, + }, + lookaheads: [ + "Expression_Number_0", + "_Whitespace__whitespace", + "source_file", + "Expression_Number", + "Expression_Sub", + "Expression_Mul", + "Expression", + ], + reason: Error, }, ], -) +} diff --git a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-4.snap b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-4.snap index f3315cb..a2a64af 100644 --- a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-4.snap +++ b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-4.snap @@ -2,52 +2,42 @@ source: example/src/arithmetic.rs expression: "grammar::Expression::parse(\"1a\")" --- -Err( - [ +ParseResult { + result: Some( + Number( + 1, + ), + ), + errors: [ ParseError { - reason: FailedNode( - [ - ParseError { - reason: UnexpectedToken( - "a", - ), - start_byte: 1, - end_byte: 2, - start_point: Point { - line: 1, - column: 2, - }, - end_point: Point { - line: 1, - column: 3, - }, - text: "a", - kind: "ERROR", - parent_context: Some( - ParentContext { - kind: "ERROR", - }, - ), - }, - ], - ), - start_byte: 1, - end_byte: 2, - start_point: Point { - line: 1, - column: 2, - }, - end_point: Point { - line: 1, - column: 3, + node_position: Position { + bytes: 1..2, + start: Point { + line: 1, + column: 2, + }, + end: Point { + line: 1, + column: 3, + }, }, - text: "a", - kind: "ERROR", - parent_context: Some( - ParentContext { - kind: "source_file", + error_position: Position { + bytes: 1..2, + start: Point { + line: 1, + column: 2, }, - ), + end: Point { + line: 1, + column: 3, + }, + }, + lookaheads: [ + "_Whitespace__whitespace", + "Expression_Sub_1", + "Expression_Mul_1", + ], + reason: Error, }, ], -) +} diff --git a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses.snap b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses.snap index b5d9645..ef78a64 100644 --- a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses.snap +++ b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses.snap @@ -2,52 +2,42 @@ source: example/src/arithmetic.rs expression: "grammar::Expression::parse(\"1 + 2\")" --- -Err( - [ +ParseResult { + result: Some( + Number( + 2, + ), + ), + errors: [ ParseError { - reason: FailedNode( - [ - ParseError { - reason: UnexpectedToken( - "+", - ), - start_byte: 2, - end_byte: 3, - start_point: Point { - line: 1, - column: 3, - }, - end_point: Point { - line: 1, - column: 4, - }, - text: "+", - kind: "ERROR", - parent_context: Some( - ParentContext { - kind: "ERROR", - }, - ), - }, - ], - ), - start_byte: 0, - end_byte: 3, - start_point: Point { - line: 1, - column: 1, - }, - end_point: Point { - line: 1, - column: 4, + node_position: Position { + bytes: 0..3, + start: Point { + line: 1, + column: 1, + }, + end: Point { + line: 1, + column: 4, + }, }, - text: "1 +", - kind: "ERROR", - parent_context: Some( - ParentContext { - kind: "source_file", + error_position: Position { + bytes: 2..3, + start: Point { + line: 1, + column: 3, }, - ), + end: Point { + line: 1, + column: 4, + }, + }, + lookaheads: [ + "_Whitespace__whitespace", + "Expression_Sub_1", + "Expression_Mul_1", + ], + reason: Error, }, ], -) +} diff --git a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-2.snap b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-2.snap index c3c1a4f..c34a0d3 100644 --- a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-2.snap +++ b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-2.snap @@ -1,30 +1,30 @@ --- source: example/src/optionals.rs -expression: "grammar::parse(\"_.\")" +expression: "grammar::Language::parse(\"_.\")" --- -Ok( - Language { - v: None, - _s: (), - t: Spanned { - value: None, - byte_span: ( - 1, - 1, - ), - line_span: ( - Point { - line: 1, - column: 2, - }, - Point { - line: 1, - column: 2, +ParseResult { + result: Some( + Language { + v: None, + _s: (), + t: Spanned { + value: None, + position: Position { + bytes: 0..0, + start: Point { + line: 1, + column: 1, + }, + end: Point { + line: 1, + column: 1, + }, }, + }, + _d: Some( + (), ), }, - _d: Some( - (), - ), - }, -) + ), + errors: [], +} diff --git a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-3.snap b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-3.snap index 77b42e3..6cd9fe9 100644 --- a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-3.snap +++ b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-3.snap @@ -1,30 +1,30 @@ --- source: example/src/optionals.rs -expression: "grammar::parse(\"1_\")" +expression: "grammar::Language::parse(\"1_\")" --- -Ok( - Language { - v: Some( - 1, - ), - _s: (), - t: Spanned { - value: None, - byte_span: ( - 2, - 2, +ParseResult { + result: Some( + Language { + v: Some( + 1, ), - line_span: ( - Point { - line: 1, - column: 3, + _s: (), + t: Spanned { + value: None, + position: Position { + bytes: 0..0, + start: Point { + line: 1, + column: 1, + }, + end: Point { + line: 1, + column: 1, + }, }, - Point { - line: 1, - column: 3, - }, - ), + }, + _d: None, }, - _d: None, - }, -) + ), + errors: [], +} diff --git a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-4.snap b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-4.snap index 26be7d6..e6189ac 100644 --- a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-4.snap +++ b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-4.snap @@ -1,32 +1,32 @@ --- source: example/src/optionals.rs -expression: "grammar::parse(\"1_.\")" +expression: "grammar::Language::parse(\"1_.\")" --- -Ok( - Language { - v: Some( - 1, - ), - _s: (), - t: Spanned { - value: None, - byte_span: ( - 2, - 2, +ParseResult { + result: Some( + Language { + v: Some( + 1, ), - line_span: ( - Point { - line: 1, - column: 3, - }, - Point { - line: 1, - column: 3, + _s: (), + t: Spanned { + value: None, + position: Position { + bytes: 0..0, + start: Point { + line: 1, + column: 1, + }, + end: Point { + line: 1, + column: 1, + }, }, + }, + _d: Some( + (), ), }, - _d: Some( - (), - ), - }, -) + ), + errors: [], +} diff --git a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-5.snap b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-5.snap index 7454502..bb5bc08 100644 --- a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-5.snap +++ b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-5.snap @@ -1,34 +1,34 @@ --- source: example/src/optionals.rs -expression: "grammar::parse(\"1_2\")" +expression: "grammar::Language::parse(\"1_2\")" --- -Ok( - Language { - v: Some( - 1, - ), - _s: (), - t: Spanned { - value: Some( - Number { - v: 2, - }, - ), - byte_span: ( - 2, - 3, +ParseResult { + result: Some( + Language { + v: Some( + 1, ), - line_span: ( - Point { - line: 1, - column: 3, + _s: (), + t: Spanned { + value: Some( + Number { + v: 2, + }, + ), + position: Position { + bytes: 2..3, + start: Point { + line: 1, + column: 3, + }, + end: Point { + line: 1, + column: 4, + }, }, - Point { - line: 1, - column: 4, - }, - ), + }, + _d: None, }, - _d: None, - }, -) + ), + errors: [], +} diff --git a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-6.snap b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-6.snap index 0244844..2debce3 100644 --- a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-6.snap +++ b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-6.snap @@ -1,36 +1,36 @@ --- source: example/src/optionals.rs -expression: "grammar::parse(\"1_2.\")" +expression: "grammar::Language::parse(\"1_2.\")" --- -Ok( - Language { - v: Some( - 1, - ), - _s: (), - t: Spanned { - value: Some( - Number { - v: 2, - }, - ), - byte_span: ( - 2, - 3, +ParseResult { + result: Some( + Language { + v: Some( + 1, ), - line_span: ( - Point { - line: 1, - column: 3, - }, - Point { - line: 1, - column: 4, + _s: (), + t: Spanned { + value: Some( + Number { + v: 2, + }, + ), + position: Position { + bytes: 2..3, + start: Point { + line: 1, + column: 3, + }, + end: Point { + line: 1, + column: 4, + }, }, + }, + _d: Some( + (), ), }, - _d: Some( - (), - ), - }, -) + ), + errors: [], +} diff --git a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-7.snap b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-7.snap index 6cf3be4..5b1b4c3 100644 --- a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-7.snap +++ b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-7.snap @@ -1,32 +1,32 @@ --- source: example/src/optionals.rs -expression: "grammar::parse(\"_2\")" +expression: "grammar::Language::parse(\"_2\")" --- -Ok( - Language { - v: None, - _s: (), - t: Spanned { - value: Some( - Number { - v: 2, +ParseResult { + result: Some( + Language { + v: None, + _s: (), + t: Spanned { + value: Some( + Number { + v: 2, + }, + ), + position: Position { + bytes: 1..2, + start: Point { + line: 1, + column: 2, + }, + end: Point { + line: 1, + column: 3, + }, }, - ), - byte_span: ( - 1, - 2, - ), - line_span: ( - Point { - line: 1, - column: 2, - }, - Point { - line: 1, - column: 3, - }, - ), + }, + _d: None, }, - _d: None, - }, -) + ), + errors: [], +} diff --git a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-8.snap b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-8.snap index 772f452..9e8f944 100644 --- a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-8.snap +++ b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-8.snap @@ -1,34 +1,34 @@ --- source: example/src/optionals.rs -expression: "grammar::parse(\"_2.\")" +expression: "grammar::Language::parse(\"_2.\")" --- -Ok( - Language { - v: None, - _s: (), - t: Spanned { - value: Some( - Number { - v: 2, - }, - ), - byte_span: ( - 1, - 2, - ), - line_span: ( - Point { - line: 1, - column: 2, - }, - Point { - line: 1, - column: 3, +ParseResult { + result: Some( + Language { + v: None, + _s: (), + t: Spanned { + value: Some( + Number { + v: 2, + }, + ), + position: Position { + bytes: 1..2, + start: Point { + line: 1, + column: 2, + }, + end: Point { + line: 1, + column: 3, + }, }, + }, + _d: Some( + (), ), }, - _d: Some( - (), - ), - }, -) + ), + errors: [], +} diff --git a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar.snap b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar.snap index b4a5402..8d166b7 100644 --- a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar.snap +++ b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar.snap @@ -1,28 +1,28 @@ --- source: example/src/optionals.rs -expression: "grammar::parse(\"_\")" +expression: "grammar::Language::parse(\"_\")" --- -Ok( - Language { - v: None, - _s: (), - t: Spanned { - value: None, - byte_span: ( - 1, - 1, - ), - line_span: ( - Point { - line: 1, - column: 2, +ParseResult { + result: Some( + Language { + v: None, + _s: (), + t: Spanned { + value: None, + position: Position { + bytes: 0..0, + start: Point { + line: 1, + column: 1, + }, + end: Point { + line: 1, + column: 1, + }, }, - Point { - line: 1, - column: 2, - }, - ), + }, + _d: None, }, - _d: None, - }, -) + ), + errors: [], +} diff --git a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar-2.snap b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar-2.snap index f46d9f8..4ad8950 100644 --- a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar-2.snap +++ b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar-2.snap @@ -1,43 +1,40 @@ --- source: example/src/repetitions.rs -expression: "grammar::parse(\"1\")" +expression: "grammar::NumberList::parse(\"1\")" --- -Ok( - NumberList { - numbers: Spanned { - value: [ - Spanned { - value: 1, - byte_span: ( - 0, - 1, - ), - line_span: ( - Point { - line: 1, - column: 1, +ParseResult { + result: Some( + NumberList { + numbers: Spanned { + value: [ + Spanned { + value: 1, + position: Position { + bytes: 0..1, + start: Point { + line: 1, + column: 1, + }, + end: Point { + line: 1, + column: 2, + }, }, - Point { - line: 1, - column: 2, - }, - ), - }, - ], - byte_span: ( - 0, - 1, - ), - line_span: ( - Point { - line: 1, - column: 1, - }, - Point { - line: 1, - column: 2, + }, + ], + position: Position { + bytes: 0..1, + start: Point { + line: 1, + column: 1, + }, + end: Point { + line: 1, + column: 2, + }, }, - ), + }, }, - }, -) + ), + errors: [], +} diff --git a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar-3.snap b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar-3.snap index caef295..1634ee9 100644 --- a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar-3.snap +++ b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar-3.snap @@ -1,60 +1,54 @@ --- source: example/src/repetitions.rs -expression: "grammar::parse(\"1, 2\")" +expression: "grammar::NumberList::parse(\"1, 2\")" --- -Ok( - NumberList { - numbers: Spanned { - value: [ - Spanned { - value: 1, - byte_span: ( - 0, - 1, - ), - line_span: ( - Point { - line: 1, - column: 1, +ParseResult { + result: Some( + NumberList { + numbers: Spanned { + value: [ + Spanned { + value: 1, + position: Position { + bytes: 0..1, + start: Point { + line: 1, + column: 1, + }, + end: Point { + line: 1, + column: 2, + }, }, - Point { - line: 1, - column: 2, + }, + Spanned { + value: 2, + position: Position { + bytes: 3..4, + start: Point { + line: 1, + column: 4, + }, + end: Point { + line: 1, + column: 5, + }, }, - ), + }, + ], + position: Position { + bytes: 0..4, + start: Point { + line: 1, + column: 1, + }, + end: Point { + line: 1, + column: 5, + }, }, - Spanned { - value: 2, - byte_span: ( - 3, - 4, - ), - line_span: ( - Point { - line: 1, - column: 4, - }, - Point { - line: 1, - column: 5, - }, - ), - }, - ], - byte_span: ( - 0, - 4, - ), - line_span: ( - Point { - line: 1, - column: 1, - }, - Point { - line: 1, - column: 5, - }, - ), + }, }, - }, -) + ), + errors: [], +} diff --git a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar.snap b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar.snap index e6bb5a4..7beb90d 100644 --- a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar.snap +++ b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar.snap @@ -1,26 +1,52 @@ --- source: example/src/repetitions.rs -expression: "grammar::parse(\"\")" +expression: "grammar::NumberList::parse(\"\")" --- -Err( - [ +ParseResult { + result: Some( + NumberList { + numbers: Spanned { + value: [], + position: Position { + bytes: 0..0, + start: Point { + line: 1, + column: 1, + }, + end: Point { + line: 1, + column: 1, + }, + }, + }, + }, + ), + errors: [ ParseError { - reason: FailedNode( - [], - ), - start_byte: 0, - end_byte: 0, - start_point: Point { - line: 1, - column: 1, + node_position: Position { + bytes: 0..0, + start: Point { + line: 1, + column: 1, + }, + end: Point { + line: 1, + column: 1, + }, }, - end_point: Point { - line: 1, - column: 1, + error_position: Position { + bytes: 0..0, + start: Point { + line: 1, + column: 1, + }, + end: Point { + line: 1, + column: 1, + }, }, - text: "", - kind: "ERROR", - parent_context: None, + lookaheads: [], + reason: Error, }, ], -) +} diff --git a/example/src/snapshots/rust_sitter_example__words__tests__words_grammar-2.snap b/example/src/snapshots/rust_sitter_example__words__tests__words_grammar-2.snap index dd8b715..c0e95b9 100644 --- a/example/src/snapshots/rust_sitter_example__words__tests__words_grammar-2.snap +++ b/example/src/snapshots/rust_sitter_example__words__tests__words_grammar-2.snap @@ -1,26 +1,68 @@ --- source: example/src/words.rs -expression: "grammar::parse(\"hello\")" +expression: "grammar::Words::parse(\"hello\")" --- -Err( - [ +ParseResult { + result: None, + errors: [ ParseError { - reason: FailedNode( - [], - ), - start_byte: 0, - end_byte: 5, - start_point: Point { - line: 1, - column: 1, + node_position: Position { + bytes: 0..5, + start: Point { + line: 1, + column: 1, + }, + end: Point { + line: 1, + column: 6, + }, }, - end_point: Point { - line: 1, - column: 6, + error_position: Position { + bytes: 0..5, + start: Point { + line: 1, + column: 1, + }, + end: Point { + line: 1, + column: 6, + }, + }, + lookaheads: [ + "Words_keyword", + "_Whitespace__whitespace", + "source_file", + ], + reason: Error, + }, + ParseError { + node_position: Position { + bytes: 5..5, + start: Point { + line: 1, + column: 6, + }, + end: Point { + line: 1, + column: 6, + }, + }, + error_position: Position { + bytes: 5..5, + start: Point { + line: 1, + column: 6, + }, + end: Point { + line: 1, + column: 6, + }, + }, + lookaheads: [], + reason: MissingNode { + node_kind: "Words_word", + type_name: "String", }, - text: "hello", - kind: "ERROR", - parent_context: None, }, ], -) +} diff --git a/example/src/snapshots/rust_sitter_example__words__tests__words_grammar-3.snap b/example/src/snapshots/rust_sitter_example__words__tests__words_grammar-3.snap index e5acc74..9d94ce4 100644 --- a/example/src/snapshots/rust_sitter_example__words__tests__words_grammar-3.snap +++ b/example/src/snapshots/rust_sitter_example__words__tests__words_grammar-3.snap @@ -1,26 +1,68 @@ --- source: example/src/words.rs -expression: "grammar::parse(\"ifhello\")" +expression: "grammar::Words::parse(\"ifhello\")" --- -Err( - [ +ParseResult { + result: None, + errors: [ ParseError { - reason: FailedNode( - [], - ), - start_byte: 0, - end_byte: 7, - start_point: Point { - line: 1, - column: 1, + node_position: Position { + bytes: 0..7, + start: Point { + line: 1, + column: 1, + }, + end: Point { + line: 1, + column: 8, + }, }, - end_point: Point { - line: 1, - column: 8, + error_position: Position { + bytes: 0..7, + start: Point { + line: 1, + column: 1, + }, + end: Point { + line: 1, + column: 8, + }, + }, + lookaheads: [ + "Words_keyword", + "_Whitespace__whitespace", + "source_file", + ], + reason: Error, + }, + ParseError { + node_position: Position { + bytes: 7..7, + start: Point { + line: 1, + column: 8, + }, + end: Point { + line: 1, + column: 8, + }, + }, + error_position: Position { + bytes: 7..7, + start: Point { + line: 1, + column: 8, + }, + end: Point { + line: 1, + column: 8, + }, + }, + lookaheads: [], + reason: MissingNode { + node_kind: "Words_word", + type_name: "String", }, - text: "ifhello", - kind: "ERROR", - parent_context: None, }, ], -) +} diff --git a/example/src/snapshots/rust_sitter_example__words__tests__words_grammar-4.snap b/example/src/snapshots/rust_sitter_example__words__tests__words_grammar-4.snap index b8210ae..bbe77b4 100644 --- a/example/src/snapshots/rust_sitter_example__words__tests__words_grammar-4.snap +++ b/example/src/snapshots/rust_sitter_example__words__tests__words_grammar-4.snap @@ -1,10 +1,13 @@ --- source: example/src/words.rs -expression: "grammar::parse(\"if hello\")" +expression: "grammar::Words::parse(\"if hello\")" --- -Ok( - Words { - keyword: (), - word: "hello", - }, -) +ParseResult { + result: Some( + Words { + keyword: (), + word: "hello", + }, + ), + errors: [], +} diff --git a/example/src/snapshots/rust_sitter_example__words__tests__words_grammar.snap b/example/src/snapshots/rust_sitter_example__words__tests__words_grammar.snap index c88d31b..e1731b7 100644 --- a/example/src/snapshots/rust_sitter_example__words__tests__words_grammar.snap +++ b/example/src/snapshots/rust_sitter_example__words__tests__words_grammar.snap @@ -1,26 +1,64 @@ --- source: example/src/words.rs -expression: "grammar::parse(\"if\")" +expression: "grammar::Words::parse(\"if\")" --- -Err( - [ +ParseResult { + result: None, + errors: [ ParseError { - reason: FailedNode( - [], - ), - start_byte: 0, - end_byte: 2, - start_point: Point { - line: 1, - column: 1, + node_position: Position { + bytes: 0..2, + start: Point { + line: 1, + column: 1, + }, + end: Point { + line: 1, + column: 3, + }, }, - end_point: Point { - line: 1, - column: 3, + error_position: Position { + bytes: 0..2, + start: Point { + line: 1, + column: 1, + }, + end: Point { + line: 1, + column: 3, + }, + }, + lookaheads: [], + reason: Error, + }, + ParseError { + node_position: Position { + bytes: 2..2, + start: Point { + line: 1, + column: 3, + }, + end: Point { + line: 1, + column: 3, + }, + }, + error_position: Position { + bytes: 2..2, + start: Point { + line: 1, + column: 3, + }, + end: Point { + line: 1, + column: 3, + }, + }, + lookaheads: [], + reason: MissingNode { + node_kind: "Words_keyword", + type_name: "String", }, - text: "if", - kind: "ERROR", - parent_context: None, }, ], -) +} diff --git a/example/src/words.rs b/example/src/words.rs index 799831e..0b44564 100644 --- a/example/src/words.rs +++ b/example/src/words.rs @@ -23,6 +23,7 @@ pub mod grammar { #[cfg(test)] mod tests { use super::*; + use rust_sitter::Language; #[test] fn words_grammar() { diff --git a/macro/src/expansion.rs b/macro/src/expansion.rs index c73ad89..fc05ebf 100644 --- a/macro/src/expansion.rs +++ b/macro/src/expansion.rs @@ -3,7 +3,10 @@ use std::collections::HashSet; use crate::errors::IteratorExt as _; use proc_macro2::Span; use quote::{ToTokens, quote}; -use rust_sitter_common::{expansion::{ExpansionState, RuleDerive}, *}; +use rust_sitter_common::{ + expansion::{ExpansionState, RuleDerive}, + *, +}; use syn::{spanned::Spanned, *}; pub enum ParamOrField { @@ -45,12 +48,12 @@ pub fn expand_rule(input: DeriveInput) -> Result { type LeafFn<'a> = (); #[allow(non_snake_case)] - fn extract<'a>( + fn extract<'a, 'tree>( ctx: &mut ::rust_sitter::extract::ExtractContext<'_>, - node: Option<::rust_sitter::tree_sitter::Node>, + node: Option<::rust_sitter::tree_sitter::Node<'tree>>, source: &[u8], _leaf_fn: Option>, - ) -> Result { + ) -> Result> { let node = node.expect("no node found"); #extract_expr } @@ -95,12 +98,12 @@ pub fn expand_rule(input: DeriveInput) -> Result { type LeafFn<'a> = (); #[allow(non_snake_case)] - fn extract<'a>( + fn extract<'a, 'tree>( _ctx: &mut ::rust_sitter::extract::ExtractContext<'_>, - node: Option<::rust_sitter::tree_sitter::Node>, + node: Option<::rust_sitter::tree_sitter::Node<'tree>>, source: &[u8], _leaf_fn: Option>, - ) -> Result { + ) -> Result> { let node = node.expect("No node found"); let mut cursor = node.walk(); @@ -139,11 +142,13 @@ pub fn expand_rule(input: DeriveInput) -> Result { let tree_sitter_ident = Ident::new(&format!("tree_sitter_{ident}"), Span::call_site()); let root_type_docstr = format!("[`{ident}`]"); - // TODO: We can maybe make a trait for `language`. It should also have a `parse` function. quote! { + impl ::rust_sitter::rule::Language for #ident { + fn produce_grammar() -> String { + String::new() + } - impl #ident { - pub fn language() -> ::rust_sitter::tree_sitter::Language { + fn language() -> ::rust_sitter::tree_sitter::Language { unsafe extern "C" { fn #tree_sitter_ident() -> ::rust_sitter::tree_sitter::Language; } @@ -152,7 +157,7 @@ pub fn expand_rule(input: DeriveInput) -> Result { /// Parse an input string according to the grammar. Returns either any parsing errors that happened, or a #[doc = #root_type_docstr] /// instance containing the parsed structured data. - pub fn parse(input: &str) -> core::result::Result { + fn parse(input: &str) -> ::rust_sitter::ParseResult { ::rust_sitter::__private::parse(input, Self::language) } } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap index e4ed000..58b6235 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap @@ -3,8 +3,11 @@ source: macro/src/lib.rs expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n mod grammar\n {\n #[derive(rust_sitter::Rule)] #[language] pub enum Expression\n {\n Number(#[leaf(pattern(r\"\\d+\"))] i32), #[prec_left(1)]\n Sub(Box, #[leaf(\"-\")] (), Box),\n }\n }\n}).to_token_stream().to_string())" --- mod grammar { - impl Expression { - pub fn language() -> ::rust_sitter::tree_sitter::Language { + impl ::rust_sitter::rule::Language for Expression { + fn produce_grammar() -> String { + String::new() + } + fn language() -> ::rust_sitter::tree_sitter::Language { unsafe extern "C" { fn tree_sitter_Expression() -> ::rust_sitter::tree_sitter::Language; } @@ -13,22 +16,19 @@ mod grammar { #[doc = r" Parse an input string according to the grammar. Returns either any parsing errors that happened, or a"] #[doc = "[`Expression`]"] #[doc = r" instance containing the parsed structured data."] - pub fn parse( - input: &str, - ) -> core::result::Result> { + fn parse(input: &str) -> ::rust_sitter::ParseResult { ::rust_sitter::__private::parse(input, Self::language) } } impl ::rust_sitter::Extract for Expression { type LeafFn<'a> = (); #[allow(non_snake_case)] - fn extract<'a>( - node: Option<::rust_sitter::tree_sitter::Node>, + fn extract<'a, 'tree>( + _ctx: &mut ::rust_sitter::extract::ExtractContext<'_>, + node: Option<::rust_sitter::tree_sitter::Node<'tree>>, source: &[u8], - _last_idx: usize, - _last_pt: ::rust_sitter::tree_sitter::Point, _leaf_fn: Option>, - ) -> Self { + ) -> Result> { let node = node.expect("No node found"); let mut cursor = node.walk(); assert!( @@ -41,42 +41,42 @@ mod grammar { "Expression_Number" => { return ::rust_sitter::__private::extract_struct_or_variant( node, - move |cursor, last_idx, last_pt| { - Expression::Number({ + move |state| { + Ok(Expression::Number({ ::rust_sitter::__private::extract_field::( - cursor, source, last_idx, last_pt, "0", None, + state, source, "0", None, ) - }) + }?)) }, ) } "Expression_Sub" => { return ::rust_sitter::__private::extract_struct_or_variant( node, - move |cursor, last_idx, last_pt| { - Expression::Sub( + move |state| { + Ok(Expression::Sub( { ::rust_sitter::__private::extract_field::, _>( - cursor, source, last_idx, last_pt, "0", None, + state, source, "0", None, ) - }, + }?, { ::rust_sitter::__private::extract_field::<(), _>( - cursor, source, last_idx, last_pt, "1", None, + state, source, "1", None, ) - }, + }?, { ::rust_sitter::__private::extract_field::, _>( - cursor, source, last_idx, last_pt, "2", None, + state, source, "2", None, ) - }, - ) + }?, + )) }, ) } - _ => { + k => { if !cursor.goto_next_sibling() { - panic!("Could not find a child corresponding to any enum branch") + panic!("Could not find a child corresponding to any enum branch: {k}") } } } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap index b0279c6..5859c6e 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap @@ -3,8 +3,11 @@ source: macro/src/lib.rs expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n mod grammar\n {\n #[derive(rust_sitter::Rule)] #[language] pub enum Expression\n {\n Number(#[leaf(re(r\"\\d+\"))] i32),\n Neg(#[leaf(\"-\")] (), Box),\n }\n }\n}).to_token_stream().to_string())" --- mod grammar { - impl Expression { - pub fn language() -> ::rust_sitter::tree_sitter::Language { + impl ::rust_sitter::rule::Language for Expression { + fn produce_grammar() -> String { + String::new() + } + fn language() -> ::rust_sitter::tree_sitter::Language { unsafe extern "C" { fn tree_sitter_Expression() -> ::rust_sitter::tree_sitter::Language; } @@ -13,22 +16,19 @@ mod grammar { #[doc = r" Parse an input string according to the grammar. Returns either any parsing errors that happened, or a"] #[doc = "[`Expression`]"] #[doc = r" instance containing the parsed structured data."] - pub fn parse( - input: &str, - ) -> core::result::Result> { + fn parse(input: &str) -> ::rust_sitter::ParseResult { ::rust_sitter::__private::parse(input, Self::language) } } impl ::rust_sitter::Extract for Expression { type LeafFn<'a> = (); #[allow(non_snake_case)] - fn extract<'a>( - node: Option<::rust_sitter::tree_sitter::Node>, + fn extract<'a, 'tree>( + _ctx: &mut ::rust_sitter::extract::ExtractContext<'_>, + node: Option<::rust_sitter::tree_sitter::Node<'tree>>, source: &[u8], - _last_idx: usize, - _last_pt: ::rust_sitter::tree_sitter::Point, _leaf_fn: Option>, - ) -> Self { + ) -> Result> { let node = node.expect("No node found"); let mut cursor = node.walk(); assert!( @@ -41,37 +41,37 @@ mod grammar { "Expression_Number" => { return ::rust_sitter::__private::extract_struct_or_variant( node, - move |cursor, last_idx, last_pt| { - Expression::Number({ + move |state| { + Ok(Expression::Number({ ::rust_sitter::__private::extract_field::( - cursor, source, last_idx, last_pt, "0", None, + state, source, "0", None, ) - }) + }?)) }, ) } "Expression_Neg" => { return ::rust_sitter::__private::extract_struct_or_variant( node, - move |cursor, last_idx, last_pt| { - Expression::Neg( + move |state| { + Ok(Expression::Neg( { ::rust_sitter::__private::extract_field::<(), _>( - cursor, source, last_idx, last_pt, "0", None, + state, source, "0", None, ) - }, + }?, { ::rust_sitter::__private::extract_field::, _>( - cursor, source, last_idx, last_pt, "1", None, + state, source, "1", None, ) - }, - ) + }?, + )) }, ) } - _ => { + k => { if !cursor.goto_next_sibling() { - panic!("Could not find a child corresponding to any enum branch") + panic!("Could not find a child corresponding to any enum branch: {k}") } } } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap index 222697b..56d6cd9 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap @@ -4,8 +4,11 @@ expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n mod grammar\n --- mod grammar { use rust_sitter::Rule; - impl Expression { - pub fn language() -> ::rust_sitter::tree_sitter::Language { + impl ::rust_sitter::rule::Language for Expression { + fn produce_grammar() -> String { + String::new() + } + fn language() -> ::rust_sitter::tree_sitter::Language { unsafe extern "C" { fn tree_sitter_Expression() -> ::rust_sitter::tree_sitter::Language; } @@ -14,22 +17,19 @@ mod grammar { #[doc = r" Parse an input string according to the grammar. Returns either any parsing errors that happened, or a"] #[doc = "[`Expression`]"] #[doc = r" instance containing the parsed structured data."] - pub fn parse( - input: &str, - ) -> core::result::Result> { + fn parse(input: &str) -> ::rust_sitter::ParseResult { ::rust_sitter::__private::parse(input, Self::language) } } impl ::rust_sitter::Extract for Expression { type LeafFn<'a> = (); #[allow(non_snake_case)] - fn extract<'a>( - node: Option<::rust_sitter::tree_sitter::Node>, + fn extract<'a, 'tree>( + _ctx: &mut ::rust_sitter::extract::ExtractContext<'_>, + node: Option<::rust_sitter::tree_sitter::Node<'tree>>, source: &[u8], - _last_idx: usize, - _last_pt: ::rust_sitter::tree_sitter::Point, _leaf_fn: Option>, - ) -> Self { + ) -> Result> { let node = node.expect("No node found"); let mut cursor = node.walk(); assert!( @@ -42,18 +42,18 @@ mod grammar { "Expression_Number" => { return ::rust_sitter::__private::extract_struct_or_variant( node, - move |cursor, last_idx, last_pt| { - Expression::Number({ + move |state| { + Ok(Expression::Number({ ::rust_sitter::__private::extract_field::( - cursor, source, last_idx, last_pt, "0", None, + state, source, "0", None, ) - }) + }?)) }, ) } - _ => { + k => { if !cursor.goto_next_sibling() { - panic!("Could not find a child corresponding to any enum branch") + panic!("Could not find a child corresponding to any enum branch: {k}") } } } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap index 51edaa2..290236e 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap @@ -3,8 +3,11 @@ source: macro/src/lib.rs expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n mod grammar\n {\n #[derive(rust_sitter::Rule)] #[language] pub enum Expr\n {\n Number(#[leaf(pattern(r\"\\d+\"))] u32), Neg\n { #[leaf(\"!\")] _bang: (), value: Box, }\n }\n }\n}).to_token_stream().to_string())" --- mod grammar { - impl Expr { - pub fn language() -> ::rust_sitter::tree_sitter::Language { + impl ::rust_sitter::rule::Language for Expr { + fn produce_grammar() -> String { + String::new() + } + fn language() -> ::rust_sitter::tree_sitter::Language { unsafe extern "C" { fn tree_sitter_Expr() -> ::rust_sitter::tree_sitter::Language; } @@ -13,22 +16,19 @@ mod grammar { #[doc = r" Parse an input string according to the grammar. Returns either any parsing errors that happened, or a"] #[doc = "[`Expr`]"] #[doc = r" instance containing the parsed structured data."] - pub fn parse( - input: &str, - ) -> core::result::Result> { + fn parse(input: &str) -> ::rust_sitter::ParseResult { ::rust_sitter::__private::parse(input, Self::language) } } impl ::rust_sitter::Extract for Expr { type LeafFn<'a> = (); #[allow(non_snake_case)] - fn extract<'a>( - node: Option<::rust_sitter::tree_sitter::Node>, + fn extract<'a, 'tree>( + _ctx: &mut ::rust_sitter::extract::ExtractContext<'_>, + node: Option<::rust_sitter::tree_sitter::Node<'tree>>, source: &[u8], - _last_idx: usize, - _last_pt: ::rust_sitter::tree_sitter::Point, _leaf_fn: Option>, - ) -> Self { + ) -> Result> { let node = node.expect("No node found"); let mut cursor = node.walk(); assert!( @@ -41,35 +41,37 @@ mod grammar { "Expr_Number" => { return ::rust_sitter::__private::extract_struct_or_variant( node, - move |cursor, last_idx, last_pt| { - Expr::Number({ + move |state| { + Ok(Expr::Number({ ::rust_sitter::__private::extract_field::( - cursor, source, last_idx, last_pt, "0", None, + state, source, "0", None, ) - }) + }?)) }, ) } "Expr_Neg" => { return ::rust_sitter::__private::extract_struct_or_variant( node, - move |cursor, last_idx, last_pt| Expr::Neg { - _bang: { - ::rust_sitter::__private::extract_field::<(), _>( - cursor, source, last_idx, last_pt, "_bang", None, - ) - }, - value: { - ::rust_sitter::__private::extract_field::, _>( - cursor, source, last_idx, last_pt, "value", None, - ) - }, + move |state| { + Ok(Expr::Neg { + _bang: { + ::rust_sitter::__private::extract_field::<(), _>( + state, source, "_bang", None, + ) + }?, + value: { + ::rust_sitter::__private::extract_field::, _>( + state, source, "value", None, + ) + }?, + }) }, ) } - _ => { + k => { if !cursor.goto_next_sibling() { - panic!("Could not find a child corresponding to any enum branch") + panic!("Could not find a child corresponding to any enum branch: {k}") } } } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap index ac32f1a..a38948d 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap @@ -6,24 +6,22 @@ mod grammar { impl ::rust_sitter::Extract for Number { type LeafFn<'a> = (); #[allow(non_snake_case)] - fn extract<'a>( - node: Option<::rust_sitter::tree_sitter::Node>, + fn extract<'a, 'tree>( + ctx: &mut ::rust_sitter::extract::ExtractContext<'_>, + node: Option<::rust_sitter::tree_sitter::Node<'tree>>, source: &[u8], - last_idx: usize, - last_pt: ::rust_sitter::tree_sitter::Point, _leaf_fn: Option>, - ) -> Self { + ) -> Result> { let node = node.expect("no node found"); - ::rust_sitter::__private::extract_struct_or_variant( - node, - move |cursor, last_idx, last_pt| Number { + ::rust_sitter::__private::extract_struct_or_variant(node, move |state| { + Ok(Number { value: { ::rust_sitter::__private::extract_field::( - cursor, source, last_idx, last_pt, "value", None, + state, source, "value", None, ) - }, - }, - ) + }?, + }) + }) } } impl ::rust_sitter::rule::Rule for Number { @@ -34,8 +32,11 @@ mod grammar { "Number" } } - impl Expr { - pub fn language() -> ::rust_sitter::tree_sitter::Language { + impl ::rust_sitter::rule::Language for Expr { + fn produce_grammar() -> String { + String::new() + } + fn language() -> ::rust_sitter::tree_sitter::Language { unsafe extern "C" { fn tree_sitter_Expr() -> ::rust_sitter::tree_sitter::Language; } @@ -44,22 +45,19 @@ mod grammar { #[doc = r" Parse an input string according to the grammar. Returns either any parsing errors that happened, or a"] #[doc = "[`Expr`]"] #[doc = r" instance containing the parsed structured data."] - pub fn parse( - input: &str, - ) -> core::result::Result> { + fn parse(input: &str) -> ::rust_sitter::ParseResult { ::rust_sitter::__private::parse(input, Self::language) } } impl ::rust_sitter::Extract for Expr { type LeafFn<'a> = (); #[allow(non_snake_case)] - fn extract<'a>( - node: Option<::rust_sitter::tree_sitter::Node>, + fn extract<'a, 'tree>( + _ctx: &mut ::rust_sitter::extract::ExtractContext<'_>, + node: Option<::rust_sitter::tree_sitter::Node<'tree>>, source: &[u8], - _last_idx: usize, - _last_pt: ::rust_sitter::tree_sitter::Point, _leaf_fn: Option>, - ) -> Self { + ) -> Result> { let node = node.expect("No node found"); let mut cursor = node.walk(); assert!( @@ -72,18 +70,18 @@ mod grammar { "Expr_Numbers" => { return ::rust_sitter::__private::extract_struct_or_variant( node, - move |cursor, last_idx, last_pt| { - Expr::Numbers({ + move |state| { + Ok(Expr::Numbers({ ::rust_sitter::__private::extract_field::, _>( - cursor, source, last_idx, last_pt, "0", None, + state, source, "0", None, ) - }) + }?)) }, ) } - _ => { + k => { if !cursor.goto_next_sibling() { - panic!("Could not find a child corresponding to any enum branch") + panic!("Could not find a child corresponding to any enum branch: {k}") } } } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap b/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap index 97437f4..d974c9f 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap @@ -3,8 +3,11 @@ source: macro/src/lib.rs expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n mod grammar\n {\n #[derive(rust_sitter::Rule)] #[language] pub struct Language\n { e: Expression, } #[derive(rust_sitter::Rule)] pub enum Expression\n { Number(#[leaf(re(r\"\\d+\"))] i32), }\n }\n}).to_token_stream().to_string())" --- mod grammar { - impl Language { - pub fn language() -> ::rust_sitter::tree_sitter::Language { + impl ::rust_sitter::rule::Language for Language { + fn produce_grammar() -> String { + String::new() + } + fn language() -> ::rust_sitter::tree_sitter::Language { unsafe extern "C" { fn tree_sitter_Language() -> ::rust_sitter::tree_sitter::Language; } @@ -13,33 +16,29 @@ mod grammar { #[doc = r" Parse an input string according to the grammar. Returns either any parsing errors that happened, or a"] #[doc = "[`Language`]"] #[doc = r" instance containing the parsed structured data."] - pub fn parse( - input: &str, - ) -> core::result::Result> { + fn parse(input: &str) -> ::rust_sitter::ParseResult { ::rust_sitter::__private::parse(input, Self::language) } } impl ::rust_sitter::Extract for Language { type LeafFn<'a> = (); #[allow(non_snake_case)] - fn extract<'a>( - node: Option<::rust_sitter::tree_sitter::Node>, + fn extract<'a, 'tree>( + ctx: &mut ::rust_sitter::extract::ExtractContext<'_>, + node: Option<::rust_sitter::tree_sitter::Node<'tree>>, source: &[u8], - last_idx: usize, - last_pt: ::rust_sitter::tree_sitter::Point, _leaf_fn: Option>, - ) -> Self { + ) -> Result> { let node = node.expect("no node found"); - ::rust_sitter::__private::extract_struct_or_variant( - node, - move |cursor, last_idx, last_pt| Language { + ::rust_sitter::__private::extract_struct_or_variant(node, move |state| { + Ok(Language { e: { ::rust_sitter::__private::extract_field::( - cursor, source, last_idx, last_pt, "e", None, + state, source, "e", None, ) - }, - }, - ) + }?, + }) + }) } } impl ::rust_sitter::rule::Rule for Language { @@ -53,13 +52,12 @@ mod grammar { impl ::rust_sitter::Extract for Expression { type LeafFn<'a> = (); #[allow(non_snake_case)] - fn extract<'a>( - node: Option<::rust_sitter::tree_sitter::Node>, + fn extract<'a, 'tree>( + _ctx: &mut ::rust_sitter::extract::ExtractContext<'_>, + node: Option<::rust_sitter::tree_sitter::Node<'tree>>, source: &[u8], - _last_idx: usize, - _last_pt: ::rust_sitter::tree_sitter::Point, _leaf_fn: Option>, - ) -> Self { + ) -> Result> { let node = node.expect("No node found"); let mut cursor = node.walk(); assert!( @@ -72,18 +70,18 @@ mod grammar { "Expression_Number" => { return ::rust_sitter::__private::extract_struct_or_variant( node, - move |cursor, last_idx, last_pt| { - Expression::Number({ + move |state| { + Ok(Expression::Number({ ::rust_sitter::__private::extract_field::( - cursor, source, last_idx, last_pt, "0", None, + state, source, "0", None, ) - }) + }?)) }, ) } - _ => { + k => { if !cursor.goto_next_sibling() { - panic!("Could not find a child corresponding to any enum branch") + panic!("Could not find a child corresponding to any enum branch: {k}") } } } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap b/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap index dda1a20..8ceae2e 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap @@ -4,8 +4,11 @@ expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n mod grammar\n --- mod grammar { use rust_sitter::{Rule, Spanned}; - impl NumberList { - pub fn language() -> ::rust_sitter::tree_sitter::Language { + impl ::rust_sitter::rule::Language for NumberList { + fn produce_grammar() -> String { + String::new() + } + fn language() -> ::rust_sitter::tree_sitter::Language { unsafe extern "C" { fn tree_sitter_NumberList() -> ::rust_sitter::tree_sitter::Language; } @@ -14,33 +17,29 @@ mod grammar { #[doc = r" Parse an input string according to the grammar. Returns either any parsing errors that happened, or a"] #[doc = "[`NumberList`]"] #[doc = r" instance containing the parsed structured data."] - pub fn parse( - input: &str, - ) -> core::result::Result> { + fn parse(input: &str) -> ::rust_sitter::ParseResult { ::rust_sitter::__private::parse(input, Self::language) } } impl ::rust_sitter::Extract for NumberList { type LeafFn<'a> = (); #[allow(non_snake_case)] - fn extract<'a>( - node: Option<::rust_sitter::tree_sitter::Node>, + fn extract<'a, 'tree>( + ctx: &mut ::rust_sitter::extract::ExtractContext<'_>, + node: Option<::rust_sitter::tree_sitter::Node<'tree>>, source: &[u8], - last_idx: usize, - last_pt: ::rust_sitter::tree_sitter::Point, _leaf_fn: Option>, - ) -> Self { + ) -> Result> { let node = node.expect("no node found"); - ::rust_sitter::__private::extract_struct_or_variant( - node, - move |cursor, last_idx, last_pt| NumberList { + ::rust_sitter::__private::extract_struct_or_variant(node, move |state| { + Ok(NumberList { numbers: { ::rust_sitter::__private::extract_field::>, _>( - cursor, source, last_idx, last_pt, "numbers", None, + state, source, "numbers", None, ) - }, - }, - ) + }?, + }) + }) } } impl ::rust_sitter::rule::Rule for NumberList { @@ -54,24 +53,20 @@ mod grammar { impl ::rust_sitter::Extract for Number { type LeafFn<'a> = (); #[allow(non_snake_case)] - fn extract<'a>( - node: Option<::rust_sitter::tree_sitter::Node>, + fn extract<'a, 'tree>( + ctx: &mut ::rust_sitter::extract::ExtractContext<'_>, + node: Option<::rust_sitter::tree_sitter::Node<'tree>>, source: &[u8], - last_idx: usize, - last_pt: ::rust_sitter::tree_sitter::Point, _leaf_fn: Option>, - ) -> Self { + ) -> Result> { let node = node.expect("no node found"); - ::rust_sitter::__private::extract_struct_or_variant( - node, - move |cursor, last_idx, last_pt| Number { + ::rust_sitter::__private::extract_struct_or_variant(node, move |state| { + Ok(Number { v: { - ::rust_sitter::__private::extract_field::( - cursor, source, last_idx, last_pt, "v", None, - ) - }, - }, - ) + ::rust_sitter::__private::extract_field::(state, source, "v", None) + }?, + }) + }) } } impl ::rust_sitter::rule::Rule for Number { @@ -85,29 +80,25 @@ mod grammar { impl ::rust_sitter::Extract for Whitespace { type LeafFn<'a> = (); #[allow(non_snake_case)] - fn extract<'a>( - node: Option<::rust_sitter::tree_sitter::Node>, + fn extract<'a, 'tree>( + ctx: &mut ::rust_sitter::extract::ExtractContext<'_>, + node: Option<::rust_sitter::tree_sitter::Node<'tree>>, source: &[u8], - last_idx: usize, - last_pt: ::rust_sitter::tree_sitter::Point, _leaf_fn: Option>, - ) -> Self { + ) -> Result> { let node = node.expect("no node found"); - ::rust_sitter::__private::extract_struct_or_variant( - node, - move |cursor, last_idx, last_pt| Whitespace { + ::rust_sitter::__private::extract_struct_or_variant(node, move |state| { + Ok(Whitespace { _whitespace: { ::rust_sitter::__private::extract_field::<(), _>( - cursor, + state, source, - last_idx, - last_pt, "_whitespace", None, ) - }, - }, - ) + }?, + }) + }) } } impl ::rust_sitter::rule::Rule for Whitespace { diff --git a/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap b/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap index e6f7a9a..a4f5bbb 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap @@ -3,8 +3,11 @@ source: macro/src/lib.rs expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n mod grammar\n {\n #[derive(rust_sitter::Rule)] #[language] pub enum Expression\n { Number(#[leaf(re(r\"\\d+\"))] i32,), } #[derive(Rule)] #[extra] struct\n Whitespace { #[leaf(pattern(r\"\\s\"))] _whitespace: (), }\n }\n}).to_token_stream().to_string())" --- mod grammar { - impl Expression { - pub fn language() -> ::rust_sitter::tree_sitter::Language { + impl ::rust_sitter::rule::Language for Expression { + fn produce_grammar() -> String { + String::new() + } + fn language() -> ::rust_sitter::tree_sitter::Language { unsafe extern "C" { fn tree_sitter_Expression() -> ::rust_sitter::tree_sitter::Language; } @@ -13,22 +16,19 @@ mod grammar { #[doc = r" Parse an input string according to the grammar. Returns either any parsing errors that happened, or a"] #[doc = "[`Expression`]"] #[doc = r" instance containing the parsed structured data."] - pub fn parse( - input: &str, - ) -> core::result::Result> { + fn parse(input: &str) -> ::rust_sitter::ParseResult { ::rust_sitter::__private::parse(input, Self::language) } } impl ::rust_sitter::Extract for Expression { type LeafFn<'a> = (); #[allow(non_snake_case)] - fn extract<'a>( - node: Option<::rust_sitter::tree_sitter::Node>, + fn extract<'a, 'tree>( + _ctx: &mut ::rust_sitter::extract::ExtractContext<'_>, + node: Option<::rust_sitter::tree_sitter::Node<'tree>>, source: &[u8], - _last_idx: usize, - _last_pt: ::rust_sitter::tree_sitter::Point, _leaf_fn: Option>, - ) -> Self { + ) -> Result> { let node = node.expect("No node found"); let mut cursor = node.walk(); assert!( @@ -41,18 +41,18 @@ mod grammar { "Expression_Number" => { return ::rust_sitter::__private::extract_struct_or_variant( node, - move |cursor, last_idx, last_pt| { - Expression::Number({ + move |state| { + Ok(Expression::Number({ ::rust_sitter::__private::extract_field::( - cursor, source, last_idx, last_pt, "0", None, + state, source, "0", None, ) - }) + }?)) }, ) } - _ => { + k => { if !cursor.goto_next_sibling() { - panic!("Could not find a child corresponding to any enum branch") + panic!("Could not find a child corresponding to any enum branch: {k}") } } } @@ -70,29 +70,25 @@ mod grammar { impl ::rust_sitter::Extract for Whitespace { type LeafFn<'a> = (); #[allow(non_snake_case)] - fn extract<'a>( - node: Option<::rust_sitter::tree_sitter::Node>, + fn extract<'a, 'tree>( + ctx: &mut ::rust_sitter::extract::ExtractContext<'_>, + node: Option<::rust_sitter::tree_sitter::Node<'tree>>, source: &[u8], - last_idx: usize, - last_pt: ::rust_sitter::tree_sitter::Point, _leaf_fn: Option>, - ) -> Self { + ) -> Result> { let node = node.expect("no node found"); - ::rust_sitter::__private::extract_struct_or_variant( - node, - move |cursor, last_idx, last_pt| Whitespace { + ::rust_sitter::__private::extract_struct_or_variant(node, move |state| { + Ok(Whitespace { _whitespace: { ::rust_sitter::__private::extract_field::<(), _>( - cursor, + state, source, - last_idx, - last_pt, "_whitespace", None, ) - }, - }, - ) + }?, + }) + }) } } impl ::rust_sitter::rule::Rule for Whitespace { diff --git a/macro/src/snapshots/rust_sitter_macro__tests__struct_optional.snap b/macro/src/snapshots/rust_sitter_macro__tests__struct_optional.snap index 3ae111b..4432b52 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__struct_optional.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__struct_optional.snap @@ -3,8 +3,11 @@ source: macro/src/lib.rs expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n mod grammar\n {\n #[derive(rust_sitter::Rule)] #[language] pub struct Language\n { #[leaf(re(r\"\\d+\"))] v: Option, t: Option, }\n #[derive(rust_sitter::Rule)] pub struct Number\n { #[leaf(re(r\"\\d+\"))] v: i32 }\n }\n}).to_token_stream().to_string())" --- mod grammar { - impl Language { - pub fn language() -> ::rust_sitter::tree_sitter::Language { + impl ::rust_sitter::rule::Language for Language { + fn produce_grammar() -> String { + String::new() + } + fn language() -> ::rust_sitter::tree_sitter::Language { unsafe extern "C" { fn tree_sitter_Language() -> ::rust_sitter::tree_sitter::Language; } @@ -13,38 +16,34 @@ mod grammar { #[doc = r" Parse an input string according to the grammar. Returns either any parsing errors that happened, or a"] #[doc = "[`Language`]"] #[doc = r" instance containing the parsed structured data."] - pub fn parse( - input: &str, - ) -> core::result::Result> { + fn parse(input: &str) -> ::rust_sitter::ParseResult { ::rust_sitter::__private::parse(input, Self::language) } } impl ::rust_sitter::Extract for Language { type LeafFn<'a> = (); #[allow(non_snake_case)] - fn extract<'a>( - node: Option<::rust_sitter::tree_sitter::Node>, + fn extract<'a, 'tree>( + ctx: &mut ::rust_sitter::extract::ExtractContext<'_>, + node: Option<::rust_sitter::tree_sitter::Node<'tree>>, source: &[u8], - last_idx: usize, - last_pt: ::rust_sitter::tree_sitter::Point, _leaf_fn: Option>, - ) -> Self { + ) -> Result> { let node = node.expect("no node found"); - ::rust_sitter::__private::extract_struct_or_variant( - node, - move |cursor, last_idx, last_pt| Language { + ::rust_sitter::__private::extract_struct_or_variant(node, move |state| { + Ok(Language { v: { ::rust_sitter::__private::extract_field::, _>( - cursor, source, last_idx, last_pt, "v", None, + state, source, "v", None, ) - }, + }?, t: { ::rust_sitter::__private::extract_field::, _>( - cursor, source, last_idx, last_pt, "t", None, + state, source, "t", None, ) - }, - }, - ) + }?, + }) + }) } } impl ::rust_sitter::rule::Rule for Language { @@ -58,24 +57,20 @@ mod grammar { impl ::rust_sitter::Extract for Number { type LeafFn<'a> = (); #[allow(non_snake_case)] - fn extract<'a>( - node: Option<::rust_sitter::tree_sitter::Node>, + fn extract<'a, 'tree>( + ctx: &mut ::rust_sitter::extract::ExtractContext<'_>, + node: Option<::rust_sitter::tree_sitter::Node<'tree>>, source: &[u8], - last_idx: usize, - last_pt: ::rust_sitter::tree_sitter::Point, _leaf_fn: Option>, - ) -> Self { + ) -> Result> { let node = node.expect("no node found"); - ::rust_sitter::__private::extract_struct_or_variant( - node, - move |cursor, last_idx, last_pt| Number { + ::rust_sitter::__private::extract_struct_or_variant(node, move |state| { + Ok(Number { v: { - ::rust_sitter::__private::extract_field::( - cursor, source, last_idx, last_pt, "v", None, - ) - }, - }, - ) + ::rust_sitter::__private::extract_field::(state, source, "v", None) + }?, + }) + }) } } impl ::rust_sitter::rule::Rule for Number { diff --git a/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap b/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap index e180f52..c022119 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap @@ -3,8 +3,11 @@ source: macro/src/lib.rs expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n mod grammar\n {\n #[derive(rust_sitter::Rule)] #[language] pub struct NumberList\n { numbers: Vec, } #[derive(rust_sitter::Rule)] pub struct\n Number { #[leaf(re(r\"\\d+\"))] v: i32 } #[derive(rust_sitter::Rule)]\n #[extra] struct Whitespace\n { #[leaf(pattern(r\"\\s\"))] _whitespace: (), }\n }\n}).to_token_stream().to_string())" --- mod grammar { - impl NumberList { - pub fn language() -> ::rust_sitter::tree_sitter::Language { + impl ::rust_sitter::rule::Language for NumberList { + fn produce_grammar() -> String { + String::new() + } + fn language() -> ::rust_sitter::tree_sitter::Language { unsafe extern "C" { fn tree_sitter_NumberList() -> ::rust_sitter::tree_sitter::Language; } @@ -13,33 +16,29 @@ mod grammar { #[doc = r" Parse an input string according to the grammar. Returns either any parsing errors that happened, or a"] #[doc = "[`NumberList`]"] #[doc = r" instance containing the parsed structured data."] - pub fn parse( - input: &str, - ) -> core::result::Result> { + fn parse(input: &str) -> ::rust_sitter::ParseResult { ::rust_sitter::__private::parse(input, Self::language) } } impl ::rust_sitter::Extract for NumberList { type LeafFn<'a> = (); #[allow(non_snake_case)] - fn extract<'a>( - node: Option<::rust_sitter::tree_sitter::Node>, + fn extract<'a, 'tree>( + ctx: &mut ::rust_sitter::extract::ExtractContext<'_>, + node: Option<::rust_sitter::tree_sitter::Node<'tree>>, source: &[u8], - last_idx: usize, - last_pt: ::rust_sitter::tree_sitter::Point, _leaf_fn: Option>, - ) -> Self { + ) -> Result> { let node = node.expect("no node found"); - ::rust_sitter::__private::extract_struct_or_variant( - node, - move |cursor, last_idx, last_pt| NumberList { + ::rust_sitter::__private::extract_struct_or_variant(node, move |state| { + Ok(NumberList { numbers: { ::rust_sitter::__private::extract_field::, _>( - cursor, source, last_idx, last_pt, "numbers", None, + state, source, "numbers", None, ) - }, - }, - ) + }?, + }) + }) } } impl ::rust_sitter::rule::Rule for NumberList { @@ -53,24 +52,20 @@ mod grammar { impl ::rust_sitter::Extract for Number { type LeafFn<'a> = (); #[allow(non_snake_case)] - fn extract<'a>( - node: Option<::rust_sitter::tree_sitter::Node>, + fn extract<'a, 'tree>( + ctx: &mut ::rust_sitter::extract::ExtractContext<'_>, + node: Option<::rust_sitter::tree_sitter::Node<'tree>>, source: &[u8], - last_idx: usize, - last_pt: ::rust_sitter::tree_sitter::Point, _leaf_fn: Option>, - ) -> Self { + ) -> Result> { let node = node.expect("no node found"); - ::rust_sitter::__private::extract_struct_or_variant( - node, - move |cursor, last_idx, last_pt| Number { + ::rust_sitter::__private::extract_struct_or_variant(node, move |state| { + Ok(Number { v: { - ::rust_sitter::__private::extract_field::( - cursor, source, last_idx, last_pt, "v", None, - ) - }, - }, - ) + ::rust_sitter::__private::extract_field::(state, source, "v", None) + }?, + }) + }) } } impl ::rust_sitter::rule::Rule for Number { @@ -84,29 +79,25 @@ mod grammar { impl ::rust_sitter::Extract for Whitespace { type LeafFn<'a> = (); #[allow(non_snake_case)] - fn extract<'a>( - node: Option<::rust_sitter::tree_sitter::Node>, + fn extract<'a, 'tree>( + ctx: &mut ::rust_sitter::extract::ExtractContext<'_>, + node: Option<::rust_sitter::tree_sitter::Node<'tree>>, source: &[u8], - last_idx: usize, - last_pt: ::rust_sitter::tree_sitter::Point, _leaf_fn: Option>, - ) -> Self { + ) -> Result> { let node = node.expect("no node found"); - ::rust_sitter::__private::extract_struct_or_variant( - node, - move |cursor, last_idx, last_pt| Whitespace { + ::rust_sitter::__private::extract_struct_or_variant(node, move |state| { + Ok(Whitespace { _whitespace: { ::rust_sitter::__private::extract_field::<(), _>( - cursor, + state, source, - last_idx, - last_pt, "_whitespace", None, ) - }, - }, - ) + }?, + }) + }) } } impl ::rust_sitter::rule::Rule for Whitespace { diff --git a/runtime/Cargo.toml b/runtime/Cargo.toml index f76fda7..8b6d9b0 100644 --- a/runtime/Cargo.toml +++ b/runtime/Cargo.toml @@ -13,15 +13,12 @@ categories = ["development-tools"] [lib] path = "src/lib.rs" -[features] -default = ["tree-sitter-standard"] -tree-sitter-c2rust = ["tree-sitter-runtime-c2rust"] -tree-sitter-standard = ["tree-sitter-runtime-standard"] - [dependencies] -tree-sitter-runtime-c2rust = { package = "tree-sitter-c2rust", version = "0.25", optional = true } -tree-sitter-runtime-standard = { package = "tree-sitter", version = "0.26", optional = true } +tree-sitter.workspace = true rust-sitter-macro = { path = "../macro" } +# This one could be optional. +serde_json = "1" +serde = { version = "1", features = ["derive"] } [dev-dependencies] insta = "1.39" diff --git a/runtime/src/__private.rs b/runtime/src/__private.rs index 20f49dc..63e3e54 100644 --- a/runtime/src/__private.rs +++ b/runtime/src/__private.rs @@ -7,12 +7,11 @@ use crate::{ Extract, extract::{ExtractContext, ExtractError, Result}, - tree_sitter, }; pub fn extract_struct_or_variant( node: tree_sitter::Node, - construct_expr: impl Fn(&mut ExtractStructState<'_>) -> Result, + construct_expr: impl for<'t> Fn(&mut ExtractStructState<'t>) -> Result<'t, T>, ) -> Result { let mut parent_cursor = node.walk(); let mut state = ExtractStructState { @@ -23,73 +22,37 @@ pub fn extract_struct_or_variant( }, last_idx: node.start_byte(), last_pt: node.start_position(), - error: ExtractError::empty(), + // error: ExtractError::empty(), }; construct_expr(&mut state) } -pub struct ExtractStructState<'a> { - cursor: Option>, +pub struct ExtractStructState<'tree> { + cursor: Option>, last_idx: usize, last_pt: tree_sitter::Point, - error: ExtractError, + // TODO: Use this. + // error: ExtractError, } -// impl<'a> ExtractStructState<'a> { -// fn extract_node, T>( -// &mut self, -// node: tree_sitter::Node, -// source: &[u8], -// closure_ref: Option>, -// ) -> Result { -// } -// } - -// pub struct TryExtractState { -// pub span: Span, -// pub err: Option, -// } - -// pub fn try_extract, T>( -// err_state: &mut TryExtractState, -// node: Option, -// source: &[u8], -// last_idx: usize, -// last_pt: tree_sitter::Point, -// leaf_fn: Option>, -// ) -> Option { -// // TODO: Double check this. -// err_state.span.end_byte = last_idx; -// match LT::extract(node, source, last_idx, last_pt, leaf_fn) { -// Ok(t) => Some(t), -// Err(err) => { -// todo!() -// } -// } -// } - -pub fn extract_field, T: std::fmt::Debug>( - state: &mut ExtractStructState<'_>, +pub fn extract_field<'tree, LT: Extract, T>( + state: &mut ExtractStructState<'tree>, source: &[u8], field_name: &str, closure_ref: Option>, -) -> Result { - dbg!(field_name); +) -> Result<'tree, T> { let mut ctx = ExtractContext { last_idx: state.last_idx, last_pt: state.last_pt, field_name, + node_kind: "", }; if let Some(cursor) = state.cursor.as_mut() { loop { let n = cursor.node(); - println!( - "Extracting node from text: {} - {}", - n.utf8_text(source).unwrap(), - n.to_sexp() - ); + ctx.node_kind = n.kind(); if n.is_error() { - println!("Processing error..."); + // println!("Processing error... {}, {}", n.kind(), field_name); // Try and parse it anyway, returning the result if we manage to get it. if !cursor.goto_first_child() { state.cursor = None; @@ -109,7 +72,6 @@ pub fn extract_field, T: std::fmt::Debug>( let out = LT::extract(&mut ctx, Some(n), source, closure_ref)?; if !cursor.goto_next_sibling() { - dbg!(name); state.cursor = None; }; @@ -135,7 +97,10 @@ pub fn extract_field, T: std::fmt::Debug>( } // TODO: Handle errors in this one too. -pub fn skip_text(state: &mut ExtractStructState<'_>, field_name: &str) -> Result<()> { +pub fn skip_text<'tree>( + state: &mut ExtractStructState<'tree>, + field_name: &str, +) -> Result<'tree, ()> { if let Some(cursor) = state.cursor.as_mut() { loop { if let Some(name) = cursor.field_name() { @@ -159,26 +124,30 @@ pub fn skip_text(state: &mut ExtractStructState<'_>, field_name: &str) -> Result pub fn parse>( input: &str, language: impl Fn() -> tree_sitter::Language, -) -> core::result::Result { - let mut parser = crate::tree_sitter::Parser::new(); +) -> crate::ParseResult { + let mut parser = tree_sitter::Parser::new(); parser.set_language(&language()).unwrap(); let tree = parser.parse(input, None).expect("Failed to parse"); let root_node = tree.root_node(); + let mut errors = vec![]; if root_node.has_error() { - let mut errors = vec![]; - crate::error::collect_parsing_errors(&root_node, input.as_bytes(), &mut errors); - for error in errors { - println!("{error}"); - } - panic!(); + crate::error::collect_parsing_errors(&root_node, &mut errors); } let mut ctx = ExtractContext { last_pt: Default::default(), last_idx: 0, field_name: "root", + node_kind: "source_file", }; - >::extract(&mut ctx, Some(root_node), input.as_bytes(), None) - - // } + let result = + >::extract(&mut ctx, Some(root_node), input.as_bytes(), None); + let result = match result { + Err(e) => { + e.accumulate_parse_errors(&mut errors); + None + } + Ok(o) => Some(o), + }; + crate::ParseResult { result, errors } } diff --git a/runtime/src/error.rs b/runtime/src/error.rs index d3c6ed9..0c1a601 100644 --- a/runtime/src/error.rs +++ b/runtime/src/error.rs @@ -1,170 +1,381 @@ -#[cfg(feature = "tree-sitter-standard")] -use tree_sitter_runtime_standard as tree_sitter; +use std::{collections::HashSet, ops::Range}; -#[cfg(feature = "tree-sitter-c2rust")] -use tree_sitter_runtime_c2rust as tree_sitter; +use crate::{Point, Position, extract::ExtractContext}; -use crate::Point; +/// A high level parsing error with useful information extracted already. +#[derive(Debug)] +pub struct ParseError { + /// Position within the source code of the full node which failed to parse. + /// This can be used in combination with `error_position` to indicate a greater context of where + /// an error occurred. + pub node_position: Position, + pub error_position: Position, + /// Possible next tokens that were expected. + pub lookaheads: Vec<&'static str>, + pub reason: ParseErrorReason, +} #[derive(Debug)] -/// An explanation for an error that occurred during parsing. pub enum ParseErrorReason { - /// The parser expected a specific token, but it was not found. - MissingToken(String), - Lookahead(Vec<&'static str>), - Unknown, + Missing, + Error, + FailedExtract { + field: String, + }, + MissingNode { + node_kind: String, + type_name: &'static str, + }, + /// Parsed OK, but failed to extract to the given type. + TypeConversion(Box), } +/// A low level error which just wraps the error node and exposes many fields around it. #[derive(Debug)] -/// An error that occurred during parsing. -pub struct ParseError { - pub reason: ParseErrorReason, - /// Inclusive start of the error. - pub start_byte: usize, - /// Exclusive end of the error. - pub end_byte: usize, - pub start_point: Point, - pub end_point: Point, - pub text: String, - pub kind: &'static str, - pub parent_context: Option, +pub struct NodeError<'a> { + node: tree_sitter::Node<'a>, } -impl std::fmt::Display for ParseError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - writeln!(f, "Failure to parse node:")?; - write!( - f, - "\t{}:{} - {}:{}", - self.start_point.line, - self.start_point.column, - self.end_point.line, - self.end_point.column, - )?; - write!(f, " {}", self.text)?; - if let Some(parent) = &self.parent_context { - writeln!(f)?; - writeln!(f, "\t(parent node: {})", parent.kind)?; +impl<'a> NodeError<'a> { + pub fn to_parse_error(&self) -> ParseError { + ParseError { + node_position: Position::new(self.node_byte_range(), self.point_range()), + error_position: Position::new(self.error_byte_range(), self.error_point_range()), + lookaheads: self.lookahead().map(|l| l.collect()).unwrap_or_default(), + reason: if self.node.is_missing() { + ParseErrorReason::Missing + } else { + ParseErrorReason::Error + }, + } + } + /// Full range of the node which failed to parse. + pub fn node_byte_range(&self) -> Range { + self.node.byte_range() + } + + /// Byte range of the portion of the text which created the error. + pub fn error_byte_range(&self) -> Range { + self.node.error_byte_range().unwrap() + } + + pub fn point_range(&self) -> (Point, Point) { + let start = self.node.start_position(); + let end = self.node.end_position(); + (Point::from_tree_sitter(start), Point::from_tree_sitter(end)) + } + + pub fn error_point_range(&self) -> (Point, Point) { + let start = self.node.error_start_position().unwrap(); + let end = self.node.error_end_position().unwrap(); + (Point::from_tree_sitter(start), Point::from_tree_sitter(end)) + } + + pub fn lookahead( + &self, + // grammar: Option<&'a crate::grammar::Grammar>, + ) -> Option> { + let (state, reachable, filter) = if self.node.is_missing() { + // Handle the lookahead appropriately for missing. + let state = self.node.parse_state(); + (state, None, true) + } else { + // Find the endpoint. + // let (node, ctx) = match self.node.error_child(0) { + // Some(c) => (c, self.node.child(0).unwrap()), + // None => (self.node, self.node), + // }; + let node = match self.node.error_child(0) { + Some(c) => c, + None => self.node, + }; + + // Find the first context node type and compute reachable set. + // let reachable = if let Some(grammar) = grammar { + // dbg!(grammar.reachable_set(dbg!(ctx.kind()))) + // } else { + // None + // }; + let reachable = None; + + let state = node.parse_state(); + // NOTE: We may want to always filter these. + (state, reachable, false) + }; + + if state == 0 { + return None; } - write!(f, "\treason: ")?; - match &self.reason { - ParseErrorReason::MissingToken(tok) => write!(f, "missing token: {tok}"), - ParseErrorReason::Unknown => write!(f, "unknown"), - ParseErrorReason::Lookahead(lookahead) => { - write!(f, "expected one of: {}", lookahead.join(" | ")) + + let language = self.node.language().to_owned(); + let it = language.lookahead_iterator(state)?; + + Some(ErrorLookahead { + it, + language, + filter_non_action: filter, + state, + reachable, + }) + } +} + +struct ErrorLookahead<'a> { + it: tree_sitter::LookaheadIterator, + language: tree_sitter::Language, + filter_non_action: bool, + state: u16, + reachable: Option>, +} + +impl Iterator for ErrorLookahead<'_> { + type Item = &'static str; + fn next(&mut self) -> Option { + loop { + self.it.next()?; + let sym = self.it.current_symbol(); + // skip the end symbol, it isn't useful here. + if sym == 0 { + continue; + } + if self.filter_non_action && !self.it.has_actions() { + continue; + } + // Maybe we want this to be optional as well? + // Filter out "extra" nodes. + if self.state == self.language.next_state(self.state, sym) { + continue; } + + let sym_name = self.it.current_symbol_name(); + + if let Some(reachable) = &self.reachable { + if !reachable.contains(sym_name) { + eprintln!("Symbol is not reachable: {sym_name}"); + continue; + } + } + + return Some(sym_name); } } } +// impl std::fmt::Display for ParseError { +// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { +// writeln!(f, "Failure to parse node:")?; +// let (start_point, end_point) = self.point_range(); +// let (error_start_point, error_end_point) = self.error_point_range(); +// write!( +// f, +// "\t{}:{} - {}:{}", +// start_point.line, start_point.column, end_point.line, end_point.column, +// )?; +// // if let Some(parent) = &self.parent_context { +// // writeln!(f)?; +// // writeln!(f, "\t(parent node: {})", parent.kind)?; +// // } +// if let Some(lookahead) = self.lookahead() { +// let mut first = true; +// write!(f, " Expected one of: ")?; +// for lk in lookahead { +// if !first { +// write!(f, " | ")?; +// } +// write!(f, "{lk}")?; +// first = false; +// } +// } +// Ok(()) +// } +// } + #[derive(Debug)] -pub struct ParentContext { - pub kind: &'static str, +pub struct ExtractError<'a> { + inner: Vec>, } -/// Given the root node of a Tree Sitter parsing result, accumulates all -/// errors that were emitted. -pub fn collect_parsing_errors( - node: &tree_sitter::Node, - source: &[u8], - errors: &mut Vec, -) { - let start_byte = node.start_byte(); - let end_byte = node.end_byte(); - let start_point = Point::from_tree_sitter(node.start_position()); - let end_point = Point::from_tree_sitter(node.end_position()); - let kind = node.kind(); - let text = node.utf8_text(source).unwrap().to_owned(); - let mut parent_context = None; - if let Some(p) = node.parent() { - parent_context = Some(ParentContext { kind: p.kind() }); - } - let reason = if node.is_error() { - // Narrow down the node range if possible. - fn walk_node(node: &tree_sitter::Node) { - let mut children = node.walk(); - dbg!(node); - dbg!(node.kind()); - for child in node.children(&mut children) { - walk_node(&child); - } +#[derive(Debug)] +struct ExtractErrorInner<'a> { + /// Span of the node which failed to extract. + position: crate::Position, + reason: ExtractErrorReason<'a>, +} + +impl<'a> ExtractError<'a> { + pub(crate) fn empty() -> Self { + Self { inner: vec![] } + } + pub(crate) fn prop(self) -> Result<(), Self> { + if self.inner.is_empty() { + Ok(()) + } else { + Err(self) + } + } + pub(crate) fn new(n: tree_sitter::Node<'a>, expected_field: String) -> Self { + let position = crate::Position::from_node(n); + Self { + inner: vec![ExtractErrorInner { + position, + reason: ExtractErrorReason::Parse { + expected_field, + node: n, + }, + }], } - // let q = tree_sitter::Query::new(&node.language(), "(ERROR_INTERNAL) @error").unwrap(); - // let mut qcur = tree_sitter::QueryCursor::new(); - // let mut it = qcur.captures(&q, *node, source); - // use tree_sitter::StreamingIterator; - // // NOTE: Instead of just using the first internal error, we should use all of them that are - // // non-overlapping. - // let Some((cap, _)) = it.next() else { - // panic!("Could not capture ERROR_INTERNAL"); - // }; - // // Should only be one capture since we only have `@error` - // let error_internal = cap.captures[0].node; - println!("Error range: {:?}", node.error_range().unwrap()); - let mut err_cur = node.walk(); - for err in node.error_children(&mut err_cur).unwrap() { - dbg!(err); + } + pub(crate) fn merge(&mut self, err: ExtractError<'a>) { + self.inner.extend(err.inner); + } + + pub(crate) fn type_conversion( + n: tree_sitter::Node<'_>, + e: impl std::error::Error + Send + 'static, + ) -> Self { + let position = crate::Position::from_node(n); + Self { + inner: vec![ExtractErrorInner { + position, + reason: ExtractErrorReason::TypeConversion(Box::new(e)), + }], } + } - let end = node.error_child(0).unwrap().prev_sibling().unwrap(); - // walk_node(node); - // dbg!(error_internal.to_sexp()); - // Traverse down to find the next parse state and display it in the error. - let mut c = end.walk(); - // c.goto_descendant(dbg!(node.descendant_count() - 1)); - // c.goto_first_child(); - // while c.node().child_count() > 0 && c.goto_next_sibling() {} - // c.goto_previous_sibling(); - // c.goto_first_child(); - // c.goto_next_sibling(); - while c.goto_last_child() {} - // while c.goto_next_sibling() {} - // loop { - // let mut run = false; - // while c.goto_first_child() { run = true ;} - // dbg!(c.node()); - // if c.goto_next_sibling() {run = true ;} - // dbg!(c.node()); - // if !run { - // break; - // } - // } - // dbg!(c.node()); - // dbg!(c.node().next_parse_state()); - let state = dbg!(c.node().next_parse_state()); - // let state = c.node().next_parse_state(); - let state = if state != 0 { - state - } else { - c.node().parse_state() + pub(crate) fn accumulate_parse_errors(self, errors: &mut Vec) { + for inner in self.inner { + let err = match inner.reason { + ExtractErrorReason::TypeConversion(t) => { + let reason = ParseErrorReason::TypeConversion(t); + ParseError { + node_position: inner.position.clone(), + error_position: inner.position, + reason, + lookaheads: vec![], + } + } + ExtractErrorReason::Parse { + expected_field, + node, + } => { + let reason = ParseErrorReason::FailedExtract { + field: expected_field, + }; + let mut error = NodeError { node }.to_parse_error(); + error.reason = reason; + error + } + ExtractErrorReason::MissingNode { + node_kind, + type_name, + } => { + let reason = ParseErrorReason::MissingNode { + node_kind, + type_name, + }; + ParseError { + node_position: inner.position.clone(), + error_position: inner.position, + reason, + lookaheads: vec![], + } + } + }; + errors.push(err); + } + } + + pub(crate) fn missing_node(ctx: &ExtractContext<'_>, type_name: &'static str) -> Self { + let position = crate::Position { + // TODO: This should be fixed to actually have the full range from the outer node. + bytes: ctx.last_idx..ctx.last_idx, + start: Point::from_tree_sitter(ctx.last_pt), + end: Point::from_tree_sitter(ctx.last_pt), }; - dbg!(state); - if state != 0 - && let Some(mut it) = node.language().lookahead_iterator(state) - { - ParseErrorReason::Lookahead(it.iter_names().collect()) - } else { - ParseErrorReason::Unknown + Self { + inner: vec![ExtractErrorInner { + position, + reason: ExtractErrorReason::MissingNode { + node_kind: ctx.node_kind.to_owned(), + type_name, + }, + }], } - } else if node.is_missing() { - ParseErrorReason::MissingToken(node.kind().to_string()) - } else if node.has_error() { - // A node somewhere down in the tree from here has an error, recursively find it. - let mut cursor = node.walk(); - node.children(&mut cursor) - .for_each(|c| collect_parsing_errors(&c, source, errors)); - return; - } else { - return; - }; - errors.push(ParseError { - reason, - start_byte, - end_byte, - start_point, - end_point, - text, - kind, - parent_context, - }); + } + + pub fn position(&self) -> &Position { + &self.inner[0].position + } + + pub fn reason(&self) -> &ExtractErrorReason { + &self.inner[0].reason + } +} + +#[derive(Debug)] +pub enum ExtractErrorReason<'a> { + /// Failed to parse at the tree-sitter level. + Parse { + // Can be &'static? + expected_field: String, + node: tree_sitter::Node<'a>, + }, + MissingNode { + node_kind: String, + type_name: &'static str, + }, + /// Parsed OK, but failed to extract to the given type. + TypeConversion(Box), +} + +impl<'a> IntoIterator for ExtractError<'a> { + type Item = ExtractError<'a>; + type IntoIter = ErrorIntoIter<'a>; + fn into_iter(self) -> Self::IntoIter { + ErrorIntoIter { + iter: self.inner.into_iter(), + } + } +} + +pub struct ErrorIntoIter<'a> { + iter: std::vec::IntoIter>, +} + +impl<'a> Iterator for ErrorIntoIter<'a> { + type Item = ExtractError<'a>; + fn next(&mut self) -> Option { + Some(ExtractError { + inner: vec![self.iter.next()?], + }) + } +} +/// Given the root node of a Tree Sitter parsing result, accumulates all +/// errors that were emitted. +pub fn collect_parsing_errors(node: &tree_sitter::Node<'_>, errors: &mut Vec) { + collect_node_errors(*node, |err| errors.push(err.to_parse_error())); +} + +pub fn collect_node_errors<'a, F>(node: tree_sitter::Node<'a>, mut f: F) +where + F: FnMut(NodeError<'a>), +{ + collect_node_errors_(node, &mut f); + // I couldn't figure out how to get this to compile well. + fn collect_node_errors_<'a, F>(node: tree_sitter::Node<'a>, f: &mut F) + where + F: FnMut(NodeError<'a>), + { + if node.is_error() || node.is_missing() { + f(NodeError { node }); + } else if node.has_error() { + // A node somewhere down in the tree from here has an error, recursively find it. + let mut cursor = node.walk(); + node.children(&mut cursor) + .for_each(|c| collect_node_errors_(c, f)); + return; + } else { + return; + }; + } } diff --git a/runtime/src/extract.rs b/runtime/src/extract.rs index 18918ba..0dd0028 100644 --- a/runtime/src/extract.rs +++ b/runtime/src/extract.rs @@ -1,115 +1,43 @@ -use crate::Span; +use super::Node; -use super::{Node, tree_sitter}; /// Defines the logic used to convert a node in a Tree Sitter tree to /// the corresponding Rust type. pub trait Extract { type LeafFn<'a>: Clone; - fn extract<'a>( + fn extract<'a, 'tree>( ctx: &mut ExtractContext<'_>, - node: Option, + node: Option>, source: &[u8], leaf_fn: Option>, - ) -> Result; + ) -> Result<'tree, Output>; } pub struct ExtractContext<'a> { - // TODO: We may need to keep a stack of these, and put them in a separate structure. pub last_idx: usize, pub last_pt: tree_sitter::Point, pub field_name: &'a str, + pub node_kind: &'a str, } -#[derive(Default)] -pub struct ExtractState { - pub last_idx: usize, - pub last_pt: tree_sitter::Point, - pub error: Option, -} - -impl ExtractState { - pub fn error(&mut self, err: ExtractError) -> &mut Self { - if let Some(existing) = &mut self.error { - existing.merge(err); - } else { - self.error = Some(err); - } - self - } -} - -// pub struct ExtractResult { -// pub value: Option, -// pub is_partial: bool, -// /// Indicates this parse failed, or an inner parse failed which propogated its error. +// #[derive(Default)] +// pub struct ExtractState { +// pub last_idx: usize, +// pub last_pt: tree_sitter::Point, // pub error: Option, -// /// Indicates somewhere within `value` there was an error. -// pub has_error: bool, // } - -pub type Result = std::result::Result; - -// NOTE: This could hold references if we want this to be fast like tree-sitter is. -#[derive(Debug)] -pub struct ExtractError { - inner: Vec, -} - -#[derive(Debug)] -struct ExtractErrorInner { - /// Span of the node which failed to extract. - span: Span, - reason: ExtractErrorReason, -} - -impl ExtractError { - pub(crate) fn empty() -> Self { - Self { inner: vec![] } - } - pub(crate) fn prop(self) -> Result<()> { - if self.inner.is_empty() { - Ok(()) - } else { - Err(self) - } - } - pub(crate) fn new(n: tree_sitter::Node<'_>, expected_field: String) -> Self { - let span = Span::new(n.start_byte(), n.end_byte()); - Self { - inner: vec![ExtractErrorInner { - span, - reason: ExtractErrorReason::Parse { expected_field }, - }], - } - } - pub(crate) fn merge(&mut self, err: ExtractError) { - self.inner.extend(err.inner); - } - - pub(crate) fn type_conversion( - n: tree_sitter::Node<'_>, - e: impl std::error::Error + Send + 'static, - ) -> Self { - let span = Span::new(n.start_byte(), n.end_byte()); - Self { - inner: vec![ExtractErrorInner { - span, - reason: ExtractErrorReason::TypeConversion(Box::new(e)), - }], - } - } -} - -#[derive(Debug)] -pub enum ExtractErrorReason { - /// Failed to parse at the tree-sitter level. - Parse { - // Can be &'static? - expected_field: String, - }, - /// Parsed OK, but failed to extract to the given type. - TypeConversion(Box), -} +// +// impl ExtractState { +// pub fn error(&mut self, err: ExtractError) -> &mut Self { +// if let Some(existing) = &mut self.error { +// existing.merge(err); +// } else { +// self.error = Some(err); +// } +// self +// } +// } +pub use crate::error::ExtractError; +pub type Result<'a, T> = std::result::Result>; #[derive(Debug, Clone, Copy)] pub struct NodeExt<'a> { @@ -213,12 +141,12 @@ where { type LeafFn<'a> = F; - fn extract<'a>( + fn extract<'a, 'tree>( ctx: &mut ExtractContext<'_>, - node: Option, + node: Option>, source: &[u8], leaf_fn: Option>, - ) -> Result { + ) -> Result<'tree, L> { let node = node.expect("Expected a node"); // TODO: Consider if this should be fallible as well. Ok(leaf_fn.expect("No leaf function on WithLeaf").apply( @@ -265,12 +193,12 @@ where impl Extract<()> for () { type LeafFn<'a> = (); - fn extract<'a>( + fn extract<'a, 'tree>( _ctx: &mut ExtractContext<'_>, - _node: Option, + _node: Option>, _source: &[u8], _leaf_fn: Option>, - ) -> Result<()> { + ) -> Result<'tree, ()> { // TODO: Do we need to handle this here? Does `extract` itself need to expect an error? Ok(()) } @@ -278,12 +206,12 @@ impl Extract<()> for () { impl, U> Extract> for Option { type LeafFn<'a> = T::LeafFn<'a>; - fn extract<'a>( + fn extract<'a, 'tree>( ctx: &mut ExtractContext<'_>, - node: Option, + node: Option>, source: &[u8], leaf_fn: Option>, - ) -> Result> { + ) -> Result<'tree, Option> { node.map(|n| T::extract(ctx, Some(n), source, leaf_fn)) .transpose() } @@ -291,51 +219,47 @@ impl, U> Extract> for Option { impl, U> Extract> for Box { type LeafFn<'a> = T::LeafFn<'a>; - fn extract<'a>( + fn extract<'a, 'tree>( ctx: &mut ExtractContext<'_>, - node: Option, + node: Option>, source: &[u8], leaf_fn: Option>, - ) -> Result> { + ) -> Result<'tree, Box> { Ok(Box::new(T::extract(ctx, node, source, leaf_fn)?)) } } impl, U> Extract> for Vec { type LeafFn<'a> = T::LeafFn<'a>; - fn extract<'a>( + fn extract<'a, 'tree>( ctx: &mut ExtractContext<'_>, - node: Option, + node: Option>, source: &[u8], leaf_fn: Option>, - ) -> Result> { + ) -> Result<'tree, Vec> { let node = match node { Some(node) => node, None => return Ok(vec![]), }; - let mut cursor = node.walk(); let mut out = vec![]; + let mut cursor = node.walk(); let mut error = ExtractError::empty(); if cursor.goto_first_child() { loop { - let n = cursor.node(); // Try and parse the error specially. + let n = cursor.node(); if n.is_error() { - println!("Processing error... for {}", ctx.field_name); - // match T::extract(ctx, Some(n), source, leaf_fn.clone()) { - // Ok(o) => { - // out.push(o); - // } - // Err(e) => { - // error.merge(e); - // } - // } + // println!("Processing error... for {}", ctx.field_name); + // TODO: Do some error handling here instead. + // For now we just ignore it. } else if cursor.field_name().is_some() { - out.push(T::extract(ctx, Some(n), source, leaf_fn.clone())?); + match T::extract(ctx, Some(n), source, leaf_fn.clone()) { + Ok(t) => out.push(t), + Err(e) => error.merge(e), + } } ctx.last_idx = n.end_byte(); ctx.last_pt = n.end_position(); - if !cursor.goto_next_sibling() { break; } @@ -350,16 +274,23 @@ macro_rules! extract_from_str { ($t:ty) => { impl Extract<$t> for $t { type LeafFn<'a> = (); - fn extract<'a>( + fn extract<'a, 'tree>( _ctx: &mut ExtractContext<'_>, - node: Option, + node: Option>, source: &[u8], _leaf_fn: Option>, - ) -> Result { - let node = node.expect(concat!( - "No node found in parsing extract: ", - stringify!($t) - )); + ) -> Result<'tree, Self> { + let node = match node { + Some(n) => n, + None => { + return Err(ExtractError::missing_node(_ctx, stringify!($t))); + // panic!( + // "No node found in parsing extract: {} - for field: {}", + // stringify!($t), + // _ctx.field_name + // ); + } + }; let text = node.utf8_text(source).expect("No text found for node"); match text.parse() { Ok(t) => Ok(t), @@ -388,13 +319,13 @@ macro_rules! extract_for_tuple { ($($t:ident),*) => { impl<$($t: Extract<$t>),*> Extract<($($t),*)> for ($($t),*) { type LeafFn<'a> = (); - fn extract<'a>( + fn extract<'a, 'tree>( ctx: &mut ExtractContext<'_>, - node: Option, + node: Option>, source: &[u8], _leaf_fn: Option>, - ) -> Result { - let node = node.expect("No node found in tuple extract"); + ) -> Result<'tree, Self> { + let node = node.ok_or_else(|| ExtractError::missing_node(ctx, stringify!($($t),*)))?; let mut c = node.walk(); let mut it = node.children(&mut c); Ok(( @@ -422,7 +353,7 @@ extract_for_tuple!(T1, T2, T3, T4, T5, T6, T7, T8); // impl Extract for bool { // type LeafFn = (); // fn extract( -// node: Option, +// node: Option, // source: &[u8], // last_idx: usize, // leaf_fn: Option<&Self::LeafFn>, diff --git a/runtime/src/grammar.rs b/runtime/src/grammar.rs new file mode 100644 index 0000000..2f5a50d --- /dev/null +++ b/runtime/src/grammar.rs @@ -0,0 +1,156 @@ +//! Grammar related functions. +use std::collections::{HashMap, HashSet}; + +use serde::{Deserialize, Serialize}; + +// NOTE: This could be useful for generating the grammar in the first place instead of just +// building json! values directly. + +/// Type for the JSON representation of a grammar, mostly copied from `tree_sitter_generate`. +#[derive(Deserialize, Serialize)] +pub struct Grammar { + pub name: String, + pub word: Option, + // NOTE: Use `indexmap` because we need to preserve order. + // https://docs.rs/indexmap/2.10.0/indexmap/map/struct.IndexMap.html + pub rules: HashMap, + pub extras: Vec, +} + +#[derive(Deserialize, Serialize)] +#[serde(tag = "type")] +#[allow(non_camel_case_types)] +#[allow(clippy::upper_case_acronyms)] +pub enum RuleDef { + ALIAS { + content: Box, + named: bool, + value: String, + }, + BLANK, + STRING { + value: String, + }, + PATTERN { + value: String, + flags: Option, + }, + SYMBOL { + name: String, + }, + CHOICE { + members: Vec, + }, + FIELD { + name: String, + content: Box, + }, + SEQ { + members: Vec, + }, + REPEAT { + content: Box, + }, + REPEAT1 { + content: Box, + }, + PREC_DYNAMIC { + value: i32, + content: Box, + }, + PREC_LEFT { + value: PrecedenceValue, + content: Box, + }, + PREC_RIGHT { + value: PrecedenceValue, + content: Box, + }, + PREC { + value: PrecedenceValue, + content: Box, + }, + TOKEN { + content: Box, + }, + IMMEDIATE_TOKEN { + content: Box, + }, + RESERVED { + context_name: String, + content: Box, + }, +} + +#[derive(Deserialize, Serialize)] +#[serde(untagged)] +pub enum PrecedenceValue { + Integer(i32), + Name(String), +} + +impl Grammar { + /// Starting from `rule_name`, find all symbols (named or anonymous) which can be reached. + pub fn reachable_set<'a>(&'a self, rule_name: &str) -> Option> { + let mut set = HashSet::new(); + let (name, rule) = self.rules.get_key_value(rule_name)?; + set.insert(name.as_str()); + self.compute_reachable(rule, &mut set)?; + Some(set) + } + + fn compute_reachable<'a>( + &'a self, + rule: &'a RuleDef, + set: &mut HashSet<&'a str>, + ) -> Option<()> { + match rule { + RuleDef::ALIAS { + content, + named: _, + value, + } => { + if set.insert(value) { + self.compute_reachable(content, set)?; + } + } + RuleDef::BLANK => {} + RuleDef::STRING { value } => { + set.insert(value.as_str()); + } + RuleDef::PATTERN { value: _, flags: _ } => {} + RuleDef::SYMBOL { name } => { + // Don't repeat if we have already seen it before. + if set.insert(name.as_str()) { + let rule = self.rules.get(name)?; + self.compute_reachable(rule, set)?; + } + } + RuleDef::CHOICE { members } => { + for member in members { + self.compute_reachable(member, set)?; + } + } + RuleDef::FIELD { name: _, content } => self.compute_reachable(content, set)?, + RuleDef::SEQ { members } => { + for member in members { + self.compute_reachable(member, set)?; + } + } + RuleDef::REPEAT { content } => self.compute_reachable(content, set)?, + RuleDef::REPEAT1 { content } => self.compute_reachable(content, set)?, + RuleDef::PREC_DYNAMIC { value: _, content } => self.compute_reachable(content, set)?, + RuleDef::PREC_LEFT { value: _, content } => self.compute_reachable(content, set)?, + RuleDef::PREC_RIGHT { value: _, content } => self.compute_reachable(content, set)?, + RuleDef::PREC { value: _, content } => self.compute_reachable(content, set)?, + RuleDef::TOKEN { content } => self.compute_reachable(content, set)?, + RuleDef::IMMEDIATE_TOKEN { content } => self.compute_reachable(content, set)?, + RuleDef::RESERVED { + context_name: _, + content, + } => self.compute_reachable(content, set)?, + } + + Some(()) + } +} diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs index 1f33326..9956ef3 100644 --- a/runtime/src/lib.rs +++ b/runtime/src/lib.rs @@ -1,32 +1,63 @@ pub mod __private; pub mod error; pub mod extract; +pub mod grammar; pub mod rule; +pub use rule::Language; + use extract::ExtractContext; pub use extract::{Extract, WithLeaf}; use std::ops::Deref; pub use rust_sitter_macro::*; +pub use tree_sitter; -#[cfg(feature = "tree-sitter-standard")] -pub use tree_sitter_runtime_standard as tree_sitter; +use tree_sitter::Node; -#[cfg(feature = "tree-sitter-c2rust")] -pub use tree_sitter_runtime_c2rust as tree_sitter; +/// The result of a parse. Parses can return errors and potentially still produce a valid result +/// partial result. +pub struct ParseResult { + /// The parse result, if it managed to get one. This can `Some` even if there are errors. + pub result: Option, + /// All errors that were found during parsing. + pub errors: Vec, +} -use tree_sitter::Node; +impl ParseResult { + /// Only return the result if there are no errors. + pub fn into_result(self) -> Result> { + if self.errors.is_empty() { + // It shouldn't be possible to have an empty result with no parse errors. + self.result.ok_or_else(Vec::new) + } else { + Err(self.errors) + } + } +} + +impl std::fmt::Debug for ParseResult { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ParseResult") + .field("result", &self.result) + .field("errors", &self.errors) + .finish() + } +} + +pub struct NodeParseResult<'a, T> { + pub result: Result>, + pub errors: Vec>, +} -#[derive(Clone, Debug)] /// A wrapper around a value that also contains the span of the value in the source. +#[derive(Clone, Debug)] pub struct Spanned { /// The underlying parsed node. pub value: T, - /// The span of the node in the source. The first value is the inclusive start - /// of the span, and the second value is the exclusive end of the span. - pub byte_span: Span, - pub line_span: (Point, Point), + /// The position where the node is located. + pub position: Position, } impl Deref for Spanned { @@ -37,26 +68,27 @@ impl Deref for Spanned { } } -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub struct Span { - pub start_byte: usize, - pub end_byte: usize, - // Do we need point? I don't think so in reality, because end tools can do the conversion, - // which tends to be the pattern in other parser tools. +/// Position in a file, used by errors and `Spanned`. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Position { + /// Byte range. + pub bytes: core::ops::Range, + /// row + column start point. + pub start: Point, + /// row + column end point. + pub end: Point, } -impl Span { - pub fn new(start_byte: usize, end_byte: usize) -> Self { - Self { - start_byte, - end_byte, - } +impl Position { + fn new(bytes: core::ops::Range, (start, end): (Point, Point)) -> Self { + Self { bytes, start, end } } -} -impl From<(usize, usize)> for Span { - fn from((start, end): (usize, usize)) -> Self { - Self::new(start, end) + fn from_node(node: Node<'_>) -> Self { + let bytes = node.byte_range(); + let start = Point::from_tree_sitter(node.start_position()); + let end = Point::from_tree_sitter(node.end_position()); + Self { bytes, start, end } } } @@ -79,29 +111,19 @@ impl Point { impl, U> Extract> for Spanned { type LeafFn<'a> = T::LeafFn<'a>; - fn extract<'a>( + fn extract<'a, 'tree>( ctx: &mut ExtractContext<'_>, - node: Option, + node: Option>, source: &[u8], leaf_fn: Option>, - ) -> extract::Result> { + ) -> extract::Result<'tree, Spanned> { Ok(Spanned { value: T::extract(ctx, node, source, leaf_fn)?, - byte_span: node - .map(|n| (n.start_byte(), n.end_byte())) - .unwrap_or((ctx.last_idx, ctx.last_idx)) - .into(), - line_span: node - .map(|n| { - ( - Point::from_tree_sitter(n.start_position()), - Point::from_tree_sitter(n.end_position()), - ) - }) - .unwrap_or(( - Point::from_tree_sitter(ctx.last_pt), - Point::from_tree_sitter(ctx.last_pt), - )), + position: node.map(Position::from_node).unwrap_or_else(|| Position { + bytes: ctx.last_idx..ctx.last_idx, + start: Point::from_tree_sitter(ctx.last_pt), + end: Point::from_tree_sitter(ctx.last_pt), + }), }) } } diff --git a/runtime/src/rule.rs b/runtime/src/rule.rs index 2681da7..88f6df4 100644 --- a/runtime/src/rule.rs +++ b/runtime/src/rule.rs @@ -1,14 +1,42 @@ +use tree_sitter::Node; -pub trait Rule { - // TODO: Consider using serde_json::Value instead. Or just a serialized actual type - // representing the different constructs... +use crate::{Extract, NodeParseResult, ParseResult, extract::ExtractContext}; + +pub trait Rule: Extract { + // TODO: Use the grammar::RuleDef and grammar::Grammar + // For this to work as expected we need a #[derive(Language)], or at least a `Language` trait + // which then has the `parse` function and the `generate_grammar() -> grammar::Grammar` + // implementation instead of just producing an ast. + // Since we aren't using any of this yet though, we will leave this alone. fn produce_ast() -> String; // Maybe Cow instead. fn rule_name() -> &'static str; + + /// Extracts directly from a node. + fn extract_node<'a>(n: Node<'a>, source: &[u8]) -> NodeParseResult<'a, Output> + where + Self: Sized, + { + let mut ctx = ExtractContext { + last_pt: n.start_position(), + last_idx: n.start_byte(), + node_kind: n.kind(), + // TODO: ??? + field_name: "", + }; + // Extract the errors, and try to parse anyway. + let mut errors = vec![]; + if n.has_error() { + crate::error::collect_node_errors(n, |e| errors.push(e)); + } + let result = Self::extract(&mut ctx, Some(n), source, None); + NodeParseResult { result, errors } + } } -// ...like this. -// pub enum TreeSitterType { -// Choice(TreeSitterChoice), -// Seq(TreeSitterSeq), -// } +pub trait Language: Sized { + fn produce_grammar() -> String; + + fn language() -> tree_sitter::Language; + fn parse(input: &str) -> ParseResult; +} diff --git a/tool/Cargo.toml b/tool/Cargo.toml index e093d7e..dd0008f 100644 --- a/tool/Cargo.toml +++ b/tool/Cargo.toml @@ -10,15 +10,6 @@ edition = "2024" keywords = ["parsing", "codegen"] categories = ["development-tools"] -[features] -default = ["build_parsers"] -build_parsers = [ - "dep:tempfile", - "dep:tree-sitter", - "dep:tree-sitter-generate", - "dep:cc", -] - [dependencies] syn = { version = "2", features = ["full", "extra-traits"] } syn-inline-mod = "0.6" @@ -26,10 +17,10 @@ serde = { version = "1", features = ["derive"] } serde_json = { version = "1", features = ["preserve_order"] } rust-sitter-common = { path = "../common" } -tempfile = { version = "3", optional = true } -tree-sitter = { version = "0.26", optional = true } -tree-sitter-generate = { version = "0.26", optional = true } -cc = { version = "1", optional = true } +tempfile = "3" +tree-sitter.workspace = true +tree-sitter-generate.workspace = true +cc = "1" [dev-dependencies] insta = "1" diff --git a/tool/src/lib.rs b/tool/src/lib.rs index 397adb2..f6767d9 100644 --- a/tool/src/lib.rs +++ b/tool/src/lib.rs @@ -1,14 +1,11 @@ // TODO: Switch on which version we are using specifically. const GENERATED_SEMANTIC_VERSION: Option<(u8, u8, u8)> = Some((0, 25, 6)); -#[cfg(feature = "build_parsers")] use std::io::Write; -use std::path::Path; +use std::path::{Path, PathBuf}; -#[cfg(feature = "build_parsers")] use tree_sitter_generate::generate_parser_for_grammar; -#[cfg(feature = "build_parsers")] /// Using the `cc` crate, generates and compiles a C parser with Tree Sitter /// for every Rust Sitter grammar found in the given module and recursive /// submodules. @@ -16,17 +13,35 @@ pub fn build_parser

(root_file: &P) where P: AsRef + ?Sized, { - let root_file = syn_inline_mod::parse_and_inline_modules(root_file.as_ref()); - match rust_sitter_common::expansion::generate_grammar(root_file.items) { - Err(e) => panic!("{e}"), - Ok(None) => {} - Ok(Some(grammar)) => { - let out_dir = std::env::var("OUT_DIR").unwrap(); - // TODO: We want to generate better errors here as well. However, it isn't really - // possible to generate it until we can produce a full grammar, which we also can't do - // if we derive on Rule. - if let Err(e) = generate_parser(&grammar, &out_dir) { - panic!("{e}"); + ParserBuilder::default().build(root_file); +} + +#[derive(Default)] +pub struct ParserBuilder { + pub output: Option, +} + +impl ParserBuilder { + pub fn output(mut self, output: impl Into) -> Self { + self.output = Some(output.into()); + self + } + + pub fn build

(self, root_file: &P) + where + P: AsRef + ?Sized, + { + let root_file = syn_inline_mod::parse_and_inline_modules(root_file.as_ref()); + match rust_sitter_common::expansion::generate_grammar(root_file.items) { + Err(e) => panic!("{e}"), + Ok(None) => {} + Ok(Some(grammar)) => { + // TODO: We want to generate better errors here as well. However, it isn't really + // possible to generate it until we can produce a full grammar, which we also can't do + // if we derive on Rule. + if let Err(e) = generate_parser(&grammar, self.output.as_deref()) { + panic!("{e}"); + } } } } @@ -34,9 +49,10 @@ where // TODO: Rewrite this function to support specifying the out dir and target manually, to allow // generating the parser to a local folder for easier integration with external text editors. -fn generate_parser(grammar: &serde_json::Value, _out_dir: &str) -> Result<(), String> { +fn generate_parser(grammar: &serde_json::Value, out_dir: Option<&Path>) -> Result<(), String> { + let grammar_string = grammar.to_string(); let (grammar_name, grammar_c) = - match generate_parser_for_grammar(&grammar.to_string(), GENERATED_SEMANTIC_VERSION) { + match generate_parser_for_grammar(&grammar_string, GENERATED_SEMANTIC_VERSION) { Ok(o) => o, Err(e) => { // Doing it this way produces a clean error from tree-sitter on failure. @@ -48,7 +64,12 @@ fn generate_parser(grammar: &serde_json::Value, _out_dir: &str) -> Result<(), St .tempdir() .unwrap(); - let dir = tempfile.path(); + let dir = if let Some(out) = out_dir { + out + } else { + tempfile.path() + }; + let sysroot_dir = write_grammar_and_c_to_dir(&grammar_name, grammar, &grammar_c, dir); // let grammar_dir = Path::new(out_dir.as_str()).join(format!("grammar_{grammar_name}",)); // if grammar_dir.is_dir() { // std::fs::remove_dir_all(&grammar_dir).expect("Couldn't clear old artifacts"); @@ -59,6 +80,26 @@ fn generate_parser(grammar: &serde_json::Value, _out_dir: &str) -> Result<(), St // .expect("Couldn't create grammar JSON directory"); // grammar_dir + let mut c_config = cc::Build::new(); + c_config.std("c11").include(dir).include(&sysroot_dir); + c_config + .flag_if_supported("-Wno-unused-label") + .flag_if_supported("-Wno-unused-parameter") + .flag_if_supported("-Wno-unused-but-set-variable") + .flag_if_supported("-Wno-trigraphs") + .flag_if_supported("-Wno-everything"); + c_config.file(dir.join("parser.c")); + + c_config.compile(&grammar_name); + Ok(()) +} + +fn write_grammar_and_c_to_dir( + grammar_name: &str, + grammar: &serde_json::Value, + grammar_c: &str, + dir: &Path, +) -> PathBuf { let grammar_file = dir.join("parser.c"); let mut f = std::fs::File::create(grammar_file).unwrap(); @@ -74,7 +115,7 @@ fn generate_parser(grammar: &serde_json::Value, _out_dir: &str) -> Result<(), St drop(grammar_json_file); let header_dir = dir.join("tree_sitter"); - std::fs::create_dir(&header_dir).unwrap(); + std::fs::create_dir_all(&header_dir).unwrap(); let mut parser_file = std::fs::File::create(header_dir.join("parser.h")).unwrap(); parser_file .write_all(tree_sitter::PARSER_HEADER.as_bytes()) @@ -109,18 +150,7 @@ fn generate_parser(grammar: &serde_json::Value, _out_dir: &str) -> Result<(), St drop(stdbool); } - let mut c_config = cc::Build::new(); - c_config.std("c11").include(dir).include(&sysroot_dir); - c_config - .flag_if_supported("-Wno-unused-label") - .flag_if_supported("-Wno-unused-parameter") - .flag_if_supported("-Wno-unused-but-set-variable") - .flag_if_supported("-Wno-trigraphs") - .flag_if_supported("-Wno-everything"); - c_config.file(dir.join("parser.c")); - - c_config.compile(&grammar_name); - Ok(()) + sysroot_dir } #[cfg(test)] @@ -132,7 +162,9 @@ mod tests { use tree_sitter_generate::generate_parser_for_grammar; fn generate_grammar(item: ItemMod) -> serde_json::Value { let (_, items) = item.content.unwrap(); - rust_sitter_common::expansion::generate_grammar(items).unwrap().unwrap() + rust_sitter_common::expansion::generate_grammar(items) + .unwrap() + .unwrap() } #[test] diff --git a/tool/src/snapshots/rust_sitter_tool__tests__enum_conflicts_prec_dynamic.snap b/tool/src/snapshots/rust_sitter_tool__tests__enum_conflicts_prec_dynamic.snap index bb1a6a9..4141495 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__enum_conflicts_prec_dynamic.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__enum_conflicts_prec_dynamic.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"Program","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Program_0_vec_contents"}}]}]},"Program_0_vec_contents":{"type":"REPEAT1","content":{"type":"FIELD","name":"Program_0_vec_element","content":{"type":"SYMBOL","name":"Statement"}}},"Program":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Program_0_vec_contents"}}]}]},"Statement_ExpressionStatement":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"ExpressionStatement"}}]},"Statement_IfStatement":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"IfStatement"}}]},"Statement":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Statement_ExpressionStatement"},{"type":"SYMBOL","name":"Statement_IfStatement"}]},"Expression_Identifier":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Identifier"}}]},"Expression_Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Number"}}]},"Expression_BinaryExpression":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"BinaryExpression"}}]},"Expression":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Identifier"},{"type":"SYMBOL","name":"Expression_Number"},{"type":"SYMBOL","name":"Expression_BinaryExpression"}]},"BinaryExpression":{"type":"PREC_LEFT","value":1,"content":{"type":"SEQ","members":[{"type":"FIELD","name":"expression","content":{"type":"SYMBOL","name":"Expression"}},{"type":"FIELD","name":"binary_expression_inner","content":{"type":"SYMBOL","name":"BinaryExpressionInner"}},{"type":"FIELD","name":"expression2","content":{"type":"SYMBOL","name":"Expression"}}]}},"BinaryExpressionInner_String_0":{"type":"STRING","value":"+"},"BinaryExpressionInner_String":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"BinaryExpressionInner_String_0"}}]},"BinaryExpressionInner_String2_0":{"type":"STRING","value":"-"},"BinaryExpressionInner_String2":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"BinaryExpressionInner_String2_0"}}]},"BinaryExpressionInner_String3_0":{"type":"STRING","value":"*"},"BinaryExpressionInner_String3":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"BinaryExpressionInner_String3_0"}}]},"BinaryExpressionInner_String4_0":{"type":"STRING","value":"/"},"BinaryExpressionInner_String4":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"BinaryExpressionInner_String4_0"}}]},"BinaryExpressionInner":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"BinaryExpressionInner_String"},{"type":"SYMBOL","name":"BinaryExpressionInner_String2"},{"type":"SYMBOL","name":"BinaryExpressionInner_String3"},{"type":"SYMBOL","name":"BinaryExpressionInner_String4"}]},"_ExpressionStatement__semicolon":{"type":"STRING","value":";"},"ExpressionStatement":{"type":"SEQ","members":[{"type":"FIELD","name":"expression","content":{"type":"SYMBOL","name":"Expression"}},{"type":"FIELD","name":"_semicolon","content":{"type":"SYMBOL","name":"_ExpressionStatement__semicolon"}}]},"_IfStatement__if":{"type":"STRING","value":"if"},"_IfStatement__lparen":{"type":"STRING","value":"("},"_IfStatement__rparen":{"type":"STRING","value":")"},"_IfStatement__lbrace":{"type":"STRING","value":"{"},"_IfStatement__rbrace":{"type":"STRING","value":"}"},"IfStatement":{"type":"PREC_DYNAMIC","value":1,"content":{"type":"SEQ","members":[{"type":"FIELD","name":"_if","content":{"type":"SYMBOL","name":"_IfStatement__if"}},{"type":"FIELD","name":"_lparen","content":{"type":"SYMBOL","name":"_IfStatement__lparen"}},{"type":"FIELD","name":"expression","content":{"type":"SYMBOL","name":"Expression"}},{"type":"FIELD","name":"_rparen","content":{"type":"SYMBOL","name":"_IfStatement__rparen"}},{"type":"FIELD","name":"_lbrace","content":{"type":"SYMBOL","name":"_IfStatement__lbrace"}},{"type":"FIELD","name":"statement","content":{"type":"SYMBOL","name":"Statement"}},{"type":"FIELD","name":"_rbrace","content":{"type":"SYMBOL","name":"_IfStatement__rbrace"}},{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"if_statement_inner","content":{"type":"SYMBOL","name":"IfStatementElse"}}]}]}},"_IfStatementElse__else":{"type":"STRING","value":"else"},"_IfStatementElse__lbrace":{"type":"STRING","value":"{"},"_IfStatementElse__rbrace":{"type":"STRING","value":"}"},"IfStatementElse":{"type":"SEQ","members":[{"type":"FIELD","name":"_else","content":{"type":"SYMBOL","name":"_IfStatementElse__else"}},{"type":"FIELD","name":"_lbrace","content":{"type":"SYMBOL","name":"_IfStatementElse__lbrace"}},{"type":"FIELD","name":"statement","content":{"type":"SYMBOL","name":"Statement"}},{"type":"FIELD","name":"_rbrace","content":{"type":"SYMBOL","name":"_IfStatementElse__rbrace"}}]},"Identifier_0":{"type":"PATTERN","value":"[a-zA-Z_][a-zA-Z0-9_]*"},"Identifier":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Identifier_0"}}]},"Number_0":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Number_0"}}]}},"extras":[]} +{"name":"Program","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"List_Program_0"}}]}]},"List_Program_0":{"type":"REPEAT1","content":{"type":"FIELD","name":"Program_0_element","content":{"type":"SYMBOL","name":"Statement"}}},"Program":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"List_Program_0"}}]}]},"Statement_ExpressionStatement":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"ExpressionStatement"}}]},"Statement_IfStatement":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"IfStatement"}}]},"Statement":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Statement_ExpressionStatement"},{"type":"SYMBOL","name":"Statement_IfStatement"}]},"Expression_Identifier":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Identifier"}}]},"Expression_Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Number"}}]},"Expression_BinaryExpression":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"BinaryExpression"}}]},"Expression":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Identifier"},{"type":"SYMBOL","name":"Expression_Number"},{"type":"SYMBOL","name":"Expression_BinaryExpression"}]},"BinaryExpression":{"type":"PREC_LEFT","value":1,"content":{"type":"SEQ","members":[{"type":"FIELD","name":"expression","content":{"type":"SYMBOL","name":"Expression"}},{"type":"FIELD","name":"binary_expression_inner","content":{"type":"SYMBOL","name":"BinaryExpressionInner"}},{"type":"FIELD","name":"expression2","content":{"type":"SYMBOL","name":"Expression"}}]}},"BinaryExpressionInner_String_0":{"type":"STRING","value":"+"},"BinaryExpressionInner_String":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"BinaryExpressionInner_String_0"}}]},"BinaryExpressionInner_String2_0":{"type":"STRING","value":"-"},"BinaryExpressionInner_String2":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"BinaryExpressionInner_String2_0"}}]},"BinaryExpressionInner_String3_0":{"type":"STRING","value":"*"},"BinaryExpressionInner_String3":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"BinaryExpressionInner_String3_0"}}]},"BinaryExpressionInner_String4_0":{"type":"STRING","value":"/"},"BinaryExpressionInner_String4":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"BinaryExpressionInner_String4_0"}}]},"BinaryExpressionInner":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"BinaryExpressionInner_String"},{"type":"SYMBOL","name":"BinaryExpressionInner_String2"},{"type":"SYMBOL","name":"BinaryExpressionInner_String3"},{"type":"SYMBOL","name":"BinaryExpressionInner_String4"}]},"_ExpressionStatement__semicolon":{"type":"STRING","value":";"},"ExpressionStatement":{"type":"SEQ","members":[{"type":"FIELD","name":"expression","content":{"type":"SYMBOL","name":"Expression"}},{"type":"FIELD","name":"_semicolon","content":{"type":"SYMBOL","name":"_ExpressionStatement__semicolon"}}]},"_IfStatement__if":{"type":"STRING","value":"if"},"_IfStatement__lparen":{"type":"STRING","value":"("},"_IfStatement__rparen":{"type":"STRING","value":")"},"_IfStatement__lbrace":{"type":"STRING","value":"{"},"_IfStatement__rbrace":{"type":"STRING","value":"}"},"IfStatement":{"type":"PREC_DYNAMIC","value":1,"content":{"type":"SEQ","members":[{"type":"FIELD","name":"_if","content":{"type":"SYMBOL","name":"_IfStatement__if"}},{"type":"FIELD","name":"_lparen","content":{"type":"SYMBOL","name":"_IfStatement__lparen"}},{"type":"FIELD","name":"expression","content":{"type":"SYMBOL","name":"Expression"}},{"type":"FIELD","name":"_rparen","content":{"type":"SYMBOL","name":"_IfStatement__rparen"}},{"type":"FIELD","name":"_lbrace","content":{"type":"SYMBOL","name":"_IfStatement__lbrace"}},{"type":"FIELD","name":"statement","content":{"type":"SYMBOL","name":"Statement"}},{"type":"FIELD","name":"_rbrace","content":{"type":"SYMBOL","name":"_IfStatement__rbrace"}},{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"if_statement_inner","content":{"type":"SYMBOL","name":"IfStatementElse"}}]}]}},"_IfStatementElse__else":{"type":"STRING","value":"else"},"_IfStatementElse__lbrace":{"type":"STRING","value":"{"},"_IfStatementElse__rbrace":{"type":"STRING","value":"}"},"IfStatementElse":{"type":"SEQ","members":[{"type":"FIELD","name":"_else","content":{"type":"SYMBOL","name":"_IfStatementElse__else"}},{"type":"FIELD","name":"_lbrace","content":{"type":"SYMBOL","name":"_IfStatementElse__lbrace"}},{"type":"FIELD","name":"statement","content":{"type":"SYMBOL","name":"Statement"}},{"type":"FIELD","name":"_rbrace","content":{"type":"SYMBOL","name":"_IfStatementElse__rbrace"}}]},"Identifier_0":{"type":"PATTERN","value":"[a-zA-Z_][a-zA-Z0-9_]*"},"Identifier":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Identifier_0"}}]},"Number_0":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Number_0"}}]}},"extras":[]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__enum_with_unamed_vector.snap b/tool/src/snapshots/rust_sitter_tool__tests__enum_with_unamed_vector.snap index 7da7603..efec7f5 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__enum_with_unamed_vector.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__enum_with_unamed_vector.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"Expr","word":null,"rules":{"source_file":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expr_Numbers"}]},"Number_value":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"value","content":{"type":"SYMBOL","name":"Number_value"}}]},"Expr_Numbers_0_vec_contents":{"type":"REPEAT1","content":{"type":"FIELD","name":"Expr_Numbers_0_vec_element","content":{"type":"SYMBOL","name":"Number"}}},"Expr_Numbers":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Expr_Numbers_0_vec_contents"}}]},"Expr":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expr_Numbers"}]}},"extras":[]} +{"name":"Expr","word":null,"rules":{"source_file":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expr_Numbers"}]},"Number_value":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"value","content":{"type":"SYMBOL","name":"Number_value"}}]},"List_Expr_Numbers_0":{"type":"REPEAT1","content":{"type":"FIELD","name":"Expr_Numbers_0_element","content":{"type":"SYMBOL","name":"Number"}}},"Expr_Numbers":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"List_Expr_Numbers_0"}}]},"Expr":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expr_Numbers"}]}},"extras":[]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat.snap b/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat.snap index 4d442de..21afd91 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"NumberList","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]}]},"NumberList_numbers_vec_contents":{"type":"SEQ","members":[{"type":"FIELD","name":"NumberList_numbers_vec_element","content":{"type":"SYMBOL","name":"Number"}},{"type":"REPEAT","content":{"type":"SEQ","members":[{"type":"STRING","value":","},{"type":"FIELD","name":"NumberList_numbers_vec_element","content":{"type":"SYMBOL","name":"Number"}}]}}]},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]}]},"Number_v":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"SYMBOL","name":"Number_v"}}]},"_Whitespace__whitespace":{"type":"PATTERN","value":"\\s"},"Whitespace":{"type":"SEQ","members":[{"type":"FIELD","name":"_whitespace","content":{"type":"SYMBOL","name":"_Whitespace__whitespace"}}]}},"extras":[{"type":"SYMBOL","name":"Whitespace"}]} +{"name":"NumberList","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"List_NumberList_numbers":{"type":"SEQ","members":[{"type":"FIELD","name":"NumberList_numbers_element","content":{"type":"SYMBOL","name":"Number"}},{"type":"REPEAT","content":{"type":"SEQ","members":[{"type":"STRING","value":","},{"type":"FIELD","name":"NumberList_numbers_element","content":{"type":"SYMBOL","name":"Number"}}]}}]},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"Number_v":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"SYMBOL","name":"Number_v"}}]},"_Whitespace__whitespace":{"type":"PATTERN","value":"\\s"},"Whitespace":{"type":"SEQ","members":[{"type":"FIELD","name":"_whitespace","content":{"type":"SYMBOL","name":"_Whitespace__whitespace"}}]}},"extras":[{"type":"SYMBOL","name":"Whitespace"}]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat1.snap b/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat1.snap index 881805c..1721ac8 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat1.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat1.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"NumberList","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]}]},"NumberList_numbers_vec_contents":{"type":"REPEAT1","content":{"type":"FIELD","name":"NumberList_numbers_vec_element","content":{"type":"SYMBOL","name":"Number"}}},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]}]},"Number_v":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"SYMBOL","name":"Number_v"}}]},"_Whitespace__whitespace":{"type":"PATTERN","value":"\\s"},"Whitespace":{"type":"SEQ","members":[{"type":"FIELD","name":"_whitespace","content":{"type":"SYMBOL","name":"_Whitespace__whitespace"}}]}},"extras":[{"type":"SYMBOL","name":"Whitespace"}]} +{"name":"NumberList","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"List_NumberList_numbers":{"type":"REPEAT1","content":{"type":"FIELD","name":"NumberList_numbers_element","content":{"type":"SYMBOL","name":"Number"}}},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"Number_v":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"SYMBOL","name":"Number_v"}}]},"_Whitespace__whitespace":{"type":"PATTERN","value":"\\s"},"Whitespace":{"type":"SEQ","members":[{"type":"FIELD","name":"_whitespace","content":{"type":"SYMBOL","name":"_Whitespace__whitespace"}}]}},"extras":[{"type":"SYMBOL","name":"Whitespace"}]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat_no_delimiter.snap b/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat_no_delimiter.snap index 881805c..1721ac8 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat_no_delimiter.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat_no_delimiter.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"NumberList","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]}]},"NumberList_numbers_vec_contents":{"type":"REPEAT1","content":{"type":"FIELD","name":"NumberList_numbers_vec_element","content":{"type":"SYMBOL","name":"Number"}}},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]}]},"Number_v":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"SYMBOL","name":"Number_v"}}]},"_Whitespace__whitespace":{"type":"PATTERN","value":"\\s"},"Whitespace":{"type":"SEQ","members":[{"type":"FIELD","name":"_whitespace","content":{"type":"SYMBOL","name":"_Whitespace__whitespace"}}]}},"extras":[{"type":"SYMBOL","name":"Whitespace"}]} +{"name":"NumberList","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"List_NumberList_numbers":{"type":"REPEAT1","content":{"type":"FIELD","name":"NumberList_numbers_element","content":{"type":"SYMBOL","name":"Number"}}},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"Number_v":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"SYMBOL","name":"Number_v"}}]},"_Whitespace__whitespace":{"type":"PATTERN","value":"\\s"},"Whitespace":{"type":"SEQ","members":[{"type":"FIELD","name":"_whitespace","content":{"type":"SYMBOL","name":"_Whitespace__whitespace"}}]}},"extras":[{"type":"SYMBOL","name":"Whitespace"}]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__spanned_in_vec.snap b/tool/src/snapshots/rust_sitter_tool__tests__spanned_in_vec.snap index 3f7c1b1..4dd0b89 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__spanned_in_vec.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__spanned_in_vec.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"NumberList","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]}]},"NumberList_numbers":{"type":"PATTERN","value":"\\d+"},"NumberList_numbers_vec_contents":{"type":"REPEAT1","content":{"type":"FIELD","name":"NumberList_numbers_vec_element","content":{"type":"SYMBOL","name":"NumberList_numbers"}}},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"NumberList_numbers_vec_contents"}}]}]},"_Whitespace__whitespace":{"type":"PATTERN","value":"\\s"},"Whitespace":{"type":"SEQ","members":[{"type":"FIELD","name":"_whitespace","content":{"type":"SYMBOL","name":"_Whitespace__whitespace"}}]}},"extras":[{"type":"SYMBOL","name":"Whitespace"}]} +{"name":"NumberList","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"NumberList_numbers":{"type":"PATTERN","value":"\\d+"},"List_NumberList_numbers":{"type":"REPEAT1","content":{"type":"FIELD","name":"NumberList_numbers_element","content":{"type":"SYMBOL","name":"NumberList_numbers"}}},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"_Whitespace__whitespace":{"type":"PATTERN","value":"\\s"},"Whitespace":{"type":"SEQ","members":[{"type":"FIELD","name":"_whitespace","content":{"type":"SYMBOL","name":"_Whitespace__whitespace"}}]}},"extras":[{"type":"SYMBOL","name":"Whitespace"}]} diff --git a/tool/target/rust-analyzer/metadata/sysroot/Cargo.lock b/tool/target/rust-analyzer/metadata/sysroot/Cargo.lock new file mode 100644 index 0000000..b125f01 --- /dev/null +++ b/tool/target/rust-analyzer/metadata/sysroot/Cargo.lock @@ -0,0 +1,511 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "addr2line" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" +dependencies = [ + "compiler_builtins", + "gimli", + "rustc-std-workspace-alloc", + "rustc-std-workspace-core", +] + +[[package]] +name = "adler2" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" +dependencies = [ + "compiler_builtins", + "rustc-std-workspace-core", +] + +[[package]] +name = "alloc" +version = "0.0.0" +dependencies = [ + "compiler_builtins", + "core", +] + +[[package]] +name = "alloctests" +version = "0.0.0" +dependencies = [ + "rand", + "rand_xorshift", +] + +[[package]] +name = "cc" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1aeb932158bd710538c73702db6945cb68a8fb08c519e6e12706b94263b36db8" +dependencies = [ + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +dependencies = [ + "compiler_builtins", + "rustc-std-workspace-core", +] + +[[package]] +name = "compiler_builtins" +version = "0.1.158" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "164cdc689e4c6d69417f77a5f48be240c291e84fbef0b1281755dc754b19c809" +dependencies = [ + "cc", + "rustc-std-workspace-core", +] + +[[package]] +name = "core" +version = "0.0.0" + +[[package]] +name = "coretests" +version = "0.0.0" +dependencies = [ + "rand", + "rand_xorshift", +] + +[[package]] +name = "dlmalloc" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8cff88b751e7a276c4ab0e222c3f355190adc6dde9ce39c851db39da34990df7" +dependencies = [ + "cfg-if", + "compiler_builtins", + "libc", + "rustc-std-workspace-core", + "windows-sys", +] + +[[package]] +name = "fortanix-sgx-abi" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57cafc2274c10fab234f176b25903ce17e690fca7597090d50880e047a0389c5" +dependencies = [ + "compiler_builtins", + "rustc-std-workspace-core", +] + +[[package]] +name = "getopts" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5" +dependencies = [ + "rustc-std-workspace-core", + "rustc-std-workspace-std", + "unicode-width", +] + +[[package]] +name = "gimli" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" +dependencies = [ + "compiler_builtins", + "rustc-std-workspace-alloc", + "rustc-std-workspace-core", +] + +[[package]] +name = "hashbrown" +version = "0.15.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84b26c544d002229e640969970a2e74021aadf6e2f96372b9c58eff97de08eb3" +dependencies = [ + "compiler_builtins", + "rustc-std-workspace-alloc", + "rustc-std-workspace-core", +] + +[[package]] +name = "hermit-abi" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbd780fe5cc30f81464441920d82ac8740e2e46b29a6fad543ddd075229ce37e" +dependencies = [ + "compiler_builtins", + "rustc-std-workspace-alloc", + "rustc-std-workspace-core", +] + +[[package]] +name = "libc" +version = "0.2.172" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" +dependencies = [ + "rustc-std-workspace-core", +] + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +dependencies = [ + "compiler_builtins", + "rustc-std-workspace-core", +] + +[[package]] +name = "miniz_oxide" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3be647b768db090acb35d5ec5db2b0e1f1de11133ca123b9eacf5137868f892a" +dependencies = [ + "adler2", + "compiler_builtins", + "rustc-std-workspace-alloc", + "rustc-std-workspace-core", +] + +[[package]] +name = "object" +version = "0.36.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" +dependencies = [ + "compiler_builtins", + "memchr", + "rustc-std-workspace-alloc", + "rustc-std-workspace-core", +] + +[[package]] +name = "panic_abort" +version = "0.0.0" +dependencies = [ + "alloc", + "cfg-if", + "compiler_builtins", + "core", + "libc", +] + +[[package]] +name = "panic_unwind" +version = "0.0.0" +dependencies = [ + "alloc", + "cfg-if", + "compiler_builtins", + "core", + "libc", + "unwind", +] + +[[package]] +name = "proc_macro" +version = "0.0.0" +dependencies = [ + "core", + "rustc-literal-escaper", + "std", +] + +[[package]] +name = "profiler_builtins" +version = "0.0.0" +dependencies = [ + "cc", +] + +[[package]] +name = "r-efi" +version = "5.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5" +dependencies = [ + "compiler_builtins", + "rustc-std-workspace-core", +] + +[[package]] +name = "r-efi-alloc" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e43c53ff1a01d423d1cb762fd991de07d32965ff0ca2e4f80444ac7804198203" +dependencies = [ + "compiler_builtins", + "r-efi", + "rustc-std-workspace-core", +] + +[[package]] +name = "rand" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97" +dependencies = [ + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" + +[[package]] +name = "rand_xorshift" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "513962919efc330f829edb2535844d1b912b0fbe2ca165d613e4e8788bb05a5a" +dependencies = [ + "rand_core", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" +dependencies = [ + "compiler_builtins", + "rustc-std-workspace-core", +] + +[[package]] +name = "rustc-literal-escaper" +version = "0.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0041b6238913c41fe704213a4a9329e2f685a156d1781998128b4149c230ad04" +dependencies = [ + "rustc-std-workspace-std", +] + +[[package]] +name = "rustc-std-workspace-alloc" +version = "1.99.0" +dependencies = [ + "alloc", +] + +[[package]] +name = "rustc-std-workspace-core" +version = "1.99.0" +dependencies = [ + "core", +] + +[[package]] +name = "rustc-std-workspace-std" +version = "1.99.0" +dependencies = [ + "std", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "std" +version = "0.0.0" +dependencies = [ + "addr2line", + "alloc", + "cfg-if", + "compiler_builtins", + "core", + "dlmalloc", + "fortanix-sgx-abi", + "hashbrown", + "hermit-abi", + "libc", + "miniz_oxide", + "object", + "panic_abort", + "panic_unwind", + "r-efi", + "r-efi-alloc", + "rand", + "rand_xorshift", + "rustc-demangle", + "std_detect", + "unwind", + "wasi", + "windows-targets 0.0.0", +] + +[[package]] +name = "std_detect" +version = "0.1.5" +dependencies = [ + "cfg-if", + "compiler_builtins", + "libc", + "rustc-std-workspace-alloc", + "rustc-std-workspace-core", +] + +[[package]] +name = "sysroot" +version = "0.0.0" +dependencies = [ + "proc_macro", + "profiler_builtins", + "std", + "test", +] + +[[package]] +name = "test" +version = "0.0.0" +dependencies = [ + "core", + "getopts", + "libc", + "std", +] + +[[package]] +name = "unicode-width" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" +dependencies = [ + "compiler_builtins", + "rustc-std-workspace-core", + "rustc-std-workspace-std", +] + +[[package]] +name = "unwind" +version = "0.0.0" +dependencies = [ + "cfg-if", + "compiler_builtins", + "core", + "libc", + "unwinding", +] + +[[package]] +name = "unwinding" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8393f2782b6060a807337ff353780c1ca15206f9ba2424df18cb6e733bd7b345" +dependencies = [ + "compiler_builtins", + "gimli", + "rustc-std-workspace-core", +] + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +dependencies = [ + "compiler_builtins", + "rustc-std-workspace-alloc", + "rustc-std-workspace-core", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.0.0" + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[patch.unused]] +name = "tree-sitter" +version = "0.26.0" + +[[patch.unused]] +name = "tree-sitter-generate" +version = "0.26.0" diff --git a/tool/target/rust-analyzer/metadata/workspace/Cargo.lock b/tool/target/rust-analyzer/metadata/workspace/Cargo.lock new file mode 100644 index 0000000..94d3e25 --- /dev/null +++ b/tool/target/rust-analyzer/metadata/workspace/Cargo.lock @@ -0,0 +1,1129 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "anyhow" +version = "1.0.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0674a1ddeecb70197781e945de4b3b8ffb61fa939a5597bcf48503737663100" + +[[package]] +name = "bitflags" +version = "2.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" + +[[package]] +name = "bumpalo" +version = "3.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" + +[[package]] +name = "cc" +version = "1.2.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2352e5597e9c544d5e6d9c95190d5d27738ade584fa8db0a16e130e5c2b5296e" +dependencies = [ + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" + +[[package]] +name = "codemap" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e769b5c8c8283982a987c6e948e540254f1058d5a74b8794914d4ef5fc2a24" + +[[package]] +name = "codemap-diagnostic" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc20770be05b566a963bf91505e60412c4a2d016d1ef95c5512823bb085a8122" +dependencies = [ + "codemap", + "termcolor", +] + +[[package]] +name = "console" +version = "0.15.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8" +dependencies = [ + "encode_unicode", + "libc", + "once_cell", + "windows-sys 0.59.0", +] + +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "encode_unicode" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad" +dependencies = [ + "libc", + "windows-sys 0.60.2", +] + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "form_urlencoded" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "getrandom" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasi", +] + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "icu_collections" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "200072f5d0e3614556f94a9930d5dc3e0662a652823904c3a75dc3b0af7fee47" +dependencies = [ + "displaydoc", + "potential_utf", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locale_core" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cde2700ccaed3872079a65fb1a78f6c0a36c91570f28755dda67bc8f7d9f00a" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_normalizer" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "436880e8e18df4d7bbc06d58432329d6458cc84531f7ac5f024e93deadb37979" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00210d6893afc98edb752b664b8890f0ef174c8adbb8d0be9710fa66fbbf72d3" + +[[package]] +name = "icu_properties" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "016c619c1eeb94efb86809b015c58f479963de65bdb6253345c1a1276f22e32b" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locale_core", + "icu_properties_data", + "icu_provider", + "potential_utf", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "298459143998310acd25ffe6810ed544932242d3f07083eee1084d83a71bd632" + +[[package]] +name = "icu_provider" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03c80da27b5f4187909049ee2d72f276f0d9f99a42c306bd0131ecfe04d8e5af" +dependencies = [ + "displaydoc", + "icu_locale_core", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerotrie", + "zerovec", +] + +[[package]] +name = "idna" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + +[[package]] +name = "indexmap" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "indoc" +version = "2.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd" + +[[package]] +name = "insta" +version = "1.43.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "154934ea70c58054b556dd430b99a98c2a7ff5309ac9891597e339b5c28f4371" +dependencies = [ + "console", + "once_cell", + "similar", +] + +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" + +[[package]] +name = "js-sys" +version = "0.3.77" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "libc" +version = "0.2.175" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543" + +[[package]] +name = "linux-raw-sys" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" + +[[package]] +name = "litemap" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956" + +[[package]] +name = "log" +version = "0.4.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" + +[[package]] +name = "memchr" +version = "2.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" + +[[package]] +name = "minicov" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f27fe9f1cc3c22e1687f9446c2083c4c5fc7f0bcf1c7a86bdbded14985895b4b" +dependencies = [ + "cc", + "walkdir", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "percent-encoding" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" + +[[package]] +name = "potential_utf" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5a7c30837279ca13e7c867e9e40053bc68740f988cb07f7ca6df43cc734b585" +dependencies = [ + "zerovec", +] + +[[package]] +name = "proc-macro2" +version = "1.0.97" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d61789d7719defeb74ea5fe81f2fdfdbd28a803847077cecce2ff14e1472f6f1" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "regex" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" + +[[package]] +name = "rust-sitter" +version = "0.5.0" +dependencies = [ + "insta", + "rust-sitter-macro", + "serde", + "serde_json", + "tempfile", + "tree-sitter", +] + +[[package]] +name = "rust-sitter-common" +version = "0.5.0" +dependencies = [ + "itertools", + "proc-macro2", + "quote", + "serde_json", + "syn", +] + +[[package]] +name = "rust-sitter-example" +version = "0.5.0" +dependencies = [ + "codemap", + "codemap-diagnostic", + "insta", + "rust-sitter", + "rust-sitter-tool", + "wasm-bindgen-test", +] + +[[package]] +name = "rust-sitter-macro" +version = "0.5.0" +dependencies = [ + "insta", + "proc-macro2", + "quote", + "rust-sitter-common", + "syn", + "tempfile", +] + +[[package]] +name = "rust-sitter-tool" +version = "0.5.0" +dependencies = [ + "cc", + "insta", + "rust-sitter-common", + "serde", + "serde_json", + "syn", + "syn-inline-mod", + "tempfile", + "tree-sitter", + "tree-sitter-generate", +] + +[[package]] +name = "rustc-hash" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" + +[[package]] +name = "rustix" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11181fbabf243db407ef8df94a6ce0b2f9a733bd8be4ad02b4eda9602296cac8" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.60.2", +] + +[[package]] +name = "ryu" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "semver" +version = "1.0.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" +dependencies = [ + "serde", +] + +[[package]] +name = "serde" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.142" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "030fedb782600dcbd6f02d479bf0d817ac3bb40d644745b769d6a96bc3afc5a7" +dependencies = [ + "indexmap", + "itoa", + "memchr", + "ryu", + "serde", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "similar" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa" + +[[package]] +name = "smallbitvec" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d31d263dd118560e1a492922182ab6ca6dc1d03a3bf54e7699993f31a4150e3f" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + +[[package]] +name = "streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" + +[[package]] +name = "syn" +version = "2.0.105" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7bc3fcb250e53458e712715cf74285c1f889686520d79294a9ef3bd7aa1fc619" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn-inline-mod" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fa6dca1fdb7b2ed46dd534a326725419d4fb10f23d8c85a8b2860e5eb25d0f9" +dependencies = [ + "proc-macro2", + "syn", +] + +[[package]] +name = "synstructure" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tempfile" +version = "3.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1" +dependencies = [ + "fastrand", + "getrandom", + "once_cell", + "rustix", + "windows-sys 0.59.0", +] + +[[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "thiserror" +version = "2.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b0949c3a6c842cbde3f1686d6eea5a010516deb7085f79db747562d4102f41e" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc5b44b4ab9c2fdd0e0512e6bece8388e214c0749f5862b114cc5b7a25daf227" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tinystr" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d4f6d1145dcb577acf783d4e601bc1d76a13337bb54e6233add580b07344c8b" +dependencies = [ + "displaydoc", + "zerovec", +] + +[[package]] +name = "topological-sort" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea68304e134ecd095ac6c3574494fc62b909f416c4fca77e440530221e549d3d" + +[[package]] +name = "tree-sitter" +version = "0.26.0" +dependencies = [ + "cc", + "regex", + "regex-syntax", + "serde_json", + "streaming-iterator", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-generate" +version = "0.26.0" +dependencies = [ + "anyhow", + "heck", + "indexmap", + "indoc", + "log", + "regex", + "regex-syntax", + "rustc-hash", + "semver", + "serde", + "serde_json", + "smallbitvec", + "thiserror", + "topological-sort", + "tree-sitter", + "url", +] + +[[package]] +name = "tree-sitter-language" +version = "0.1.4" + +[[package]] +name = "unicode-ident" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" + +[[package]] +name = "url" +version = "2.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", + "serde", +] + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "wasi" +version = "0.14.2+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" +dependencies = [ + "wit-bindgen-rt", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" +dependencies = [ + "cfg-if", + "once_cell", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" +dependencies = [ + "bumpalo", + "log", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61" +dependencies = [ + "cfg-if", + "js-sys", + "once_cell", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "wasm-bindgen-test" +version = "0.3.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66c8d5e33ca3b6d9fa3b4676d774c5778031d27a578c2b007f905acf816152c3" +dependencies = [ + "js-sys", + "minicov", + "wasm-bindgen", + "wasm-bindgen-futures", + "wasm-bindgen-test-macro", +] + +[[package]] +name = "wasm-bindgen-test-macro" +version = "0.3.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17d5042cc5fa009658f9a7333ef24291b1291a25b6382dd68862a7f3b969f69b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "web-sys" +version = "0.3.77" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "winapi-util" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +dependencies = [ + "windows-sys 0.59.0", +] + +[[package]] +name = "windows-link" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets 0.53.3", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm 0.52.6", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.53.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5fe6031c4041849d7c496a8ded650796e7b6ecc19df1a431c1a363342e5dc91" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm 0.53.0", + "windows_aarch64_msvc 0.53.0", + "windows_i686_gnu 0.53.0", + "windows_i686_gnullvm 0.53.0", + "windows_i686_msvc 0.53.0", + "windows_x86_64_gnu 0.53.0", + "windows_x86_64_gnullvm 0.53.0", + "windows_x86_64_msvc 0.53.0", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_i686_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" + +[[package]] +name = "wit-bindgen-rt" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" +dependencies = [ + "bitflags", +] + +[[package]] +name = "writeable" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb" + +[[package]] +name = "yoke" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f41bb01b8226ef4bfd589436a297c53d118f65921786300e427be8d487695cc" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zerofrom" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zerotrie" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36f0bbd478583f79edad978b407914f61b2972f5af6fa089686016be8f9af595" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", +] + +[[package]] +name = "zerovec" +version = "0.11.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7aa2bd55086f1ab526693ecbe444205da57e25f4489879da80635a46d90e73b" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] From d1c3370a42826f22353e6fbb700ee23bf3fdcbee Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Mon, 18 Aug 2025 09:25:09 -0500 Subject: [PATCH 33/50] Remove extraneous println --- runtime/src/error.rs | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/runtime/src/error.rs b/runtime/src/error.rs index 0c1a601..db2d875 100644 --- a/runtime/src/error.rs +++ b/runtime/src/error.rs @@ -151,7 +151,6 @@ impl Iterator for ErrorLookahead<'_> { if let Some(reachable) = &self.reachable { if !reachable.contains(sym_name) { - eprintln!("Symbol is not reachable: {sym_name}"); continue; } } @@ -161,35 +160,6 @@ impl Iterator for ErrorLookahead<'_> { } } -// impl std::fmt::Display for ParseError { -// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { -// writeln!(f, "Failure to parse node:")?; -// let (start_point, end_point) = self.point_range(); -// let (error_start_point, error_end_point) = self.error_point_range(); -// write!( -// f, -// "\t{}:{} - {}:{}", -// start_point.line, start_point.column, end_point.line, end_point.column, -// )?; -// // if let Some(parent) = &self.parent_context { -// // writeln!(f)?; -// // writeln!(f, "\t(parent node: {})", parent.kind)?; -// // } -// if let Some(lookahead) = self.lookahead() { -// let mut first = true; -// write!(f, " Expected one of: ")?; -// for lk in lookahead { -// if !first { -// write!(f, " | ")?; -// } -// write!(f, "{lk}")?; -// first = false; -// } -// } -// Ok(()) -// } -// } - #[derive(Debug)] pub struct ExtractError<'a> { inner: Vec>, From f78ffbfbe358bc7af5a173a2deedfbcbc794c563 Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Mon, 18 Aug 2025 15:51:00 -0500 Subject: [PATCH 34/50] Prevent panicking in partial extraction --- Cargo.lock | 27 ++++++------ macro/src/expansion.rs | 9 +++- ...t_sitter_macro__tests__enum_prec_left.snap | 4 +- ...t_sitter_macro__tests__enum_recursive.snap | 4 +- ...macro__tests__enum_transformed_fields.snap | 4 +- ...r_macro__tests__enum_with_named_field.snap | 4 +- ...macro__tests__enum_with_unamed_vector.snap | 8 +++- ...r_macro__tests__grammar_unboxed_field.snap | 8 +++- ...t_sitter_macro__tests__spanned_in_vec.snap | 12 ++++-- ...ust_sitter_macro__tests__struct_extra.snap | 8 +++- ..._sitter_macro__tests__struct_optional.snap | 8 +++- ...st_sitter_macro__tests__struct_repeat.snap | 12 ++++-- runtime/src/error.rs | 42 +++++++++++++++++-- runtime/src/lib.rs | 19 +++++++-- 14 files changed, 130 insertions(+), 39 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 94d3e25..6d47607 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -19,9 +19,9 @@ checksum = "b0674a1ddeecb70197781e945de4b3b8ffb61fa939a5597bcf48503737663100" [[package]] name = "bitflags" -version = "2.9.1" +version = "2.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" +checksum = "6a65b545ab31d687cff52899d4890855fec459eb6afe0da6417b8a18da87aa29" [[package]] name = "bumpalo" @@ -31,9 +31,9 @@ checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" [[package]] name = "cc" -version = "1.2.32" +version = "1.2.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2352e5597e9c544d5e6d9c95190d5d27738ade584fa8db0a16e130e5c2b5296e" +checksum = "3ee0f8803222ba5a7e2777dd72ca451868909b1ac410621b676adf07280e9b5f" dependencies = [ "shlex", ] @@ -372,9 +372,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.97" +version = "1.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d61789d7719defeb74ea5fe81f2fdfdbd28a803847077cecce2ff14e1472f6f1" +checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" dependencies = [ "unicode-ident", ] @@ -600,9 +600,9 @@ checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" [[package]] name = "syn" -version = "2.0.105" +version = "2.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7bc3fcb250e53458e712715cf74285c1f889686520d79294a9ef3bd7aa1fc619" +checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" dependencies = [ "proc-macro2", "quote", @@ -654,18 +654,18 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.14" +version = "2.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b0949c3a6c842cbde3f1686d6eea5a010516deb7085f79db747562d4102f41e" +checksum = "80d76d3f064b981389ecb4b6b7f45a0bf9fdac1d5b9204c7bd6714fecc302850" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "2.0.14" +version = "2.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc5b44b4ab9c2fdd0e0512e6bece8388e214c0749f5862b114cc5b7a25daf227" +checksum = "44d29feb33e986b6ea906bd9c3559a856983f92371b3eaa5e83782a351623de0" dependencies = [ "proc-macro2", "quote", @@ -691,6 +691,7 @@ checksum = "ea68304e134ecd095ac6c3574494fc62b909f416c4fca77e440530221e549d3d" [[package]] name = "tree-sitter" version = "0.26.0" +source = "git+https://github.com/jaboatman/tree-sitter?branch=combined#0a7c89a71b4dc35cf6a2f3cddc091f78e4c78af2" dependencies = [ "cc", "regex", @@ -703,6 +704,7 @@ dependencies = [ [[package]] name = "tree-sitter-generate" version = "0.26.0" +source = "git+https://github.com/jaboatman/tree-sitter?branch=combined#0a7c89a71b4dc35cf6a2f3cddc091f78e4c78af2" dependencies = [ "anyhow", "heck", @@ -725,6 +727,7 @@ dependencies = [ [[package]] name = "tree-sitter-language" version = "0.1.4" +source = "git+https://github.com/jaboatman/tree-sitter?branch=combined#0a7c89a71b4dc35cf6a2f3cddc091f78e4c78af2" [[package]] name = "unicode-ident" diff --git a/macro/src/expansion.rs b/macro/src/expansion.rs index fc05ebf..3018a2f 100644 --- a/macro/src/expansion.rs +++ b/macro/src/expansion.rs @@ -54,7 +54,10 @@ pub fn expand_rule(input: DeriveInput) -> Result { source: &[u8], _leaf_fn: Option>, ) -> Result> { - let node = node.expect("no node found"); + let node = node.ok_or_else(|| { + ::rust_sitter::error::ExtractError::missing_node(ctx, stringify!(#ident)) + })?; + #extract_expr } } @@ -104,7 +107,9 @@ pub fn expand_rule(input: DeriveInput) -> Result { source: &[u8], _leaf_fn: Option>, ) -> Result> { - let node = node.expect("No node found"); + let node = node.ok_or_else(|| { + ::rust_sitter::error::ExtractError::missing_node(_ctx, stringify!(#enum_name)) + })?; let mut cursor = node.walk(); assert!(cursor.goto_first_child(), "Could not find a child corresponding to any enum branch"); diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap index 58b6235..3b5456c 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap @@ -29,7 +29,9 @@ mod grammar { source: &[u8], _leaf_fn: Option>, ) -> Result> { - let node = node.expect("No node found"); + let node = node.ok_or_else(|| { + ::rust_sitter::error::ExtractError::missing_node(_ctx, stringify!(Expression)) + })?; let mut cursor = node.walk(); assert!( cursor.goto_first_child(), diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap index 5859c6e..6833423 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap @@ -29,7 +29,9 @@ mod grammar { source: &[u8], _leaf_fn: Option>, ) -> Result> { - let node = node.expect("No node found"); + let node = node.ok_or_else(|| { + ::rust_sitter::error::ExtractError::missing_node(_ctx, stringify!(Expression)) + })?; let mut cursor = node.walk(); assert!( cursor.goto_first_child(), diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap index 56d6cd9..ef307c9 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap @@ -30,7 +30,9 @@ mod grammar { source: &[u8], _leaf_fn: Option>, ) -> Result> { - let node = node.expect("No node found"); + let node = node.ok_or_else(|| { + ::rust_sitter::error::ExtractError::missing_node(_ctx, stringify!(Expression)) + })?; let mut cursor = node.walk(); assert!( cursor.goto_first_child(), diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap index 290236e..a094431 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap @@ -29,7 +29,9 @@ mod grammar { source: &[u8], _leaf_fn: Option>, ) -> Result> { - let node = node.expect("No node found"); + let node = node.ok_or_else(|| { + ::rust_sitter::error::ExtractError::missing_node(_ctx, stringify!(Expr)) + })?; let mut cursor = node.walk(); assert!( cursor.goto_first_child(), diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap index a38948d..c8d0f80 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap @@ -12,7 +12,9 @@ mod grammar { source: &[u8], _leaf_fn: Option>, ) -> Result> { - let node = node.expect("no node found"); + let node = node.ok_or_else(|| { + ::rust_sitter::error::ExtractError::missing_node(ctx, stringify!(Number)) + })?; ::rust_sitter::__private::extract_struct_or_variant(node, move |state| { Ok(Number { value: { @@ -58,7 +60,9 @@ mod grammar { source: &[u8], _leaf_fn: Option>, ) -> Result> { - let node = node.expect("No node found"); + let node = node.ok_or_else(|| { + ::rust_sitter::error::ExtractError::missing_node(_ctx, stringify!(Expr)) + })?; let mut cursor = node.walk(); assert!( cursor.goto_first_child(), diff --git a/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap b/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap index d974c9f..e1f6a09 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap @@ -29,7 +29,9 @@ mod grammar { source: &[u8], _leaf_fn: Option>, ) -> Result> { - let node = node.expect("no node found"); + let node = node.ok_or_else(|| { + ::rust_sitter::error::ExtractError::missing_node(ctx, stringify!(Language)) + })?; ::rust_sitter::__private::extract_struct_or_variant(node, move |state| { Ok(Language { e: { @@ -58,7 +60,9 @@ mod grammar { source: &[u8], _leaf_fn: Option>, ) -> Result> { - let node = node.expect("No node found"); + let node = node.ok_or_else(|| { + ::rust_sitter::error::ExtractError::missing_node(_ctx, stringify!(Expression)) + })?; let mut cursor = node.walk(); assert!( cursor.goto_first_child(), diff --git a/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap b/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap index 8ceae2e..cb50b43 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap @@ -30,7 +30,9 @@ mod grammar { source: &[u8], _leaf_fn: Option>, ) -> Result> { - let node = node.expect("no node found"); + let node = node.ok_or_else(|| { + ::rust_sitter::error::ExtractError::missing_node(ctx, stringify!(NumberList)) + })?; ::rust_sitter::__private::extract_struct_or_variant(node, move |state| { Ok(NumberList { numbers: { @@ -59,7 +61,9 @@ mod grammar { source: &[u8], _leaf_fn: Option>, ) -> Result> { - let node = node.expect("no node found"); + let node = node.ok_or_else(|| { + ::rust_sitter::error::ExtractError::missing_node(ctx, stringify!(Number)) + })?; ::rust_sitter::__private::extract_struct_or_variant(node, move |state| { Ok(Number { v: { @@ -86,7 +90,9 @@ mod grammar { source: &[u8], _leaf_fn: Option>, ) -> Result> { - let node = node.expect("no node found"); + let node = node.ok_or_else(|| { + ::rust_sitter::error::ExtractError::missing_node(ctx, stringify!(Whitespace)) + })?; ::rust_sitter::__private::extract_struct_or_variant(node, move |state| { Ok(Whitespace { _whitespace: { diff --git a/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap b/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap index a4f5bbb..f17919b 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap @@ -29,7 +29,9 @@ mod grammar { source: &[u8], _leaf_fn: Option>, ) -> Result> { - let node = node.expect("No node found"); + let node = node.ok_or_else(|| { + ::rust_sitter::error::ExtractError::missing_node(_ctx, stringify!(Expression)) + })?; let mut cursor = node.walk(); assert!( cursor.goto_first_child(), @@ -76,7 +78,9 @@ mod grammar { source: &[u8], _leaf_fn: Option>, ) -> Result> { - let node = node.expect("no node found"); + let node = node.ok_or_else(|| { + ::rust_sitter::error::ExtractError::missing_node(ctx, stringify!(Whitespace)) + })?; ::rust_sitter::__private::extract_struct_or_variant(node, move |state| { Ok(Whitespace { _whitespace: { diff --git a/macro/src/snapshots/rust_sitter_macro__tests__struct_optional.snap b/macro/src/snapshots/rust_sitter_macro__tests__struct_optional.snap index 4432b52..fa14e29 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__struct_optional.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__struct_optional.snap @@ -29,7 +29,9 @@ mod grammar { source: &[u8], _leaf_fn: Option>, ) -> Result> { - let node = node.expect("no node found"); + let node = node.ok_or_else(|| { + ::rust_sitter::error::ExtractError::missing_node(ctx, stringify!(Language)) + })?; ::rust_sitter::__private::extract_struct_or_variant(node, move |state| { Ok(Language { v: { @@ -63,7 +65,9 @@ mod grammar { source: &[u8], _leaf_fn: Option>, ) -> Result> { - let node = node.expect("no node found"); + let node = node.ok_or_else(|| { + ::rust_sitter::error::ExtractError::missing_node(ctx, stringify!(Number)) + })?; ::rust_sitter::__private::extract_struct_or_variant(node, move |state| { Ok(Number { v: { diff --git a/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap b/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap index c022119..28624ba 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap @@ -29,7 +29,9 @@ mod grammar { source: &[u8], _leaf_fn: Option>, ) -> Result> { - let node = node.expect("no node found"); + let node = node.ok_or_else(|| { + ::rust_sitter::error::ExtractError::missing_node(ctx, stringify!(NumberList)) + })?; ::rust_sitter::__private::extract_struct_or_variant(node, move |state| { Ok(NumberList { numbers: { @@ -58,7 +60,9 @@ mod grammar { source: &[u8], _leaf_fn: Option>, ) -> Result> { - let node = node.expect("no node found"); + let node = node.ok_or_else(|| { + ::rust_sitter::error::ExtractError::missing_node(ctx, stringify!(Number)) + })?; ::rust_sitter::__private::extract_struct_or_variant(node, move |state| { Ok(Number { v: { @@ -85,7 +89,9 @@ mod grammar { source: &[u8], _leaf_fn: Option>, ) -> Result> { - let node = node.expect("no node found"); + let node = node.ok_or_else(|| { + ::rust_sitter::error::ExtractError::missing_node(ctx, stringify!(Whitespace)) + })?; ::rust_sitter::__private::extract_struct_or_variant(node, move |state| { Ok(Whitespace { _whitespace: { diff --git a/runtime/src/error.rs b/runtime/src/error.rs index db2d875..6b69bc3 100644 --- a/runtime/src/error.rs +++ b/runtime/src/error.rs @@ -27,7 +27,7 @@ pub enum ParseErrorReason { type_name: &'static str, }, /// Parsed OK, but failed to extract to the given type. - TypeConversion(Box), + TypeConversion(Box), } /// A low level error which just wraps the error node and exposes many fields around it. @@ -120,6 +120,40 @@ impl<'a> NodeError<'a> { } } +impl std::fmt::Display for ParseError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}:{} to {}:{}, {}", + self.error_position.start.line, + self.error_position.start.column, + self.error_position.end.line, + self.error_position.end.column, + self.reason + ) + } +} + +impl std::fmt::Display for ParseErrorReason { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ParseErrorReason::Missing => f.write_str("missing node"), + ParseErrorReason::Error => f.write_str("parse error"), + ParseErrorReason::FailedExtract { field } => { + write!(f, "failed extraction of field: {field}") + } + ParseErrorReason::MissingNode { + node_kind, + type_name, + } => write!( + f, + "missing node in extraction of type: {type_name}, {node_kind}" + ), + ParseErrorReason::TypeConversion(error) => write!(f, "type conversion: {error}"), + } + } +} + struct ErrorLookahead<'a> { it: tree_sitter::LookaheadIterator, language: tree_sitter::Language, @@ -201,7 +235,7 @@ impl<'a> ExtractError<'a> { pub(crate) fn type_conversion( n: tree_sitter::Node<'_>, - e: impl std::error::Error + Send + 'static, + e: impl std::error::Error + Send + Sync + 'static, ) -> Self { let position = crate::Position::from_node(n); Self { @@ -255,7 +289,7 @@ impl<'a> ExtractError<'a> { } } - pub(crate) fn missing_node(ctx: &ExtractContext<'_>, type_name: &'static str) -> Self { + pub fn missing_node(ctx: &ExtractContext<'_>, type_name: &'static str) -> Self { let position = crate::Position { // TODO: This should be fixed to actually have the full range from the outer node. bytes: ctx.last_idx..ctx.last_idx, @@ -295,7 +329,7 @@ pub enum ExtractErrorReason<'a> { type_name: &'static str, }, /// Parsed OK, but failed to extract to the given type. - TypeConversion(Box), + TypeConversion(Box), } impl<'a> IntoIterator for ExtractError<'a> { diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs index 9956ef3..732e14f 100644 --- a/runtime/src/lib.rs +++ b/runtime/src/lib.rs @@ -8,6 +8,7 @@ pub use rule::Language; use extract::ExtractContext; pub use extract::{Extract, WithLeaf}; +use serde::{Deserialize, Serialize}; use std::ops::Deref; @@ -52,7 +53,7 @@ pub struct NodeParseResult<'a, T> { } /// A wrapper around a value that also contains the span of the value in the source. -#[derive(Clone, Debug)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct Spanned { /// The underlying parsed node. pub value: T, @@ -69,7 +70,7 @@ impl Deref for Spanned { } /// Position in a file, used by errors and `Spanned`. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] pub struct Position { /// Byte range. pub bytes: core::ops::Range, @@ -92,9 +93,21 @@ impl Position { } } +impl PartialOrd for Position { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for Position { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + (self.bytes.start, self.bytes.end).cmp(&(other.bytes.start, other.bytes.end)) + } +} + /// A line and column point in a source parse. These are 1 based to correspond with a text editor /// line and column. Note, this is a divergence from tree-sitter, which uses a zero-based `Point`. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)] pub struct Point { pub line: usize, pub column: usize, From f498805d8eb32c16cdaad25a7c08be181b1ef556 Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Wed, 20 Aug 2025 09:28:10 -0500 Subject: [PATCH 35/50] Refactor to not have `#[extra]` on individual rules, but `#[extras(...)]` on the `#[language]` rule. --- Cargo.lock | 30 ++--- README.md | 18 +-- common/src/expansion.rs | 94 +++++++++----- example/src/arithmetic.rs | 11 +- example/src/repetitions.rs | 10 +- ...e__arithmetic__tests__failed_parses-2.snap | 1 - ...e__arithmetic__tests__failed_parses-3.snap | 1 - ...e__arithmetic__tests__failed_parses-4.snap | 40 ++++-- ...ple__arithmetic__tests__failed_parses.snap | 1 - ...xample__words__tests__words_grammar-2.snap | 1 - ...xample__words__tests__words_grammar-3.snap | 1 - example/src/words.rs | 10 +- macro/src/expansion.rs | 2 +- macro/src/lib.rs | 4 +- ...t_sitter_macro__tests__enum_prec_left.snap | 2 +- ...t_sitter_macro__tests__enum_recursive.snap | 2 +- ...macro__tests__enum_transformed_fields.snap | 2 +- ...r_macro__tests__enum_with_named_field.snap | 2 +- ...macro__tests__enum_with_unamed_vector.snap | 2 +- ...r_macro__tests__grammar_unboxed_field.snap | 2 +- ...ust_sitter_macro__tests__struct_extra.snap | 2 +- runtime/Cargo.toml | 1 + runtime/src/__private.rs | 1 + runtime/src/error.rs | 90 +++++++++++-- runtime/src/grammar.rs | 7 +- tool/src/lib.rs | 118 +++++++----------- ...st_sitter_tool__tests__grammar_repeat.snap | 2 +- ...t_sitter_tool__tests__grammar_repeat1.snap | 2 +- ...l__tests__grammar_repeat_no_delimiter.snap | 2 +- ...tter_tool__tests__grammar_with_extras.snap | 2 +- .../rust_sitter_tool__tests__immediate.snap | 2 +- ...st_sitter_tool__tests__spanned_in_vec.snap | 2 +- 32 files changed, 279 insertions(+), 188 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6d47607..07126d2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -40,9 +40,9 @@ dependencies = [ [[package]] name = "cfg-if" -version = "1.0.1" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" +checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" [[package]] name = "codemap" @@ -265,6 +265,7 @@ checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661" dependencies = [ "equivalent", "hashbrown", + "serde", ] [[package]] @@ -427,6 +428,7 @@ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" name = "rust-sitter" version = "0.5.0" dependencies = [ + "indexmap", "insta", "rust-sitter-macro", "serde", @@ -551,9 +553,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.142" +version = "1.0.143" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "030fedb782600dcbd6f02d479bf0d817ac3bb40d644745b769d6a96bc3afc5a7" +checksum = "d401abef1d108fbd9cbaebc3e46611f4b1021f714a0597a71f41ee463f5f4a5a" dependencies = [ "indexmap", "itoa", @@ -632,15 +634,15 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.20.0" +version = "3.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1" +checksum = "15b61f8f20e3a6f7e0649d825294eaf317edce30f82cf6026e7e4cb9222a7d1e" dependencies = [ "fastrand", "getrandom", "once_cell", "rustix", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -654,18 +656,18 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.15" +version = "2.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80d76d3f064b981389ecb4b6b7f45a0bf9fdac1d5b9204c7bd6714fecc302850" +checksum = "3467d614147380f2e4e374161426ff399c91084acd2363eaf549172b3d5e60c0" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "2.0.15" +version = "2.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44d29feb33e986b6ea906bd9c3559a856983f92371b3eaa5e83782a351623de0" +checksum = "6c5e1be1c48b9172ee610da68fd9cd2770e7a4056cb3fc98710ee6906f0c7960" dependencies = [ "proc-macro2", "quote", @@ -878,11 +880,11 @@ dependencies = [ [[package]] name = "winapi-util" -version = "0.1.9" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +checksum = "0978bf7171b3d90bac376700cb56d606feb40f251a475a5d6634613564460b22" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] diff --git a/README.md b/README.md index 0a9de28..491e168 100644 --- a/README.md +++ b/README.md @@ -135,16 +135,20 @@ struct Code { } ```` -### `#[extra]` -This annotation marks a node as extra which instructs tree-sitter that it can appear anywhere within the -grammar. This is useful for handling whitespace/newlines/comments. +### `#[extras(...)]` +This annotation can be used on the `#[language]` rule to specify a list of extras. These extras are specified +using the same DSL as `#[leaf(...)]` and `#[text(...)]`. These rules are inserted to the `extras` array in the +grammar. ```rust #[derive(Rule)] -#[extra] -#[leaf(re(r"\s"))] -// Structs and fields that start with `_` are hidden from the output grammar. -struct _Whitespace; +#[language] +#[extras( + re(r"\s") // allows whitespace in the grammar. +)] +struct Code { + ... +} ``` ## Field Annotations diff --git a/common/src/expansion.rs b/common/src/expansion.rs index 9a6921b..bd21e28 100644 --- a/common/src/expansion.rs +++ b/common/src/expansion.rs @@ -9,7 +9,7 @@ use syn::{parse::Parse, punctuated::Punctuated, spanned::Spanned}; pub struct RuleDerive { pub ident: syn::Ident, pub attrs: Vec, - pub extras: Extras, + pub extras: RuleParams, pub data: syn::Data, } @@ -38,7 +38,7 @@ impl RuleDerive { // Used by the proc macro directly. pub fn from_derive_input_known(d: DeriveInput) -> Result { - let extras = Extras::new(&d.attrs)?; + let extras = RuleParams::new(&d.attrs)?; Ok(Self { ident: d.ident, attrs: d.attrs, @@ -118,6 +118,7 @@ impl ExpansionState { Ok(()) } } + fn accumulate_error(&mut self, err: Error) -> Error { if let Some(inner) = &mut self.error { inner.combine(err.clone()); @@ -126,6 +127,7 @@ impl ExpansionState { } err } + fn verify_seen(&self) -> Result<()> { if let Some(e) = self .rules_map @@ -141,6 +143,7 @@ impl ExpansionState { Ok(()) } } + // TODO: This could be made a lot simpler by eventually having actual types for this. That // could also make it easier to generate traits which produce grammars instead. fn check_seen_value(&self, value: &Value) -> Result<()> { @@ -177,6 +180,7 @@ impl ExpansionState { Ok(()) } + fn set_language(&mut self, ident: &Ident) -> Result<()> { if let Some(existing) = &self.language_rule { return Err(self.accumulate_error(Error::new( @@ -193,6 +197,7 @@ impl ExpansionState { self.language_rule = Some(ident.clone()); Ok(()) } + fn set_word(&mut self, ident: String) -> Result<()> { if let Some(existing) = &self.word_rule { return Err(self.accumulate_error(Error::new( @@ -203,12 +208,6 @@ impl ExpansionState { self.word_rule = Some(ident); Ok(()) } - fn push_extra(&mut self, ident: &Ident) { - self.extras.push(json!({ - "type": "SYMBOL", - "name": ident.to_string(), - })); - } } fn process_item(item: Item, ctx: &mut ExpansionState) -> Result<()> { @@ -241,12 +240,29 @@ pub fn process_rule(input: RuleDerive, ctx: &mut ExpansionState) -> Result<()> { if input.extras.language { ctx.set_language(&input.ident)?; } + if let Some(extras) = &input.extras.extras { + if !input.extras.language { + return Err(Error::new( + extras.span(), + "Cannot specify extras without #[language]", + )); + } + let (extras, errs): (Vec<_>, Vec<_>) = extras + .iter() + .map(|input| input.evaluate()) + .partition_result(); + let err = errs.into_iter().reduce(|mut acc, n| { + acc.combine(n); + acc + }); + if let Some(err) = err { + return Err(err); + } + ctx.extras = extras; + } // if input.extras.word { // ctx.set_word(&input.ident); // } - if input.extras.extra { - ctx.push_extra(&input.ident); - } let ident = input.ident; @@ -287,8 +303,7 @@ pub fn process_rule(input: RuleDerive, ctx: &mut ExpansionState) -> Result<()> { "members": members }); - let precs = input.extras; - let rule = precs.apply(rule)?; + let rule = input.extras.apply(rule)?; ctx.rules_map.insert(ident.to_string(), rule); } @@ -299,7 +314,7 @@ pub fn process_rule(input: RuleDerive, ctx: &mut ExpansionState) -> Result<()> { } #[derive(Debug)] -pub struct Extras { +pub struct RuleParams { pub prec_param: Option, pub prec_left_param: Option, pub prec_right_param: Option, @@ -307,11 +322,11 @@ pub struct Extras { pub immediate: bool, pub token: bool, pub language: bool, - pub extra: bool, + pub extras: Option>, pub word: bool, } -impl Extras { +impl RuleParams { fn new(attrs: &[Attribute]) -> Result { let prec_attr = attrs.iter().find(|attr| sitter_attr_matches(attr, "prec")); @@ -359,7 +374,12 @@ impl Extras { )); } - let extra = attrs.iter().any(|attr| sitter_attr_matches(attr, "extra")); + let extras = attrs + .iter() + .find(|a| sitter_attr_matches(a, "extras")) + .map(|a| a.parse_args_with(Punctuated::::parse_terminated)) + .transpose()?; + let language = attrs.iter().any(|a| sitter_attr_matches(a, "language")); let word = attrs.iter().any(|a| sitter_attr_matches(a, "word")); @@ -370,7 +390,7 @@ impl Extras { prec_dynamic_param, immediate: immediate.is_some(), token: token.is_some(), - extra, + extras, word, language, }) @@ -466,8 +486,8 @@ fn gen_field( leaf_type: Option, attrs: Vec, ctx: &mut ExpansionState, -) -> Result<(Value, bool)> { - let precs = Extras::new(&attrs)?; +) -> Result<(Value, bool, bool)> { + let precs = RuleParams::new(&attrs)?; if precs.word { // TODO: We don't want to allow this, but because we generate a dummy `_unit` field @@ -495,7 +515,7 @@ fn gen_field( if let Some(text) = text_attr { let input: TsInput = text.parse_args()?; - return Ok((precs.apply(input.evaluate()?)?, false)); + return Ok((precs.apply(input.evaluate()?)?, false, true)); } let leaf_input = leaf_attr.map(|a| a.parse_args::()).transpose()?; @@ -510,7 +530,7 @@ fn gen_field( "Empty types must have a leaf or text attribute", )); }; - return Ok((precs.apply(leaf_input.evaluate()?)?, false)); + return Ok((precs.apply(leaf_input.evaluate()?)?, false, false)); } }; @@ -532,6 +552,7 @@ fn gen_field( "name": path }), is_option, + false, )) } else { let symbol_name = match filter_inner_type(&leaf_type, &skip_over) { @@ -545,10 +566,11 @@ fn gen_field( "name": symbol_name, }))?, false, + false, )) } } else if is_vec { - let (field_json, field_optional) = gen_field( + let (field_json, field_optional, _is_text) = gen_field( path.clone(), Some(inner_type_vec), leaf_attr.iter().cloned().cloned().collect(), @@ -638,10 +660,12 @@ fn gen_field( "name": contents_ident, }), !repeat_non_empty, + false, )) } else { // is_option - let (field_json, field_optional) = gen_field(path, Some(inner_type_option), attrs, ctx)?; + let (field_json, field_optional, _is_text) = + gen_field(path, Some(inner_type_option), attrs, ctx)?; if field_optional { return Err(Error::new( @@ -650,7 +674,7 @@ fn gen_field( )); } - Ok((precs.apply(field_json)?, true)) + Ok((precs.apply(field_json)?, true, false)) } } @@ -672,14 +696,18 @@ fn gen_struct_or_variant( } else { format!("{path}_{ident_str}") }; - let (field_contents, is_option) = + let (field_contents, is_option, is_text) = gen_field(path, Some(field.ty.clone()), field.attrs.clone(), ctx)?; - let core = json!({ - "type": "FIELD", - "name": ident_str, - "content": field_contents - }); + let core = if !is_text { + json!({ + "type": "FIELD", + "name": ident_str, + "content": field_contents + }) + } else { + field_contents + }; let r = if is_option { json!({ @@ -728,7 +756,7 @@ fn gen_struct_or_variant( let base_rule = match fields { Fields::Unit => { - let (field_contents, _is_option) = + let (field_contents, _is_option, _is_text) = gen_field(path.clone(), None, attrs.to_owned(), ctx)?; field_contents } @@ -738,7 +766,7 @@ fn gen_struct_or_variant( }), }; - let precs = Extras::new(attrs)?; + let precs = RuleParams::new(attrs)?; ctx.rules_map.insert(path, precs.apply(base_rule)?); Ok(()) diff --git a/example/src/arithmetic.rs b/example/src/arithmetic.rs index 9fd70d1..ab72fbc 100644 --- a/example/src/arithmetic.rs +++ b/example/src/arithmetic.rs @@ -2,6 +2,10 @@ pub mod grammar { use rust_sitter::Rule; #[derive(PartialEq, Eq, Debug, Rule)] #[language] + #[extras( + // whitespace + re(r"\s") + )] pub enum Expression { Number(#[leaf(pattern(r"\d+"))] i32), #[prec_left(1)] @@ -9,13 +13,6 @@ pub mod grammar { #[prec_left(2)] Mul(Box, #[leaf("*")] (), Box), } - - #[derive(Rule)] - #[extra] - struct Whitespace { - #[leaf(pattern(r"\s"))] - _whitespace: (), - } } #[cfg(test)] diff --git a/example/src/repetitions.rs b/example/src/repetitions.rs index e81ba6a..21e8cfb 100644 --- a/example/src/repetitions.rs +++ b/example/src/repetitions.rs @@ -3,19 +3,15 @@ pub mod grammar { #[derive(Debug, Rule)] #[language] + #[extras( + re(r"\s") + )] #[allow(dead_code)] pub struct NumberList { #[sep_by1(",")] #[leaf(pattern(r"\d+"))] numbers: Spanned>>, } - - #[derive(Rule)] - #[extra] - struct Whitespace { - #[leaf(pattern(r"\s"))] - _whitespace: (), - } } // TODO: Currently not allowed, needs to be fixed. diff --git a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-2.snap b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-2.snap index 37328c6..b80a7de 100644 --- a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-2.snap +++ b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-2.snap @@ -30,7 +30,6 @@ ParseResult { }, lookaheads: [ "Expression_Number_0", - "_Whitespace__whitespace", ], reason: Missing, }, diff --git a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-3.snap b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-3.snap index 4b624e1..b402002 100644 --- a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-3.snap +++ b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-3.snap @@ -34,7 +34,6 @@ ParseResult { }, lookaheads: [ "Expression_Number_0", - "_Whitespace__whitespace", "source_file", "Expression_Number", "Expression_Sub", diff --git a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-4.snap b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-4.snap index a2a64af..6c273bf 100644 --- a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-4.snap +++ b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-4.snap @@ -3,18 +3,14 @@ source: example/src/arithmetic.rs expression: "grammar::Expression::parse(\"1a\")" --- ParseResult { - result: Some( - Number( - 1, - ), - ), + result: None, errors: [ ParseError { node_position: Position { - bytes: 1..2, + bytes: 0..2, start: Point { line: 1, - column: 2, + column: 1, }, end: Point { line: 1, @@ -33,11 +29,39 @@ ParseResult { }, }, lookaheads: [ - "_Whitespace__whitespace", "Expression_Sub_1", "Expression_Mul_1", ], reason: Error, }, + ParseError { + node_position: Position { + bytes: 0..0, + start: Point { + line: 1, + column: 1, + }, + end: Point { + line: 1, + column: 1, + }, + }, + error_position: Position { + bytes: 0..0, + start: Point { + line: 1, + column: 1, + }, + end: Point { + line: 1, + column: 1, + }, + }, + lookaheads: [], + reason: MissingEnum { + node_kind: "source_file", + enum_name: "ERROR", + }, + }, ], } diff --git a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses.snap b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses.snap index ef78a64..a1433c4 100644 --- a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses.snap +++ b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses.snap @@ -33,7 +33,6 @@ ParseResult { }, }, lookaheads: [ - "_Whitespace__whitespace", "Expression_Sub_1", "Expression_Mul_1", ], diff --git a/example/src/snapshots/rust_sitter_example__words__tests__words_grammar-2.snap b/example/src/snapshots/rust_sitter_example__words__tests__words_grammar-2.snap index c0e95b9..4a5d976 100644 --- a/example/src/snapshots/rust_sitter_example__words__tests__words_grammar-2.snap +++ b/example/src/snapshots/rust_sitter_example__words__tests__words_grammar-2.snap @@ -30,7 +30,6 @@ ParseResult { }, lookaheads: [ "Words_keyword", - "_Whitespace__whitespace", "source_file", ], reason: Error, diff --git a/example/src/snapshots/rust_sitter_example__words__tests__words_grammar-3.snap b/example/src/snapshots/rust_sitter_example__words__tests__words_grammar-3.snap index 9d94ce4..65b928e 100644 --- a/example/src/snapshots/rust_sitter_example__words__tests__words_grammar-3.snap +++ b/example/src/snapshots/rust_sitter_example__words__tests__words_grammar-3.snap @@ -30,7 +30,6 @@ ParseResult { }, lookaheads: [ "Words_keyword", - "_Whitespace__whitespace", "source_file", ], reason: Error, diff --git a/example/src/words.rs b/example/src/words.rs index 0b44564..a402d06 100644 --- a/example/src/words.rs +++ b/example/src/words.rs @@ -3,6 +3,9 @@ pub mod grammar { #[derive(Debug, Rule)] #[language] + #[extras( + re(r"\s") + )] #[allow(dead_code)] pub struct Words { #[leaf("if")] @@ -11,13 +14,6 @@ pub mod grammar { #[leaf(pattern(r"[a-z_]+"))] word: String, } - - #[derive(Rule)] - #[extra] - struct Whitespace { - #[leaf(pattern(r"\s"))] - _whitespace: (), - } } #[cfg(test)] diff --git a/macro/src/expansion.rs b/macro/src/expansion.rs index 3018a2f..aa5fa5f 100644 --- a/macro/src/expansion.rs +++ b/macro/src/expansion.rs @@ -118,7 +118,7 @@ pub fn expand_rule(input: DeriveInput) -> Result { match node.kind() { #(#match_cases),*, k => if !cursor.goto_next_sibling() { - panic!("Could not find a child corresponding to any enum branch: {k}") + return Err(::rust_sitter::error::ExtractError::missing_enum(_ctx, k)); } } } diff --git a/macro/src/lib.rs b/macro/src/lib.rs index 54eb22d..2464c2b 100644 --- a/macro/src/lib.rs +++ b/macro/src/lib.rs @@ -37,7 +37,9 @@ use expansion::*; prec_right, prec_dynamic, token, - extra, + // TODO: This will instead be on a derive(Language) as well as others like conflicts, + // externals, inline, word, supertypes, etc. to fill out the full grammar specification. + extras, with, with_node, transform, diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap index 3b5456c..cd877e2 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap @@ -78,7 +78,7 @@ mod grammar { } k => { if !cursor.goto_next_sibling() { - panic!("Could not find a child corresponding to any enum branch: {k}") + return Err(::rust_sitter::error::ExtractError::missing_enum(_ctx, k)); } } } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap index 6833423..aa98f7c 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap @@ -73,7 +73,7 @@ mod grammar { } k => { if !cursor.goto_next_sibling() { - panic!("Could not find a child corresponding to any enum branch: {k}") + return Err(::rust_sitter::error::ExtractError::missing_enum(_ctx, k)); } } } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap index ef307c9..fec40ce 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap @@ -55,7 +55,7 @@ mod grammar { } k => { if !cursor.goto_next_sibling() { - panic!("Could not find a child corresponding to any enum branch: {k}") + return Err(::rust_sitter::error::ExtractError::missing_enum(_ctx, k)); } } } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap index a094431..9771655 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap @@ -73,7 +73,7 @@ mod grammar { } k => { if !cursor.goto_next_sibling() { - panic!("Could not find a child corresponding to any enum branch: {k}") + return Err(::rust_sitter::error::ExtractError::missing_enum(_ctx, k)); } } } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap index c8d0f80..84cb5b8 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap @@ -85,7 +85,7 @@ mod grammar { } k => { if !cursor.goto_next_sibling() { - panic!("Could not find a child corresponding to any enum branch: {k}") + return Err(::rust_sitter::error::ExtractError::missing_enum(_ctx, k)); } } } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap b/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap index e1f6a09..f2f1135 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap @@ -85,7 +85,7 @@ mod grammar { } k => { if !cursor.goto_next_sibling() { - panic!("Could not find a child corresponding to any enum branch: {k}") + return Err(::rust_sitter::error::ExtractError::missing_enum(_ctx, k)); } } } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap b/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap index f17919b..d18c632 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap @@ -54,7 +54,7 @@ mod grammar { } k => { if !cursor.goto_next_sibling() { - panic!("Could not find a child corresponding to any enum branch: {k}") + return Err(::rust_sitter::error::ExtractError::missing_enum(_ctx, k)); } } } diff --git a/runtime/Cargo.toml b/runtime/Cargo.toml index 8b6d9b0..1286be7 100644 --- a/runtime/Cargo.toml +++ b/runtime/Cargo.toml @@ -19,6 +19,7 @@ rust-sitter-macro = { path = "../macro" } # This one could be optional. serde_json = "1" serde = { version = "1", features = ["derive"] } +indexmap = { version = "2", features = ["serde"] } [dev-dependencies] insta = "1.39" diff --git a/runtime/src/__private.rs b/runtime/src/__private.rs index 63e3e54..dc9fe5a 100644 --- a/runtime/src/__private.rs +++ b/runtime/src/__private.rs @@ -127,6 +127,7 @@ pub fn parse>( ) -> crate::ParseResult { let mut parser = tree_sitter::Parser::new(); parser.set_language(&language()).unwrap(); + // parser.set_logger(Some(Box::new(|_t, m| eprintln!("parser::{m}")))); let tree = parser.parse(input, None).expect("Failed to parse"); let root_node = tree.root_node(); diff --git a/runtime/src/error.rs b/runtime/src/error.rs index 6b69bc3..3ce47b5 100644 --- a/runtime/src/error.rs +++ b/runtime/src/error.rs @@ -26,6 +26,10 @@ pub enum ParseErrorReason { node_kind: String, type_name: &'static str, }, + MissingEnum { + node_kind: String, + enum_name: &'static str, + }, /// Parsed OK, but failed to extract to the given type. TypeConversion(Box), } @@ -40,7 +44,10 @@ impl<'a> NodeError<'a> { pub fn to_parse_error(&self) -> ParseError { ParseError { node_position: Position::new(self.node_byte_range(), self.point_range()), - error_position: Position::new(self.error_byte_range(), self.error_point_range()), + error_position: Position::new( + self.first_error_byte_range(), + self.first_error_point_range(), + ), lookaheads: self.lookahead().map(|l| l.collect()).unwrap_or_default(), reason: if self.node.is_missing() { ParseErrorReason::Missing @@ -71,6 +78,28 @@ impl<'a> NodeError<'a> { (Point::from_tree_sitter(start), Point::from_tree_sitter(end)) } + pub fn first_error_point_range(&self) -> (Point, Point) { + match self.node.error_child(0) { + None => self.error_point_range(), + Some(c) => { + let start = c.start_position(); + let end = c.end_position(); + (Point::from_tree_sitter(start), Point::from_tree_sitter(end)) + } + } + } + + pub fn first_error_byte_range(&self) -> Range { + match self.node.error_child(0) { + None => self.error_byte_range(), + Some(c) => c.byte_range(), + } + } + + pub fn is_missing(&self) -> bool { + self.node.is_missing() + } + pub fn lookahead( &self, // grammar: Option<&'a crate::grammar::Grammar>, @@ -149,6 +178,13 @@ impl std::fmt::Display for ParseErrorReason { f, "missing node in extraction of type: {type_name}, {node_kind}" ), + ParseErrorReason::MissingEnum { + node_kind, + enum_name, + } => write!( + f, + "missing enum in extraction of type: {enum_name}, {node_kind}" + ), ParseErrorReason::TypeConversion(error) => write!(f, "type conversion: {error}"), } } @@ -183,10 +219,10 @@ impl Iterator for ErrorLookahead<'_> { let sym_name = self.it.current_symbol_name(); - if let Some(reachable) = &self.reachable { - if !reachable.contains(sym_name) { - continue; - } + if let Some(reachable) = &self.reachable + && !reachable.contains(sym_name) + { + continue; } return Some(sym_name); @@ -284,6 +320,21 @@ impl<'a> ExtractError<'a> { lookaheads: vec![], } } + ExtractErrorReason::MissingEnum { + node_kind, + enum_name, + } => { + let reason = ParseErrorReason::MissingEnum { + node_kind, + enum_name, + }; + ParseError { + node_position: inner.position.clone(), + error_position: inner.position, + reason, + lookaheads: vec![], + } + } }; errors.push(err); } @@ -307,11 +358,29 @@ impl<'a> ExtractError<'a> { } } + pub fn missing_enum(ctx: &ExtractContext<'_>, enum_name: &'static str) -> Self { + let position = crate::Position { + // TODO: This should be fixed to actually have the full range from the outer node. + bytes: ctx.last_idx..ctx.last_idx, + start: Point::from_tree_sitter(ctx.last_pt), + end: Point::from_tree_sitter(ctx.last_pt), + }; + Self { + inner: vec![ExtractErrorInner { + position, + reason: ExtractErrorReason::MissingEnum { + node_kind: ctx.node_kind.to_owned(), + enum_name, + }, + }], + } + } + pub fn position(&self) -> &Position { &self.inner[0].position } - pub fn reason(&self) -> &ExtractErrorReason { + pub fn reason(&self) -> &ExtractErrorReason<'_> { &self.inner[0].reason } } @@ -328,6 +397,10 @@ pub enum ExtractErrorReason<'a> { node_kind: String, type_name: &'static str, }, + MissingEnum { + node_kind: String, + enum_name: &'static str, + }, /// Parsed OK, but failed to extract to the given type. TypeConversion(Box), } @@ -377,9 +450,6 @@ where let mut cursor = node.walk(); node.children(&mut cursor) .for_each(|c| collect_node_errors_(c, f)); - return; - } else { - return; - }; + } } } diff --git a/runtime/src/grammar.rs b/runtime/src/grammar.rs index 2f5a50d..b3bb220 100644 --- a/runtime/src/grammar.rs +++ b/runtime/src/grammar.rs @@ -1,6 +1,7 @@ //! Grammar related functions. -use std::collections::{HashMap, HashSet}; +use std::collections::HashSet; +use indexmap::IndexMap; use serde::{Deserialize, Serialize}; // NOTE: This could be useful for generating the grammar in the first place instead of just @@ -11,9 +12,7 @@ use serde::{Deserialize, Serialize}; pub struct Grammar { pub name: String, pub word: Option, - // NOTE: Use `indexmap` because we need to preserve order. - // https://docs.rs/indexmap/2.10.0/indexmap/map/struct.IndexMap.html - pub rules: HashMap, + pub rules: IndexMap, pub extras: Vec, } diff --git a/tool/src/lib.rs b/tool/src/lib.rs index f6767d9..720df0e 100644 --- a/tool/src/lib.rs +++ b/tool/src/lib.rs @@ -1,5 +1,5 @@ // TODO: Switch on which version we are using specifically. -const GENERATED_SEMANTIC_VERSION: Option<(u8, u8, u8)> = Some((0, 25, 6)); +const GENERATED_SEMANTIC_VERSION: Option<(u8, u8, u8)> = Some((0, 26, 0)); use std::io::Write; use std::path::{Path, PathBuf}; @@ -69,7 +69,7 @@ fn generate_parser(grammar: &serde_json::Value, out_dir: Option<&Path>) -> Resul } else { tempfile.path() }; - let sysroot_dir = write_grammar_and_c_to_dir(&grammar_name, grammar, &grammar_c, dir); + let _sysroot_dir = write_grammar_and_c_to_dir(&grammar_name, grammar, &grammar_c, dir); // let grammar_dir = Path::new(out_dir.as_str()).join(format!("grammar_{grammar_name}",)); // if grammar_dir.is_dir() { // std::fs::remove_dir_all(&grammar_dir).expect("Couldn't clear old artifacts"); @@ -81,7 +81,7 @@ fn generate_parser(grammar: &serde_json::Value, out_dir: Option<&Path>) -> Resul // grammar_dir let mut c_config = cc::Build::new(); - c_config.std("c11").include(dir).include(&sysroot_dir); + c_config.std("c11").include(dir); c_config .flag_if_supported("-Wno-unused-label") .flag_if_supported("-Wno-unused-parameter") @@ -123,32 +123,32 @@ fn write_grammar_and_c_to_dir( drop(parser_file); let sysroot_dir = dir.join("sysroot"); - if std::env::var("TARGET").unwrap().starts_with("wasm32") { - std::fs::create_dir(&sysroot_dir).unwrap(); - let mut stdint = std::fs::File::create(sysroot_dir.join("stdint.h")).unwrap(); - stdint - .write_all(include_bytes!("wasm-sysroot/stdint.h")) - .unwrap(); - drop(stdint); - - let mut stdlib = std::fs::File::create(sysroot_dir.join("stdlib.h")).unwrap(); - stdlib - .write_all(include_bytes!("wasm-sysroot/stdlib.h")) - .unwrap(); - drop(stdlib); - - let mut stdio = std::fs::File::create(sysroot_dir.join("stdio.h")).unwrap(); - stdio - .write_all(include_bytes!("wasm-sysroot/stdio.h")) - .unwrap(); - drop(stdio); - - let mut stdbool = std::fs::File::create(sysroot_dir.join("stdbool.h")).unwrap(); - stdbool - .write_all(include_bytes!("wasm-sysroot/stdbool.h")) - .unwrap(); - drop(stdbool); - } + // if std::env::var("TARGET").unwrap().starts_with("wasm32") { + // std::fs::create_dir(&sysroot_dir).unwrap(); + // let mut stdint = std::fs::File::create(sysroot_dir.join("stdint.h")).unwrap(); + // stdint + // .write_all(include_bytes!("wasm-sysroot/stdint.h")) + // .unwrap(); + // drop(stdint); + + // let mut stdlib = std::fs::File::create(sysroot_dir.join("stdlib.h")).unwrap(); + // stdlib + // .write_all(include_bytes!("wasm-sysroot/stdlib.h")) + // .unwrap(); + // drop(stdlib); + + // let mut stdio = std::fs::File::create(sysroot_dir.join("stdio.h")).unwrap(); + // stdio + // .write_all(include_bytes!("wasm-sysroot/stdio.h")) + // .unwrap(); + // drop(stdio); + + // let mut stdbool = std::fs::File::create(sysroot_dir.join("stdbool.h")).unwrap(); + // stdbool + // .write_all(include_bytes!("wasm-sysroot/stdbool.h")) + // .unwrap(); + // drop(stdbool); + // } sysroot_dir } @@ -378,19 +378,15 @@ mod tests { mod grammar { #[derive(rust_sitter::Rule)] #[language] + #[extras( + re(r"\s") + )] pub enum Expression { Number( #[leaf(re(r"\d+"))] i32 ), } - - #[derive(rust_sitter::Rule)] - #[extra] - struct Whitespace { - #[leaf(re(r"\s"))] - _whitespace: (), - } } } { m @@ -438,6 +434,9 @@ mod tests { pub mod grammar { #[derive(rust_sitter::Rule)] #[language] + #[extras( + re(r"\s") + )] pub struct NumberList { #[sep_by(",")] numbers: Vec, @@ -448,13 +447,6 @@ mod tests { #[leaf(re(r"\d+"))] v: i32, } - - #[derive(Rule)] - #[extra] - struct Whitespace { - #[leaf(pattern(r"\s"))] - _whitespace: (), - } } } { m @@ -473,6 +465,9 @@ mod tests { pub mod grammar { #[derive(rust_sitter::Rule)] #[language] + #[extras( + re(r"\s") + )] pub struct NumberList { numbers: Vec, } @@ -482,13 +477,6 @@ mod tests { #[leaf(re(r"\d+"))] v: i32, } - - #[derive(rust_sitter::Rule)] - #[extra] - struct Whitespace { - #[leaf(pattern(r"\s"))] - _whitespace: (), - } } } { m @@ -507,6 +495,9 @@ mod tests { pub mod grammar { #[derive(rust_sitter::Rule)] #[language] + #[extras( + re(r"\s") + )] pub struct NumberList { #[repeat(non_empty = true)] #[delimited(",")] @@ -518,13 +509,6 @@ mod tests { #[leaf(re(r"\d+"))] v: i32, } - - #[derive(rust_sitter::Rule)] - #[extra] - struct Whitespace { - #[leaf(pattern(r"\s"))] - _whitespace: (), - } } } { m @@ -606,17 +590,13 @@ mod tests { #[derive(rust_sitter::Rule)] #[language] + #[extras( + re(r"\s") + )] pub struct NumberList { #[leaf(re(r"\d+"))] numbers: Vec>, } - - #[derive(rust_sitter::Rule)] - #[extra] - struct Whitespace { - #[leaf(pattern(r"\s"))] - _whitespace: (), - } } } { m @@ -635,19 +615,15 @@ mod tests { mod grammar { #[derive(rust_sitter::Rule)] #[language] + #[extras( + re(r"\s") + )] pub struct StringFragment( #[immediate] #[prec(1)] #[leaf(pattern(r#"[^"\\]+"#))] () ); - - #[derive(rust_sitter::Rule)] - #[extra] - struct Whitespace { - #[leaf(pattern(r"\s"))] - _whitespace: (), - } } } { m diff --git a/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat.snap b/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat.snap index 21afd91..6b7cc59 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"NumberList","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"List_NumberList_numbers":{"type":"SEQ","members":[{"type":"FIELD","name":"NumberList_numbers_element","content":{"type":"SYMBOL","name":"Number"}},{"type":"REPEAT","content":{"type":"SEQ","members":[{"type":"STRING","value":","},{"type":"FIELD","name":"NumberList_numbers_element","content":{"type":"SYMBOL","name":"Number"}}]}}]},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"Number_v":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"SYMBOL","name":"Number_v"}}]},"_Whitespace__whitespace":{"type":"PATTERN","value":"\\s"},"Whitespace":{"type":"SEQ","members":[{"type":"FIELD","name":"_whitespace","content":{"type":"SYMBOL","name":"_Whitespace__whitespace"}}]}},"extras":[{"type":"SYMBOL","name":"Whitespace"}]} +{"name":"NumberList","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"List_NumberList_numbers":{"type":"SEQ","members":[{"type":"FIELD","name":"NumberList_numbers_element","content":{"type":"SYMBOL","name":"Number"}},{"type":"REPEAT","content":{"type":"SEQ","members":[{"type":"STRING","value":","},{"type":"FIELD","name":"NumberList_numbers_element","content":{"type":"SYMBOL","name":"Number"}}]}}]},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"Number_v":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"SYMBOL","name":"Number_v"}}]}},"extras":[{"type":"PATTERN","value":"\\s"}]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat1.snap b/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat1.snap index 1721ac8..e6e68f3 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat1.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat1.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"NumberList","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"List_NumberList_numbers":{"type":"REPEAT1","content":{"type":"FIELD","name":"NumberList_numbers_element","content":{"type":"SYMBOL","name":"Number"}}},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"Number_v":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"SYMBOL","name":"Number_v"}}]},"_Whitespace__whitespace":{"type":"PATTERN","value":"\\s"},"Whitespace":{"type":"SEQ","members":[{"type":"FIELD","name":"_whitespace","content":{"type":"SYMBOL","name":"_Whitespace__whitespace"}}]}},"extras":[{"type":"SYMBOL","name":"Whitespace"}]} +{"name":"NumberList","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"List_NumberList_numbers":{"type":"REPEAT1","content":{"type":"FIELD","name":"NumberList_numbers_element","content":{"type":"SYMBOL","name":"Number"}}},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"Number_v":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"SYMBOL","name":"Number_v"}}]}},"extras":[{"type":"PATTERN","value":"\\s"}]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat_no_delimiter.snap b/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat_no_delimiter.snap index 1721ac8..e6e68f3 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat_no_delimiter.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat_no_delimiter.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"NumberList","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"List_NumberList_numbers":{"type":"REPEAT1","content":{"type":"FIELD","name":"NumberList_numbers_element","content":{"type":"SYMBOL","name":"Number"}}},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"Number_v":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"SYMBOL","name":"Number_v"}}]},"_Whitespace__whitespace":{"type":"PATTERN","value":"\\s"},"Whitespace":{"type":"SEQ","members":[{"type":"FIELD","name":"_whitespace","content":{"type":"SYMBOL","name":"_Whitespace__whitespace"}}]}},"extras":[{"type":"SYMBOL","name":"Whitespace"}]} +{"name":"NumberList","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"List_NumberList_numbers":{"type":"REPEAT1","content":{"type":"FIELD","name":"NumberList_numbers_element","content":{"type":"SYMBOL","name":"Number"}}},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"Number_v":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"SYMBOL","name":"Number_v"}}]}},"extras":[{"type":"PATTERN","value":"\\s"}]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__grammar_with_extras.snap b/tool/src/snapshots/rust_sitter_tool__tests__grammar_with_extras.snap index 48e12e9..37b6d7d 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__grammar_with_extras.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__grammar_with_extras.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"Expression","word":null,"rules":{"source_file":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"}]},"Expression_Number_0":{"type":"PATTERN","value":"\\d+"},"Expression_Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Expression_Number_0"}}]},"Expression":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"}]},"_Whitespace__whitespace":{"type":"PATTERN","value":"\\s"},"Whitespace":{"type":"SEQ","members":[{"type":"FIELD","name":"_whitespace","content":{"type":"SYMBOL","name":"_Whitespace__whitespace"}}]}},"extras":[{"type":"SYMBOL","name":"Whitespace"}]} +{"name":"Expression","word":null,"rules":{"source_file":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"}]},"Expression_Number_0":{"type":"PATTERN","value":"\\d+"},"Expression_Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Expression_Number_0"}}]},"Expression":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"}]}},"extras":[{"type":"PATTERN","value":"\\s"}]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__immediate.snap b/tool/src/snapshots/rust_sitter_tool__tests__immediate.snap index b8e7f07..e5d9a19 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__immediate.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__immediate.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"StringFragment","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"StringFragment_0"}}]},"StringFragment_0":{"type":"IMMEDIATE_TOKEN","content":{"type":"PREC","value":1,"content":{"type":"PATTERN","value":"[^\"\\\\]+"}}},"StringFragment":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"StringFragment_0"}}]},"_Whitespace__whitespace":{"type":"PATTERN","value":"\\s"},"Whitespace":{"type":"SEQ","members":[{"type":"FIELD","name":"_whitespace","content":{"type":"SYMBOL","name":"_Whitespace__whitespace"}}]}},"extras":[{"type":"SYMBOL","name":"Whitespace"}]} +{"name":"StringFragment","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"StringFragment_0"}}]},"StringFragment_0":{"type":"IMMEDIATE_TOKEN","content":{"type":"PREC","value":1,"content":{"type":"PATTERN","value":"[^\"\\\\]+"}}},"StringFragment":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"StringFragment_0"}}]}},"extras":[{"type":"PATTERN","value":"\\s"}]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__spanned_in_vec.snap b/tool/src/snapshots/rust_sitter_tool__tests__spanned_in_vec.snap index 4dd0b89..c6cf4f3 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__spanned_in_vec.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__spanned_in_vec.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"NumberList","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"NumberList_numbers":{"type":"PATTERN","value":"\\d+"},"List_NumberList_numbers":{"type":"REPEAT1","content":{"type":"FIELD","name":"NumberList_numbers_element","content":{"type":"SYMBOL","name":"NumberList_numbers"}}},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"_Whitespace__whitespace":{"type":"PATTERN","value":"\\s"},"Whitespace":{"type":"SEQ","members":[{"type":"FIELD","name":"_whitespace","content":{"type":"SYMBOL","name":"_Whitespace__whitespace"}}]}},"extras":[{"type":"SYMBOL","name":"Whitespace"}]} +{"name":"NumberList","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"NumberList_numbers":{"type":"PATTERN","value":"\\d+"},"List_NumberList_numbers":{"type":"REPEAT1","content":{"type":"FIELD","name":"NumberList_numbers_element","content":{"type":"SYMBOL","name":"NumberList_numbers"}}},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]}},"extras":[{"type":"PATTERN","value":"\\s"}]} From 84eb394ce14a4213ffbd0fd8a6a86c8477975cbd Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Thu, 21 Aug 2025 08:47:17 -0500 Subject: [PATCH 36/50] Experimental changes to flatten the grammar --- Cargo.lock | 1 + common/src/expansion.rs | 38 +---- common/src/lib.rs | 34 +++- ..._arithmetic__tests__failed_parses.snap.new | 39 +++++ ...ionals__tests__optional_grammar-3.snap.new | 29 ++++ ...ptionals__tests__optional_grammar.snap.new | 29 ++++ ...ons__tests__repetitions_grammar-2.snap.new | 26 +++ ...tions__tests__repetitions_grammar.snap.new | 36 ++++ ...mple__words__tests__words_grammar.snap.new | 36 ++++ example/src/words.rs | 2 +- macro/src/expansion.rs | 4 +- macro/src/lib.rs | 1 - runtime/Cargo.toml | 1 + runtime/src/__private.rs | 13 +- runtime/src/error.rs | 158 ++++++++++++------ runtime/src/extract.rs | 5 +- runtime/src/lib.rs | 10 ++ 17 files changed, 372 insertions(+), 90 deletions(-) create mode 100644 example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses.snap.new create mode 100644 example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-3.snap.new create mode 100644 example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar.snap.new create mode 100644 example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar-2.snap.new create mode 100644 example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar.snap.new create mode 100644 example/src/snapshots/rust_sitter_example__words__tests__words_grammar.snap.new diff --git a/Cargo.lock b/Cargo.lock index 07126d2..0a8bc57 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -430,6 +430,7 @@ version = "0.5.0" dependencies = [ "indexmap", "insta", + "log", "rust-sitter-macro", "serde", "serde_json", diff --git a/common/src/expansion.rs b/common/src/expansion.rs index bd21e28..2c0a7d7 100644 --- a/common/src/expansion.rs +++ b/common/src/expansion.rs @@ -319,8 +319,6 @@ pub struct RuleParams { pub prec_left_param: Option, pub prec_right_param: Option, pub prec_dynamic_param: Option, - pub immediate: bool, - pub token: bool, pub language: bool, pub extras: Option>, pub word: bool, @@ -358,15 +356,6 @@ impl RuleParams { .map(|a| a.parse_args_with(Expr::parse)) .transpose()?; - let token = attrs.iter().find(|attr| sitter_attr_matches(attr, "token")); - let immediate = attrs - .iter() - .find(|attr| sitter_attr_matches(attr, "immediate")); - - if let (Some(im), Some(_tok)) = (&immediate, &token) { - return Err(Error::new(im.span(), "Cannot be immediate and token")); - } - if let (Some(prec_left), Some(_prec_right)) = (prec_left_attr, prec_right_attr) { return Err(Error::new( prec_left.span(), @@ -388,8 +377,6 @@ impl RuleParams { prec_left_param, prec_right_param, prec_dynamic_param, - immediate: immediate.is_some(), - token: token.is_some(), extras, word, language, @@ -402,8 +389,6 @@ impl RuleParams { prec_left_param, prec_right_param, prec_dynamic_param, - immediate, - token, .. } = self; @@ -465,19 +450,7 @@ impl RuleParams { rule }; - if *immediate { - Ok(json!({ - "type": "IMMEDIATE_TOKEN", - "content": rule - })) - } else if *token { - Ok(json!({ - "type": "TOKEN", - "content": rule, - })) - } else { - Ok(rule) - } + Ok(rule) } } @@ -547,10 +520,11 @@ fn gen_field( .insert(path.clone(), precs.apply(input.evaluate()?)?); Ok(( - json!({ - "type": "SYMBOL", - "name": path - }), + precs.apply(input.evaluate()?)?, + // json!({ + // "type": "SYMBOL", + // "name": path + // }), is_option, false, )) diff --git a/common/src/lib.rs b/common/src/lib.rs index 4bea022..93ee3d2 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -2,7 +2,10 @@ use proc_macro2::Span; use quote::ToTokens; use std::collections::HashSet; use syn::{ - parse::{Parse, ParseStream}, punctuated::Punctuated, spanned::Spanned, * + parse::{Parse, ParseStream}, + punctuated::Punctuated, + spanned::Spanned, + *, }; pub mod expansion; @@ -162,6 +165,22 @@ impl TsInput { "value": get_str(get_arg(args, 0, 1)?)?, }) } + "token" => { + let inner = Self::new(get_arg(args, 0, 1)?); + let content = inner.evaluate()?; + json!({ + "type": "TOKEN", + "content": content + }) + } + "immediate" => { + let inner = Self::new(get_arg(args, 0, 1)?); + let content = inner.evaluate()?; + json!({ + "type": "IMMEDIATE_TOKEN", + "content": content + }) + } // nodes can be double wrapped in fields, although I'm not sure what happens // when you ask the cursor for the field name? May not be possible to handle // that in this case. @@ -178,14 +197,23 @@ impl TsInput { } } } - Expr::Path(ExprPath { attrs: _, qself: _, path }) => { + Expr::Path(ExprPath { + attrs: _, + qself: _, + path, + }) => { let ident = path.require_ident()?; json!({ "type": "SYMBOL", "name": ident.to_string(), }) } - k => return Err(syn::Error::new(k.span(), format!("Unexpected input type: {k:?}"))), + k => { + return Err(syn::Error::new( + k.span(), + format!("Unexpected input type: {k:?}"), + )); + } }; Ok(json) } diff --git a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses.snap.new b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses.snap.new new file mode 100644 index 0000000..45e4df2 --- /dev/null +++ b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses.snap.new @@ -0,0 +1,39 @@ +--- +source: example/src/arithmetic.rs +assertion_line: 101 +expression: "grammar::Expression::parse(\"1 + 2\")" +--- +ParseResult { + result: None, + errors: [ + ParseError { + node_position: Position { + bytes: 0..3, + start: Point { + line: 1, + column: 1, + }, + end: Point { + line: 1, + column: 4, + }, + }, + error_position: Position { + bytes: 2..3, + start: Point { + line: 1, + column: 3, + }, + end: Point { + line: 1, + column: 4, + }, + }, + lookaheads: [ + "-", + "*", + ], + reason: Error, + }, + ], +} diff --git a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-3.snap.new b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-3.snap.new new file mode 100644 index 0000000..63ccd42 --- /dev/null +++ b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-3.snap.new @@ -0,0 +1,29 @@ +--- +source: example/src/optionals.rs +assertion_line: 37 +expression: "grammar::Language::parse(\"1_\")" +--- +ParseResult { + result: Some( + Language { + v: None, + _s: (), + t: Spanned { + value: None, + position: Position { + bytes: 0..0, + start: Point { + line: 1, + column: 1, + }, + end: Point { + line: 1, + column: 1, + }, + }, + }, + _d: None, + }, + ), + errors: [], +} diff --git a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar.snap.new b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar.snap.new new file mode 100644 index 0000000..109621c --- /dev/null +++ b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar.snap.new @@ -0,0 +1,29 @@ +--- +source: example/src/optionals.rs +assertion_line: 35 +expression: "grammar::Language::parse(\"_\")" +--- +ParseResult { + result: Some( + Language { + v: None, + _s: (), + t: Spanned { + value: None, + position: Position { + bytes: 1..1, + start: Point { + line: 1, + column: 2, + }, + end: Point { + line: 1, + column: 2, + }, + }, + }, + _d: None, + }, + ), + errors: [], +} diff --git a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar-2.snap.new b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar-2.snap.new new file mode 100644 index 0000000..af761a2 --- /dev/null +++ b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar-2.snap.new @@ -0,0 +1,26 @@ +--- +source: example/src/repetitions.rs +assertion_line: 68 +expression: "grammar::NumberList::parse(\"1\")" +--- +ParseResult { + result: Some( + NumberList { + numbers: Spanned { + value: [], + position: Position { + bytes: 0..1, + start: Point { + line: 1, + column: 1, + }, + end: Point { + line: 1, + column: 2, + }, + }, + }, + }, + ), + errors: [], +} diff --git a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar.snap.new b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar.snap.new new file mode 100644 index 0000000..07619b1 --- /dev/null +++ b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar.snap.new @@ -0,0 +1,36 @@ +--- +source: example/src/repetitions.rs +assertion_line: 67 +expression: "grammar::NumberList::parse(\"\")" +--- +ParseResult { + result: None, + errors: [ + ParseError { + node_position: Position { + bytes: 0..0, + start: Point { + line: 1, + column: 1, + }, + end: Point { + line: 1, + column: 1, + }, + }, + error_position: Position { + bytes: 0..0, + start: Point { + line: 1, + column: 1, + }, + end: Point { + line: 1, + column: 1, + }, + }, + lookaheads: [], + reason: Error, + }, + ], +} diff --git a/example/src/snapshots/rust_sitter_example__words__tests__words_grammar.snap.new b/example/src/snapshots/rust_sitter_example__words__tests__words_grammar.snap.new new file mode 100644 index 0000000..ce705e0 --- /dev/null +++ b/example/src/snapshots/rust_sitter_example__words__tests__words_grammar.snap.new @@ -0,0 +1,36 @@ +--- +source: example/src/words.rs +assertion_line: 26 +expression: "grammar::Words::parse(\"if\")" +--- +ParseResult { + result: None, + errors: [ + ParseError { + node_position: Position { + bytes: 0..2, + start: Point { + line: 1, + column: 1, + }, + end: Point { + line: 1, + column: 3, + }, + }, + error_position: Position { + bytes: 0..2, + start: Point { + line: 1, + column: 1, + }, + end: Point { + line: 1, + column: 3, + }, + }, + lookaheads: [], + reason: Error, + }, + ], +} diff --git a/example/src/words.rs b/example/src/words.rs index a402d06..4d44f56 100644 --- a/example/src/words.rs +++ b/example/src/words.rs @@ -10,7 +10,7 @@ pub mod grammar { pub struct Words { #[leaf("if")] keyword: (), - #[word] + // #[word] #[leaf(pattern(r"[a-z_]+"))] word: String, } diff --git a/macro/src/expansion.rs b/macro/src/expansion.rs index aa5fa5f..28086cc 100644 --- a/macro/src/expansion.rs +++ b/macro/src/expansion.rs @@ -112,7 +112,9 @@ pub fn expand_rule(input: DeriveInput) -> Result { })?; let mut cursor = node.walk(); - assert!(cursor.goto_first_child(), "Could not find a child corresponding to any enum branch"); + if !cursor.goto_first_child() { + return Err(::rust_sitter::error::ExtractError::missing_node(_ctx, stringify!(#enum_name))); + } loop { let node = cursor.node(); match node.kind() { diff --git a/macro/src/lib.rs b/macro/src/lib.rs index 2464c2b..96209c4 100644 --- a/macro/src/lib.rs +++ b/macro/src/lib.rs @@ -36,7 +36,6 @@ use expansion::*; prec_left, prec_right, prec_dynamic, - token, // TODO: This will instead be on a derive(Language) as well as others like conflicts, // externals, inline, word, supertypes, etc. to fill out the full grammar specification. extras, diff --git a/runtime/Cargo.toml b/runtime/Cargo.toml index 1286be7..d0d0352 100644 --- a/runtime/Cargo.toml +++ b/runtime/Cargo.toml @@ -16,6 +16,7 @@ path = "src/lib.rs" [dependencies] tree-sitter.workspace = true rust-sitter-macro = { path = "../macro" } +log = "0.4" # This one could be optional. serde_json = "1" serde = { version = "1", features = ["derive"] } diff --git a/runtime/src/__private.rs b/runtime/src/__private.rs index dc9fe5a..abd18c0 100644 --- a/runtime/src/__private.rs +++ b/runtime/src/__private.rs @@ -15,6 +15,7 @@ pub fn extract_struct_or_variant( ) -> Result { let mut parent_cursor = node.walk(); let mut state = ExtractStructState { + // cursor: Some(parent_cursor), cursor: if parent_cursor.goto_first_child() { Some(parent_cursor) } else { @@ -127,10 +128,14 @@ pub fn parse>( ) -> crate::ParseResult { let mut parser = tree_sitter::Parser::new(); parser.set_language(&language()).unwrap(); - // parser.set_logger(Some(Box::new(|_t, m| eprintln!("parser::{m}")))); + if matches!(std::env::var("RUST_SITTER_PARSER_LOG").as_deref(), Ok("1")) { + parser.set_logger(Some(Box::new(|_t, m| log::debug!("parser::{m}")))); + } let tree = parser.parse(input, None).expect("Failed to parse"); let root_node = tree.root_node(); + println!("{root_node}"); + let mut errors = vec![]; if root_node.has_error() { crate::error::collect_parsing_errors(&root_node, &mut errors); @@ -143,9 +148,11 @@ pub fn parse>( }; let result = >::extract(&mut ctx, Some(root_node), input.as_bytes(), None); + #[allow(clippy::manual_ok_err)] let result = match result { - Err(e) => { - e.accumulate_parse_errors(&mut errors); + Err(_e) => { + // These are actually not really useful yet. + // e.accumulate_parse_errors(&mut errors); None } Ok(o) => Some(o), diff --git a/runtime/src/error.rs b/runtime/src/error.rs index 3ce47b5..f9537b5 100644 --- a/runtime/src/error.rs +++ b/runtime/src/error.rs @@ -1,3 +1,4 @@ +use log::{trace, debug}; use std::{collections::HashSet, ops::Range}; use crate::{Point, Position, extract::ExtractContext}; @@ -17,7 +18,7 @@ pub struct ParseError { #[derive(Debug)] pub enum ParseErrorReason { - Missing, + Missing(&'static str), Error, FailedExtract { field: String, @@ -34,6 +35,53 @@ pub enum ParseErrorReason { TypeConversion(Box), } +impl ParseError { + pub fn is_missing(&self) -> bool { + matches!(&self.reason, ParseErrorReason::Missing(_)) + } +} + +impl std::fmt::Display for ParseError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}:{} to {}:{}, {}", + self.error_position.start.line, + self.error_position.start.column, + self.error_position.end.line, + self.error_position.end.column, + self.reason + ) + } +} + +impl std::fmt::Display for ParseErrorReason { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ParseErrorReason::Missing(kind) => write!(f, "missing {kind}"), + ParseErrorReason::Error => f.write_str("parse error"), + ParseErrorReason::FailedExtract { field } => { + write!(f, "failed extraction of field: {field}") + } + ParseErrorReason::MissingNode { + node_kind, + type_name, + } => write!( + f, + "missing node in extraction of type: {type_name}, {node_kind}" + ), + ParseErrorReason::MissingEnum { + node_kind, + enum_name, + } => write!( + f, + "missing enum in extraction of type: {enum_name}, {node_kind}" + ), + ParseErrorReason::TypeConversion(error) => write!(f, "type conversion: {error}"), + } + } +} + /// A low level error which just wraps the error node and exposes many fields around it. #[derive(Debug)] pub struct NodeError<'a> { @@ -42,15 +90,69 @@ pub struct NodeError<'a> { impl<'a> NodeError<'a> { pub fn to_parse_error(&self) -> ParseError { + // Handle missing shift. + let mut node_position = Position::new(self.node_byte_range(), self.point_range()); + let mut error_position = Position::new( + self.first_error_byte_range(), + self.first_error_point_range(), + ); + trace!("error node: {}", self.node); + trace!("error node: {:?}", self.node); + trace!("error node parent: {:?}", self.node.parent()); + if self.node.is_missing() + && let Some(parent) = self.node.parent() + { + debug!("attempting missing shift: {}", parent.to_sexp()); + // Find where the missing node is located in the parent, then shift it backwards by + // removing any extra nodes in its place. + // let mut c = parent.walk(); + // let idx = parent.children(&mut c) + // // defers to pointer equality, which is what we want in this case. + // .position(|n| n == self.node) + // .unwrap(); + // c.reset(self.node); + // Doesn't work, the cursor iterator doesn't work correctly. + // dbg!(self.node.prev_sibling()); + // while dbg!(c.goto_previous_sibling()) && c.node().is_extra() { + // debug!("shifting past extra: {}", c.node()); + // } + // Use parent node for node_position and this node for error_position. + let mut node = self.node; + let mut has_shifted = false; + while let Some(n) = node.prev_sibling() { + node = n; + if !has_shifted { + has_shifted = node.is_extra(); + } + debug!("shifting past extra: {}", n); + if !node.is_extra() { + break; + } + } + + if has_shifted { + debug!("shifted to node: {}", node.kind()); + let range = node.byte_range(); + let range = range.end..range.end; + let new_err = Position::new( + range, + (node.start_position().into(), node.end_position().into()), + ); + let new_pos = Position::new( + parent.byte_range(), + (parent.start_position().into(), parent.end_position().into()), + ); + debug!("shifted position from {error_position:?} to {new_pos:?}"); + error_position = new_err; + node_position = new_pos; + } + } ParseError { - node_position: Position::new(self.node_byte_range(), self.point_range()), - error_position: Position::new( - self.first_error_byte_range(), - self.first_error_point_range(), - ), + node_position, + error_position, lookaheads: self.lookahead().map(|l| l.collect()).unwrap_or_default(), reason: if self.node.is_missing() { - ParseErrorReason::Missing + ParseErrorReason::Missing(self.node.kind()) } else { ParseErrorReason::Error }, @@ -149,47 +251,6 @@ impl<'a> NodeError<'a> { } } -impl std::fmt::Display for ParseError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "{}:{} to {}:{}, {}", - self.error_position.start.line, - self.error_position.start.column, - self.error_position.end.line, - self.error_position.end.column, - self.reason - ) - } -} - -impl std::fmt::Display for ParseErrorReason { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - ParseErrorReason::Missing => f.write_str("missing node"), - ParseErrorReason::Error => f.write_str("parse error"), - ParseErrorReason::FailedExtract { field } => { - write!(f, "failed extraction of field: {field}") - } - ParseErrorReason::MissingNode { - node_kind, - type_name, - } => write!( - f, - "missing node in extraction of type: {type_name}, {node_kind}" - ), - ParseErrorReason::MissingEnum { - node_kind, - enum_name, - } => write!( - f, - "missing enum in extraction of type: {enum_name}, {node_kind}" - ), - ParseErrorReason::TypeConversion(error) => write!(f, "type conversion: {error}"), - } - } -} - struct ErrorLookahead<'a> { it: tree_sitter::LookaheadIterator, language: tree_sitter::Language, @@ -282,6 +343,7 @@ impl<'a> ExtractError<'a> { } } + #[allow(dead_code)] pub(crate) fn accumulate_parse_errors(self, errors: &mut Vec) { for inner in self.inner { let err = match inner.reason { diff --git a/runtime/src/extract.rs b/runtime/src/extract.rs index 0dd0028..098b86f 100644 --- a/runtime/src/extract.rs +++ b/runtime/src/extract.rs @@ -147,7 +147,10 @@ where source: &[u8], leaf_fn: Option>, ) -> Result<'tree, L> { - let node = node.expect("Expected a node"); + let node = match node { + Some(n) => n, + None => return Err(ExtractError::missing_node(ctx, "WithLeaf")), + }; // TODO: Consider if this should be fallible as well. Ok(leaf_fn.expect("No leaf function on WithLeaf").apply( source, diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs index 732e14f..2d189bc 100644 --- a/runtime/src/lib.rs +++ b/runtime/src/lib.rs @@ -91,6 +91,10 @@ impl Position { let end = Point::from_tree_sitter(node.end_position()); Self { bytes, start, end } } + + pub fn point_range(&self) -> (Point, Point) { + (self.start, self.end) + } } impl PartialOrd for Position { @@ -122,6 +126,12 @@ impl Point { } } +impl From for Point { + fn from(value: tree_sitter::Point) -> Self { + Self::from_tree_sitter(value) + } +} + impl, U> Extract> for Spanned { type LeafFn<'a> = T::LeafFn<'a>; fn extract<'a, 'tree>( From 8ac25569f1d0217cebac6d7a2821ecab68a2d5b4 Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Sat, 23 Aug 2025 10:45:55 -0500 Subject: [PATCH 37/50] WIP: Rewrite to build a parse state table based on the grammar and use that to extract information. This allows for the grammar to be flatten significantly by not requiring every field be its own rule. --- Cargo.lock | 150 +++++++++++++- Cargo.toml | 2 +- common/Cargo.toml | 1 + common/src/expansion.rs | 311 +++++++++++++----------------- common/src/lib.rs | 81 +++----- example/Cargo.toml | 1 + example/src/arithmetic.rs | 89 ++++++++- example/src/main.rs | 9 +- example/src/optionals.rs | 3 +- macro/Cargo.toml | 3 +- macro/src/expansion.rs | 177 ++++++++++++++--- runtime/Cargo.toml | 2 +- runtime/src/__private.rs | 187 +++++++++++------- runtime/src/error.rs | 18 +- runtime/src/extract.rs | 275 +++++++++++++++----------- runtime/src/extract/field.rs | 177 +++++++++++++++++ runtime/src/lib.rs | 39 +++- runtime/src/rule.rs | 12 +- tool/src/lib.rs | 24 +-- types/Cargo.toml | 10 + {runtime => types}/src/grammar.rs | 53 ++++- types/src/lib.rs | 2 + 22 files changed, 1149 insertions(+), 477 deletions(-) create mode 100644 runtime/src/extract/field.rs create mode 100644 types/Cargo.toml rename {runtime => types}/src/grammar.rs (73%) create mode 100644 types/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 0a8bc57..da05c43 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,6 +11,56 @@ dependencies = [ "memchr", ] +[[package]] +name = "anstream" +version = "0.6.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ae563653d1938f79b1ab1b5e668c87c76a9930414574a6583a7b7e11a8e6192" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd" + +[[package]] +name = "anstyle-parse" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2" +dependencies = [ + "windows-sys 0.60.2", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e0633414522a32ffaac8ac6cc8f748e090c5717661fddeea04219e2344f5f2a" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys 0.60.2", +] + [[package]] name = "anyhow" version = "1.0.99" @@ -60,6 +110,12 @@ dependencies = [ "termcolor", ] +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" + [[package]] name = "console" version = "0.15.11" @@ -95,6 +151,29 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" +[[package]] +name = "env_filter" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0" +dependencies = [ + "log", + "regex", +] + +[[package]] +name = "env_logger" +version = "0.11.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f" +dependencies = [ + "anstream", + "anstyle", + "env_filter", + "jiff", + "log", +] + [[package]] name = "equivalent" version = "1.0.2" @@ -285,6 +364,12 @@ dependencies = [ "similar", ] +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + [[package]] name = "itertools" version = "0.14.0" @@ -300,6 +385,30 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +[[package]] +name = "jiff" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be1f93b8b1eb69c77f24bbb0afdf66f54b632ee39af40ca21c4365a1d7347e49" +dependencies = [ + "jiff-static", + "log", + "portable-atomic", + "portable-atomic-util", + "serde", +] + +[[package]] +name = "jiff-static" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "js-sys" version = "0.3.77" @@ -356,12 +465,33 @@ version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +[[package]] +name = "once_cell_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" + [[package]] name = "percent-encoding" version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +[[package]] +name = "portable-atomic" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" + +[[package]] +name = "portable-atomic-util" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" +dependencies = [ + "portable-atomic", +] + [[package]] name = "potential_utf" version = "0.1.2" @@ -428,10 +558,10 @@ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" name = "rust-sitter" version = "0.5.0" dependencies = [ - "indexmap", "insta", "log", "rust-sitter-macro", + "rust-sitter-types", "serde", "serde_json", "tempfile", @@ -445,6 +575,7 @@ dependencies = [ "itertools", "proc-macro2", "quote", + "rust-sitter-types", "serde_json", "syn", ] @@ -455,6 +586,7 @@ version = "0.5.0" dependencies = [ "codemap", "codemap-diagnostic", + "env_logger", "insta", "rust-sitter", "rust-sitter-tool", @@ -469,6 +601,7 @@ dependencies = [ "proc-macro2", "quote", "rust-sitter-common", + "rust-sitter-types", "syn", "tempfile", ] @@ -489,6 +622,15 @@ dependencies = [ "tree-sitter-generate", ] +[[package]] +name = "rust-sitter-types" +version = "0.5.0" +dependencies = [ + "indexmap", + "serde", + "serde_json", +] + [[package]] name = "rustc-hash" version = "2.1.1" @@ -756,6 +898,12 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + [[package]] name = "walkdir" version = "2.5.0" diff --git a/Cargo.toml b/Cargo.toml index f19df91..8b3d587 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,7 +5,7 @@ members = [ "runtime", "tool", "example", - "common", + "common", "types", ] [workspace.package] version = "0.5.0" diff --git a/common/Cargo.toml b/common/Cargo.toml index 9dd92d9..7190317 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -14,6 +14,7 @@ categories = ["development-tools"] path = "src/lib.rs" [dependencies] +rust-sitter-types = { path = "../types" } syn = { version = "2", features = [ "full", "extra-traits" ] } proc-macro2 = "1" quote = "1" diff --git a/common/src/expansion.rs b/common/src/expansion.rs index 2c0a7d7..f4eef3b 100644 --- a/common/src/expansion.rs +++ b/common/src/expansion.rs @@ -2,7 +2,7 @@ use std::collections::HashSet; use super::*; use itertools::Itertools; -use serde_json::{Map, Value, json}; +use rust_sitter_types::grammar::Grammar; use syn::{parse::Parse, punctuated::Punctuated, spanned::Spanned}; #[derive(Debug)] @@ -49,10 +49,13 @@ impl RuleDerive { } /// Generate a single grammar per module. -pub fn generate_grammar(root_file: Vec) -> Result> { - let mut state = ExpansionState::default(); +pub fn generate_grammar(root_file: Vec) -> Result> { + let mut state = ExpansionState::new(); // for some reason, source_file must be the first key for things to work - state.rules_map.insert("source_file".to_string(), json!({})); + state + .grammar + .rules + .insert("source_file".to_string(), RuleDef::BLANK); if root_file .into_iter() @@ -85,32 +88,40 @@ pub fn generate_grammar(root_file: Vec) -> Result> { ) })? .to_string(); - state.rules_map.insert( + state.grammar.name = language.clone(); + state.grammar.rules.insert( "source_file".to_string(), - state.rules_map.get(&language).unwrap().clone(), + state.grammar.rules.get(&language).unwrap().clone(), ); - let word_rule = state.word_rule; - let rules_map = state.rules_map; - let extras_list = state.extras; - Ok(Some(json!({ - "name": language, - "word": word_rule, - "rules": rules_map, - "extras": extras_list - }))) + Ok(Some(state.grammar)) } -#[derive(Default)] pub struct ExpansionState { - rules_map: Map, - word_rule: Option, - language_rule: Option, - extras: Vec, + pub grammar: Grammar, + pub language_rule: Option, // Accumulated errors. - error: Option, + pub error: Option, +} + +impl Default for ExpansionState { + fn default() -> Self { + Self::new() + } } impl ExpansionState { + pub fn new() -> Self { + Self { + grammar: Grammar { + name: String::new(), + word: None, + rules: Default::default(), + extras: Default::default(), + }, + language_rule: None, + error: None, + } + } fn err(&mut self) -> Result<()> { if let Some(err) = self.error.take() { Err(err) @@ -130,7 +141,8 @@ impl ExpansionState { fn verify_seen(&self) -> Result<()> { if let Some(e) = self - .rules_map + .grammar + .rules .values() .flat_map(|v| self.check_seen_value(v).err()) .reduce(|mut acc, e| { @@ -144,40 +156,36 @@ impl ExpansionState { } } - // TODO: This could be made a lot simpler by eventually having actual types for this. That - // could also make it easier to generate traits which produce grammars instead. - fn check_seen_value(&self, value: &Value) -> Result<()> { + fn check_seen_value(&self, value: &RuleDef) -> Result<()> { // Each value is always a map. - let map = value.as_object().unwrap(); - if map.contains_key("members") { - let members = map["members"].as_array().unwrap(); - for member in members { - self.check_seen_value(member)?; - } - } else { - if map.is_empty() { - return Ok(()); - } - // type is always present, expect on the empty rule for source_file. - match map["type"].as_str().unwrap() { - "SYMBOL" => { - // Check if another top level rule exists, otherwise this is an error. - let name = map["name"].as_str().unwrap(); - if !self.rules_map.contains_key(name) { - return Err(Error::new( - Span::call_site(), - format!("Symbol found with no corresponding value: {name}"), - )); - } + match value { + RuleDef::SYMBOL { name } => { + if !self.grammar.rules.contains_key(name) { + return Err(Error::new( + Span::call_site(), + format!("Symbol found with no corresponding value: {name}"), + )); } - _ => { - if let Some(content) = map.get("content") { - self.check_seen_value(content)?; - } + } + RuleDef::CHOICE { members } | RuleDef::SEQ { members } => { + for member in members { + self.check_seen_value(member)?; } } + RuleDef::REPEAT { content } + | RuleDef::REPEAT1 { content } + | RuleDef::PREC_DYNAMIC { value: _, content } + | RuleDef::PREC_LEFT { value: _, content } + | RuleDef::PREC_RIGHT { value: _, content } + | RuleDef::PREC { value: _, content } + | RuleDef::TOKEN { content } + | RuleDef::IMMEDIATE_TOKEN { content } + | RuleDef::RESERVED { + context_name: _, + content, + } => self.check_seen_value(content)?, + _ => return Ok(()), } - Ok(()) } @@ -199,13 +207,13 @@ impl ExpansionState { } fn set_word(&mut self, ident: String) -> Result<()> { - if let Some(existing) = &self.word_rule { + if let Some(existing) = &self.grammar.word { return Err(self.accumulate_error(Error::new( Span::call_site(), format!("Word rule already defined as {existing}, found duplicate with {ident}"), ))); } - self.word_rule = Some(ident); + self.grammar.word = Some(ident); Ok(()) } } @@ -258,7 +266,7 @@ pub fn process_rule(input: RuleDerive, ctx: &mut ExpansionState) -> Result<()> { if let Some(err) = err { return Err(err); } - ctx.extras = extras; + ctx.grammar.extras = extras; } // if input.extras.word { // ctx.set_word(&input.ident); @@ -268,7 +276,7 @@ pub fn process_rule(input: RuleDerive, ctx: &mut ExpansionState) -> Result<()> { match input.data { Data::Struct(DataStruct { fields, .. }) => { - gen_struct_or_variant(ident.to_string(), &input.attrs, fields.clone(), ctx)?; + gen_struct_or_variant(ident.to_string(), &input.attrs, fields.clone(), false, ctx)?; } Data::Enum(DataEnum { variants, .. }) => { variants @@ -278,6 +286,7 @@ pub fn process_rule(input: RuleDerive, ctx: &mut ExpansionState) -> Result<()> { format!("{}_{}", ident, v.ident), &v.attrs, v.fields.clone(), + true, ctx, ) .err() @@ -289,23 +298,17 @@ pub fn process_rule(input: RuleDerive, ctx: &mut ExpansionState) -> Result<()> { .map(Err::<(), _>) .transpose()?; - let mut members: Vec = vec![]; + let mut members = vec![]; variants.iter().for_each(|v| { let variant_path = format!("{}_{}", ident, v.ident); - members.push(json!({ - "type": "SYMBOL", - "name": variant_path - })) + members.push(RuleDef::SYMBOL { name: variant_path }); }); - let rule = json!({ - "type": "CHOICE", - "members": members - }); + let rule = RuleDef::CHOICE { members }; let rule = input.extras.apply(rule)?; - ctx.rules_map.insert(ident.to_string(), rule); + ctx.grammar.rules.insert(ident.to_string(), rule); } Data::Union(_) => return Err(Error::new(ident.span(), "Union not supported")), } @@ -383,7 +386,7 @@ impl RuleParams { }) } - fn apply(&self, rule: serde_json::Value) -> Result { + fn apply(&self, rule: RuleDef) -> Result { let Self { prec_param, prec_left_param, @@ -394,11 +397,11 @@ impl RuleParams { let rule = if let Some(Expr::Lit(lit)) = prec_param { if let Lit::Int(i) = &lit.lit { - json!({ - "type": "PREC", - "value": i.base10_parse::()?, - "content": rule - }) + let value = i.base10_parse::()?; + RuleDef::PREC { + value: value.into(), + content: Box::new(rule), + } } else { return Err(Error::new( lit.span(), @@ -407,39 +410,36 @@ impl RuleParams { } } else if let Some(Expr::Lit(lit)) = prec_left_param { let value = if let Lit::Int(i) = &lit.lit { - i.base10_parse::()? + i.base10_parse::()? } else { return Err(Error::new( lit.span(), "Expected integer literal for precedence", )); }; - json!({ - "type": "PREC_LEFT", - "value": value, - "content": rule - }) + RuleDef::PREC_LEFT { + value: value.into(), + content: Box::new(rule), + } } else if let Some(Expr::Lit(lit)) = prec_right_param { let value = if let Lit::Int(i) = &lit.lit { - i.base10_parse::()? + i.base10_parse::()? } else { return Err(Error::new( lit.span(), "Expected integer literal for precedence", )); }; - json!({ - "type": "PREC_RIGHT", - "value": value, - "content": rule - }) + RuleDef::PREC_RIGHT { + value: value.into(), + content: Box::new(rule), + } } else if let Some(Expr::Lit(lit)) = prec_dynamic_param { if let Lit::Int(i) = &lit.lit { - json!({ - "type": "PREC_DYNAMIC", - "value": i.base10_parse::()?, - "content": rule - }) + RuleDef::PREC_DYNAMIC { + value: i.base10_parse::()?, + content: Box::new(rule), + } } else { return Err(Error::new( lit.span(), @@ -459,7 +459,7 @@ fn gen_field( leaf_type: Option, attrs: Vec, ctx: &mut ExpansionState, -) -> Result<(Value, bool, bool)> { +) -> Result<(RuleDef, bool, bool)> { let precs = RuleParams::new(&attrs)?; if precs.word { @@ -516,18 +516,14 @@ fn gen_field( if !is_vec && !is_option { if let Some(input) = leaf_input { - ctx.rules_map - .insert(path.clone(), precs.apply(input.evaluate()?)?); - - Ok(( - precs.apply(input.evaluate()?)?, - // json!({ - // "type": "SYMBOL", - // "name": path - // }), - is_option, - false, - )) + let result = input.evaluate()?; + Ok((precs.apply(result)?, is_option, false)) + // if result.is_symbol() { + // Ok((precs.apply(result)?, is_option, false)) + // } else { + // ctx.grammar.rules.insert(path.clone(), precs.apply(result)?); + // Ok((RuleDef::SYMBOL { name: path }, is_option, false)) + // } } else { let symbol_name = match filter_inner_type(&leaf_type, &skip_over) { Type::Path(p) => p.path.require_ident()?.to_string(), @@ -535,10 +531,7 @@ fn gen_field( }; Ok(( - precs.apply(json!({ - "type": "SYMBOL", - "name": symbol_name, - }))?, + precs.apply(RuleDef::SYMBOL { name: symbol_name })?, false, false, )) @@ -574,65 +567,47 @@ fn gen_field( .map(|p| precs.apply(p.evaluate()?)) .transpose()?; - let field_rule_non_optional = json!({ - "type": "FIELD", - "name": format!("{path}_element"), - "content": field_json - }); + let field_rule_non_optional = RuleDef::FIELD { + name: format!("{path}_element"), + content: field_json.into(), + }; let field_rule = if field_optional { - json!({ - "type": "CHOICE", - "members": [ - { - "type": "BLANK" - }, - field_rule_non_optional, - ] - }) + RuleDef::optional(field_rule_non_optional) } else { field_rule_non_optional }; let vec_contents = if let Some(delimiter_json) = delimiter_json { - json!({ - "type": "SEQ", - "members": [ - field_rule, - { - "type": if field_optional { - "REPEAT1" - } else { - "REPEAT" - }, - "content": { - "type": "SEQ", - "members": [ - delimiter_json, - field_rule, - ] - } - } - ] - }) + let content = Box::new(RuleDef::SEQ { + members: vec![delimiter_json, field_rule.clone()], + }); + let delimiter_rule = if field_optional { + RuleDef::REPEAT1 { content } + } else { + RuleDef::REPEAT { content } + }; + RuleDef::SEQ { + members: vec![field_rule, delimiter_rule], + } } else { - json!({ - "type": "REPEAT1", - "content": field_rule - }) + RuleDef::REPEAT1 { + content: field_rule.into(), + } }; let vec_contents = precs.apply(vec_contents)?; let contents_ident = format!("List_{path}"); - ctx.rules_map.insert(contents_ident.clone(), vec_contents); + ctx.grammar + .rules + .insert(contents_ident.clone(), vec_contents); Ok(( // vec_contents, - json!({ - "type": "SYMBOL", - "name": contents_ident, - }), + RuleDef::SYMBOL { + name: contents_ident, + }, !repeat_non_empty, false, )) @@ -656,6 +631,7 @@ fn gen_struct_or_variant( path: String, attrs: &[Attribute], fields: Fields, + is_variant: bool, ctx: &mut ExpansionState, ) -> Result<()> { fn gen_field_optional( @@ -663,7 +639,7 @@ fn gen_struct_or_variant( field: &Field, ctx: &mut ExpansionState, ident_str: String, - ) -> Result { + ) -> Result { // Produce a cleaner grammar: fields with `_` are hidden fields. let path = if ident_str.starts_with("_") { format!("_{path}_{ident_str}") @@ -673,26 +649,13 @@ fn gen_struct_or_variant( let (field_contents, is_option, is_text) = gen_field(path, Some(field.ty.clone()), field.attrs.clone(), ctx)?; - let core = if !is_text { - json!({ - "type": "FIELD", - "name": ident_str, - "content": field_contents - }) - } else { - field_contents + let core = RuleDef::FIELD { + name: ident_str, + content: field_contents.into(), }; let r = if is_option { - json!({ - "type": "CHOICE", - "members": [ - { - "type": "BLANK" - }, - core - ] - }) + RuleDef::optional(core) } else { core }; @@ -732,16 +695,20 @@ fn gen_struct_or_variant( Fields::Unit => { let (field_contents, _is_option, _is_text) = gen_field(path.clone(), None, attrs.to_owned(), ctx)?; - field_contents + if is_variant { + RuleDef::FIELD { + name: "unit".to_owned(), + content: field_contents.into(), + } + } else { + field_contents + } } - _ => json!({ - "type": "SEQ", - "members": children - }), + _ => RuleDef::SEQ { members: children }, }; let precs = RuleParams::new(attrs)?; - ctx.rules_map.insert(path, precs.apply(base_rule)?); + ctx.grammar.rules.insert(path, precs.apply(base_rule)?); Ok(()) } diff --git a/common/src/lib.rs b/common/src/lib.rs index 93ee3d2..f85c5be 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -1,5 +1,6 @@ use proc_macro2::Span; use quote::ToTokens; +use rust_sitter_types::grammar::RuleDef; use std::collections::HashSet; use syn::{ parse::{Parse, ParseStream}, @@ -76,8 +77,7 @@ impl TsInput { fn new(expr: &Expr) -> Self { Self { expr: expr.clone() } } - pub fn evaluate(&self) -> Result { - use serde_json::json; + pub fn evaluate(&self) -> Result { fn get_str(e: &Expr) -> Result { let s = match e { Expr::Lit(ExprLit { @@ -96,14 +96,11 @@ impl TsInput { } Ok(p.get(i).unwrap()) } - let json = match &self.expr { + let def = match &self.expr { Expr::Lit(ExprLit { attrs: _, lit: Lit::Str(s), - }) => json!({ - "type": "STRING", - "value": s.value(), - }), + }) => RuleDef::STRING { value: s.value() }, Expr::Call(ExprCall { attrs: _, func, @@ -121,15 +118,8 @@ impl TsInput { match name.as_str() { "optional" => { let inner = Self::new(get_arg(args, 0, 1)?); - let mut members = vec![]; - members.push(inner.evaluate()?); - members.push(json!({ - "type": "BLANK", - })); - json!({ - "type": "CHOICE", - "members": members, - }) + let members = vec![inner.evaluate()?, RuleDef::BLANK]; + RuleDef::CHOICE { members } } "seq" => { let mut members = vec![]; @@ -137,10 +127,7 @@ impl TsInput { let ts = Self::new(arg); members.push(ts.evaluate()?); } - json!({ - "type": "SEQ", - "members": members, - }) + RuleDef::SEQ { members } } "choice" => { let mut members = vec![]; @@ -148,46 +135,33 @@ impl TsInput { let ts = Self::new(arg); members.push(ts.evaluate()?); } - json!({ - "type": "CHOICE", - "members": members, - }) - } - "re" | "pattern" => { - json!({ - "type": "PATTERN", - "value": get_str(get_arg(args, 0, 1)?)?, - }) - } - "text" => { - json!({ - "type": "STRING", - "value": get_str(get_arg(args, 0, 1)?)?, - }) + RuleDef::CHOICE { members } } + "re" | "pattern" => RuleDef::PATTERN { + value: get_str(get_arg(args, 0, 1)?)?, + flags: None, + }, + "text" => RuleDef::STRING { + value: get_str(get_arg(args, 0, 1)?)?, + }, "token" => { let inner = Self::new(get_arg(args, 0, 1)?); - let content = inner.evaluate()?; - json!({ - "type": "TOKEN", - "content": content - }) + let content = Box::new(inner.evaluate()?); + RuleDef::TOKEN { content } } "immediate" => { let inner = Self::new(get_arg(args, 0, 1)?); - let content = inner.evaluate()?; - json!({ - "type": "IMMEDIATE_TOKEN", - "content": content - }) + let content = Box::new(inner.evaluate()?); + RuleDef::IMMEDIATE_TOKEN { content } } // nodes can be double wrapped in fields, although I'm not sure what happens // when you ask the cursor for the field name? May not be possible to handle // that in this case. "field" => { - let _field_name = get_str(get_arg(args, 0, 2)?)?; - let _inner = get_arg(args, 1, 2)?; - todo!() + let name = get_str(get_arg(args, 0, 2)?)?; + let inner = Self::new(get_arg(args, 1, 2)?); + let content = Box::new(inner.evaluate()?); + RuleDef::FIELD { name, content } } k => { return Err(syn::Error::new( @@ -203,10 +177,9 @@ impl TsInput { path, }) => { let ident = path.require_ident()?; - json!({ - "type": "SYMBOL", - "name": ident.to_string(), - }) + RuleDef::SYMBOL { + name: ident.to_string(), + } } k => { return Err(syn::Error::new( @@ -215,7 +188,7 @@ impl TsInput { )); } }; - Ok(json) + Ok(def) } } diff --git a/example/Cargo.toml b/example/Cargo.toml index 5ca107a..f0e3e8d 100644 --- a/example/Cargo.toml +++ b/example/Cargo.toml @@ -6,6 +6,7 @@ edition = "2021" publish = false [dependencies] +env_logger = "0.11" rust-sitter = { path = "../runtime" } codemap = "0.1.3" codemap-diagnostic = "0.1.1" diff --git a/example/src/arithmetic.rs b/example/src/arithmetic.rs index ab72fbc..62a5bc0 100644 --- a/example/src/arithmetic.rs +++ b/example/src/arithmetic.rs @@ -12,7 +12,80 @@ pub mod grammar { Sub(Box, #[leaf("-")] (), Box), #[prec_left(2)] Mul(Box, #[leaf("*")] (), Box), + Let(LetExpression), + Complex(ComplexExpression), + Print(PrintExpression), } + + #[derive(PartialEq, Eq, Debug, Rule)] + pub struct LetExpression { + #[text("let")] + _let: (), + pub var: Ident, + #[text("=")] + _eq: (), + pub val: Box, + } + + #[derive(PartialEq, Eq, Debug, Rule)] + pub enum LogLevel { + #[leaf("info")] + Info, + #[leaf("debug")] + Debug, + #[leaf("trace")] + Trace, + Custom(CustomLevel), + } + + #[derive(PartialEq, Eq, Debug, Rule)] + pub enum Other { + #[leaf("info")] + Info, + #[leaf("debug")] + Debug, + #[leaf("trace")] + Trace, + } + + #[derive(PartialEq, Eq, Debug, Rule)] + pub struct CustomLevel { + #[text("custom")] + _custom: (), + #[text("::")] + _co: (), + pub value: Other, + } + + #[derive(PartialEq, Eq, Debug, Rule)] + pub struct ComplexExpression { + #[text("log")] + _log: (), + #[leaf("optional")] + optional: Option<()>, + pub level: LogLevel, + #[leaf(seq("(", optional(Expression), ")"))] + pub ex: Option<((), Option>, ())>, + // #[leaf(seq(LogLevel, "(", optional(Expression), ")"))] + // pub ident_ex: Option<(LogLevel, (), Option>, ())>, + #[leaf(";")] + _semi: Option<()>, + } + + #[derive(PartialEq, Eq, Debug, Rule)] + pub struct PrintExpression { + #[text("print")] + _print: (), + #[text("(")] + _lparen: (), + #[sep_by(",")] + inputs: Vec, + #[text(")")] + _rparen: (), + } + + #[derive(PartialEq, Eq, Debug, Rule)] + pub struct Ident(#[leaf(re(r"[a-zA-Z_][a-zA-Z_0-9]*"))] String); } #[cfg(test)] @@ -44,7 +117,9 @@ mod tests { ); assert_eq!( - grammar::Expression::parse("1 - 2 - 3").into_result().unwrap(), + grammar::Expression::parse("1 - 2 - 3") + .into_result() + .unwrap(), Expression::Sub( Box::new(Expression::Sub( Box::new(Expression::Number(1)), @@ -57,7 +132,9 @@ mod tests { ); assert_eq!( - grammar::Expression::parse("1 - 2 * 3").into_result().unwrap(), + grammar::Expression::parse("1 - 2 * 3") + .into_result() + .unwrap(), Expression::Sub( Box::new(Expression::Number(1)), (), @@ -70,7 +147,9 @@ mod tests { ); assert_eq!( - grammar::Expression::parse("1 * 2 * 3").into_result().unwrap(), + grammar::Expression::parse("1 * 2 * 3") + .into_result() + .unwrap(), Expression::Mul( Box::new(Expression::Mul( Box::new(Expression::Number(1)), @@ -83,7 +162,9 @@ mod tests { ); assert_eq!( - grammar::Expression::parse("1 * 2 - 3").into_result().unwrap(), + grammar::Expression::parse("1 * 2 - 3") + .into_result() + .unwrap(), Expression::Sub( Box::new(Expression::Mul( Box::new(Expression::Number(1)), diff --git a/example/src/main.rs b/example/src/main.rs index d515e51..07d4d9b 100644 --- a/example/src/main.rs +++ b/example/src/main.rs @@ -6,9 +6,9 @@ use codemap_diagnostic::{ColorConfig, Diagnostic, Emitter, Level, SpanLabel, Spa use rust_sitter::error::ParseError; mod arithmetic; -mod optionals; -mod repetitions; -mod words; +// mod optionals; +// mod repetitions; +// mod words; fn convert_parse_error_to_diagnostics(file_span: &codemap::Span, error: &ParseError) -> Diagnostic { let mut message = format!("syntax error. reason: {:?}", error.reason); @@ -35,6 +35,7 @@ fn convert_parse_error_to_diagnostics(file_span: &codemap::Span, error: &ParseEr } fn main() { + env_logger::init(); let stdin = std::io::stdin(); loop { @@ -49,7 +50,7 @@ fn main() { } match arithmetic::grammar::Expression::parse(input).into_result() { - Ok(expr) => println!("{expr:?}"), + Ok(expr) => println!("{expr:#?}"), Err(errs) => { let mut codemap = CodeMap::new(); let file_span = codemap.add_file("".to_string(), input.to_string()); diff --git a/example/src/optionals.rs b/example/src/optionals.rs index 3d9c15f..6eccf04 100644 --- a/example/src/optionals.rs +++ b/example/src/optionals.rs @@ -20,7 +20,8 @@ mod grammar { #[derive(Debug, Rule)] pub struct Number { #[leaf(re(r"\d+"))] - #[with(|v| v.parse().unwrap())] + // TODO: We are replacing this entirely with a different defintion. + // #[with(|v| v.parse().unwrap())] v: i32, } } diff --git a/macro/Cargo.toml b/macro/Cargo.toml index c65bbce..e40ac77 100644 --- a/macro/Cargo.toml +++ b/macro/Cargo.toml @@ -19,7 +19,8 @@ syn = { version = "2", features = [ "full", "extra-traits" ] } quote = "1" proc-macro2 = "1" rust-sitter-common = { path = "../common" } +rust-sitter-types = { path = "../types" } [dev-dependencies] -insta = "1.39" +insta = "1" tempfile = "3" diff --git a/macro/src/expansion.rs b/macro/src/expansion.rs index 28086cc..b07d903 100644 --- a/macro/src/expansion.rs +++ b/macro/src/expansion.rs @@ -1,4 +1,4 @@ -use std::collections::HashSet; +use std::collections::{HashMap, HashSet}; use crate::errors::IteratorExt as _; use proc_macro2::Span; @@ -7,6 +7,7 @@ use rust_sitter_common::{ expansion::{ExpansionState, RuleDerive}, *, }; +use rust_sitter_types::grammar::{Grammar, RuleDef}; use syn::{spanned::Spanned, *}; pub enum ParamOrField { @@ -28,7 +29,7 @@ pub fn expand_rule(input: DeriveInput) -> Result { // there at compile time, and allow us to cleanly represent them. This is a lot of extra // compilation time but it is the best we can do for now. Probably isn't noticable in general. let d = RuleDerive::from_derive_input_known(input.clone())?; - let mut ctx = ExpansionState::default(); + let mut ctx = ExpansionState::new(); rust_sitter_common::expansion::process_rule(d, &mut ctx)?; // TODO: Allow renaming it. @@ -40,31 +41,32 @@ pub fn expand_rule(input: DeriveInput) -> Result { let attrs = input.attrs; let (extract, rule) = match input.data { Data::Struct(DataStruct { fields, .. }) => { - let extract_expr = - gen_struct_or_variant(fields.clone(), None, ident.clone(), attrs.clone())?; + let extract_expr = gen_struct_or_variant( + fields.clone(), + None, + ident.clone(), + attrs.clone(), + &ctx.grammar, + )?; let extract_impl: Item = syn::parse_quote! { - impl ::rust_sitter::Extract<#ident> for #ident { - type LeafFn<'a> = (); - + impl ::rust_sitter::Extract for #ident { #[allow(non_snake_case)] - fn extract<'a, 'tree>( - ctx: &mut ::rust_sitter::extract::ExtractContext<'_>, + fn extract<'tree>( + ctx: &mut ::rust_sitter::extract::ExtractContext, node: Option<::rust_sitter::tree_sitter::Node<'tree>>, source: &[u8], - _leaf_fn: Option>, ) -> Result> { let node = node.ok_or_else(|| { ::rust_sitter::error::ExtractError::missing_node(ctx, stringify!(#ident)) })?; - #extract_expr } } }; let ident_str = ident.to_string(); let rule_impl: Item = syn::parse_quote! { - impl ::rust_sitter::rule::Rule<#ident> for #ident { + impl ::rust_sitter::rule::Rule for #ident { fn produce_ast() -> String { String::new() } @@ -87,6 +89,7 @@ pub fn expand_rule(input: DeriveInput) -> Result { Some(v.ident.clone()), ident.clone(), v.attrs.clone(), + &ctx.grammar, )?; Ok(syn::parse_quote! { #variant_path => return #extract_expr @@ -97,15 +100,12 @@ pub fn expand_rule(input: DeriveInput) -> Result { let enum_name = &ident; let ident_str = enum_name.to_string(); let extract_impl: Item = syn::parse_quote! { - impl ::rust_sitter::Extract<#enum_name> for #enum_name { - type LeafFn<'a> = (); - + impl ::rust_sitter::Extract for #enum_name { #[allow(non_snake_case)] - fn extract<'a, 'tree>( - _ctx: &mut ::rust_sitter::extract::ExtractContext<'_>, + fn extract<'tree>( + _ctx: &mut ::rust_sitter::extract::ExtractContext, node: Option<::rust_sitter::tree_sitter::Node<'tree>>, source: &[u8], - _leaf_fn: Option>, ) -> Result> { let node = node.ok_or_else(|| { ::rust_sitter::error::ExtractError::missing_node(_ctx, stringify!(#enum_name)) @@ -129,7 +129,7 @@ pub fn expand_rule(input: DeriveInput) -> Result { }; let rule_impl: Item = syn::parse_quote! { - impl ::rust_sitter::rule::Rule<#enum_name> for #enum_name { + impl ::rust_sitter::rule::Rule for #enum_name { fn produce_ast() -> String { String::new() } @@ -180,7 +180,7 @@ pub fn expand_rule(input: DeriveInput) -> Result { }) } -fn gen_field(ident_str: String, leaf: Field) -> Result { +fn gen_field(ident_str: String, leaf: Field, grammar: &RuleDef) -> Result { let leaf_type = &leaf.ty; let leaf_attr = leaf @@ -256,8 +256,10 @@ fn gen_field(ident_str: String, leaf: Field) -> Result { None => (leaf_type.clone(), syn::parse_quote!(None)), }; + let extract_state = rule_def_to_extract(grammar)?; + Ok(syn::parse_quote!({ - ::rust_sitter::__private::extract_field::<#leaf_type,_>(state, source, #ident_str, #closure_expr) + ::rust_sitter::__private::extract_field::<#leaf_type>(state, #extract_state, source, #ident_str) })) } @@ -266,7 +268,16 @@ fn gen_struct_or_variant( variant_ident: Option, containing_type: Ident, container_attrs: Vec, + grammar: &Grammar, ) -> Result { + let path = match &variant_ident { + Some(v) => format!("{containing_type}_{v}"), + None => containing_type.to_string(), + }; + let rule = grammar + .rules + .get(&path) + .expect("Unexpected state, no grammar found"); let children_parsed = if fields == Fields::Unit { let expr = { let dummy_field = Field { @@ -278,10 +289,34 @@ fn gen_struct_or_variant( ty: Type::Verbatim(quote!(())), // unit type. }; - gen_field("unit".to_string(), dummy_field)? + gen_field("unit".to_owned(), dummy_field, rule)? }; vec![ParamOrField::Param(expr)] } else { + // Parse out the rule into its appropriate sub parts. + // All top-level rules at this level are guaranteed to be `SEQ` of `FIELD`s. If a field is + // optional, the optional part comes before the `FIELD` definition, although that may be + // unnecessary. However, we don't need to check the fields specifically, because they can be + // determined by the actual field names instead. + let field_grammars: HashMap<_, _> = match rule.as_seq().expect("Must be a SEQ") { + RuleDef::SEQ { members } => fields + .iter() + .enumerate() + .zip(members) + .map(|((i, field), def)| { + let ident_str = field + .ident + .as_ref() + .map(|v| v.to_string()) + .unwrap_or(format!("{i}")); + (ident_str, def) + }) + .collect(), + _ => { + unreachable!() + } + }; + fields .iter() .enumerate() @@ -299,7 +334,10 @@ fn gen_struct_or_variant( .map(|v| v.to_string()) .unwrap_or(format!("{i}")); - gen_field(ident_str, field.clone())? + let grammar = field_grammars + .get(&ident_str) + .expect("Missing ident grammar"); + gen_field(ident_str, field.clone(), grammar)? }; let field = if let Some(field_name) = &field.ident { @@ -354,6 +392,97 @@ fn gen_struct_or_variant( }; Ok( - syn::parse_quote!(::rust_sitter::__private::extract_struct_or_variant(node, move |state| #construct_expr)), + syn::parse_quote!(::rust_sitter::__private::extract_struct_or_variant(stringify!(#construct_name), node, move |state| #construct_expr)), ) } + +fn rule_def_to_extract(def: &RuleDef) -> Result { + let mut states = vec![]; + // Handle if the top level rule is itself optional. + let optional = if let Some(def) = def.as_optional() { + // Don't propogate the optional to all of the inner states. + rule_def_add_state(def, false, &mut states); + true + } else { + rule_def_add_state(def, false, &mut states); + false + }; + let num_states = states.len() as u32; + let states = states.into_iter().enumerate().map(|(state, value)| { + let state = state as u32; + quote! { + #state => #value, + } + }); + Ok(quote! { + ::rust_sitter::extract::ExtractFieldContext::new(#num_states, #optional, |state| { + match state { + #(#states)* + #num_states => ::rust_sitter::extract::ExtractFieldState::Complete, + _ => ::rust_sitter::extract::ExtractFieldState::Overflow, + } + }) + }) +} + +fn rule_def_add_state(def: &RuleDef, optional: bool, states: &mut Vec) { + let s = match def { + RuleDef::SYMBOL { name } => { + quote! { + ::rust_sitter::extract::ExtractFieldState::Str(#name, true, #optional) + } + } + RuleDef::STRING { value } => { + quote! { + ::rust_sitter::extract::ExtractFieldState::Str(#value, false, #optional) + } + } + RuleDef::BLANK => return, + // Not sure what we get here, let's just assume the string is enough though. + RuleDef::PATTERN { .. } => { + return; + } + RuleDef::CHOICE { members } => { + // Special handle the optional case. + if let Some(value) = def.as_optional() { + return rule_def_add_state(value, true, states); + } else { + // TODO: Need to figure out the optional case now, should only produce one state + // that can then enumerate on all the values. + // It could just return all of them as a set/array, _or_ the state function could + // take in the inputs and do the checking for us instead. + let strs = members.iter().map(|s| match s { + RuleDef::STRING { value } => quote! { (#value, false) }, + RuleDef::SYMBOL { name } => quote! { (#name, true) }, + _ => panic!("CHOICE cannot use {s:#?} currently"), + }); + quote! { + ::rust_sitter::extract::ExtractFieldState::Choice(&[#(#strs),*], #optional) + } + } + } + // TODO: Handle subfields appropriately? + RuleDef::FIELD { name: _, content } => { + return rule_def_add_state(content, optional, states); + } + RuleDef::SEQ { members } => { + return members + .iter() + .for_each(|def| rule_def_add_state(def, optional, states)); + } + RuleDef::PREC_DYNAMIC { value: _, content } + | RuleDef::PREC_LEFT { value: _, content } + | RuleDef::PREC_RIGHT { value: _, content } + | RuleDef::PREC { value: _, content } + | RuleDef::TOKEN { content } + | RuleDef::IMMEDIATE_TOKEN { content } => { + return rule_def_add_state(content, optional, states); + } + RuleDef::ALIAS { .. } => unreachable!("ALIAS not supported in this context"), + RuleDef::REPEAT { content: _ } => unreachable!("REPEAT not supported in this context"), + RuleDef::REPEAT1 { content: _ } => unreachable!("REPEAT1 not supported in this context"), + RuleDef::RESERVED { .. } => unreachable!("RESERVED not supported in this context"), + }; + + states.push(s); +} diff --git a/runtime/Cargo.toml b/runtime/Cargo.toml index d0d0352..ac43368 100644 --- a/runtime/Cargo.toml +++ b/runtime/Cargo.toml @@ -16,11 +16,11 @@ path = "src/lib.rs" [dependencies] tree-sitter.workspace = true rust-sitter-macro = { path = "../macro" } +rust-sitter-types = { path = "../types" } log = "0.4" # This one could be optional. serde_json = "1" serde = { version = "1", features = ["derive"] } -indexmap = { version = "2", features = ["serde"] } [dev-dependencies] insta = "1.39" diff --git a/runtime/src/__private.rs b/runtime/src/__private.rs index abd18c0..0fb3ddb 100644 --- a/runtime/src/__private.rs +++ b/runtime/src/__private.rs @@ -4,23 +4,22 @@ //! They need to be public so they can be accessed at all (\*cough\* macro hygiene), but //! they are not intended to actually be called in any other circumstance. -use crate::{ - Extract, - extract::{ExtractContext, ExtractError, Result}, -}; +use crate::{extract::{ExtractFieldContext, ExtractFieldIterator, Result}, Extract, ExtractContext}; +use log::{debug, trace}; -pub fn extract_struct_or_variant( - node: tree_sitter::Node, +pub fn extract_struct_or_variant<'tree, T>( + struct_name: &'static str, + node: tree_sitter::Node<'tree>, construct_expr: impl for<'t> Fn(&mut ExtractStructState<'t>) -> Result<'t, T>, -) -> Result { +) -> Result<'tree, T> { + debug!("extract_struct_or_variant node.kind={}", node.kind()); + trace!("extract_struct_or_variant node={}", node); let mut parent_cursor = node.walk(); + let has_children = parent_cursor.goto_first_child(); let mut state = ExtractStructState { - // cursor: Some(parent_cursor), - cursor: if parent_cursor.goto_first_child() { - Some(parent_cursor) - } else { - None - }, + struct_name, + cursor: Some(parent_cursor), + has_children, last_idx: node.start_byte(), last_pt: node.start_position(), // error: ExtractError::empty(), @@ -29,72 +28,123 @@ pub fn extract_struct_or_variant( } pub struct ExtractStructState<'tree> { + struct_name: &'static str, cursor: Option>, + has_children: bool, last_idx: usize, last_pt: tree_sitter::Point, // TODO: Use this. // error: ExtractError, } -pub fn extract_field<'tree, LT: Extract, T>( +pub fn extract_field<'tree, T: Extract>( state: &mut ExtractStructState<'tree>, + field_state: ExtractFieldContext, source: &[u8], - field_name: &str, - closure_ref: Option>, + field_name: &'static str, ) -> Result<'tree, T> { + debug!( + "extract_field struct_name={} field_name={field_name}", + state.struct_name + ); let mut ctx = ExtractContext { last_idx: state.last_idx, last_pt: state.last_pt, field_name, node_kind: "", }; - if let Some(cursor) = state.cursor.as_mut() { - loop { - let n = cursor.node(); - ctx.node_kind = n.kind(); - if n.is_error() { - // println!("Processing error... {}, {}", n.kind(), field_name); - // Try and parse it anyway, returning the result if we manage to get it. - if !cursor.goto_first_child() { - state.cursor = None; - ctx.last_idx = n.end_byte(); - ctx.last_pt = n.end_position(); - return Err(ExtractError::new(n, field_name.to_owned())); - } - let n = cursor.node(); - let out = LT::extract(&mut ctx, Some(n), source, closure_ref)?; - ctx.last_idx = n.end_byte(); - ctx.last_pt = n.end_position(); + if state.has_children { + if let Some(cursor) = state.cursor.as_mut() { + trace!("extract_field has_children: {}", cursor.node()); + let mut iter = ExtractFieldIterator { + cursor, + field_name, + ctx: field_state, + current: Default::default(), + }; - return Ok(out); - } else if let Some(name) = cursor.field_name() { - if name == field_name { - // TODO: Need to keep going if it fails. - let out = LT::extract(&mut ctx, Some(n), source, closure_ref)?; - - if !cursor.goto_next_sibling() { - state.cursor = None; - }; + // Start the iterator. + // Some iteration requires knowing if there is a valid starting state or not. + iter.advance_state()?; - ctx.last_idx = n.end_byte(); - ctx.last_pt = n.end_position(); - - return Ok(out); - } else { - return LT::extract(&mut ctx, None, source, closure_ref); - } - } else { - state.last_idx = n.end_byte(); - state.last_pt = n.end_position(); - } - - if !cursor.goto_next_sibling() { - return LT::extract(&mut ctx, None, source, closure_ref); - } + let result = T::extract_field(&mut ctx, &mut iter, source)?; + // if !iter.cursor.goto_next_sibling() { + // state.cursor = None; + // } + Ok(result) + } else { + // TODO: ??? + T::extract(&mut ctx, None, source) + } + } else if let Some(cursor) = state.cursor.as_mut() { + let n = cursor.node(); + if !cursor.goto_next_sibling() { + state.cursor = None; } + T::extract(&mut ctx, Some(n), source) } else { - LT::extract(&mut ctx, None, source, closure_ref) + T::extract(&mut ctx, None, source) } + // if state.has_children { + // if let Some(cursor) = state.cursor.as_mut() { + // loop { + // let n = cursor.node(); + // ctx.node_kind = n.kind(); + // trace!( + // "extract_field checking node.kind={}, cursor.field_name={:?}", + // n.kind(), + // cursor.field_name() + // ); + // if n.is_error() { + // // println!("Processing error... {}, {}", n.kind(), field_name); + // // Try and parse it anyway, returning the result if we manage to get it. + // if !cursor.goto_first_child() { + // state.cursor = None; + // ctx.last_idx = n.end_byte(); + // ctx.last_pt = n.end_position(); + // return Err(ExtractError::new(n, field_name.to_owned())); + // } + // let n = cursor.node(); + // let out = LT::extract(&mut ctx, Some(n), source, closure_ref)?; + // ctx.last_idx = n.end_byte(); + // ctx.last_pt = n.end_position(); + + // return Ok(out); + // } else if let Some(name) = cursor.field_name() { + // if name == field_name { + // // TODO: Need to keep going if it fails. + // let out = LT::extract(&mut ctx, Some(n), source, closure_ref)?; + + // if !cursor.goto_next_sibling() { + // state.cursor = None; + // }; + + // ctx.last_idx = n.end_byte(); + // ctx.last_pt = n.end_position(); + + // return Ok(out); + // } else { + // return LT::extract(&mut ctx, None, source, closure_ref); + // } + // } else { + // state.last_idx = n.end_byte(); + // state.last_pt = n.end_position(); + // } + + // if !cursor.goto_next_sibling() { + // return LT::extract(&mut ctx, None, source, closure_ref); + // } + // } + // } else { + // debug!("No cursor, attempting direct extract"); + // LT::extract(&mut ctx, None, source, closure_ref) + // } + // } else if let Some(cursor) = state.cursor.as_mut() { + // debug!("attempting direct node extraction"); + // LT::extract(&mut ctx, Some(cursor.node()), source, closure_ref) + // } else { + // Err(ExtractError::missing_node(&ctx, "unknown")) + // } } // TODO: Handle errors in this one too. @@ -102,7 +152,15 @@ pub fn skip_text<'tree>( state: &mut ExtractStructState<'tree>, field_name: &str, ) -> Result<'tree, ()> { + debug!( + "skip field: {field_name}, has cursor: {}", + state.cursor.is_some() + ); if let Some(cursor) = state.cursor.as_mut() { + debug!( + "skip field: expects: {field_name}, has: {:?}", + cursor.field_name() + ); loop { if let Some(name) = cursor.field_name() { if name == field_name { @@ -122,7 +180,7 @@ pub fn skip_text<'tree>( Ok(()) } -pub fn parse>( +pub fn parse( input: &str, language: impl Fn() -> tree_sitter::Language, ) -> crate::ParseResult { @@ -134,8 +192,6 @@ pub fn parse>( let tree = parser.parse(input, None).expect("Failed to parse"); let root_node = tree.root_node(); - println!("{root_node}"); - let mut errors = vec![]; if root_node.has_error() { crate::error::collect_parsing_errors(&root_node, &mut errors); @@ -144,15 +200,14 @@ pub fn parse>( last_pt: Default::default(), last_idx: 0, field_name: "root", - node_kind: "source_file", + node_kind: "", }; - let result = - >::extract(&mut ctx, Some(root_node), input.as_bytes(), None); + let result = ::extract(&mut ctx, Some(root_node), input.as_bytes()); #[allow(clippy::manual_ok_err)] let result = match result { - Err(_e) => { + Err(e) => { // These are actually not really useful yet. - // e.accumulate_parse_errors(&mut errors); + e.accumulate_parse_errors(&mut errors); None } Ok(o) => Some(o), diff --git a/runtime/src/error.rs b/runtime/src/error.rs index f9537b5..d7af557 100644 --- a/runtime/src/error.rs +++ b/runtime/src/error.rs @@ -1,7 +1,7 @@ use log::{trace, debug}; use std::{collections::HashSet, ops::Range}; -use crate::{Point, Position, extract::ExtractContext}; +use crate::{ExtractContext, Point, Position}; /// A high level parsing error with useful information extracted already. #[derive(Debug)] @@ -24,11 +24,11 @@ pub enum ParseErrorReason { field: String, }, MissingNode { - node_kind: String, + node_kind: &'static str, type_name: &'static str, }, MissingEnum { - node_kind: String, + node_kind: &'static str, enum_name: &'static str, }, /// Parsed OK, but failed to extract to the given type. @@ -402,7 +402,7 @@ impl<'a> ExtractError<'a> { } } - pub fn missing_node(ctx: &ExtractContext<'_>, type_name: &'static str) -> Self { + pub fn missing_node(ctx: &ExtractContext, type_name: &'static str) -> Self { let position = crate::Position { // TODO: This should be fixed to actually have the full range from the outer node. bytes: ctx.last_idx..ctx.last_idx, @@ -413,14 +413,14 @@ impl<'a> ExtractError<'a> { inner: vec![ExtractErrorInner { position, reason: ExtractErrorReason::MissingNode { - node_kind: ctx.node_kind.to_owned(), + node_kind: ctx.node_kind, type_name, }, }], } } - pub fn missing_enum(ctx: &ExtractContext<'_>, enum_name: &'static str) -> Self { + pub fn missing_enum(ctx: &ExtractContext, enum_name: &'static str) -> Self { let position = crate::Position { // TODO: This should be fixed to actually have the full range from the outer node. bytes: ctx.last_idx..ctx.last_idx, @@ -431,7 +431,7 @@ impl<'a> ExtractError<'a> { inner: vec![ExtractErrorInner { position, reason: ExtractErrorReason::MissingEnum { - node_kind: ctx.node_kind.to_owned(), + node_kind: ctx.node_kind, enum_name, }, }], @@ -456,11 +456,11 @@ pub enum ExtractErrorReason<'a> { node: tree_sitter::Node<'a>, }, MissingNode { - node_kind: String, + node_kind: &'static str, type_name: &'static str, }, MissingEnum { - node_kind: String, + node_kind: &'static str, enum_name: &'static str, }, /// Parsed OK, but failed to extract to the given type. diff --git a/runtime/src/extract.rs b/runtime/src/extract.rs index 098b86f..1b9dbad 100644 --- a/runtime/src/extract.rs +++ b/runtime/src/extract.rs @@ -1,43 +1,92 @@ use super::Node; +pub mod field; +pub use field::{ExtractFieldState, ExtractFieldContext, ExtractFieldIterator}; +pub use crate::error::ExtractError; +pub type Result<'a, T> = std::result::Result>; + +pub trait Extractor { + fn do_extract<'tree>( + self, + ctx: &mut ExtractContext, + node: Option>, + source: &[u8], + ) -> Result<'tree, E>; +} /// Defines the logic used to convert a node in a Tree Sitter tree to /// the corresponding Rust type. -pub trait Extract { - type LeafFn<'a>: Clone; - fn extract<'a, 'tree>( - ctx: &mut ExtractContext<'_>, +pub trait Extract: Sized { + fn extract<'tree>( + ctx: &mut ExtractContext, node: Option>, source: &[u8], - leaf_fn: Option>, - ) -> Result<'tree, Output>; + ) -> Result<'tree, Self>; + + fn extract_field<'cursor, 'tree>( + ctx: &mut ExtractContext, + it: &mut ExtractFieldIterator<'cursor, 'tree>, + source: &[u8], + ) -> Result<'tree, Self> { + let node = it.next_node()?; + assert!(it.current_node().is_none()); + Self::extract(ctx, node, source) + } } -pub struct ExtractContext<'a> { +pub struct ExtractContext { pub last_idx: usize, pub last_pt: tree_sitter::Point, - pub field_name: &'a str, - pub node_kind: &'a str, + pub field_name: &'static str, + // TODO: Remove this, clean it up. + pub node_kind: &'static str, +} + +pub struct RuleExtractor {} + +impl Extractor for RuleExtractor { + fn do_extract<'tree>( + self, + ctx: &mut ExtractContext, + node: Option>, + source: &[u8], + ) -> Result<'tree, E> { + E::extract(ctx, node, source) + } +} + +pub struct WithLeafExtractor { + _e: std::marker::PhantomData, + base: B, + f: F, +} + +impl WithLeafExtractor { + pub fn new(base: B, f: F) -> WithLeafExtractor { + WithLeafExtractor { + _e: std::marker::PhantomData, + base, + f, + } + } +} + +impl Extractor for WithLeafExtractor +where + B: Extractor, + E: Extract, + O: Extract, + F: FnOnce(E) -> O, +{ + fn do_extract<'tree>( + self, + ctx: &mut ExtractContext, + node: Option>, + source: &[u8], + ) -> Result<'tree, O> { + Ok((self.f)(self.base.do_extract(ctx, node, source)?)) + } } -// #[derive(Default)] -// pub struct ExtractState { -// pub last_idx: usize, -// pub last_pt: tree_sitter::Point, -// pub error: Option, -// } -// -// impl ExtractState { -// pub fn error(&mut self, err: ExtractError) -> &mut Self { -// if let Some(existing) = &mut self.error { -// existing.merge(err); -// } else { -// self.error = Some(err); -// } -// self -// } -// } -pub use crate::error::ExtractError; -pub type Result<'a, T> = std::result::Result>; #[derive(Debug, Clone, Copy)] pub struct NodeExt<'a> { @@ -129,37 +178,37 @@ impl StrOrNode for fn(&NodeExt<'_>) -> L { // // handler_fn!(T1, T2); -/// Map for `#[with(...)]` -pub struct WithLeaf { - _phantom: std::marker::PhantomData, - _f: std::marker::PhantomData, -} - -impl Extract for WithLeaf -where - F: StrOrNode + Clone, -{ - type LeafFn<'a> = F; - - fn extract<'a, 'tree>( - ctx: &mut ExtractContext<'_>, - node: Option>, - source: &[u8], - leaf_fn: Option>, - ) -> Result<'tree, L> { - let node = match node { - Some(n) => n, - None => return Err(ExtractError::missing_node(ctx, "WithLeaf")), - }; - // TODO: Consider if this should be fallible as well. - Ok(leaf_fn.expect("No leaf function on WithLeaf").apply( - source, - node, - ctx.last_idx, - ctx.last_pt, - )) - } -} +// /// Map for `#[with(...)]` +// pub struct WithLeaf { +// _phantom: std::marker::PhantomData, +// _f: std::marker::PhantomData, +// } +// +// impl Extract for WithLeaf +// where +// F: StrOrNode + Clone, +// { +// type LeafFn<'a> = F; +// +// fn extract<'a, 'tree>( +// ctx: &mut ExtractContext<'_, 'tree>, +// node: Option>, +// source: &[u8], +// leaf_fn: Option>, +// ) -> Result<'tree, L> { +// let node = match node { +// Some(n) => n, +// None => return Err(ExtractError::missing_node(ctx, "WithLeaf")), +// }; +// // TODO: Consider if this should be fallible as well. +// Ok(leaf_fn.expect("No leaf function on WithLeaf").apply( +// source, +// node, +// ctx.last_idx, +// ctx.last_pt, +// )) +// } +// } // #[derive(Clone)] // pub struct MappedExtract { @@ -194,52 +243,62 @@ where // Common implementations for various types. -impl Extract<()> for () { - type LeafFn<'a> = (); +impl Extract for () { fn extract<'a, 'tree>( - _ctx: &mut ExtractContext<'_>, + _ctx: &mut ExtractContext, _node: Option>, _source: &[u8], - _leaf_fn: Option>, ) -> Result<'tree, ()> { - // TODO: Do we need to handle this here? Does `extract` itself need to expect an error? Ok(()) } } -impl, U> Extract> for Option { - type LeafFn<'a> = T::LeafFn<'a>; +impl Extract for Option { fn extract<'a, 'tree>( - ctx: &mut ExtractContext<'_>, + ctx: &mut ExtractContext, node: Option>, source: &[u8], - leaf_fn: Option>, - ) -> Result<'tree, Option> { - node.map(|n| T::extract(ctx, Some(n), source, leaf_fn)) - .transpose() + ) -> Result<'tree, Option> { + node.map(|n| T::extract(ctx, Some(n), source)).transpose() + } + + fn extract_field<'cursor, 'tree>( + ctx: &mut ExtractContext, + it: &mut ExtractFieldIterator<'cursor, 'tree>, + source: &[u8], + ) -> Result<'tree, Self> { + if it.current_node().is_some() { + Ok(Some(T::extract_field(ctx, it, source)?)) + } else { + Ok(None) + } } } -impl, U> Extract> for Box { - type LeafFn<'a> = T::LeafFn<'a>; +impl Extract for Box { fn extract<'a, 'tree>( - ctx: &mut ExtractContext<'_>, + ctx: &mut ExtractContext, node: Option>, source: &[u8], - leaf_fn: Option>, - ) -> Result<'tree, Box> { - Ok(Box::new(T::extract(ctx, node, source, leaf_fn)?)) + ) -> Result<'tree, Box> { + Ok(Box::new(T::extract(ctx, node, source)?)) + } + + fn extract_field<'cursor, 'tree>( + ctx: &mut ExtractContext, + it: &mut ExtractFieldIterator<'cursor, 'tree>, + source: &[u8], + ) -> Result<'tree, Self> { + Ok(Box::new(T::extract_field(ctx, it, source)?)) } } -impl, U> Extract> for Vec { - type LeafFn<'a> = T::LeafFn<'a>; +impl Extract for Vec { fn extract<'a, 'tree>( - ctx: &mut ExtractContext<'_>, + ctx: &mut ExtractContext, node: Option>, source: &[u8], - leaf_fn: Option>, - ) -> Result<'tree, Vec> { + ) -> Result<'tree, Vec> { let node = match node { Some(node) => node, None => return Ok(vec![]), @@ -256,7 +315,7 @@ impl, U> Extract> for Vec { // TODO: Do some error handling here instead. // For now we just ignore it. } else if cursor.field_name().is_some() { - match T::extract(ctx, Some(n), source, leaf_fn.clone()) { + match T::extract(ctx, Some(n), source) { Ok(t) => out.push(t), Err(e) => error.merge(e), } @@ -275,23 +334,17 @@ impl, U> Extract> for Vec { macro_rules! extract_from_str { ($t:ty) => { - impl Extract<$t> for $t { - type LeafFn<'a> = (); - fn extract<'a, 'tree>( - _ctx: &mut ExtractContext<'_>, + impl Extract for $t { + fn extract<'tree>( + _ctx: &mut ExtractContext, node: Option>, source: &[u8], - _leaf_fn: Option>, ) -> Result<'tree, Self> { let node = match node { Some(n) => n, None => { - return Err(ExtractError::missing_node(_ctx, stringify!($t))); - // panic!( - // "No node found in parsing extract: {} - for field: {}", - // stringify!($t), - // _ctx.field_name - // ); + panic!("Better error"); + // return Err(ExtractError::missing_node(ctx, stringify!($t))); } }; let text = node.utf8_text(source).expect("No text found for node"); @@ -320,23 +373,25 @@ extract_from_str!(String); macro_rules! extract_for_tuple { ($($t:ident),*) => { - impl<$($t: Extract<$t>),*> Extract<($($t),*)> for ($($t),*) { - type LeafFn<'a> = (); - fn extract<'a, 'tree>( - ctx: &mut ExtractContext<'_>, - node: Option>, - source: &[u8], - _leaf_fn: Option>, - ) -> Result<'tree, Self> { - let node = node.ok_or_else(|| ExtractError::missing_node(ctx, stringify!($($t),*)))?; - let mut c = node.walk(); - let mut it = node.children(&mut c); - Ok(( - $( - $t::extract(ctx, it.next(), source, None)? + impl<$($t: Extract),*> Extract for ($($t),*) { + fn extract<'tree>( + _ctx: &mut ExtractContext, + _node: Option>, + _source: &[u8], + ) -> Result<'tree, Self> { + panic!("Cannot be implemented on tuples") + } + + fn extract_field<'cursor, 'tree>(ctx: &mut ExtractContext, it: &mut ExtractFieldIterator<'cursor, 'tree>, source: &[u8]) -> Result<'tree, Self> { + // NOTE: Nested tuples are not supported as it stands. + log::debug!("extract_field on tuple"); + Ok(( + $( + $t::extract(ctx, it.next_node()?, source)? ),* - )) - } + )) + } + } }; diff --git a/runtime/src/extract/field.rs b/runtime/src/extract/field.rs new file mode 100644 index 0000000..c4d5d88 --- /dev/null +++ b/runtime/src/extract/field.rs @@ -0,0 +1,177 @@ +use super::Result; +use log::debug; + +pub struct ExtractFieldIterator<'cursor, 'tree: 'cursor> { + pub(crate) cursor: &'cursor mut tree_sitter::TreeCursor<'tree>, + pub(crate) field_name: &'static str, + pub(crate) ctx: ExtractFieldContext, + pub(crate) current: NodeIterState<'tree>, +} + +pub struct ExtractFieldContext { + state_fn: fn(u32) -> ExtractFieldState, + state: u32, + num_states: u32, + optional: bool, +} + +impl ExtractFieldContext { + pub fn new(num_states: u32, optional: bool, state_fn: fn(u32) -> ExtractFieldState) -> Self { + Self { + state_fn, + state: 0, + num_states, + optional, + } + } +} + +#[derive(Debug)] +pub enum ExtractFieldState { + // expected string, is_named, is_optional + Str(&'static str, bool, bool), + // Current implementation only really supports doing this with a list of strings. + Choice(&'static [(&'static str, bool)], bool), + Complete, + // State went too far. + Overflow, +} + +impl<'cursor, 'tree: 'cursor> ExtractFieldIterator<'cursor, 'tree> { + fn advance_node(&mut self) { + loop { + if self.cursor.node().is_extra() { + if !self.cursor.goto_next_sibling() { + return; + } + continue; + } + return; + } + } + + fn handle_optional_err(&mut self, error: &str) -> Result<'tree, ()> { + if self.ctx.optional && self.ctx.state == 1 { + debug!("advance_state: optional, outputting None"); + self.ctx.state = self.ctx.num_states + 1; + self.current = NodeIterState::Complete; + Ok(()) + } else { + todo!("{}", error); + } + } + pub fn advance_state(&mut self) -> Result<'tree, ()> { + if self.current == NodeIterState::Complete { + debug!("advance_state: verifying completion"); + self.finalize()?; + return Ok(()); + } + self.advance_node(); + let n = self.cursor.node(); + debug!( + "advance_state: field_name={}, state={}, num_states={}, optional={}, node={}, node.kind={}", + self.field_name, + self.ctx.state, + self.ctx.num_states, + self.ctx.optional, + n, + n.kind() + ); + debug!( + "advance_state: cursor.field_name()={:?}", + self.cursor.field_name() + ); + + let state = (self.ctx.state_fn)(self.ctx.state); + self.ctx.state += 1; + debug!("advance_state: got state={:?}", state); + match state { + ExtractFieldState::Str(expected, named, optional) => { + if self.cursor.field_name() != Some(self.field_name) { + debug!("advance_state: field names didn't match"); + // Check if we have an optional overall. + self.handle_optional_err("error fields didn't match")?; + return Ok(()); + } + if n.kind() == expected && n.is_named() == named { + debug!("advance_state: state matched, advancing iteration"); + // advance the cursor and return the current node. + self.cursor.goto_next_sibling(); + self.current = NodeIterState::Node(Some(n)); + Ok(()) + } else if optional { + debug!("advance_state: state didn't match, but optional, skipping"); + self.current = NodeIterState::Node(None); + Ok(()) + } else { + self.handle_optional_err("error state didn't match")?; + Ok(()) + } + } + ExtractFieldState::Choice(values, optional) => { + if self.cursor.field_name() != Some(self.field_name) { + debug!("advance_state: field names didn't match"); + self.handle_optional_err("error fields didn't match")?; + return Ok(()); + } + for (value, named) in values { + if n.kind() == *value && n.is_named() == *named { + // Found one. + self.cursor.goto_next_sibling(); + self.current = NodeIterState::Node(Some(n)); + return Ok(()); + } + } + if optional { + self.current = NodeIterState::Node(None); + Ok(()) + } else { + self.handle_optional_err("error none of the values matched")?; + Ok(()) + } + } + ExtractFieldState::Complete => { + debug!("advance_state: got complete state"); + self.current = NodeIterState::Complete; + Ok(()) + } + ExtractFieldState::Overflow => { + self.handle_optional_err("error state overflowed")?; + Ok(()) + } + } + } + + pub fn next_node(&mut self) -> Result<'tree, Option>> { + let node = self.current_node(); + self.advance_state()?; + Ok(node) + } + + pub fn current_node(&self) -> Option> { + match self.current { + NodeIterState::Node(n) => { + debug!("current_node: {:?}", n.map(|n| n.kind())); + n + } + NodeIterState::Complete => None, + // TODO: Should error? + NodeIterState::Start => None, + } + } + + pub fn finalize(&self) -> Result<'tree, ()> { + if self.ctx.state != self.ctx.num_states + 1 { + todo!("error state didn't finalize") + } + Ok(()) + } +} + +#[derive(Default, Clone, Copy, PartialEq)] +pub(crate) enum NodeIterState<'tree> { + Node(Option>), + #[default] + Start, + Complete, +} diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs index 2d189bc..a51a1c8 100644 --- a/runtime/src/lib.rs +++ b/runtime/src/lib.rs @@ -1,13 +1,12 @@ pub mod __private; pub mod error; pub mod extract; -pub mod grammar; pub mod rule; +pub use rust_sitter_types::grammar; pub use rule::Language; -use extract::ExtractContext; -pub use extract::{Extract, WithLeaf}; +pub use extract::{Extract, ExtractContext, WithLeafExtractor}; use serde::{Deserialize, Serialize}; use std::ops::Deref; @@ -132,16 +131,14 @@ impl From for Point { } } -impl, U> Extract> for Spanned { - type LeafFn<'a> = T::LeafFn<'a>; +impl Extract for Spanned { fn extract<'a, 'tree>( - ctx: &mut ExtractContext<'_>, + ctx: &mut ExtractContext, node: Option>, source: &[u8], - leaf_fn: Option>, - ) -> extract::Result<'tree, Spanned> { + ) -> extract::Result<'tree, Spanned> { Ok(Spanned { - value: T::extract(ctx, node, source, leaf_fn)?, + value: T::extract(ctx, node, source)?, position: node.map(Position::from_node).unwrap_or_else(|| Position { bytes: ctx.last_idx..ctx.last_idx, start: Point::from_tree_sitter(ctx.last_pt), @@ -149,4 +146,28 @@ impl, U> Extract> for Spanned { }), }) } + + fn extract_field<'cursor, 'tree>( + ctx: &mut ExtractContext, + it: &mut extract::ExtractFieldIterator<'cursor, 'tree>, + source: &[u8], + ) -> extract::Result<'tree, Self> { + // TODO: Figure this out correctly. We need to extend the span over all of the consumed + // nodes when we do this. + let start_byte = ctx.last_idx; + let start = ctx.last_pt; + let value = T::extract_field(ctx, it, source)?; + // We need to make sure these get updated; maybe in this case it should just be in the + // iterator instead of in here. + let end_byte = ctx.last_idx; + let end = ctx.last_pt; + Ok(Spanned { + value, + position: Position { + bytes: start_byte..end_byte, // TODO: This is incorrect, needs to be fixed. + start: Point::from_tree_sitter(start), + end: Point::from_tree_sitter(end), + }, + }) + } } diff --git a/runtime/src/rule.rs b/runtime/src/rule.rs index 88f6df4..46ba6f8 100644 --- a/runtime/src/rule.rs +++ b/runtime/src/rule.rs @@ -2,7 +2,7 @@ use tree_sitter::Node; use crate::{Extract, NodeParseResult, ParseResult, extract::ExtractContext}; -pub trait Rule: Extract { +pub trait Rule: Extract { // TODO: Use the grammar::RuleDef and grammar::Grammar // For this to work as expected we need a #[derive(Language)], or at least a `Language` trait // which then has the `parse` function and the `generate_grammar() -> grammar::Grammar` @@ -13,23 +13,23 @@ pub trait Rule: Extract { fn rule_name() -> &'static str; /// Extracts directly from a node. - fn extract_node<'a>(n: Node<'a>, source: &[u8]) -> NodeParseResult<'a, Output> + fn extract_node<'a>(n: Node<'a>, source: &[u8]) -> NodeParseResult<'a, Self> where Self: Sized, { let mut ctx = ExtractContext { last_pt: n.start_position(), last_idx: n.start_byte(), - node_kind: n.kind(), - // TODO: ??? - field_name: "", + field_name: Self::rule_name(), + node_kind: "", }; // Extract the errors, and try to parse anyway. let mut errors = vec![]; if n.has_error() { crate::error::collect_node_errors(n, |e| errors.push(e)); } - let result = Self::extract(&mut ctx, Some(n), source, None); + // TODO: Review this!!! + let result = Self::extract(&mut ctx, Some(n), source); NodeParseResult { result, errors } } } diff --git a/tool/src/lib.rs b/tool/src/lib.rs index 720df0e..eddcfef 100644 --- a/tool/src/lib.rs +++ b/tool/src/lib.rs @@ -36,6 +36,7 @@ impl ParserBuilder { Err(e) => panic!("{e}"), Ok(None) => {} Ok(Some(grammar)) => { + let grammar = serde_json::to_value(grammar).unwrap(); // TODO: We want to generate better errors here as well. However, it isn't really // possible to generate it until we can produce a full grammar, which we also can't do // if we derive on Rule. @@ -70,15 +71,11 @@ fn generate_parser(grammar: &serde_json::Value, out_dir: Option<&Path>) -> Resul tempfile.path() }; let _sysroot_dir = write_grammar_and_c_to_dir(&grammar_name, grammar, &grammar_c, dir); - // let grammar_dir = Path::new(out_dir.as_str()).join(format!("grammar_{grammar_name}",)); - // if grammar_dir.is_dir() { - // std::fs::remove_dir_all(&grammar_dir).expect("Couldn't clear old artifacts"); - // } - // std::fs::DirBuilder::new() - // .recursive(true) - // .create(grammar_dir.clone()) - // .expect("Couldn't create grammar JSON directory"); - // grammar_dir + // Check if we have an additional output directory. + if let Ok(output) = std::env::var("RUST_SITTER_PARSER_OUTPUT") { + let output: &Path = output.as_ref(); + write_grammar_and_c_to_dir(&grammar_name, grammar, &grammar_c, output); + } let mut c_config = cc::Build::new(); c_config.std("c11").include(dir); @@ -162,9 +159,12 @@ mod tests { use tree_sitter_generate::generate_parser_for_grammar; fn generate_grammar(item: ItemMod) -> serde_json::Value { let (_, items) = item.content.unwrap(); - rust_sitter_common::expansion::generate_grammar(items) - .unwrap() - .unwrap() + serde_json::to_value( + rust_sitter_common::expansion::generate_grammar(items) + .unwrap() + .unwrap(), + ) + .unwrap() } #[test] diff --git a/types/Cargo.toml b/types/Cargo.toml new file mode 100644 index 0000000..948004d --- /dev/null +++ b/types/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "rust-sitter-types" +edition = "2024" +version.workspace = true +authors.workspace = true + +[dependencies] +serde = { version = "1", features = ["derive"] } +serde_json = "1" +indexmap = { version = "2", features = ["serde"] } diff --git a/runtime/src/grammar.rs b/types/src/grammar.rs similarity index 73% rename from runtime/src/grammar.rs rename to types/src/grammar.rs index b3bb220..3b2a881 100644 --- a/runtime/src/grammar.rs +++ b/types/src/grammar.rs @@ -16,7 +16,7 @@ pub struct Grammar { pub extras: Vec, } -#[derive(Deserialize, Serialize)] +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] #[serde(tag = "type")] #[allow(non_camel_case_types)] #[allow(clippy::upper_case_acronyms)] @@ -32,6 +32,7 @@ pub enum RuleDef { }, PATTERN { value: String, + #[serde(skip_serializing_if = "Option::is_none")] flags: Option, }, SYMBOL { @@ -81,13 +82,61 @@ pub enum RuleDef { }, } -#[derive(Deserialize, Serialize)] +impl RuleDef { + pub fn is_symbol(&self) -> bool { + matches!(self, RuleDef::SYMBOL { .. }) + } + + pub fn is_blank(&self) -> bool { + matches!(self, RuleDef::BLANK) + } + + pub fn optional(rule: RuleDef) -> RuleDef { + RuleDef::CHOICE { + members: vec![RuleDef::BLANK, rule], + } + } + + pub fn as_optional(&self) -> Option<&RuleDef> { + match self { + Self::CHOICE { members } => match members.as_slice() { + &[ref rule, RuleDef::BLANK] | &[RuleDef::BLANK, ref rule] => Some(rule), + _ => None, + }, + Self::PREC { value: _, content } + | Self::PREC_LEFT { value: _, content } + | Self::PREC_RIGHT { value: _, content } + | Self::PREC_DYNAMIC { value: _, content } => content.as_optional(), + _ => None, + } + } + + /// Pull out a sequence, including through precedence unwrapping. + pub fn as_seq(&self) -> Option<&RuleDef> { + match self { + Self::SEQ { .. } => Some(self), + Self::PREC { value: _, content } + | Self::PREC_LEFT { value: _, content } + | Self::PREC_RIGHT { value: _, content } + | Self::PREC_DYNAMIC { value: _, content } => content.as_seq(), + _ => None, + } + } +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] #[serde(untagged)] pub enum PrecedenceValue { Integer(i32), Name(String), } +impl From for PrecedenceValue { + fn from(value: i32) -> Self { + Self::Integer(value) + } +} + impl Grammar { /// Starting from `rule_name`, find all symbols (named or anonymous) which can be reached. pub fn reachable_set<'a>(&'a self, rule_name: &str) -> Option> { diff --git a/types/src/lib.rs b/types/src/lib.rs new file mode 100644 index 0000000..ea680a6 --- /dev/null +++ b/types/src/lib.rs @@ -0,0 +1,2 @@ + +pub mod grammar; From dc4cbd18403fcdc255ced84db1c76af573285938 Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Sun, 24 Aug 2025 13:48:59 -0500 Subject: [PATCH 38/50] Re-implement WithLeaf correctly. --- macro/src/expansion.rs | 59 +++---- macro/src/lib.rs | 330 +-------------------------------------- runtime/src/__private.rs | 86 ++-------- runtime/src/extract.rs | 303 +++++++++++++++-------------------- runtime/src/lib.rs | 14 +- runtime/src/rule.rs | 5 +- 6 files changed, 178 insertions(+), 619 deletions(-) diff --git a/macro/src/expansion.rs b/macro/src/expansion.rs index b07d903..ff65a6d 100644 --- a/macro/src/expansion.rs +++ b/macro/src/expansion.rs @@ -51,11 +51,14 @@ pub fn expand_rule(input: DeriveInput) -> Result { let extract_impl: Item = syn::parse_quote! { impl ::rust_sitter::Extract for #ident { + type Output = Self; + type LeafFn = (); #[allow(non_snake_case)] fn extract<'tree>( ctx: &mut ::rust_sitter::extract::ExtractContext, node: Option<::rust_sitter::tree_sitter::Node<'tree>>, source: &[u8], + _l: Self::LeafFn, ) -> Result> { let node = node.ok_or_else(|| { ::rust_sitter::error::ExtractError::missing_node(ctx, stringify!(#ident)) @@ -101,11 +104,14 @@ pub fn expand_rule(input: DeriveInput) -> Result { let ident_str = enum_name.to_string(); let extract_impl: Item = syn::parse_quote! { impl ::rust_sitter::Extract for #enum_name { + type Output = Self; + type LeafFn = (); #[allow(non_snake_case)] fn extract<'tree>( _ctx: &mut ::rust_sitter::extract::ExtractContext, node: Option<::rust_sitter::tree_sitter::Node<'tree>>, source: &[u8], + _l: Self::LeafFn, ) -> Result> { let node = node.ok_or_else(|| { ::rust_sitter::error::ExtractError::missing_node(_ctx, stringify!(#enum_name)) @@ -188,15 +194,10 @@ fn gen_field(ident_str: String, leaf: Field, grammar: &RuleDef) -> Result .iter() .find(|attr| sitter_attr_matches(attr, "leaf")); - let transform = leaf.attrs.iter().find_map(|attr| { - if sitter_attr_matches(attr, "transform") || sitter_attr_matches(attr, "with") { - Some((false, attr)) - } else if sitter_attr_matches(attr, "with_node") { - Some((true, attr)) - } else { - None - } - }); + let transform = leaf + .attrs + .iter() + .find(|attr| sitter_attr_matches(attr, "transform") || sitter_attr_matches(attr, "with")); if transform.is_some() && leaf_attr.is_none() { return Err(Error::new(leaf.span(), "Cannot transform non-leaf nodes")); @@ -222,45 +223,33 @@ fn gen_field(ident_str: String, leaf: Field, grammar: &RuleDef) -> Result })); } - // NOTE (JAB, 2025-07-17): We want to use this eventually in the extract generation, so it - // makes sense to parse it here. Additionally, we get compile time errors at this level instead - // of at the parser generation phase. let leaf_input = leaf_attr.map(|a| a.parse_args::()).transpose()?; // But for now, we just evaluate it to make sure it works correctly. if let Some(leaf_input) = leaf_input { leaf_input.evaluate()?; } - let (leaf_type, closure_expr): (Type, Expr) = match transform { - Some((is_node, closure)) => { + let extractor: Expr = + parse_quote! { ::rust_sitter::extract::BaseExtractor::default() }; + + let (leaf_type, leaf_fn): (Type, Expr) = match transform { + Some(closure) => { let closure = closure.parse_args::()?; let mut non_leaf = HashSet::new(); - // Major hackery... - if !is_node { - non_leaf.insert("Spanned"); - non_leaf.insert("Box"); - non_leaf.insert("Option"); - non_leaf.insert("Vec"); - } - let wrapped_leaf_type = wrap_leaf_type(leaf_type, &non_leaf); - let input_type: syn::Type = if is_node { - syn::parse_quote!(&::rust_sitter::extract::NodeExt<'_>) - } else { - syn::parse_quote!(&str) - }; - ( - wrapped_leaf_type, - syn::parse_quote!(Some((#closure) as fn(#input_type) -> #leaf_type)), - ) + non_leaf.insert("Spanned"); + non_leaf.insert("Box"); + non_leaf.insert("Option"); + let ty = wrap_leaf_type(leaf_type, &non_leaf); + (ty, closure) } - None => (leaf_type.clone(), syn::parse_quote!(None)), + None => (leaf_type.clone(), parse_quote! { () }), }; let extract_state = rule_def_to_extract(grammar)?; - Ok(syn::parse_quote!({ - ::rust_sitter::__private::extract_field::<#leaf_type>(state, #extract_state, source, #ident_str) - })) + Ok(parse_quote! { + ::rust_sitter::__private::extract_field::<#leaf_type, _>(#extractor, #leaf_fn, state, #extract_state, source, #ident_str) + }) } fn gen_struct_or_variant( diff --git a/macro/src/lib.rs b/macro/src/lib.rs index 96209c4..833a666 100644 --- a/macro/src/lib.rs +++ b/macro/src/lib.rs @@ -10,6 +10,7 @@ use expansion::*; // struct Function { // name: String, // inputs: Vec, +// } // grammar! { // rule: seq("function", $.ident, "(", repeat($.input), ")") -> |id, inputs| Function { name, // inputs: inputs.into() }; @@ -19,7 +20,7 @@ use expansion::*; // // } // #[proc_macro] -// pub fn grammar2(input: proc_macro::TokenStream) -> proc_macro::TokenStream { +// pub fn grammar(input: proc_macro::TokenStream) -> proc_macro::TokenStream { // grammar::parse_grammar_macro(input) // } @@ -40,7 +41,6 @@ use expansion::*; // externals, inline, word, supertypes, etc. to fill out the full grammar specification. extras, with, - with_node, transform, sep_by, // Helper! @@ -56,332 +56,6 @@ pub fn derive_rule(input: proc_macro::TokenStream) -> proc_macro::TokenStream { .into() } -// /// Mark a module to be analyzed for a Rust Sitter grammar. Takes a single, unnamed argument, which -// /// specifies the name of the grammar. This name must be unique across all Rust Sitter grammars within -// /// a compilation unit. -// #[proc_macro_attribute] -// pub fn grammar( -// attr: proc_macro::TokenStream, -// input: proc_macro::TokenStream, -// ) -> proc_macro::TokenStream { -// let attr_tokens: proc_macro2::TokenStream = attr.into(); -// let module: ItemMod = parse_macro_input!(input); -// let expanded = derive_rule(syn::parse_quote! { -// #[rust_sitter::grammar[#attr_tokens]] -// #module -// }) -// .map(ToTokens::into_token_stream) -// .unwrap_or_else(syn::Error::into_compile_error); -// proc_macro::TokenStream::from(expanded) -// } - -// #[proc_macro_attribute] -// /// Marks the top level AST node where parsing should start. -// /// -// /// ## Example -// /// ```ignore -// /// #[rust_sitter::language] -// /// pub struct Code { -// /// ... -// /// } -// /// ``` -// pub fn language( -// _attr: proc_macro::TokenStream, -// item: proc_macro::TokenStream, -// ) -> proc_macro::TokenStream { -// item -// } -// -// #[proc_macro_attribute] -// /// This annotation marks a node as extra, which can safely be skipped while parsing. -// /// This is useful for handling whitespace/newlines/comments. -// /// -// /// ## Example -// /// ```ignore -// /// #[rust_sitter::extra] -// /// struct Whitespace { -// /// #[rust_sitter::leaf(re(r"\s"))] -// /// _whitespace: (), -// /// } -// /// ``` -// pub fn extra( -// _attr: proc_macro::TokenStream, -// item: proc_macro::TokenStream, -// ) -> proc_macro::TokenStream { -// item -// } -// -// #[proc_macro_attribute] -// /// Defines a field which matches a specific token in the source string. -// /// The token can be defined by passing one of two arguments -// /// - `text`: a string literal that will be exactly matched -// /// - `pattern`: a regular expression that will be matched against the source string -// /// -// /// If the resulting token needs to be converted into a richer type at runtime, -// /// such as a number, then the `transform` argument can be used to specify a function -// /// that will be called with the token's text. -// /// -// /// The attribute can also be applied to a struct or enum variant with no fields. -// /// -// /// ## Examples -// /// -// /// Using the `leaf` attribute on a field: -// /// ```ignore -// /// Number( -// /// #[rust_sitter::leaf(re(r"\d+"))] -// /// u32 -// /// ) -// /// ``` -// /// -// /// Using the attribute on a unit struct or unit enum variant: -// /// ```ignore -// /// #[rust_sitter::leaf("9")] -// /// struct BigDigit; -// /// -// /// enum SmallDigit { -// /// #[rust_sitter::leaf("0")] -// /// Zero, -// /// #[rust_sitter::leaf("1")] -// /// One, -// /// } -// /// ``` -// /// -// pub fn leaf( -// _attr: proc_macro::TokenStream, -// item: proc_macro::TokenStream, -// ) -> proc_macro::TokenStream { -// item -// } -// -// #[proc_macro_attribute] -// /// Defines text in the grammar that should be parsed but not explicitly used. No explicit rule is -// /// created and these segments are inlined. -// /// -// /// ## Example -// /// ```ignore -// /// struct Function { -// /// #[text("function")] -// /// _function: (), -// /// name: Ident, -// /// #[text("(")] -// /// _lparen: (), -// /// // ... -// /// } -// /// ``` -// pub fn text( -// _attr: proc_macro::TokenStream, -// item: proc_macro::TokenStream, -// ) -> proc_macro::TokenStream { -// item -// } -// -// #[proc_macro_attribute] -// /// Defines a field that does not correspond to anything in the input string, -// /// such as some metadata. Takes a single, unnamed argument, which is the value -// /// used to populate the field at runtime. -// /// -// /// ## Example -// /// ```ignore -// /// struct MyNode { -// /// ..., -// /// #[rust_sitter::skip(false)] -// /// node_visited: bool -// /// } -// /// ``` -// pub fn skip( -// _attr: proc_macro::TokenStream, -// item: proc_macro::TokenStream, -// ) -> proc_macro::TokenStream { -// item -// } -// -// /// Applies a custom transformation for parsing the input text of a `leaf` node. -// /// Without using `with` the default extractor is applied. -// /// -// /// ## Example -// /// ```ignore -// /// struct CustomInt( -// /// #[leaf(re(r"\d+"))] -// /// #[with(plus_one)] -// /// i32 -// /// ); -// /// -// /// fn plus_one(s: &str) -> i32 { -// /// s.parse::().unwrap() + 1 -// /// } -// /// ``` -// #[proc_macro_attribute] -// pub fn with( -// _attr: proc_macro::TokenStream, -// item: proc_macro::TokenStream, -// ) -> proc_macro::TokenStream { -// item -// } -// -// /// Alias for `with`. -// #[proc_macro_attribute] -// pub fn transform( -// _attr: proc_macro::TokenStream, -// item: proc_macro::TokenStream, -// ) -> proc_macro::TokenStream { -// item -// } -// -// #[proc_macro_attribute] -// /// Defines a precedence level for a non-terminal that has no associativity. -// /// -// /// This annotation takes a single, unnamed parameter, which specifies the precedence level. -// /// This is used to resolve conflicts with other non-terminals, so that the one with the higher -// /// precedence will bind more tightly (appear lower in the parse tree). -// /// -// /// ## Example -// /// ```ignore -// /// #[rust_sitter::prec(1)] -// /// PriorityExpr(Box, Box) -// /// ``` -// pub fn prec( -// _attr: proc_macro::TokenStream, -// item: proc_macro::TokenStream, -// ) -> proc_macro::TokenStream { -// item -// } -// -// #[proc_macro_attribute] -// /// Defines a precedence level for a non-terminal that should be left-associative. -// /// For example, with subtraction we expect 1 - 2 - 3 to be parsed as (1 - 2) - 3, -// /// which corresponds to a left-associativity. -// /// -// /// This annotation takes a single, unnamed parameter, which specifies the precedence level. -// /// This is used to resolve conflicts with other non-terminals, so that the one with the higher -// /// precedence will bind more tightly (appear lower in the parse tree). -// /// -// /// ## Example -// /// ```ignore -// /// #[rust_sitter::prec_left(1)] -// /// Subtract(Box, Box) -// /// ``` -// pub fn prec_left( -// _attr: proc_macro::TokenStream, -// item: proc_macro::TokenStream, -// ) -> proc_macro::TokenStream { -// item -// } -// -// #[proc_macro_attribute] -// /// Defines a precedence level for a non-terminal that should be right-associative. -// /// For example, with cons we could have 1 :: 2 :: 3 to be parsed as 1 :: (2 :: 3), -// /// which corresponds to a right-associativity. -// /// -// /// This annotation takes a single, unnamed parameter, which specifies the precedence level. -// /// This is used to resolve conflicts with other non-terminals, so that the one with the higher -// /// precedence will bind more tightly (appear lower in the parse tree). -// /// -// /// ## Example -// /// ```ignore -// /// #[rust_sitter::prec_right(1)] -// /// Cons(Box, Box) -// /// ``` -// pub fn prec_right( -// _attr: proc_macro::TokenStream, -// item: proc_macro::TokenStream, -// ) -> proc_macro::TokenStream { -// item -// } -// -// #[proc_macro_attribute] -// /// This macro is similar to [`prec`], but the given numerical precedence is applied at runtime instead -// /// of at parser generation time. This is only necessary when handling a conflict dynamically using -// /// [`conflicts`], and when there is a genuine ambiguity: multiple rules correctly -// /// match a given piece of code. In that event, Rust-sitter compares the total dynamic precedence -// /// associated with each rule, and selects the one with the highest total. -// /// -// /// This is similar to dynamic precedence directives in Bison grammars. -// /// -// /// ## Example -// /// ```ignore -// /// #[rust_sitter::prec_dynamic(1)] -// /// Cons(Box, Box) -// /// ``` -// pub fn prec_dynamic( -// _attr: proc_macro::TokenStream, -// item: proc_macro::TokenStream, -// ) -> proc_macro::TokenStream { -// item -// } -// -// #[proc_macro_attribute] -// /// Usually, whitespace is optional before each token. This attribute means that the token will only match if there is no whitespace. -// /// -// /// ## Example -// /// ```ignore -// /// struct StringFragment( -// /// #[rust_sitter::immediate] -// /// #[rust_sitter::leaf(pattern(r"[^"\\]+"))] -// /// () -// /// ); -// /// ``` -// pub fn immediate( -// _attr: proc_macro::TokenStream, -// item: proc_macro::TokenStream, -// ) -> proc_macro::TokenStream { -// item -// } -// -// #[proc_macro_attribute] -// /// Allows the leaf node sequence to be created as a single token. -// /// -// /// ## Example -// /// ```ignore -// /// struct StringFragment( -// /// #[rust_sitter::token] -// /// #[rust_sitter::leaf(pattern(r"[^"\\]+"))] -// /// () -// /// ); -// /// ``` -// pub fn token( -// _attr: proc_macro::TokenStream, -// item: proc_macro::TokenStream, -// ) -> proc_macro::TokenStream { -// item -// } -// -// #[proc_macro_attribute] -// /// On `Vec<_>` typed fields, specifies a non-terminal that should be parsed in between the elements. -// /// The [`rust_sitter::repeat`] annotation can be used on the field as well. -// /// -// /// This annotation takes a single, unnamed argument, which specifies a field type to parse. This can -// /// either be a reference to another type, or can be defined as a `leaf` field. Generally, the argument -// /// is parsed using the same rules as an unnamed field of an enum variant. -// /// -// /// ## Example -// /// ```ignore -// /// #[rust_sitter::delimited(",")] -// /// numbers: Vec -// /// ``` -// pub fn delimited( -// _attr: proc_macro::TokenStream, -// item: proc_macro::TokenStream, -// ) -> proc_macro::TokenStream { -// item -// } -// -// #[proc_macro_attribute] -// /// On `Vec<_>` typed fields, specifies additional config for how the repeated elements should -// /// be parsed. In particular, this annotation takes the following named arguments: -// /// - `non_empty` - if this argument is `true`, then there must be at least one element parsed -// /// -// /// ## Example -// /// ```ignore -// /// #[rust_sitter::repeat(non_empty = true)] -// /// numbers: Vec -// /// ``` -// pub fn repeat( -// _attr: proc_macro::TokenStream, -// item: proc_macro::TokenStream, -// ) -> proc_macro::TokenStream { -// item -// } - #[cfg(test)] mod tests { use std::fs::File; diff --git a/runtime/src/__private.rs b/runtime/src/__private.rs index 0fb3ddb..efcf8b1 100644 --- a/runtime/src/__private.rs +++ b/runtime/src/__private.rs @@ -4,7 +4,10 @@ //! They need to be public so they can be accessed at all (\*cough\* macro hygiene), but //! they are not intended to actually be called in any other circumstance. -use crate::{extract::{ExtractFieldContext, ExtractFieldIterator, Result}, Extract, ExtractContext}; +use crate::{ + Extract, ExtractContext, Extractor, + extract::{ExtractFieldContext, ExtractFieldIterator, Result}, +}; use log::{debug, trace}; pub fn extract_struct_or_variant<'tree, T>( @@ -37,12 +40,14 @@ pub struct ExtractStructState<'tree> { // error: ExtractError, } -pub fn extract_field<'tree, T: Extract>( +pub fn extract_field<'tree, T: Extract, E: Extractor>( + extractor: E, + leaf_fn: T::LeafFn, state: &mut ExtractStructState<'tree>, field_state: ExtractFieldContext, source: &[u8], field_name: &'static str, -) -> Result<'tree, T> { +) -> Result<'tree, T::Output> { debug!( "extract_field struct_name={} field_name={field_name}", state.struct_name @@ -67,84 +72,21 @@ pub fn extract_field<'tree, T: Extract>( // Some iteration requires knowing if there is a valid starting state or not. iter.advance_state()?; - let result = T::extract_field(&mut ctx, &mut iter, source)?; - // if !iter.cursor.goto_next_sibling() { - // state.cursor = None; - // } + let result = extractor.do_extract_field(&mut ctx, &mut iter, source, leaf_fn)?; Ok(result) } else { // TODO: ??? - T::extract(&mut ctx, None, source) + extractor.do_extract(&mut ctx, None, source, leaf_fn) } } else if let Some(cursor) = state.cursor.as_mut() { let n = cursor.node(); if !cursor.goto_next_sibling() { state.cursor = None; } - T::extract(&mut ctx, Some(n), source) + extractor.do_extract(&mut ctx, Some(n), source, leaf_fn) } else { - T::extract(&mut ctx, None, source) + extractor.do_extract(&mut ctx, None, source, leaf_fn) } - // if state.has_children { - // if let Some(cursor) = state.cursor.as_mut() { - // loop { - // let n = cursor.node(); - // ctx.node_kind = n.kind(); - // trace!( - // "extract_field checking node.kind={}, cursor.field_name={:?}", - // n.kind(), - // cursor.field_name() - // ); - // if n.is_error() { - // // println!("Processing error... {}, {}", n.kind(), field_name); - // // Try and parse it anyway, returning the result if we manage to get it. - // if !cursor.goto_first_child() { - // state.cursor = None; - // ctx.last_idx = n.end_byte(); - // ctx.last_pt = n.end_position(); - // return Err(ExtractError::new(n, field_name.to_owned())); - // } - // let n = cursor.node(); - // let out = LT::extract(&mut ctx, Some(n), source, closure_ref)?; - // ctx.last_idx = n.end_byte(); - // ctx.last_pt = n.end_position(); - - // return Ok(out); - // } else if let Some(name) = cursor.field_name() { - // if name == field_name { - // // TODO: Need to keep going if it fails. - // let out = LT::extract(&mut ctx, Some(n), source, closure_ref)?; - - // if !cursor.goto_next_sibling() { - // state.cursor = None; - // }; - - // ctx.last_idx = n.end_byte(); - // ctx.last_pt = n.end_position(); - - // return Ok(out); - // } else { - // return LT::extract(&mut ctx, None, source, closure_ref); - // } - // } else { - // state.last_idx = n.end_byte(); - // state.last_pt = n.end_position(); - // } - - // if !cursor.goto_next_sibling() { - // return LT::extract(&mut ctx, None, source, closure_ref); - // } - // } - // } else { - // debug!("No cursor, attempting direct extract"); - // LT::extract(&mut ctx, None, source, closure_ref) - // } - // } else if let Some(cursor) = state.cursor.as_mut() { - // debug!("attempting direct node extraction"); - // LT::extract(&mut ctx, Some(cursor.node()), source, closure_ref) - // } else { - // Err(ExtractError::missing_node(&ctx, "unknown")) - // } } // TODO: Handle errors in this one too. @@ -180,7 +122,7 @@ pub fn skip_text<'tree>( Ok(()) } -pub fn parse( +pub fn parse>( input: &str, language: impl Fn() -> tree_sitter::Language, ) -> crate::ParseResult { @@ -202,7 +144,7 @@ pub fn parse( field_name: "root", node_kind: "", }; - let result = ::extract(&mut ctx, Some(root_node), input.as_bytes()); + let result = ::extract(&mut ctx, Some(root_node), input.as_bytes(), ()); #[allow(clippy::manual_ok_err)] let result = match result { Err(e) => { diff --git a/runtime/src/extract.rs b/runtime/src/extract.rs index 1b9dbad..c7b5112 100644 --- a/runtime/src/extract.rs +++ b/runtime/src/extract.rs @@ -1,35 +1,59 @@ use super::Node; pub mod field; -pub use field::{ExtractFieldState, ExtractFieldContext, ExtractFieldIterator}; pub use crate::error::ExtractError; +pub use field::{ExtractFieldContext, ExtractFieldIterator, ExtractFieldState}; + pub type Result<'a, T> = std::result::Result>; +/// Structs which can perform extractions. This allows an extractor to carry additional state +/// around the extraction (see for example, `WithLeafExtractor`). pub trait Extractor { fn do_extract<'tree>( self, ctx: &mut ExtractContext, node: Option>, source: &[u8], - ) -> Result<'tree, E>; + leaf_fn: E::LeafFn, + ) -> Result<'tree, E::Output>; + + fn do_extract_field<'cursor, 'tree>( + self, + ctx: &mut ExtractContext, + it: &mut ExtractFieldIterator<'cursor, 'tree>, + source: &[u8], + leaf_fn: E::LeafFn, + ) -> Result<'tree, E::Output>; + + fn map(self, next: F) -> MapExtractor + where + F: FnOnce(E) -> O, + Self: Sized, + { + MapExtractor::new(self, next) + } } /// Defines the logic used to convert a node in a Tree Sitter tree to /// the corresponding Rust type. pub trait Extract: Sized { + type LeafFn; + type Output; fn extract<'tree>( ctx: &mut ExtractContext, node: Option>, source: &[u8], - ) -> Result<'tree, Self>; + leaf_fn: Self::LeafFn, + ) -> Result<'tree, Self::Output>; fn extract_field<'cursor, 'tree>( ctx: &mut ExtractContext, it: &mut ExtractFieldIterator<'cursor, 'tree>, source: &[u8], - ) -> Result<'tree, Self> { + leaf_fn: Self::LeafFn, + ) -> Result<'tree, Self::Output> { let node = it.next_node()?; assert!(it.current_node().is_none()); - Self::extract(ctx, node, source) + Self::extract(ctx, node, source, leaf_fn) } } @@ -41,28 +65,42 @@ pub struct ExtractContext { pub node_kind: &'static str, } -pub struct RuleExtractor {} +/// Default extractor which simply delegates to the `Extract` implementation. +#[derive(Default)] +pub struct BaseExtractor {} -impl Extractor for RuleExtractor { +impl Extractor for BaseExtractor { fn do_extract<'tree>( self, ctx: &mut ExtractContext, node: Option>, source: &[u8], - ) -> Result<'tree, E> { - E::extract(ctx, node, source) + leaf_fn: E::LeafFn, + ) -> Result<'tree, E::Output> { + E::extract(ctx, node, source, leaf_fn) + } + + fn do_extract_field<'cursor, 'tree>( + self, + ctx: &mut ExtractContext, + it: &mut ExtractFieldIterator<'cursor, 'tree>, + source: &[u8], + leaf_fn: E::LeafFn, + ) -> Result<'tree, E::Output> { + E::extract_field(ctx, it, source, leaf_fn) } } -pub struct WithLeafExtractor { +/// Transforms leaf nodes from one output type to another. +pub struct MapExtractor { _e: std::marker::PhantomData, base: B, f: F, } -impl WithLeafExtractor { - pub fn new(base: B, f: F) -> WithLeafExtractor { - WithLeafExtractor { +impl MapExtractor { + pub fn new(base: B, f: F) -> MapExtractor { + MapExtractor { _e: std::marker::PhantomData, base, f, @@ -70,205 +108,99 @@ impl WithLeafExtractor { } } -impl Extractor for WithLeafExtractor +impl Extractor for MapExtractor where B: Extractor, E: Extract, - O: Extract, - F: FnOnce(E) -> O, + O: Extract, + F: FnOnce(E::Output) -> O::Output, { fn do_extract<'tree>( self, ctx: &mut ExtractContext, node: Option>, source: &[u8], - ) -> Result<'tree, O> { - Ok((self.f)(self.base.do_extract(ctx, node, source)?)) + leaf_fn: O::LeafFn, + ) -> Result<'tree, O::Output> { + Ok((self.f)(self.base.do_extract(ctx, node, source, leaf_fn)?)) } -} - - -#[derive(Debug, Clone, Copy)] -pub struct NodeExt<'a> { - pub node: Node<'a>, - pub source: &'a [u8], - pub last_idx: usize, - pub last_pt: tree_sitter::Point, -} -pub trait StrOrNode { - type Output; - fn apply( + fn do_extract_field<'cursor, 'tree>( self, + ctx: &mut ExtractContext, + it: &mut ExtractFieldIterator<'cursor, 'tree>, source: &[u8], - node: Node<'_>, - last_idx: usize, - last_pt: tree_sitter::Point, - ) -> Self::Output; + leaf_fn: E::LeafFn, + ) -> Result<'tree, O::Output> { + todo!() + } } -impl StrOrNode for fn(&str) -> L { - type Output = L; - fn apply( - self, - source: &[u8], - node: Node<'_>, - _last_idx: usize, - _last_pt: tree_sitter::Point, - ) -> L { - let text = node.utf8_text(source).expect("Could not get text"); - self(text) - } +/// Map for `#[with(...)]` +pub struct WithLeaf { + _phantom: std::marker::PhantomData, + _f: std::marker::PhantomData, } -impl StrOrNode for fn(&NodeExt<'_>) -> L { +impl Extract for WithLeaf +where + F: FnOnce(&str) -> L, +{ + type LeafFn = F; type Output = L; - fn apply( - self, + + fn extract<'a, 'tree>( + ctx: &mut ExtractContext, + node: Option>, source: &[u8], - node: Node<'_>, - last_idx: usize, - last_pt: tree_sitter::Point, - ) -> L { - let node = NodeExt { - node, - source, - last_idx, - last_pt, + leaf_fn: Self::LeafFn, + ) -> Result<'tree, L> { + let node = match node { + Some(n) => n, + None => return Err(ExtractError::missing_node(ctx, "WithLeaf")), }; - self(&node) + let text = node.utf8_text(source).unwrap(); + Ok(leaf_fn(text)) } } -// pub trait Handler { -// fn extract( -// self, -// node: Option, -// source: &[u8], -// last_idx: usize, -// last_pt: tree_sitter::Point, -// ) -> Output; -// } -// -// macro_rules! handler_fn { -// ($($t:ident),*) => { -// impl),*> Handler<($($t),*), O> for F -// where F: FnOnce($($t),*) -> O, -// { -// fn extract( -// self, -// node: Option, -// source: &[u8], -// last_idx: usize, -// last_pt: tree_sitter::Point, -// ) -> O { -// let node = node.expect("No node found"); -// let mut c = node.walk(); -// let mut it = node.children(&mut c); -// self( -// $( -// $t::extract(it.next(), source, last_idx, last_pt, None) -// ),* -// ) -// } -// } -// -// }; -// } -// -// handler_fn!(T1, T2); - -// /// Map for `#[with(...)]` -// pub struct WithLeaf { -// _phantom: std::marker::PhantomData, -// _f: std::marker::PhantomData, -// } -// -// impl Extract for WithLeaf -// where -// F: StrOrNode + Clone, -// { -// type LeafFn<'a> = F; -// -// fn extract<'a, 'tree>( -// ctx: &mut ExtractContext<'_, 'tree>, -// node: Option>, -// source: &[u8], -// leaf_fn: Option>, -// ) -> Result<'tree, L> { -// let node = match node { -// Some(n) => n, -// None => return Err(ExtractError::missing_node(ctx, "WithLeaf")), -// }; -// // TODO: Consider if this should be fallible as well. -// Ok(leaf_fn.expect("No leaf function on WithLeaf").apply( -// source, -// node, -// ctx.last_idx, -// ctx.last_pt, -// )) -// } -// } - -// #[derive(Clone)] -// pub struct MappedExtract { -// _type: std::marker::PhantomData, -// _prev: std::marker::PhantomData, -// _curr: std::marker::PhantomData, -// } -// -// #[derive(Clone)] -// pub struct MappedLeaf { -// prev: Option

, -// curr: F, -// } -// -// impl Extract for MappedExtract -// where -// F: Extract, -// { -// type LeafFn<'a> = MappedLeaf, &'a dyn Fn(L0) -> L1>; -// fn extract<'a>( -// node: Option, -// source: &[u8], -// last_idx: usize, -// last_pt: tree_sitter::Point, -// leaf_fn: Option>, -// ) -> L1 { -// let mapped = leaf_fn.unwrap(); -// let prev = F::extract(node, source, last_idx, last_pt, mapped.prev); -// (mapped.curr)(prev) -// } -// } - // Common implementations for various types. impl Extract for () { + type LeafFn = (); + type Output = (); fn extract<'a, 'tree>( _ctx: &mut ExtractContext, _node: Option>, _source: &[u8], + _l: (), ) -> Result<'tree, ()> { Ok(()) } } -impl Extract for Option { +impl Extract for Option +{ + type LeafFn = T::LeafFn; + type Output = Option; fn extract<'a, 'tree>( ctx: &mut ExtractContext, node: Option>, source: &[u8], - ) -> Result<'tree, Option> { - node.map(|n| T::extract(ctx, Some(n), source)).transpose() + l: T::LeafFn, + ) -> Result<'tree, Option> { + node.map(|n| T::extract(ctx, Some(n), source, l)) + .transpose() } fn extract_field<'cursor, 'tree>( ctx: &mut ExtractContext, it: &mut ExtractFieldIterator<'cursor, 'tree>, source: &[u8], - ) -> Result<'tree, Self> { + l: T::LeafFn, + ) -> Result<'tree, Option> { if it.current_node().is_some() { - Ok(Some(T::extract_field(ctx, it, source)?)) + Ok(Some(T::extract_field(ctx, it, source, l)?)) } else { Ok(None) } @@ -276,29 +208,39 @@ impl Extract for Option { } impl Extract for Box { + type LeafFn = T::LeafFn; + type Output = Box; fn extract<'a, 'tree>( ctx: &mut ExtractContext, node: Option>, source: &[u8], - ) -> Result<'tree, Box> { - Ok(Box::new(T::extract(ctx, node, source)?)) + l: Self::LeafFn, + ) -> Result<'tree, Self::Output> { + Ok(Box::new(T::extract(ctx, node, source, l)?)) } fn extract_field<'cursor, 'tree>( ctx: &mut ExtractContext, it: &mut ExtractFieldIterator<'cursor, 'tree>, source: &[u8], - ) -> Result<'tree, Self> { - Ok(Box::new(T::extract_field(ctx, it, source)?)) + l: Self::LeafFn, + ) -> Result<'tree, Self::Output> { + Ok(Box::new(T::extract_field(ctx, it, source, l)?)) } } -impl Extract for Vec { +impl Extract for Vec +where + T::LeafFn: Clone, +{ + type LeafFn = T::LeafFn; + type Output = Vec; fn extract<'a, 'tree>( ctx: &mut ExtractContext, node: Option>, source: &[u8], - ) -> Result<'tree, Vec> { + l: Self::LeafFn, + ) -> Result<'tree, Self::Output> { let node = match node { Some(node) => node, None => return Ok(vec![]), @@ -315,7 +257,7 @@ impl Extract for Vec { // TODO: Do some error handling here instead. // For now we just ignore it. } else if cursor.field_name().is_some() { - match T::extract(ctx, Some(n), source) { + match T::extract(ctx, Some(n), source, l.clone()) { Ok(t) => out.push(t), Err(e) => error.merge(e), } @@ -335,10 +277,13 @@ impl Extract for Vec { macro_rules! extract_from_str { ($t:ty) => { impl Extract for $t { + type LeafFn = (); + type Output = $t; fn extract<'tree>( _ctx: &mut ExtractContext, node: Option>, source: &[u8], + _l: (), ) -> Result<'tree, Self> { let node = match node { Some(n) => n, @@ -373,21 +318,27 @@ extract_from_str!(String); macro_rules! extract_for_tuple { ($($t:ident),*) => { - impl<$($t: Extract),*> Extract for ($($t),*) { + impl<$($t: Extract),*> Extract for ($($t),*) + where + $(<$t as Extract>::LeafFn: Default),* + { + type LeafFn = (); + type Output = Self; fn extract<'tree>( _ctx: &mut ExtractContext, _node: Option>, _source: &[u8], + _l: (), ) -> Result<'tree, Self> { panic!("Cannot be implemented on tuples") } - fn extract_field<'cursor, 'tree>(ctx: &mut ExtractContext, it: &mut ExtractFieldIterator<'cursor, 'tree>, source: &[u8]) -> Result<'tree, Self> { + fn extract_field<'cursor, 'tree>(ctx: &mut ExtractContext, it: &mut ExtractFieldIterator<'cursor, 'tree>, source: &[u8], _l: ()) -> Result<'tree, Self> { // NOTE: Nested tuples are not supported as it stands. log::debug!("extract_field on tuple"); Ok(( $( - $t::extract(ctx, it.next_node()?, source)? + $t::extract(ctx, it.next_node()?, source, Default::default())? ),* )) } diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs index a51a1c8..4356f58 100644 --- a/runtime/src/lib.rs +++ b/runtime/src/lib.rs @@ -6,7 +6,7 @@ pub use rust_sitter_types::grammar; pub use rule::Language; -pub use extract::{Extract, ExtractContext, WithLeafExtractor}; +pub use extract::{Extract, ExtractContext, Extractor}; use serde::{Deserialize, Serialize}; use std::ops::Deref; @@ -132,13 +132,16 @@ impl From for Point { } impl Extract for Spanned { + type LeafFn = T::LeafFn; + type Output = Spanned; fn extract<'a, 'tree>( ctx: &mut ExtractContext, node: Option>, source: &[u8], - ) -> extract::Result<'tree, Spanned> { + l: Self::LeafFn, + ) -> extract::Result<'tree, Self::Output> { Ok(Spanned { - value: T::extract(ctx, node, source)?, + value: T::extract(ctx, node, source, l)?, position: node.map(Position::from_node).unwrap_or_else(|| Position { bytes: ctx.last_idx..ctx.last_idx, start: Point::from_tree_sitter(ctx.last_pt), @@ -151,12 +154,13 @@ impl Extract for Spanned { ctx: &mut ExtractContext, it: &mut extract::ExtractFieldIterator<'cursor, 'tree>, source: &[u8], - ) -> extract::Result<'tree, Self> { + l: Self::LeafFn, + ) -> extract::Result<'tree, Self::Output> { // TODO: Figure this out correctly. We need to extend the span over all of the consumed // nodes when we do this. let start_byte = ctx.last_idx; let start = ctx.last_pt; - let value = T::extract_field(ctx, it, source)?; + let value = T::extract_field(ctx, it, source, l)?; // We need to make sure these get updated; maybe in this case it should just be in the // iterator instead of in here. let end_byte = ctx.last_idx; diff --git a/runtime/src/rule.rs b/runtime/src/rule.rs index 46ba6f8..bff7bed 100644 --- a/runtime/src/rule.rs +++ b/runtime/src/rule.rs @@ -2,7 +2,7 @@ use tree_sitter::Node; use crate::{Extract, NodeParseResult, ParseResult, extract::ExtractContext}; -pub trait Rule: Extract { +pub trait Rule: Extract { // TODO: Use the grammar::RuleDef and grammar::Grammar // For this to work as expected we need a #[derive(Language)], or at least a `Language` trait // which then has the `parse` function and the `generate_grammar() -> grammar::Grammar` @@ -28,8 +28,7 @@ pub trait Rule: Extract { if n.has_error() { crate::error::collect_node_errors(n, |e| errors.push(e)); } - // TODO: Review this!!! - let result = Self::extract(&mut ctx, Some(n), source); + let result = Self::extract(&mut ctx, Some(n), source, ()); NodeParseResult { result, errors } } } From 9cff7bc2300cf172f04874df9fdba83244c92e52 Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Mon, 25 Aug 2025 09:59:35 -0500 Subject: [PATCH 39/50] Finish implementing and fixing tests --- Cargo.lock | 40 ++-- common/src/expansion.rs | 32 +-- common/src/lib.rs | 4 +- example/src/main.rs | 54 +++-- example/src/optionals.rs | 11 +- example/src/repetitions.rs | 21 +- ...e__arithmetic__tests__failed_parses-2.snap | 42 +--- ...e__arithmetic__tests__failed_parses-3.snap | 11 +- ...e__arithmetic__tests__failed_parses-4.snap | 13 +- ...ple__arithmetic__tests__failed_parses.snap | 6 +- ..._arithmetic__tests__failed_parses.snap.new | 39 --- ..._optionals__tests__optional_grammar-2.snap | 6 +- ..._optionals__tests__optional_grammar-3.snap | 10 +- ...ionals__tests__optional_grammar-3.snap.new | 29 --- ..._optionals__tests__optional_grammar-4.snap | 10 +- ..._optionals__tests__optional_grammar-5.snap | 4 +- ..._optionals__tests__optional_grammar-6.snap | 10 +- ..._optionals__tests__optional_grammar-8.snap | 6 +- ...e__optionals__tests__optional_grammar.snap | 4 +- ...ptionals__tests__optional_grammar.snap.new | 29 --- ...ons__tests__repetitions_grammar-2.snap.new | 26 -- ...tions__tests__repetitions_grammar.snap.new | 36 --- ...xample__words__tests__words_grammar-2.snap | 19 +- ...xample__words__tests__words_grammar-3.snap | 19 +- ..._example__words__tests__words_grammar.snap | 17 +- ...mple__words__tests__words_grammar.snap.new | 36 --- example/src/words.rs | 12 +- macro/src/expansion.rs | 16 +- ...t_sitter_macro__tests__enum_prec_left.snap | 104 +++++--- ...t_sitter_macro__tests__enum_recursive.snap | 85 ++++--- ...macro__tests__enum_transformed_fields.snap | 46 ++-- ...r_macro__tests__enum_with_named_field.snap | 93 ++++--- ...macro__tests__enum_with_unamed_vector.snap | 108 ++++++--- ...r_macro__tests__grammar_unboxed_field.snap | 100 +++++--- ...t_sitter_macro__tests__spanned_in_vec.snap | 145 +++++++---- ...ust_sitter_macro__tests__struct_extra.snap | 92 ++++--- ..._sitter_macro__tests__struct_optional.snap | 115 ++++++--- ...st_sitter_macro__tests__struct_repeat.snap | 145 +++++++---- runtime/src/__private.rs | 16 +- runtime/src/error.rs | 226 ++++++++---------- runtime/src/extract.rs | 17 +- runtime/src/extract/field.rs | 84 +++++-- runtime/src/lib.rs | 22 +- runtime/src/rule.rs | 6 +- ...l__tests__enum_conflicts_prec_dynamic.snap | 2 +- ...st_sitter_tool__tests__enum_prec_left.snap | 2 +- ...st_sitter_tool__tests__enum_recursive.snap | 2 +- ..._tool__tests__enum_transformed_fields.snap | 2 +- ...er_tool__tests__enum_with_named_field.snap | 2 +- ..._tool__tests__enum_with_unamed_vector.snap | 2 +- ...st_sitter_tool__tests__grammar_repeat.snap | 2 +- ...t_sitter_tool__tests__grammar_repeat1.snap | 2 +- ...l__tests__grammar_repeat_no_delimiter.snap | 2 +- ...er_tool__tests__grammar_unboxed_field.snap | 2 +- ...tter_tool__tests__grammar_with_extras.snap | 2 +- .../rust_sitter_tool__tests__immediate.snap | 2 +- ...st_sitter_tool__tests__spanned_in_vec.snap | 2 +- ...t_sitter_tool__tests__struct_optional.snap | 2 +- types/src/lib.rs | 1 - 59 files changed, 1062 insertions(+), 933 deletions(-) delete mode 100644 example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses.snap.new delete mode 100644 example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-3.snap.new delete mode 100644 example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar.snap.new delete mode 100644 example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar-2.snap.new delete mode 100644 example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar.snap.new delete mode 100644 example/src/snapshots/rust_sitter_example__words__tests__words_grammar.snap.new diff --git a/Cargo.lock b/Cargo.lock index da05c43..7ce757e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -69,9 +69,9 @@ checksum = "b0674a1ddeecb70197781e945de4b3b8ffb61fa939a5597bcf48503737663100" [[package]] name = "bitflags" -version = "2.9.2" +version = "2.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a65b545ab31d687cff52899d4890855fec459eb6afe0da6417b8a18da87aa29" +checksum = "34efbcccd345379ca2868b2b2c9d3782e9cc58ba87bc7d79d5b53d9c9ae6f25d" [[package]] name = "bumpalo" @@ -81,9 +81,9 @@ checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" [[package]] name = "cc" -version = "1.2.33" +version = "1.2.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ee0f8803222ba5a7e2777dd72ca451868909b1ac410621b676adf07280e9b5f" +checksum = "42bc4aea80032b7bf409b0bc7ccad88853858911b7713a8062fdc0623867bedc" dependencies = [ "shlex", ] @@ -198,9 +198,9 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] name = "form_urlencoded" -version = "1.2.1" +version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" dependencies = [ "percent-encoding", ] @@ -317,9 +317,9 @@ dependencies = [ [[package]] name = "idna" -version = "1.0.3" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" +checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" dependencies = [ "idna_adapter", "smallvec", @@ -338,9 +338,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.10.0" +version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661" +checksum = "f2481980430f9f78649238835720ddccc57e52df14ffce1c6f37391d61b563e9" dependencies = [ "equivalent", "hashbrown", @@ -473,9 +473,9 @@ checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" [[package]] name = "percent-encoding" -version = "2.3.1" +version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" [[package]] name = "portable-atomic" @@ -527,9 +527,9 @@ checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" [[package]] name = "regex" -version = "1.11.1" +version = "1.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +checksum = "23d7fd106d8c02486a8d64e778353d1cffe08ce79ac2e82f540c86d0facf6912" dependencies = [ "aho-corasick", "memchr", @@ -539,9 +539,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.9" +version = "0.4.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" +checksum = "6b9458fa0bfeeac22b5ca447c63aaf45f28439a709ccd244698632f9aa6394d6" dependencies = [ "aho-corasick", "memchr", @@ -550,9 +550,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.5" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" +checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001" [[package]] name = "rust-sitter" @@ -882,9 +882,9 @@ checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" [[package]] name = "url" -version = "2.5.4" +version = "2.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" +checksum = "08bc136a29a3d1758e07a9cca267be308aeebf5cfd5a10f3f67ab2097683ef5b" dependencies = [ "form_urlencoded", "idna", diff --git a/common/src/expansion.rs b/common/src/expansion.rs index f4eef3b..91a222a 100644 --- a/common/src/expansion.rs +++ b/common/src/expansion.rs @@ -459,7 +459,7 @@ fn gen_field( leaf_type: Option, attrs: Vec, ctx: &mut ExpansionState, -) -> Result<(RuleDef, bool, bool)> { +) -> Result<(RuleDef, bool)> { let precs = RuleParams::new(&attrs)?; if precs.word { @@ -488,7 +488,7 @@ fn gen_field( if let Some(text) = text_attr { let input: TsInput = text.parse_args()?; - return Ok((precs.apply(input.evaluate()?)?, false, true)); + return Ok((precs.apply(input.evaluate()?)?, false)); } let leaf_input = leaf_attr.map(|a| a.parse_args::()).transpose()?; @@ -503,7 +503,7 @@ fn gen_field( "Empty types must have a leaf or text attribute", )); }; - return Ok((precs.apply(leaf_input.evaluate()?)?, false, false)); + return Ok((precs.apply(leaf_input.evaluate()?)?, false)); } }; @@ -517,27 +517,17 @@ fn gen_field( if !is_vec && !is_option { if let Some(input) = leaf_input { let result = input.evaluate()?; - Ok((precs.apply(result)?, is_option, false)) - // if result.is_symbol() { - // Ok((precs.apply(result)?, is_option, false)) - // } else { - // ctx.grammar.rules.insert(path.clone(), precs.apply(result)?); - // Ok((RuleDef::SYMBOL { name: path }, is_option, false)) - // } + Ok((precs.apply(result)?, is_option)) } else { let symbol_name = match filter_inner_type(&leaf_type, &skip_over) { Type::Path(p) => p.path.require_ident()?.to_string(), t => return Err(Error::new(t.span(), "Expected a path")), }; - Ok(( - precs.apply(RuleDef::SYMBOL { name: symbol_name })?, - false, - false, - )) + Ok((precs.apply(RuleDef::SYMBOL { name: symbol_name })?, false)) } } else if is_vec { - let (field_json, field_optional, _is_text) = gen_field( + let (field_json, field_optional) = gen_field( path.clone(), Some(inner_type_vec), leaf_attr.iter().cloned().cloned().collect(), @@ -609,12 +599,10 @@ fn gen_field( name: contents_ident, }, !repeat_non_empty, - false, )) } else { // is_option - let (field_json, field_optional, _is_text) = - gen_field(path, Some(inner_type_option), attrs, ctx)?; + let (field_json, field_optional) = gen_field(path, Some(inner_type_option), attrs, ctx)?; if field_optional { return Err(Error::new( @@ -623,7 +611,7 @@ fn gen_field( )); } - Ok((precs.apply(field_json)?, true, false)) + Ok((precs.apply(field_json)?, true)) } } @@ -646,7 +634,7 @@ fn gen_struct_or_variant( } else { format!("{path}_{ident_str}") }; - let (field_contents, is_option, is_text) = + let (field_contents, is_option) = gen_field(path, Some(field.ty.clone()), field.attrs.clone(), ctx)?; let core = RuleDef::FIELD { @@ -693,7 +681,7 @@ fn gen_struct_or_variant( let base_rule = match fields { Fields::Unit => { - let (field_contents, _is_option, _is_text) = + let (field_contents, _is_option) = gen_field(path.clone(), None, attrs.to_owned(), ctx)?; if is_variant { RuleDef::FIELD { diff --git a/common/src/lib.rs b/common/src/lib.rs index f85c5be..1f9eada 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -279,9 +279,9 @@ pub fn wrap_leaf_type(ty: &Type, skip_over: &HashSet<&str>) -> Type { panic!("Expected angle bracketed path"); } } else { - parse_quote!(rust_sitter::WithLeaf<#ty, _>) + parse_quote!(::rust_sitter::extract::WithLeaf<#ty, _>) } } else { - parse_quote!(rust_sitter::WithLeaf<#ty, _>) + parse_quote!(::rust_sitter::extract::WithLeaf<#ty, _>) } } diff --git a/example/src/main.rs b/example/src/main.rs index 07d4d9b..0d9611f 100644 --- a/example/src/main.rs +++ b/example/src/main.rs @@ -1,14 +1,14 @@ use rust_sitter::Language; -use std::io::Write; +use std::{fmt::Debug, io::Write}; use codemap::CodeMap; use codemap_diagnostic::{ColorConfig, Diagnostic, Emitter, Level, SpanLabel, SpanStyle}; use rust_sitter::error::ParseError; mod arithmetic; -// mod optionals; -// mod repetitions; -// mod words; +mod optionals; +mod repetitions; +mod words; fn convert_parse_error_to_diagnostics(file_span: &codemap::Span, error: &ParseError) -> Diagnostic { let mut message = format!("syntax error. reason: {:?}", error.reason); @@ -36,6 +36,14 @@ fn convert_parse_error_to_diagnostics(file_span: &codemap::Span, error: &ParseEr fn main() { env_logger::init(); + let args: Vec<_> = std::env::args().collect(); + let grammar = if args.len() == 1 { + "Expression" + } else if args.len() == 2 { + &args[1] + } else { + panic!("Unexpected inputs") + }; let stdin = std::io::stdin(); loop { @@ -49,20 +57,30 @@ fn main() { break; } - match arithmetic::grammar::Expression::parse(input).into_result() { - Ok(expr) => println!("{expr:#?}"), - Err(errs) => { - let mut codemap = CodeMap::new(); - let file_span = codemap.add_file("".to_string(), input.to_string()); - let mut diagnostics = vec![]; - for error in errs { - let d = convert_parse_error_to_diagnostics(&file_span.span, &error); - diagnostics.push(d); - } + match grammar { + "Expression" => process_input::(input), + "Repetition" => process_input::(input), + "Optional" => process_input::(input), + "Word" => process_input::(input), + _ => {} + } + } +} - let mut emitter = Emitter::stderr(ColorConfig::Always, Some(&codemap)); - emitter.emit(&diagnostics); +fn process_input(input: &str) { + match T::parse(input).into_result() { + Ok(expr) => println!("{expr:#?}"), + Err(errs) => { + let mut codemap = CodeMap::new(); + let file_span = codemap.add_file("".to_string(), input.to_string()); + let mut diagnostics = vec![]; + for error in errs { + let d = convert_parse_error_to_diagnostics(&file_span.span, &error); + diagnostics.push(d); } - }; - } + + let mut emitter = Emitter::stderr(ColorConfig::Always, Some(&codemap)); + emitter.emit(&diagnostics); + } + }; } diff --git a/example/src/optionals.rs b/example/src/optionals.rs index 6eccf04..7eb7f20 100644 --- a/example/src/optionals.rs +++ b/example/src/optionals.rs @@ -1,15 +1,12 @@ #[allow(dead_code)] -mod grammar { - use rust_sitter::Spanned; +pub mod grammar { use rust_sitter::Rule; + use rust_sitter::Spanned; #[derive(Debug, Rule)] #[language] pub struct Language { - #[leaf(re(r"\d+"))] - // Not necessary, done automatically. - // #[rust_sitter::with(|v| v.parse().unwrap())] - v: Option, + v: Option, #[leaf("_")] _s: (), t: Spanned>, @@ -20,8 +17,6 @@ mod grammar { #[derive(Debug, Rule)] pub struct Number { #[leaf(re(r"\d+"))] - // TODO: We are replacing this entirely with a different defintion. - // #[with(|v| v.parse().unwrap())] v: i32, } } diff --git a/example/src/repetitions.rs b/example/src/repetitions.rs index 21e8cfb..da9f652 100644 --- a/example/src/repetitions.rs +++ b/example/src/repetitions.rs @@ -3,21 +3,23 @@ pub mod grammar { #[derive(Debug, Rule)] #[language] - #[extras( - re(r"\s") - )] + #[extras(re(r"\s"))] #[allow(dead_code)] pub struct NumberList { #[sep_by1(",")] - #[leaf(pattern(r"\d+"))] + #[leaf(Number)] numbers: Spanned>>, } + + #[derive(Debug, Rule)] + #[leaf(pattern(r"\d+"))] + pub struct Number; } // TODO: Currently not allowed, needs to be fixed. // pub mod grammar2 { // use rust_sitter::{Rule, Spanned}; -// +// // #[derive(Debug, Rule)] // #[language] // #[allow(dead_code)] @@ -25,7 +27,7 @@ pub mod grammar { // #[leaf(pattern(r"\d+"))] // numbers: Spanned>>, // } -// +// // #[derive(Rule)] // #[extra] // struct Whitespace { @@ -33,10 +35,10 @@ pub mod grammar { // _whitespace: (), // } // } -// +// // pub mod grammar3 { // use rust_sitter::{Rule, Spanned}; -// +// // #[derive(Debug, Rule)] // #[language] // #[allow(dead_code)] @@ -47,7 +49,7 @@ pub mod grammar { // #[skip(123)] // metadata: u32, // } -// +// // #[derive(Rule)] // #[extra] // struct Whitespace { @@ -63,7 +65,6 @@ mod tests { #[test] fn repetitions_grammar() { - // Bug in latest tree-sitter: empty parse on a top-level repeat1 segfaults. insta::assert_debug_snapshot!(grammar::NumberList::parse("")); insta::assert_debug_snapshot!(grammar::NumberList::parse("1")); insta::assert_debug_snapshot!(grammar::NumberList::parse("1, 2")); diff --git a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-2.snap b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-2.snap index b80a7de..7040e00 100644 --- a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-2.snap +++ b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-2.snap @@ -5,34 +5,6 @@ expression: "grammar::Expression::parse(\"1 - 2 -\")" ParseResult { result: None, errors: [ - ParseError { - node_position: Position { - bytes: 7..7, - start: Point { - line: 1, - column: 8, - }, - end: Point { - line: 1, - column: 8, - }, - }, - error_position: Position { - bytes: 7..7, - start: Point { - line: 1, - column: 8, - }, - end: Point { - line: 1, - column: 8, - }, - }, - lookaheads: [ - "Expression_Number_0", - ], - reason: Missing, - }, ParseError { node_position: Position { bytes: 7..7, @@ -57,11 +29,15 @@ ParseResult { }, }, lookaheads: [], - reason: TypeConversion( - ParseIntError { - kind: Empty, - }, - ), + reason: Extract { + struct_name: "Expression :: Number", + field_name: "0", + reason: TypeConversion( + ParseIntError { + kind: Empty, + }, + ), + }, }, ], } diff --git a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-3.snap b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-3.snap index b402002..0eecef4 100644 --- a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-3.snap +++ b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-3.snap @@ -33,12 +33,21 @@ ParseResult { }, }, lookaheads: [ - "Expression_Number_0", + "Expression_Number_token1", + "let", + "log", + "print", "source_file", "Expression_Number", "Expression_Sub", "Expression_Mul", + "Expression_Let", + "Expression_Complex", + "Expression_Print", "Expression", + "LetExpression", + "ComplexExpression", + "PrintExpression", ], reason: Error, }, diff --git a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-4.snap b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-4.snap index 6c273bf..0032772 100644 --- a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-4.snap +++ b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-4.snap @@ -29,8 +29,10 @@ ParseResult { }, }, lookaheads: [ - "Expression_Sub_1", - "Expression_Mul_1", + "-", + "*", + ")", + ",", ], reason: Error, }, @@ -58,9 +60,10 @@ ParseResult { }, }, lookaheads: [], - reason: MissingEnum { - node_kind: "source_file", - enum_name: "ERROR", + reason: Extract { + struct_name: "Expression", + field_name: "root", + reason: MissingEnum, }, }, ], diff --git a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses.snap b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses.snap index a1433c4..f6dff06 100644 --- a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses.snap +++ b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses.snap @@ -33,8 +33,10 @@ ParseResult { }, }, lookaheads: [ - "Expression_Sub_1", - "Expression_Mul_1", + "-", + "*", + ")", + ",", ], reason: Error, }, diff --git a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses.snap.new b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses.snap.new deleted file mode 100644 index 45e4df2..0000000 --- a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses.snap.new +++ /dev/null @@ -1,39 +0,0 @@ ---- -source: example/src/arithmetic.rs -assertion_line: 101 -expression: "grammar::Expression::parse(\"1 + 2\")" ---- -ParseResult { - result: None, - errors: [ - ParseError { - node_position: Position { - bytes: 0..3, - start: Point { - line: 1, - column: 1, - }, - end: Point { - line: 1, - column: 4, - }, - }, - error_position: Position { - bytes: 2..3, - start: Point { - line: 1, - column: 3, - }, - end: Point { - line: 1, - column: 4, - }, - }, - lookaheads: [ - "-", - "*", - ], - reason: Error, - }, - ], -} diff --git a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-2.snap b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-2.snap index c34a0d3..a45beef 100644 --- a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-2.snap +++ b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-2.snap @@ -10,14 +10,14 @@ ParseResult { t: Spanned { value: None, position: Position { - bytes: 0..0, + bytes: 1..2, start: Point { line: 1, - column: 1, + column: 2, }, end: Point { line: 1, - column: 1, + column: 3, }, }, }, diff --git a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-3.snap b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-3.snap index 6cd9fe9..7d099ce 100644 --- a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-3.snap +++ b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-3.snap @@ -6,20 +6,22 @@ ParseResult { result: Some( Language { v: Some( - 1, + Number { + v: 1, + }, ), _s: (), t: Spanned { value: None, position: Position { - bytes: 0..0, + bytes: 1..2, start: Point { line: 1, - column: 1, + column: 2, }, end: Point { line: 1, - column: 1, + column: 3, }, }, }, diff --git a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-3.snap.new b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-3.snap.new deleted file mode 100644 index 63ccd42..0000000 --- a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-3.snap.new +++ /dev/null @@ -1,29 +0,0 @@ ---- -source: example/src/optionals.rs -assertion_line: 37 -expression: "grammar::Language::parse(\"1_\")" ---- -ParseResult { - result: Some( - Language { - v: None, - _s: (), - t: Spanned { - value: None, - position: Position { - bytes: 0..0, - start: Point { - line: 1, - column: 1, - }, - end: Point { - line: 1, - column: 1, - }, - }, - }, - _d: None, - }, - ), - errors: [], -} diff --git a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-4.snap b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-4.snap index e6189ac..53290f5 100644 --- a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-4.snap +++ b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-4.snap @@ -6,20 +6,22 @@ ParseResult { result: Some( Language { v: Some( - 1, + Number { + v: 1, + }, ), _s: (), t: Spanned { value: None, position: Position { - bytes: 0..0, + bytes: 2..3, start: Point { line: 1, - column: 1, + column: 3, }, end: Point { line: 1, - column: 1, + column: 4, }, }, }, diff --git a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-5.snap b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-5.snap index bb5bc08..2f43687 100644 --- a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-5.snap +++ b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-5.snap @@ -6,7 +6,9 @@ ParseResult { result: Some( Language { v: Some( - 1, + Number { + v: 1, + }, ), _s: (), t: Spanned { diff --git a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-6.snap b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-6.snap index 2debce3..69391f5 100644 --- a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-6.snap +++ b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-6.snap @@ -6,7 +6,9 @@ ParseResult { result: Some( Language { v: Some( - 1, + Number { + v: 1, + }, ), _s: (), t: Spanned { @@ -16,14 +18,14 @@ ParseResult { }, ), position: Position { - bytes: 2..3, + bytes: 3..4, start: Point { line: 1, - column: 3, + column: 4, }, end: Point { line: 1, - column: 4, + column: 5, }, }, }, diff --git a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-8.snap b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-8.snap index 9e8f944..b88c69a 100644 --- a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-8.snap +++ b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-8.snap @@ -14,14 +14,14 @@ ParseResult { }, ), position: Position { - bytes: 1..2, + bytes: 2..3, start: Point { line: 1, - column: 2, + column: 3, }, end: Point { line: 1, - column: 3, + column: 4, }, }, }, diff --git a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar.snap b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar.snap index 8d166b7..5783701 100644 --- a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar.snap +++ b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar.snap @@ -10,14 +10,14 @@ ParseResult { t: Spanned { value: None, position: Position { - bytes: 0..0, + bytes: 0..1, start: Point { line: 1, column: 1, }, end: Point { line: 1, - column: 1, + column: 2, }, }, }, diff --git a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar.snap.new b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar.snap.new deleted file mode 100644 index 109621c..0000000 --- a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar.snap.new +++ /dev/null @@ -1,29 +0,0 @@ ---- -source: example/src/optionals.rs -assertion_line: 35 -expression: "grammar::Language::parse(\"_\")" ---- -ParseResult { - result: Some( - Language { - v: None, - _s: (), - t: Spanned { - value: None, - position: Position { - bytes: 1..1, - start: Point { - line: 1, - column: 2, - }, - end: Point { - line: 1, - column: 2, - }, - }, - }, - _d: None, - }, - ), - errors: [], -} diff --git a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar-2.snap.new b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar-2.snap.new deleted file mode 100644 index af761a2..0000000 --- a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar-2.snap.new +++ /dev/null @@ -1,26 +0,0 @@ ---- -source: example/src/repetitions.rs -assertion_line: 68 -expression: "grammar::NumberList::parse(\"1\")" ---- -ParseResult { - result: Some( - NumberList { - numbers: Spanned { - value: [], - position: Position { - bytes: 0..1, - start: Point { - line: 1, - column: 1, - }, - end: Point { - line: 1, - column: 2, - }, - }, - }, - }, - ), - errors: [], -} diff --git a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar.snap.new b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar.snap.new deleted file mode 100644 index 07619b1..0000000 --- a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar.snap.new +++ /dev/null @@ -1,36 +0,0 @@ ---- -source: example/src/repetitions.rs -assertion_line: 67 -expression: "grammar::NumberList::parse(\"\")" ---- -ParseResult { - result: None, - errors: [ - ParseError { - node_position: Position { - bytes: 0..0, - start: Point { - line: 1, - column: 1, - }, - end: Point { - line: 1, - column: 1, - }, - }, - error_position: Position { - bytes: 0..0, - start: Point { - line: 1, - column: 1, - }, - end: Point { - line: 1, - column: 1, - }, - }, - lookaheads: [], - reason: Error, - }, - ], -} diff --git a/example/src/snapshots/rust_sitter_example__words__tests__words_grammar-2.snap b/example/src/snapshots/rust_sitter_example__words__tests__words_grammar-2.snap index 4a5d976..dc53222 100644 --- a/example/src/snapshots/rust_sitter_example__words__tests__words_grammar-2.snap +++ b/example/src/snapshots/rust_sitter_example__words__tests__words_grammar-2.snap @@ -29,17 +29,17 @@ ParseResult { }, }, lookaheads: [ - "Words_keyword", + "if", "source_file", ], reason: Error, }, ParseError { node_position: Position { - bytes: 5..5, + bytes: 0..5, start: Point { line: 1, - column: 6, + column: 1, }, end: Point { line: 1, @@ -47,10 +47,10 @@ ParseResult { }, }, error_position: Position { - bytes: 5..5, + bytes: 0..5, start: Point { line: 1, - column: 6, + column: 1, }, end: Point { line: 1, @@ -58,9 +58,12 @@ ParseResult { }, }, lookaheads: [], - reason: MissingNode { - node_kind: "Words_word", - type_name: "String", + reason: Extract { + struct_name: "Words", + field_name: "keyword", + reason: FieldExtraction { + message: "fields didn't match, cursor had: None, expected: keyword", + }, }, }, ], diff --git a/example/src/snapshots/rust_sitter_example__words__tests__words_grammar-3.snap b/example/src/snapshots/rust_sitter_example__words__tests__words_grammar-3.snap index 65b928e..62d68a3 100644 --- a/example/src/snapshots/rust_sitter_example__words__tests__words_grammar-3.snap +++ b/example/src/snapshots/rust_sitter_example__words__tests__words_grammar-3.snap @@ -29,17 +29,17 @@ ParseResult { }, }, lookaheads: [ - "Words_keyword", + "if", "source_file", ], reason: Error, }, ParseError { node_position: Position { - bytes: 7..7, + bytes: 0..7, start: Point { line: 1, - column: 8, + column: 1, }, end: Point { line: 1, @@ -47,10 +47,10 @@ ParseResult { }, }, error_position: Position { - bytes: 7..7, + bytes: 0..7, start: Point { line: 1, - column: 8, + column: 1, }, end: Point { line: 1, @@ -58,9 +58,12 @@ ParseResult { }, }, lookaheads: [], - reason: MissingNode { - node_kind: "Words_word", - type_name: "String", + reason: Extract { + struct_name: "Words", + field_name: "keyword", + reason: FieldExtraction { + message: "fields didn't match, cursor had: None, expected: keyword", + }, }, }, ], diff --git a/example/src/snapshots/rust_sitter_example__words__tests__words_grammar.snap b/example/src/snapshots/rust_sitter_example__words__tests__words_grammar.snap index e1731b7..3d00063 100644 --- a/example/src/snapshots/rust_sitter_example__words__tests__words_grammar.snap +++ b/example/src/snapshots/rust_sitter_example__words__tests__words_grammar.snap @@ -33,10 +33,10 @@ ParseResult { }, ParseError { node_position: Position { - bytes: 2..2, + bytes: 0..2, start: Point { line: 1, - column: 3, + column: 1, }, end: Point { line: 1, @@ -44,10 +44,10 @@ ParseResult { }, }, error_position: Position { - bytes: 2..2, + bytes: 0..2, start: Point { line: 1, - column: 3, + column: 1, }, end: Point { line: 1, @@ -55,9 +55,12 @@ ParseResult { }, }, lookaheads: [], - reason: MissingNode { - node_kind: "Words_keyword", - type_name: "String", + reason: Extract { + struct_name: "Words", + field_name: "keyword", + reason: FieldExtraction { + message: "fields didn't match, cursor had: None, expected: keyword", + }, }, }, ], diff --git a/example/src/snapshots/rust_sitter_example__words__tests__words_grammar.snap.new b/example/src/snapshots/rust_sitter_example__words__tests__words_grammar.snap.new deleted file mode 100644 index ce705e0..0000000 --- a/example/src/snapshots/rust_sitter_example__words__tests__words_grammar.snap.new +++ /dev/null @@ -1,36 +0,0 @@ ---- -source: example/src/words.rs -assertion_line: 26 -expression: "grammar::Words::parse(\"if\")" ---- -ParseResult { - result: None, - errors: [ - ParseError { - node_position: Position { - bytes: 0..2, - start: Point { - line: 1, - column: 1, - }, - end: Point { - line: 1, - column: 3, - }, - }, - error_position: Position { - bytes: 0..2, - start: Point { - line: 1, - column: 1, - }, - end: Point { - line: 1, - column: 3, - }, - }, - lookaheads: [], - reason: Error, - }, - ], -} diff --git a/example/src/words.rs b/example/src/words.rs index 4d44f56..d68bb4a 100644 --- a/example/src/words.rs +++ b/example/src/words.rs @@ -3,17 +3,19 @@ pub mod grammar { #[derive(Debug, Rule)] #[language] - #[extras( - re(r"\s") - )] + #[extras(re(r"\s"))] #[allow(dead_code)] pub struct Words { #[leaf("if")] keyword: (), - // #[word] - #[leaf(pattern(r"[a-z_]+"))] + #[leaf(Ident)] word: String, } + + #[derive(Debug, Rule)] + #[leaf(pattern(r"[a-z_]+"))] + #[word] + pub struct Ident; } #[cfg(test)] diff --git a/macro/src/expansion.rs b/macro/src/expansion.rs index ff65a6d..98cb3e9 100644 --- a/macro/src/expansion.rs +++ b/macro/src/expansion.rs @@ -61,7 +61,7 @@ pub fn expand_rule(input: DeriveInput) -> Result { _l: Self::LeafFn, ) -> Result> { let node = node.ok_or_else(|| { - ::rust_sitter::error::ExtractError::missing_node(ctx, stringify!(#ident)) + ::rust_sitter::error::ExtractError::missing_node(ctx) })?; #extract_expr } @@ -108,25 +108,25 @@ pub fn expand_rule(input: DeriveInput) -> Result { type LeafFn = (); #[allow(non_snake_case)] fn extract<'tree>( - _ctx: &mut ::rust_sitter::extract::ExtractContext, + ctx: &mut ::rust_sitter::extract::ExtractContext, node: Option<::rust_sitter::tree_sitter::Node<'tree>>, source: &[u8], _l: Self::LeafFn, ) -> Result> { let node = node.ok_or_else(|| { - ::rust_sitter::error::ExtractError::missing_node(_ctx, stringify!(#enum_name)) + ::rust_sitter::error::ExtractError::missing_node(ctx) })?; let mut cursor = node.walk(); if !cursor.goto_first_child() { - return Err(::rust_sitter::error::ExtractError::missing_node(_ctx, stringify!(#enum_name))); + return Err(::rust_sitter::error::ExtractError::missing_node(ctx)); } loop { let node = cursor.node(); match node.kind() { #(#match_cases),*, k => if !cursor.goto_next_sibling() { - return Err(::rust_sitter::error::ExtractError::missing_enum(_ctx, k)); + return Err(::rust_sitter::error::ExtractError::missing_enum(ctx)); } } } @@ -229,8 +229,7 @@ fn gen_field(ident_str: String, leaf: Field, grammar: &RuleDef) -> Result leaf_input.evaluate()?; } - let extractor: Expr = - parse_quote! { ::rust_sitter::extract::BaseExtractor::default() }; + let extractor: Expr = parse_quote! { ::rust_sitter::extract::BaseExtractor::default() }; let (leaf_type, leaf_fn): (Type, Expr) = match transform { Some(closure) => { @@ -429,6 +428,9 @@ fn rule_def_add_state(def: &RuleDef, optional: bool, states: &mut Vec return, // Not sure what we get here, let's just assume the string is enough though. RuleDef::PATTERN { .. } => { + // It is not possible to have these in direct field extractions, actually. A quirk of + // tree-sitter, they are always set to `.visible = false`. Maybe we can create a PR + // where PATTERNs can be exposed if they are wrapped in a FIELD. return; } RuleDef::CHOICE { members } => { diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap index cd877e2..fbeb228 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap @@ -20,72 +20,110 @@ mod grammar { ::rust_sitter::__private::parse(input, Self::language) } } - impl ::rust_sitter::Extract for Expression { - type LeafFn<'a> = (); + impl ::rust_sitter::Extract for Expression { + type Output = Self; + type LeafFn = (); #[allow(non_snake_case)] - fn extract<'a, 'tree>( - _ctx: &mut ::rust_sitter::extract::ExtractContext<'_>, + fn extract<'tree>( + ctx: &mut ::rust_sitter::extract::ExtractContext, node: Option<::rust_sitter::tree_sitter::Node<'tree>>, source: &[u8], - _leaf_fn: Option>, + _l: Self::LeafFn, ) -> Result> { - let node = node.ok_or_else(|| { - ::rust_sitter::error::ExtractError::missing_node(_ctx, stringify!(Expression)) - })?; + let node = node.ok_or_else(|| ::rust_sitter::error::ExtractError::missing_node(ctx))?; let mut cursor = node.walk(); - assert!( - cursor.goto_first_child(), - "Could not find a child corresponding to any enum branch" - ); + if !cursor.goto_first_child() { + return Err(::rust_sitter::error::ExtractError::missing_node(ctx)); + } loop { let node = cursor.node(); match node.kind() { "Expression_Number" => { return ::rust_sitter::__private::extract_struct_or_variant( + stringify!(Expression::Number), node, move |state| { - Ok(Expression::Number({ + Ok(Expression::Number( ::rust_sitter::__private::extract_field::( - state, source, "0", None, - ) - }?)) + ::rust_sitter::extract::BaseExtractor::default(), + (), + state, + ::rust_sitter::extract::ExtractFieldContext::new( + 0u32, + false, + |state| { + match state { 0u32 => :: rust_sitter :: extract :: ExtractFieldState :: Complete , _ => :: rust_sitter :: extract :: ExtractFieldState :: Overflow , } + }, + ), + source, + "0", + )?, + )) }, ) } "Expression_Sub" => { return ::rust_sitter::__private::extract_struct_or_variant( + stringify!(Expression::Sub), node, move |state| { Ok(Expression::Sub( - { - ::rust_sitter::__private::extract_field::, _>( - state, source, "0", None, - ) - }?, - { - ::rust_sitter::__private::extract_field::<(), _>( - state, source, "1", None, - ) - }?, - { - ::rust_sitter::__private::extract_field::, _>( - state, source, "2", None, - ) - }?, + ::rust_sitter::__private::extract_field::, _>( + ::rust_sitter::extract::BaseExtractor::default(), + (), + state, + ::rust_sitter::extract::ExtractFieldContext::new( + 1u32, + false, + |state| { + match state { 0u32 => :: rust_sitter :: extract :: ExtractFieldState :: Str ("Expression" , true , false) , 1u32 => :: rust_sitter :: extract :: ExtractFieldState :: Complete , _ => :: rust_sitter :: extract :: ExtractFieldState :: Overflow , } + }, + ), + source, + "0", + )?, + ::rust_sitter::__private::extract_field::<(), _>( + ::rust_sitter::extract::BaseExtractor::default(), + (), + state, + ::rust_sitter::extract::ExtractFieldContext::new( + 1u32, + false, + |state| { + match state { 0u32 => :: rust_sitter :: extract :: ExtractFieldState :: Str ("-" , false , false) , 1u32 => :: rust_sitter :: extract :: ExtractFieldState :: Complete , _ => :: rust_sitter :: extract :: ExtractFieldState :: Overflow , } + }, + ), + source, + "1", + )?, + ::rust_sitter::__private::extract_field::, _>( + ::rust_sitter::extract::BaseExtractor::default(), + (), + state, + ::rust_sitter::extract::ExtractFieldContext::new( + 1u32, + false, + |state| { + match state { 0u32 => :: rust_sitter :: extract :: ExtractFieldState :: Str ("Expression" , true , false) , 1u32 => :: rust_sitter :: extract :: ExtractFieldState :: Complete , _ => :: rust_sitter :: extract :: ExtractFieldState :: Overflow , } + }, + ), + source, + "2", + )?, )) }, ) } k => { if !cursor.goto_next_sibling() { - return Err(::rust_sitter::error::ExtractError::missing_enum(_ctx, k)); + return Err(::rust_sitter::error::ExtractError::missing_enum(ctx)); } } } } } } - impl ::rust_sitter::rule::Rule for Expression { + impl ::rust_sitter::rule::Rule for Expression { fn produce_ast() -> String { String::new() } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap index aa98f7c..04e4210 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap @@ -20,67 +20,96 @@ mod grammar { ::rust_sitter::__private::parse(input, Self::language) } } - impl ::rust_sitter::Extract for Expression { - type LeafFn<'a> = (); + impl ::rust_sitter::Extract for Expression { + type Output = Self; + type LeafFn = (); #[allow(non_snake_case)] - fn extract<'a, 'tree>( - _ctx: &mut ::rust_sitter::extract::ExtractContext<'_>, + fn extract<'tree>( + ctx: &mut ::rust_sitter::extract::ExtractContext, node: Option<::rust_sitter::tree_sitter::Node<'tree>>, source: &[u8], - _leaf_fn: Option>, + _l: Self::LeafFn, ) -> Result> { - let node = node.ok_or_else(|| { - ::rust_sitter::error::ExtractError::missing_node(_ctx, stringify!(Expression)) - })?; + let node = node.ok_or_else(|| ::rust_sitter::error::ExtractError::missing_node(ctx))?; let mut cursor = node.walk(); - assert!( - cursor.goto_first_child(), - "Could not find a child corresponding to any enum branch" - ); + if !cursor.goto_first_child() { + return Err(::rust_sitter::error::ExtractError::missing_node(ctx)); + } loop { let node = cursor.node(); match node.kind() { "Expression_Number" => { return ::rust_sitter::__private::extract_struct_or_variant( + stringify!(Expression::Number), node, move |state| { - Ok(Expression::Number({ + Ok(Expression::Number( ::rust_sitter::__private::extract_field::( - state, source, "0", None, - ) - }?)) + ::rust_sitter::extract::BaseExtractor::default(), + (), + state, + ::rust_sitter::extract::ExtractFieldContext::new( + 0u32, + false, + |state| { + match state { 0u32 => :: rust_sitter :: extract :: ExtractFieldState :: Complete , _ => :: rust_sitter :: extract :: ExtractFieldState :: Overflow , } + }, + ), + source, + "0", + )?, + )) }, ) } "Expression_Neg" => { return ::rust_sitter::__private::extract_struct_or_variant( + stringify!(Expression::Neg), node, move |state| { Ok(Expression::Neg( - { - ::rust_sitter::__private::extract_field::<(), _>( - state, source, "0", None, - ) - }?, - { - ::rust_sitter::__private::extract_field::, _>( - state, source, "1", None, - ) - }?, + ::rust_sitter::__private::extract_field::<(), _>( + ::rust_sitter::extract::BaseExtractor::default(), + (), + state, + ::rust_sitter::extract::ExtractFieldContext::new( + 1u32, + false, + |state| { + match state { 0u32 => :: rust_sitter :: extract :: ExtractFieldState :: Str ("-" , false , false) , 1u32 => :: rust_sitter :: extract :: ExtractFieldState :: Complete , _ => :: rust_sitter :: extract :: ExtractFieldState :: Overflow , } + }, + ), + source, + "0", + )?, + ::rust_sitter::__private::extract_field::, _>( + ::rust_sitter::extract::BaseExtractor::default(), + (), + state, + ::rust_sitter::extract::ExtractFieldContext::new( + 1u32, + false, + |state| { + match state { 0u32 => :: rust_sitter :: extract :: ExtractFieldState :: Str ("Expression" , true , false) , 1u32 => :: rust_sitter :: extract :: ExtractFieldState :: Complete , _ => :: rust_sitter :: extract :: ExtractFieldState :: Overflow , } + }, + ), + source, + "1", + )?, )) }, ) } k => { if !cursor.goto_next_sibling() { - return Err(::rust_sitter::error::ExtractError::missing_enum(_ctx, k)); + return Err(::rust_sitter::error::ExtractError::missing_enum(ctx)); } } } } } } - impl ::rust_sitter::rule::Rule for Expression { + impl ::rust_sitter::rule::Rule for Expression { fn produce_ast() -> String { String::new() } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap index fec40ce..41024e7 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap @@ -21,48 +21,58 @@ mod grammar { ::rust_sitter::__private::parse(input, Self::language) } } - impl ::rust_sitter::Extract for Expression { - type LeafFn<'a> = (); + impl ::rust_sitter::Extract for Expression { + type Output = Self; + type LeafFn = (); #[allow(non_snake_case)] - fn extract<'a, 'tree>( - _ctx: &mut ::rust_sitter::extract::ExtractContext<'_>, + fn extract<'tree>( + ctx: &mut ::rust_sitter::extract::ExtractContext, node: Option<::rust_sitter::tree_sitter::Node<'tree>>, source: &[u8], - _leaf_fn: Option>, + _l: Self::LeafFn, ) -> Result> { - let node = node.ok_or_else(|| { - ::rust_sitter::error::ExtractError::missing_node(_ctx, stringify!(Expression)) - })?; + let node = node.ok_or_else(|| ::rust_sitter::error::ExtractError::missing_node(ctx))?; let mut cursor = node.walk(); - assert!( - cursor.goto_first_child(), - "Could not find a child corresponding to any enum branch" - ); + if !cursor.goto_first_child() { + return Err(::rust_sitter::error::ExtractError::missing_node(ctx)); + } loop { let node = cursor.node(); match node.kind() { "Expression_Number" => { return ::rust_sitter::__private::extract_struct_or_variant( + stringify!(Expression::Number), node, move |state| { - Ok(Expression::Number({ + Ok(Expression::Number( ::rust_sitter::__private::extract_field::( - state, source, "0", None, - ) - }?)) + ::rust_sitter::extract::BaseExtractor::default(), + (), + state, + ::rust_sitter::extract::ExtractFieldContext::new( + 0u32, + false, + |state| { + match state { 0u32 => :: rust_sitter :: extract :: ExtractFieldState :: Complete , _ => :: rust_sitter :: extract :: ExtractFieldState :: Overflow , } + }, + ), + source, + "0", + )?, + )) }, ) } k => { if !cursor.goto_next_sibling() { - return Err(::rust_sitter::error::ExtractError::missing_enum(_ctx, k)); + return Err(::rust_sitter::error::ExtractError::missing_enum(ctx)); } } } } } } - impl ::rust_sitter::rule::Rule for Expression { + impl ::rust_sitter::rule::Rule for Expression { fn produce_ast() -> String { String::new() } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap index 9771655..22969ea 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap @@ -20,67 +20,102 @@ mod grammar { ::rust_sitter::__private::parse(input, Self::language) } } - impl ::rust_sitter::Extract for Expr { - type LeafFn<'a> = (); + impl ::rust_sitter::Extract for Expr { + type Output = Self; + type LeafFn = (); #[allow(non_snake_case)] - fn extract<'a, 'tree>( - _ctx: &mut ::rust_sitter::extract::ExtractContext<'_>, + fn extract<'tree>( + ctx: &mut ::rust_sitter::extract::ExtractContext, node: Option<::rust_sitter::tree_sitter::Node<'tree>>, source: &[u8], - _leaf_fn: Option>, + _l: Self::LeafFn, ) -> Result> { - let node = node.ok_or_else(|| { - ::rust_sitter::error::ExtractError::missing_node(_ctx, stringify!(Expr)) - })?; + let node = node.ok_or_else(|| ::rust_sitter::error::ExtractError::missing_node(ctx))?; let mut cursor = node.walk(); - assert!( - cursor.goto_first_child(), - "Could not find a child corresponding to any enum branch" - ); + if !cursor.goto_first_child() { + return Err(::rust_sitter::error::ExtractError::missing_node(ctx)); + } loop { let node = cursor.node(); match node.kind() { "Expr_Number" => { return ::rust_sitter::__private::extract_struct_or_variant( + stringify!(Expr::Number), node, move |state| { - Ok(Expr::Number({ - ::rust_sitter::__private::extract_field::( - state, source, "0", None, - ) - }?)) + Ok(Expr::Number(::rust_sitter::__private::extract_field::< + u32, + _, + >( + ::rust_sitter::extract::BaseExtractor::default(), + (), + state, + ::rust_sitter::extract::ExtractFieldContext::new( + 0u32, + false, + |state| match state { + 0u32 => { + ::rust_sitter::extract::ExtractFieldState::Complete + } + _ => { + ::rust_sitter::extract::ExtractFieldState::Overflow + } + }, + ), + source, + "0", + )?)) }, ) } "Expr_Neg" => { return ::rust_sitter::__private::extract_struct_or_variant( + stringify!(Expr::Neg), node, move |state| { Ok(Expr::Neg { - _bang: { - ::rust_sitter::__private::extract_field::<(), _>( - state, source, "_bang", None, - ) - }?, - value: { - ::rust_sitter::__private::extract_field::, _>( - state, source, "value", None, - ) - }?, + _bang: ::rust_sitter::__private::extract_field::<(), _>( + ::rust_sitter::extract::BaseExtractor::default(), + (), + state, + ::rust_sitter::extract::ExtractFieldContext::new( + 1u32, + false, + |state| { + match state { 0u32 => :: rust_sitter :: extract :: ExtractFieldState :: Str ("!" , false , false) , 1u32 => :: rust_sitter :: extract :: ExtractFieldState :: Complete , _ => :: rust_sitter :: extract :: ExtractFieldState :: Overflow , } + }, + ), + source, + "_bang", + )?, + value: ::rust_sitter::__private::extract_field::, _>( + ::rust_sitter::extract::BaseExtractor::default(), + (), + state, + ::rust_sitter::extract::ExtractFieldContext::new( + 1u32, + false, + |state| { + match state { 0u32 => :: rust_sitter :: extract :: ExtractFieldState :: Str ("Expr" , true , false) , 1u32 => :: rust_sitter :: extract :: ExtractFieldState :: Complete , _ => :: rust_sitter :: extract :: ExtractFieldState :: Overflow , } + }, + ), + source, + "value", + )?, }) }, ) } k => { if !cursor.goto_next_sibling() { - return Err(::rust_sitter::error::ExtractError::missing_enum(_ctx, k)); + return Err(::rust_sitter::error::ExtractError::missing_enum(ctx)); } } } } } } - impl ::rust_sitter::rule::Rule for Expr { + impl ::rust_sitter::rule::Rule for Expr { fn produce_ast() -> String { String::new() } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap index 84cb5b8..6cb4762 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap @@ -3,30 +3,43 @@ source: macro/src/lib.rs expression: "rustfmt_code(&expand_grammar(parse_quote!\n{\n mod grammar\n {\n #[derive(rust_sitter::Rule)] pub struct Number\n { #[leaf(re(r\"\\d+\"))] value: u32 } #[derive(rust_sitter::Rule)]\n #[language] pub enum Expr { Numbers(#[repeat1] Vec) }\n }\n}).to_token_stream().to_string())" --- mod grammar { - impl ::rust_sitter::Extract for Number { - type LeafFn<'a> = (); + impl ::rust_sitter::Extract for Number { + type Output = Self; + type LeafFn = (); #[allow(non_snake_case)] - fn extract<'a, 'tree>( - ctx: &mut ::rust_sitter::extract::ExtractContext<'_>, + fn extract<'tree>( + ctx: &mut ::rust_sitter::extract::ExtractContext, node: Option<::rust_sitter::tree_sitter::Node<'tree>>, source: &[u8], - _leaf_fn: Option>, + _l: Self::LeafFn, ) -> Result> { - let node = node.ok_or_else(|| { - ::rust_sitter::error::ExtractError::missing_node(ctx, stringify!(Number)) - })?; - ::rust_sitter::__private::extract_struct_or_variant(node, move |state| { - Ok(Number { - value: { - ::rust_sitter::__private::extract_field::( - state, source, "value", None, - ) - }?, - }) - }) + let node = node.ok_or_else(|| ::rust_sitter::error::ExtractError::missing_node(ctx))?; + ::rust_sitter::__private::extract_struct_or_variant( + stringify!(Number), + node, + move |state| { + Ok(Number { + value: ::rust_sitter::__private::extract_field::( + ::rust_sitter::extract::BaseExtractor::default(), + (), + state, + ::rust_sitter::extract::ExtractFieldContext::new( + 0u32, + false, + |state| match state { + 0u32 => ::rust_sitter::extract::ExtractFieldState::Complete, + _ => ::rust_sitter::extract::ExtractFieldState::Overflow, + }, + ), + source, + "value", + )?, + }) + }, + ) } } - impl ::rust_sitter::rule::Rule for Number { + impl ::rust_sitter::rule::Rule for Number { fn produce_ast() -> String { String::new() } @@ -51,48 +64,69 @@ mod grammar { ::rust_sitter::__private::parse(input, Self::language) } } - impl ::rust_sitter::Extract for Expr { - type LeafFn<'a> = (); + impl ::rust_sitter::Extract for Expr { + type Output = Self; + type LeafFn = (); #[allow(non_snake_case)] - fn extract<'a, 'tree>( - _ctx: &mut ::rust_sitter::extract::ExtractContext<'_>, + fn extract<'tree>( + ctx: &mut ::rust_sitter::extract::ExtractContext, node: Option<::rust_sitter::tree_sitter::Node<'tree>>, source: &[u8], - _leaf_fn: Option>, + _l: Self::LeafFn, ) -> Result> { - let node = node.ok_or_else(|| { - ::rust_sitter::error::ExtractError::missing_node(_ctx, stringify!(Expr)) - })?; + let node = node.ok_or_else(|| ::rust_sitter::error::ExtractError::missing_node(ctx))?; let mut cursor = node.walk(); - assert!( - cursor.goto_first_child(), - "Could not find a child corresponding to any enum branch" - ); + if !cursor.goto_first_child() { + return Err(::rust_sitter::error::ExtractError::missing_node(ctx)); + } loop { let node = cursor.node(); match node.kind() { "Expr_Numbers" => { return ::rust_sitter::__private::extract_struct_or_variant( + stringify!(Expr::Numbers), node, move |state| { - Ok(Expr::Numbers({ - ::rust_sitter::__private::extract_field::, _>( - state, source, "0", None, - ) - }?)) + Ok(Expr::Numbers(::rust_sitter::__private::extract_field::< + Vec, + _, + >( + ::rust_sitter::extract::BaseExtractor::default(), + (), + state, + ::rust_sitter::extract::ExtractFieldContext::new( + 1u32, + false, + |state| match state { + 0u32 => ::rust_sitter::extract::ExtractFieldState::Str( + "List_Expr_Numbers_0", + true, + false, + ), + 1u32 => { + ::rust_sitter::extract::ExtractFieldState::Complete + } + _ => { + ::rust_sitter::extract::ExtractFieldState::Overflow + } + }, + ), + source, + "0", + )?)) }, ) } k => { if !cursor.goto_next_sibling() { - return Err(::rust_sitter::error::ExtractError::missing_enum(_ctx, k)); + return Err(::rust_sitter::error::ExtractError::missing_enum(ctx)); } } } } } } - impl ::rust_sitter::rule::Rule for Expr { + impl ::rust_sitter::rule::Rule for Expr { fn produce_ast() -> String { String::new() } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap b/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap index f2f1135..8db0bd2 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap @@ -20,30 +20,48 @@ mod grammar { ::rust_sitter::__private::parse(input, Self::language) } } - impl ::rust_sitter::Extract for Language { - type LeafFn<'a> = (); + impl ::rust_sitter::Extract for Language { + type Output = Self; + type LeafFn = (); #[allow(non_snake_case)] - fn extract<'a, 'tree>( - ctx: &mut ::rust_sitter::extract::ExtractContext<'_>, + fn extract<'tree>( + ctx: &mut ::rust_sitter::extract::ExtractContext, node: Option<::rust_sitter::tree_sitter::Node<'tree>>, source: &[u8], - _leaf_fn: Option>, + _l: Self::LeafFn, ) -> Result> { - let node = node.ok_or_else(|| { - ::rust_sitter::error::ExtractError::missing_node(ctx, stringify!(Language)) - })?; - ::rust_sitter::__private::extract_struct_or_variant(node, move |state| { - Ok(Language { - e: { - ::rust_sitter::__private::extract_field::( - state, source, "e", None, - ) - }?, - }) - }) + let node = node.ok_or_else(|| ::rust_sitter::error::ExtractError::missing_node(ctx))?; + ::rust_sitter::__private::extract_struct_or_variant( + stringify!(Language), + node, + move |state| { + Ok(Language { + e: ::rust_sitter::__private::extract_field::( + ::rust_sitter::extract::BaseExtractor::default(), + (), + state, + ::rust_sitter::extract::ExtractFieldContext::new( + 1u32, + false, + |state| match state { + 0u32 => ::rust_sitter::extract::ExtractFieldState::Str( + "Expression", + true, + false, + ), + 1u32 => ::rust_sitter::extract::ExtractFieldState::Complete, + _ => ::rust_sitter::extract::ExtractFieldState::Overflow, + }, + ), + source, + "e", + )?, + }) + }, + ) } } - impl ::rust_sitter::rule::Rule for Language { + impl ::rust_sitter::rule::Rule for Language { fn produce_ast() -> String { String::new() } @@ -51,48 +69,58 @@ mod grammar { "Language" } } - impl ::rust_sitter::Extract for Expression { - type LeafFn<'a> = (); + impl ::rust_sitter::Extract for Expression { + type Output = Self; + type LeafFn = (); #[allow(non_snake_case)] - fn extract<'a, 'tree>( - _ctx: &mut ::rust_sitter::extract::ExtractContext<'_>, + fn extract<'tree>( + ctx: &mut ::rust_sitter::extract::ExtractContext, node: Option<::rust_sitter::tree_sitter::Node<'tree>>, source: &[u8], - _leaf_fn: Option>, + _l: Self::LeafFn, ) -> Result> { - let node = node.ok_or_else(|| { - ::rust_sitter::error::ExtractError::missing_node(_ctx, stringify!(Expression)) - })?; + let node = node.ok_or_else(|| ::rust_sitter::error::ExtractError::missing_node(ctx))?; let mut cursor = node.walk(); - assert!( - cursor.goto_first_child(), - "Could not find a child corresponding to any enum branch" - ); + if !cursor.goto_first_child() { + return Err(::rust_sitter::error::ExtractError::missing_node(ctx)); + } loop { let node = cursor.node(); match node.kind() { "Expression_Number" => { return ::rust_sitter::__private::extract_struct_or_variant( + stringify!(Expression::Number), node, move |state| { - Ok(Expression::Number({ + Ok(Expression::Number( ::rust_sitter::__private::extract_field::( - state, source, "0", None, - ) - }?)) + ::rust_sitter::extract::BaseExtractor::default(), + (), + state, + ::rust_sitter::extract::ExtractFieldContext::new( + 0u32, + false, + |state| { + match state { 0u32 => :: rust_sitter :: extract :: ExtractFieldState :: Complete , _ => :: rust_sitter :: extract :: ExtractFieldState :: Overflow , } + }, + ), + source, + "0", + )?, + )) }, ) } k => { if !cursor.goto_next_sibling() { - return Err(::rust_sitter::error::ExtractError::missing_enum(_ctx, k)); + return Err(::rust_sitter::error::ExtractError::missing_enum(ctx)); } } } } } } - impl ::rust_sitter::rule::Rule for Expression { + impl ::rust_sitter::rule::Rule for Expression { fn produce_ast() -> String { String::new() } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap b/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap index cb50b43..ecbd08f 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap @@ -21,30 +21,46 @@ mod grammar { ::rust_sitter::__private::parse(input, Self::language) } } - impl ::rust_sitter::Extract for NumberList { - type LeafFn<'a> = (); + impl ::rust_sitter::Extract for NumberList { + type Output = Self; + type LeafFn = (); #[allow(non_snake_case)] - fn extract<'a, 'tree>( - ctx: &mut ::rust_sitter::extract::ExtractContext<'_>, + fn extract<'tree>( + ctx: &mut ::rust_sitter::extract::ExtractContext, node: Option<::rust_sitter::tree_sitter::Node<'tree>>, source: &[u8], - _leaf_fn: Option>, + _l: Self::LeafFn, ) -> Result> { - let node = node.ok_or_else(|| { - ::rust_sitter::error::ExtractError::missing_node(ctx, stringify!(NumberList)) - })?; - ::rust_sitter::__private::extract_struct_or_variant(node, move |state| { - Ok(NumberList { - numbers: { - ::rust_sitter::__private::extract_field::>, _>( - state, source, "numbers", None, - ) - }?, - }) - }) + let node = node.ok_or_else(|| ::rust_sitter::error::ExtractError::missing_node(ctx))?; + ::rust_sitter::__private::extract_struct_or_variant( + stringify!(NumberList), + node, + move |state| { + Ok(NumberList { + numbers: ::rust_sitter::__private::extract_field::>, _>( + ::rust_sitter::extract::BaseExtractor::default(), + (), + state, + ::rust_sitter::extract::ExtractFieldContext::new(1u32, true, |state| { + match state { + 0u32 => ::rust_sitter::extract::ExtractFieldState::Str( + "List_NumberList_numbers", + true, + false, + ), + 1u32 => ::rust_sitter::extract::ExtractFieldState::Complete, + _ => ::rust_sitter::extract::ExtractFieldState::Overflow, + } + }), + source, + "numbers", + )?, + }) + }, + ) } } - impl ::rust_sitter::rule::Rule for NumberList { + impl ::rust_sitter::rule::Rule for NumberList { fn produce_ast() -> String { String::new() } @@ -52,28 +68,43 @@ mod grammar { "NumberList" } } - impl ::rust_sitter::Extract for Number { - type LeafFn<'a> = (); + impl ::rust_sitter::Extract for Number { + type Output = Self; + type LeafFn = (); #[allow(non_snake_case)] - fn extract<'a, 'tree>( - ctx: &mut ::rust_sitter::extract::ExtractContext<'_>, + fn extract<'tree>( + ctx: &mut ::rust_sitter::extract::ExtractContext, node: Option<::rust_sitter::tree_sitter::Node<'tree>>, source: &[u8], - _leaf_fn: Option>, + _l: Self::LeafFn, ) -> Result> { - let node = node.ok_or_else(|| { - ::rust_sitter::error::ExtractError::missing_node(ctx, stringify!(Number)) - })?; - ::rust_sitter::__private::extract_struct_or_variant(node, move |state| { - Ok(Number { - v: { - ::rust_sitter::__private::extract_field::(state, source, "v", None) - }?, - }) - }) + let node = node.ok_or_else(|| ::rust_sitter::error::ExtractError::missing_node(ctx))?; + ::rust_sitter::__private::extract_struct_or_variant( + stringify!(Number), + node, + move |state| { + Ok(Number { + v: ::rust_sitter::__private::extract_field::( + ::rust_sitter::extract::BaseExtractor::default(), + (), + state, + ::rust_sitter::extract::ExtractFieldContext::new( + 0u32, + false, + |state| match state { + 0u32 => ::rust_sitter::extract::ExtractFieldState::Complete, + _ => ::rust_sitter::extract::ExtractFieldState::Overflow, + }, + ), + source, + "v", + )?, + }) + }, + ) } } - impl ::rust_sitter::rule::Rule for Number { + impl ::rust_sitter::rule::Rule for Number { fn produce_ast() -> String { String::new() } @@ -81,33 +112,43 @@ mod grammar { "Number" } } - impl ::rust_sitter::Extract for Whitespace { - type LeafFn<'a> = (); + impl ::rust_sitter::Extract for Whitespace { + type Output = Self; + type LeafFn = (); #[allow(non_snake_case)] - fn extract<'a, 'tree>( - ctx: &mut ::rust_sitter::extract::ExtractContext<'_>, + fn extract<'tree>( + ctx: &mut ::rust_sitter::extract::ExtractContext, node: Option<::rust_sitter::tree_sitter::Node<'tree>>, source: &[u8], - _leaf_fn: Option>, + _l: Self::LeafFn, ) -> Result> { - let node = node.ok_or_else(|| { - ::rust_sitter::error::ExtractError::missing_node(ctx, stringify!(Whitespace)) - })?; - ::rust_sitter::__private::extract_struct_or_variant(node, move |state| { - Ok(Whitespace { - _whitespace: { - ::rust_sitter::__private::extract_field::<(), _>( + let node = node.ok_or_else(|| ::rust_sitter::error::ExtractError::missing_node(ctx))?; + ::rust_sitter::__private::extract_struct_or_variant( + stringify!(Whitespace), + node, + move |state| { + Ok(Whitespace { + _whitespace: ::rust_sitter::__private::extract_field::<(), _>( + ::rust_sitter::extract::BaseExtractor::default(), + (), state, + ::rust_sitter::extract::ExtractFieldContext::new( + 0u32, + false, + |state| match state { + 0u32 => ::rust_sitter::extract::ExtractFieldState::Complete, + _ => ::rust_sitter::extract::ExtractFieldState::Overflow, + }, + ), source, "_whitespace", - None, - ) - }?, - }) - }) + )?, + }) + }, + ) } } - impl ::rust_sitter::rule::Rule for Whitespace { + impl ::rust_sitter::rule::Rule for Whitespace { fn produce_ast() -> String { String::new() } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap b/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap index d18c632..fddafa7 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap @@ -20,48 +20,58 @@ mod grammar { ::rust_sitter::__private::parse(input, Self::language) } } - impl ::rust_sitter::Extract for Expression { - type LeafFn<'a> = (); + impl ::rust_sitter::Extract for Expression { + type Output = Self; + type LeafFn = (); #[allow(non_snake_case)] - fn extract<'a, 'tree>( - _ctx: &mut ::rust_sitter::extract::ExtractContext<'_>, + fn extract<'tree>( + ctx: &mut ::rust_sitter::extract::ExtractContext, node: Option<::rust_sitter::tree_sitter::Node<'tree>>, source: &[u8], - _leaf_fn: Option>, + _l: Self::LeafFn, ) -> Result> { - let node = node.ok_or_else(|| { - ::rust_sitter::error::ExtractError::missing_node(_ctx, stringify!(Expression)) - })?; + let node = node.ok_or_else(|| ::rust_sitter::error::ExtractError::missing_node(ctx))?; let mut cursor = node.walk(); - assert!( - cursor.goto_first_child(), - "Could not find a child corresponding to any enum branch" - ); + if !cursor.goto_first_child() { + return Err(::rust_sitter::error::ExtractError::missing_node(ctx)); + } loop { let node = cursor.node(); match node.kind() { "Expression_Number" => { return ::rust_sitter::__private::extract_struct_or_variant( + stringify!(Expression::Number), node, move |state| { - Ok(Expression::Number({ + Ok(Expression::Number( ::rust_sitter::__private::extract_field::( - state, source, "0", None, - ) - }?)) + ::rust_sitter::extract::BaseExtractor::default(), + (), + state, + ::rust_sitter::extract::ExtractFieldContext::new( + 0u32, + false, + |state| { + match state { 0u32 => :: rust_sitter :: extract :: ExtractFieldState :: Complete , _ => :: rust_sitter :: extract :: ExtractFieldState :: Overflow , } + }, + ), + source, + "0", + )?, + )) }, ) } k => { if !cursor.goto_next_sibling() { - return Err(::rust_sitter::error::ExtractError::missing_enum(_ctx, k)); + return Err(::rust_sitter::error::ExtractError::missing_enum(ctx)); } } } } } } - impl ::rust_sitter::rule::Rule for Expression { + impl ::rust_sitter::rule::Rule for Expression { fn produce_ast() -> String { String::new() } @@ -69,33 +79,43 @@ mod grammar { "Expression" } } - impl ::rust_sitter::Extract for Whitespace { - type LeafFn<'a> = (); + impl ::rust_sitter::Extract for Whitespace { + type Output = Self; + type LeafFn = (); #[allow(non_snake_case)] - fn extract<'a, 'tree>( - ctx: &mut ::rust_sitter::extract::ExtractContext<'_>, + fn extract<'tree>( + ctx: &mut ::rust_sitter::extract::ExtractContext, node: Option<::rust_sitter::tree_sitter::Node<'tree>>, source: &[u8], - _leaf_fn: Option>, + _l: Self::LeafFn, ) -> Result> { - let node = node.ok_or_else(|| { - ::rust_sitter::error::ExtractError::missing_node(ctx, stringify!(Whitespace)) - })?; - ::rust_sitter::__private::extract_struct_or_variant(node, move |state| { - Ok(Whitespace { - _whitespace: { - ::rust_sitter::__private::extract_field::<(), _>( + let node = node.ok_or_else(|| ::rust_sitter::error::ExtractError::missing_node(ctx))?; + ::rust_sitter::__private::extract_struct_or_variant( + stringify!(Whitespace), + node, + move |state| { + Ok(Whitespace { + _whitespace: ::rust_sitter::__private::extract_field::<(), _>( + ::rust_sitter::extract::BaseExtractor::default(), + (), state, + ::rust_sitter::extract::ExtractFieldContext::new( + 0u32, + false, + |state| match state { + 0u32 => ::rust_sitter::extract::ExtractFieldState::Complete, + _ => ::rust_sitter::extract::ExtractFieldState::Overflow, + }, + ), source, "_whitespace", - None, - ) - }?, - }) - }) + )?, + }) + }, + ) } } - impl ::rust_sitter::rule::Rule for Whitespace { + impl ::rust_sitter::rule::Rule for Whitespace { fn produce_ast() -> String { String::new() } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__struct_optional.snap b/macro/src/snapshots/rust_sitter_macro__tests__struct_optional.snap index fa14e29..51281a9 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__struct_optional.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__struct_optional.snap @@ -20,35 +20,57 @@ mod grammar { ::rust_sitter::__private::parse(input, Self::language) } } - impl ::rust_sitter::Extract for Language { - type LeafFn<'a> = (); + impl ::rust_sitter::Extract for Language { + type Output = Self; + type LeafFn = (); #[allow(non_snake_case)] - fn extract<'a, 'tree>( - ctx: &mut ::rust_sitter::extract::ExtractContext<'_>, + fn extract<'tree>( + ctx: &mut ::rust_sitter::extract::ExtractContext, node: Option<::rust_sitter::tree_sitter::Node<'tree>>, source: &[u8], - _leaf_fn: Option>, + _l: Self::LeafFn, ) -> Result> { - let node = node.ok_or_else(|| { - ::rust_sitter::error::ExtractError::missing_node(ctx, stringify!(Language)) - })?; - ::rust_sitter::__private::extract_struct_or_variant(node, move |state| { - Ok(Language { - v: { - ::rust_sitter::__private::extract_field::, _>( - state, source, "v", None, - ) - }?, - t: { - ::rust_sitter::__private::extract_field::, _>( - state, source, "t", None, - ) - }?, - }) - }) + let node = node.ok_or_else(|| ::rust_sitter::error::ExtractError::missing_node(ctx))?; + ::rust_sitter::__private::extract_struct_or_variant( + stringify!(Language), + node, + move |state| { + Ok(Language { + v: ::rust_sitter::__private::extract_field::, _>( + ::rust_sitter::extract::BaseExtractor::default(), + (), + state, + ::rust_sitter::extract::ExtractFieldContext::new(0u32, true, |state| { + match state { + 0u32 => ::rust_sitter::extract::ExtractFieldState::Complete, + _ => ::rust_sitter::extract::ExtractFieldState::Overflow, + } + }), + source, + "v", + )?, + t: ::rust_sitter::__private::extract_field::, _>( + ::rust_sitter::extract::BaseExtractor::default(), + (), + state, + ::rust_sitter::extract::ExtractFieldContext::new(1u32, true, |state| { + match state { + 0u32 => ::rust_sitter::extract::ExtractFieldState::Str( + "Number", true, false, + ), + 1u32 => ::rust_sitter::extract::ExtractFieldState::Complete, + _ => ::rust_sitter::extract::ExtractFieldState::Overflow, + } + }), + source, + "t", + )?, + }) + }, + ) } } - impl ::rust_sitter::rule::Rule for Language { + impl ::rust_sitter::rule::Rule for Language { fn produce_ast() -> String { String::new() } @@ -56,28 +78,43 @@ mod grammar { "Language" } } - impl ::rust_sitter::Extract for Number { - type LeafFn<'a> = (); + impl ::rust_sitter::Extract for Number { + type Output = Self; + type LeafFn = (); #[allow(non_snake_case)] - fn extract<'a, 'tree>( - ctx: &mut ::rust_sitter::extract::ExtractContext<'_>, + fn extract<'tree>( + ctx: &mut ::rust_sitter::extract::ExtractContext, node: Option<::rust_sitter::tree_sitter::Node<'tree>>, source: &[u8], - _leaf_fn: Option>, + _l: Self::LeafFn, ) -> Result> { - let node = node.ok_or_else(|| { - ::rust_sitter::error::ExtractError::missing_node(ctx, stringify!(Number)) - })?; - ::rust_sitter::__private::extract_struct_or_variant(node, move |state| { - Ok(Number { - v: { - ::rust_sitter::__private::extract_field::(state, source, "v", None) - }?, - }) - }) + let node = node.ok_or_else(|| ::rust_sitter::error::ExtractError::missing_node(ctx))?; + ::rust_sitter::__private::extract_struct_or_variant( + stringify!(Number), + node, + move |state| { + Ok(Number { + v: ::rust_sitter::__private::extract_field::( + ::rust_sitter::extract::BaseExtractor::default(), + (), + state, + ::rust_sitter::extract::ExtractFieldContext::new( + 0u32, + false, + |state| match state { + 0u32 => ::rust_sitter::extract::ExtractFieldState::Complete, + _ => ::rust_sitter::extract::ExtractFieldState::Overflow, + }, + ), + source, + "v", + )?, + }) + }, + ) } } - impl ::rust_sitter::rule::Rule for Number { + impl ::rust_sitter::rule::Rule for Number { fn produce_ast() -> String { String::new() } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap b/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap index 28624ba..65b5d7c 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap @@ -20,30 +20,46 @@ mod grammar { ::rust_sitter::__private::parse(input, Self::language) } } - impl ::rust_sitter::Extract for NumberList { - type LeafFn<'a> = (); + impl ::rust_sitter::Extract for NumberList { + type Output = Self; + type LeafFn = (); #[allow(non_snake_case)] - fn extract<'a, 'tree>( - ctx: &mut ::rust_sitter::extract::ExtractContext<'_>, + fn extract<'tree>( + ctx: &mut ::rust_sitter::extract::ExtractContext, node: Option<::rust_sitter::tree_sitter::Node<'tree>>, source: &[u8], - _leaf_fn: Option>, + _l: Self::LeafFn, ) -> Result> { - let node = node.ok_or_else(|| { - ::rust_sitter::error::ExtractError::missing_node(ctx, stringify!(NumberList)) - })?; - ::rust_sitter::__private::extract_struct_or_variant(node, move |state| { - Ok(NumberList { - numbers: { - ::rust_sitter::__private::extract_field::, _>( - state, source, "numbers", None, - ) - }?, - }) - }) + let node = node.ok_or_else(|| ::rust_sitter::error::ExtractError::missing_node(ctx))?; + ::rust_sitter::__private::extract_struct_or_variant( + stringify!(NumberList), + node, + move |state| { + Ok(NumberList { + numbers: ::rust_sitter::__private::extract_field::, _>( + ::rust_sitter::extract::BaseExtractor::default(), + (), + state, + ::rust_sitter::extract::ExtractFieldContext::new(1u32, true, |state| { + match state { + 0u32 => ::rust_sitter::extract::ExtractFieldState::Str( + "List_NumberList_numbers", + true, + false, + ), + 1u32 => ::rust_sitter::extract::ExtractFieldState::Complete, + _ => ::rust_sitter::extract::ExtractFieldState::Overflow, + } + }), + source, + "numbers", + )?, + }) + }, + ) } } - impl ::rust_sitter::rule::Rule for NumberList { + impl ::rust_sitter::rule::Rule for NumberList { fn produce_ast() -> String { String::new() } @@ -51,28 +67,43 @@ mod grammar { "NumberList" } } - impl ::rust_sitter::Extract for Number { - type LeafFn<'a> = (); + impl ::rust_sitter::Extract for Number { + type Output = Self; + type LeafFn = (); #[allow(non_snake_case)] - fn extract<'a, 'tree>( - ctx: &mut ::rust_sitter::extract::ExtractContext<'_>, + fn extract<'tree>( + ctx: &mut ::rust_sitter::extract::ExtractContext, node: Option<::rust_sitter::tree_sitter::Node<'tree>>, source: &[u8], - _leaf_fn: Option>, + _l: Self::LeafFn, ) -> Result> { - let node = node.ok_or_else(|| { - ::rust_sitter::error::ExtractError::missing_node(ctx, stringify!(Number)) - })?; - ::rust_sitter::__private::extract_struct_or_variant(node, move |state| { - Ok(Number { - v: { - ::rust_sitter::__private::extract_field::(state, source, "v", None) - }?, - }) - }) + let node = node.ok_or_else(|| ::rust_sitter::error::ExtractError::missing_node(ctx))?; + ::rust_sitter::__private::extract_struct_or_variant( + stringify!(Number), + node, + move |state| { + Ok(Number { + v: ::rust_sitter::__private::extract_field::( + ::rust_sitter::extract::BaseExtractor::default(), + (), + state, + ::rust_sitter::extract::ExtractFieldContext::new( + 0u32, + false, + |state| match state { + 0u32 => ::rust_sitter::extract::ExtractFieldState::Complete, + _ => ::rust_sitter::extract::ExtractFieldState::Overflow, + }, + ), + source, + "v", + )?, + }) + }, + ) } } - impl ::rust_sitter::rule::Rule for Number { + impl ::rust_sitter::rule::Rule for Number { fn produce_ast() -> String { String::new() } @@ -80,33 +111,43 @@ mod grammar { "Number" } } - impl ::rust_sitter::Extract for Whitespace { - type LeafFn<'a> = (); + impl ::rust_sitter::Extract for Whitespace { + type Output = Self; + type LeafFn = (); #[allow(non_snake_case)] - fn extract<'a, 'tree>( - ctx: &mut ::rust_sitter::extract::ExtractContext<'_>, + fn extract<'tree>( + ctx: &mut ::rust_sitter::extract::ExtractContext, node: Option<::rust_sitter::tree_sitter::Node<'tree>>, source: &[u8], - _leaf_fn: Option>, + _l: Self::LeafFn, ) -> Result> { - let node = node.ok_or_else(|| { - ::rust_sitter::error::ExtractError::missing_node(ctx, stringify!(Whitespace)) - })?; - ::rust_sitter::__private::extract_struct_or_variant(node, move |state| { - Ok(Whitespace { - _whitespace: { - ::rust_sitter::__private::extract_field::<(), _>( + let node = node.ok_or_else(|| ::rust_sitter::error::ExtractError::missing_node(ctx))?; + ::rust_sitter::__private::extract_struct_or_variant( + stringify!(Whitespace), + node, + move |state| { + Ok(Whitespace { + _whitespace: ::rust_sitter::__private::extract_field::<(), _>( + ::rust_sitter::extract::BaseExtractor::default(), + (), state, + ::rust_sitter::extract::ExtractFieldContext::new( + 0u32, + false, + |state| match state { + 0u32 => ::rust_sitter::extract::ExtractFieldState::Complete, + _ => ::rust_sitter::extract::ExtractFieldState::Overflow, + }, + ), source, "_whitespace", - None, - ) - }?, - }) - }) + )?, + }) + }, + ) } } - impl ::rust_sitter::rule::Rule for Whitespace { + impl ::rust_sitter::rule::Rule for Whitespace { fn produce_ast() -> String { String::new() } diff --git a/runtime/src/__private.rs b/runtime/src/__private.rs index efcf8b1..5accdf0 100644 --- a/runtime/src/__private.rs +++ b/runtime/src/__private.rs @@ -17,6 +17,10 @@ pub fn extract_struct_or_variant<'tree, T>( ) -> Result<'tree, T> { debug!("extract_struct_or_variant node.kind={}", node.kind()); trace!("extract_struct_or_variant node={}", node); + trace!( + "extract_struct_or_variant node.child_count={}", + node.child_count() + ); let mut parent_cursor = node.walk(); let has_children = parent_cursor.goto_first_child(); let mut state = ExtractStructState { @@ -56,7 +60,7 @@ pub fn extract_field<'tree, T: Extract, E: Extractor>( last_idx: state.last_idx, last_pt: state.last_pt, field_name, - node_kind: "", + struct_name: state.struct_name, }; if state.has_children { if let Some(cursor) = state.cursor.as_mut() { @@ -64,18 +68,19 @@ pub fn extract_field<'tree, T: Extract, E: Extractor>( let mut iter = ExtractFieldIterator { cursor, field_name, + struct_name: state.struct_name, ctx: field_state, + source, current: Default::default(), }; // Start the iterator. - // Some iteration requires knowing if there is a valid starting state or not. + // Iteration requires knowing if there is a valid starting state or not. iter.advance_state()?; let result = extractor.do_extract_field(&mut ctx, &mut iter, source, leaf_fn)?; Ok(result) } else { - // TODO: ??? extractor.do_extract(&mut ctx, None, source, leaf_fn) } } else if let Some(cursor) = state.cursor.as_mut() { @@ -122,7 +127,7 @@ pub fn skip_text<'tree>( Ok(()) } -pub fn parse>( +pub fn parse( input: &str, language: impl Fn() -> tree_sitter::Language, ) -> crate::ParseResult { @@ -142,10 +147,9 @@ pub fn parse>( last_pt: Default::default(), last_idx: 0, field_name: "root", - node_kind: "", + struct_name: T::rule_name(), }; let result = ::extract(&mut ctx, Some(root_node), input.as_bytes(), ()); - #[allow(clippy::manual_ok_err)] let result = match result { Err(e) => { // These are actually not really useful yet. diff --git a/runtime/src/error.rs b/runtime/src/error.rs index d7af557..6f297f9 100644 --- a/runtime/src/error.rs +++ b/runtime/src/error.rs @@ -1,7 +1,7 @@ -use log::{trace, debug}; +use log::{debug, trace}; use std::{collections::HashSet, ops::Range}; -use crate::{ExtractContext, Point, Position}; +use crate::{ExtractContext, Point, Position, extract::ExtractFieldIterator}; /// A high level parsing error with useful information extracted already. #[derive(Debug)] @@ -20,19 +20,11 @@ pub struct ParseError { pub enum ParseErrorReason { Missing(&'static str), Error, - FailedExtract { - field: String, + Extract { + struct_name: &'static str, + field_name: &'static str, + reason: ExtractErrorReason, }, - MissingNode { - node_kind: &'static str, - type_name: &'static str, - }, - MissingEnum { - node_kind: &'static str, - enum_name: &'static str, - }, - /// Parsed OK, but failed to extract to the given type. - TypeConversion(Box), } impl ParseError { @@ -60,24 +52,19 @@ impl std::fmt::Display for ParseErrorReason { match self { ParseErrorReason::Missing(kind) => write!(f, "missing {kind}"), ParseErrorReason::Error => f.write_str("parse error"), - ParseErrorReason::FailedExtract { field } => { - write!(f, "failed extraction of field: {field}") + // ParseErrorReason::FailedExtract { field } => { + // write!(f, "failed extraction of field: {field}") + // } + ParseErrorReason::Extract { + struct_name, + field_name, + reason, + } => { + write!( + f, + "extraction error for {struct_name}::{field_name}. Reason: {reason}" + ) } - ParseErrorReason::MissingNode { - node_kind, - type_name, - } => write!( - f, - "missing node in extraction of type: {type_name}, {node_kind}" - ), - ParseErrorReason::MissingEnum { - node_kind, - enum_name, - } => write!( - f, - "missing enum in extraction of type: {enum_name}, {node_kind}" - ), - ParseErrorReason::TypeConversion(error) => write!(f, "type conversion: {error}"), } } } @@ -300,13 +287,17 @@ pub struct ExtractError<'a> { struct ExtractErrorInner<'a> { /// Span of the node which failed to extract. position: crate::Position, - reason: ExtractErrorReason<'a>, + field_name: &'static str, + struct_name: &'static str, + node: Option>, + reason: ExtractErrorReason, } impl<'a> ExtractError<'a> { pub(crate) fn empty() -> Self { Self { inner: vec![] } } + pub(crate) fn prop(self) -> Result<(), Self> { if self.inner.is_empty() { Ok(()) @@ -314,159 +305,134 @@ impl<'a> ExtractError<'a> { Err(self) } } - pub(crate) fn new(n: tree_sitter::Node<'a>, expected_field: String) -> Self { - let position = crate::Position::from_node(n); + + pub(crate) fn new( + struct_name: &'static str, + field_name: &'static str, + position: crate::Position, + reason: ExtractErrorReason, + ) -> Self { Self { inner: vec![ExtractErrorInner { + // TODO: Provide this where possible. + node: None, position, - reason: ExtractErrorReason::Parse { - expected_field, - node: n, - }, + field_name, + struct_name, + reason, }], } } + + pub(crate) fn new_ctx( + ctx: &ExtractContext, + position: crate::Position, + reason: ExtractErrorReason, + ) -> Self { + Self::new(ctx.struct_name, ctx.field_name, position, reason) + } + pub(crate) fn merge(&mut self, err: ExtractError<'a>) { self.inner.extend(err.inner); } pub(crate) fn type_conversion( + ctx: &ExtractContext, n: tree_sitter::Node<'_>, e: impl std::error::Error + Send + Sync + 'static, ) -> Self { let position = crate::Position::from_node(n); - Self { - inner: vec![ExtractErrorInner { - position, - reason: ExtractErrorReason::TypeConversion(Box::new(e)), - }], - } + Self::new( + ctx.struct_name, + ctx.field_name, + position, + ExtractErrorReason::TypeConversion(Box::new(e)), + ) + } + + pub(crate) fn field_extraction( + ctx: &ExtractFieldIterator<'_, '_>, + msg: impl Into, + ) -> Self { + let position = ctx.position(); + Self::new( + ctx.struct_name, + ctx.field_name, + position, + ExtractErrorReason::FieldExtraction { + message: msg.into(), + }, + ) } #[allow(dead_code)] pub(crate) fn accumulate_parse_errors(self, errors: &mut Vec) { for inner in self.inner { - let err = match inner.reason { - ExtractErrorReason::TypeConversion(t) => { - let reason = ParseErrorReason::TypeConversion(t); - ParseError { - node_position: inner.position.clone(), - error_position: inner.position, - reason, - lookaheads: vec![], - } - } - ExtractErrorReason::Parse { - expected_field, - node, - } => { - let reason = ParseErrorReason::FailedExtract { - field: expected_field, - }; - let mut error = NodeError { node }.to_parse_error(); - error.reason = reason; - error - } - ExtractErrorReason::MissingNode { - node_kind, - type_name, - } => { - let reason = ParseErrorReason::MissingNode { - node_kind, - type_name, - }; - ParseError { - node_position: inner.position.clone(), - error_position: inner.position, - reason, - lookaheads: vec![], - } - } - ExtractErrorReason::MissingEnum { - node_kind, - enum_name, - } => { - let reason = ParseErrorReason::MissingEnum { - node_kind, - enum_name, - }; - ParseError { - node_position: inner.position.clone(), - error_position: inner.position, - reason, - lookaheads: vec![], - } - } + let err = ParseError { + node_position: inner.position.clone(), + error_position: inner.position, + lookaheads: vec![], + reason: ParseErrorReason::Extract { + struct_name: inner.struct_name, + field_name: inner.field_name, + reason: inner.reason, + }, }; errors.push(err); } } - pub fn missing_node(ctx: &ExtractContext, type_name: &'static str) -> Self { + pub fn missing_node(ctx: &ExtractContext) -> Self { let position = crate::Position { // TODO: This should be fixed to actually have the full range from the outer node. bytes: ctx.last_idx..ctx.last_idx, start: Point::from_tree_sitter(ctx.last_pt), end: Point::from_tree_sitter(ctx.last_pt), }; - Self { - inner: vec![ExtractErrorInner { - position, - reason: ExtractErrorReason::MissingNode { - node_kind: ctx.node_kind, - type_name, - }, - }], - } + Self::new_ctx(ctx, position, ExtractErrorReason::MissingNode) } - pub fn missing_enum(ctx: &ExtractContext, enum_name: &'static str) -> Self { + pub fn missing_enum(ctx: &ExtractContext) -> Self { let position = crate::Position { // TODO: This should be fixed to actually have the full range from the outer node. bytes: ctx.last_idx..ctx.last_idx, start: Point::from_tree_sitter(ctx.last_pt), end: Point::from_tree_sitter(ctx.last_pt), }; - Self { - inner: vec![ExtractErrorInner { - position, - reason: ExtractErrorReason::MissingEnum { - node_kind: ctx.node_kind, - enum_name, - }, - }], - } + Self::new_ctx(ctx, position, ExtractErrorReason::MissingEnum) } pub fn position(&self) -> &Position { &self.inner[0].position } - pub fn reason(&self) -> &ExtractErrorReason<'_> { + pub fn reason(&self) -> &ExtractErrorReason { &self.inner[0].reason } } #[derive(Debug)] -pub enum ExtractErrorReason<'a> { - /// Failed to parse at the tree-sitter level. - Parse { - // Can be &'static? - expected_field: String, - node: tree_sitter::Node<'a>, - }, - MissingNode { - node_kind: &'static str, - type_name: &'static str, - }, - MissingEnum { - node_kind: &'static str, - enum_name: &'static str, +pub enum ExtractErrorReason { + FieldExtraction { + message: String, }, + MissingNode, + MissingEnum, /// Parsed OK, but failed to extract to the given type. TypeConversion(Box), } +impl std::fmt::Display for ExtractErrorReason { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::MissingNode => write!(f, "missing node in extraction"), + Self::MissingEnum => write!(f, "missing enum in extraction",), + Self::FieldExtraction { message } => write!(f, "field extraction failure: {message}"), + Self::TypeConversion(error) => write!(f, "type conversion: {error}"), + } + } +} + impl<'a> IntoIterator for ExtractError<'a> { type Item = ExtractError<'a>; type IntoIter = ErrorIntoIter<'a>; diff --git a/runtime/src/extract.rs b/runtime/src/extract.rs index c7b5112..e516406 100644 --- a/runtime/src/extract.rs +++ b/runtime/src/extract.rs @@ -61,8 +61,7 @@ pub struct ExtractContext { pub last_idx: usize, pub last_pt: tree_sitter::Point, pub field_name: &'static str, - // TODO: Remove this, clean it up. - pub node_kind: &'static str, + pub struct_name: &'static str, } /// Default extractor which simply delegates to the `Extract` implementation. @@ -157,7 +156,7 @@ where ) -> Result<'tree, L> { let node = match node { Some(n) => n, - None => return Err(ExtractError::missing_node(ctx, "WithLeaf")), + None => return Err(ExtractError::missing_node(ctx)), }; let text = node.utf8_text(source).unwrap(); Ok(leaf_fn(text)) @@ -179,8 +178,7 @@ impl Extract for () { } } -impl Extract for Option -{ +impl Extract for Option { type LeafFn = T::LeafFn; type Output = Option; fn extract<'a, 'tree>( @@ -280,7 +278,7 @@ macro_rules! extract_from_str { type LeafFn = (); type Output = $t; fn extract<'tree>( - _ctx: &mut ExtractContext, + ctx: &mut ExtractContext, node: Option>, source: &[u8], _l: (), @@ -288,14 +286,13 @@ macro_rules! extract_from_str { let node = match node { Some(n) => n, None => { - panic!("Better error"); - // return Err(ExtractError::missing_node(ctx, stringify!($t))); + return Err(ExtractError::missing_node(ctx)); } }; let text = node.utf8_text(source).expect("No text found for node"); match text.parse() { Ok(t) => Ok(t), - Err(e) => Err(ExtractError::type_conversion(node, e)), + Err(e) => Err(ExtractError::type_conversion(ctx, node, e)), } } } @@ -318,7 +315,7 @@ extract_from_str!(String); macro_rules! extract_for_tuple { ($($t:ident),*) => { - impl<$($t: Extract),*> Extract for ($($t),*) + impl<$($t: Extract),*> Extract for ($($t),*) where $(<$t as Extract>::LeafFn: Default),* { diff --git a/runtime/src/extract/field.rs b/runtime/src/extract/field.rs index c4d5d88..9cfd449 100644 --- a/runtime/src/extract/field.rs +++ b/runtime/src/extract/field.rs @@ -1,10 +1,14 @@ +use crate::error::ExtractError; + use super::Result; -use log::debug; +use log::{debug, trace}; pub struct ExtractFieldIterator<'cursor, 'tree: 'cursor> { pub(crate) cursor: &'cursor mut tree_sitter::TreeCursor<'tree>, pub(crate) field_name: &'static str, + pub(crate) struct_name: &'static str, pub(crate) ctx: ExtractFieldContext, + pub(crate) source: &'cursor [u8], pub(crate) current: NodeIterState<'tree>, } @@ -50,16 +54,6 @@ impl<'cursor, 'tree: 'cursor> ExtractFieldIterator<'cursor, 'tree> { } } - fn handle_optional_err(&mut self, error: &str) -> Result<'tree, ()> { - if self.ctx.optional && self.ctx.state == 1 { - debug!("advance_state: optional, outputting None"); - self.ctx.state = self.ctx.num_states + 1; - self.current = NodeIterState::Complete; - Ok(()) - } else { - todo!("{}", error); - } - } pub fn advance_state(&mut self) -> Result<'tree, ()> { if self.current == NodeIterState::Complete { debug!("advance_state: verifying completion"); @@ -69,17 +63,19 @@ impl<'cursor, 'tree: 'cursor> ExtractFieldIterator<'cursor, 'tree> { self.advance_node(); let n = self.cursor.node(); debug!( - "advance_state: field_name={}, state={}, num_states={}, optional={}, node={}, node.kind={}", + "advance_state: field_name={}, cursor.field_name={:?}, state={}, num_states={}, optional={}, node={}, node.kind={}", self.field_name, + self.cursor.field_name(), self.ctx.state, self.ctx.num_states, self.ctx.optional, n, n.kind() ); - debug!( - "advance_state: cursor.field_name()={:?}", - self.cursor.field_name() + + trace!( + "advance_state: node_string={}", + n.utf8_text(self.source).unwrap() ); let state = (self.ctx.state_fn)(self.ctx.state); @@ -87,10 +83,17 @@ impl<'cursor, 'tree: 'cursor> ExtractFieldIterator<'cursor, 'tree> { debug!("advance_state: got state={:?}", state); match state { ExtractFieldState::Str(expected, named, optional) => { - if self.cursor.field_name() != Some(self.field_name) { + let cursor_field = self.cursor.field_name(); + let field_name = self.field_name; + if cursor_field != Some(field_name) { debug!("advance_state: field names didn't match"); // Check if we have an optional overall. - self.handle_optional_err("error fields didn't match")?; + self.handle_optional_err(|| { + format!( + "fields didn't match, cursor had: {:?}, expected: {}", + cursor_field, field_name + ) + })?; return Ok(()); } if n.kind() == expected && n.is_named() == named { @@ -104,14 +107,21 @@ impl<'cursor, 'tree: 'cursor> ExtractFieldIterator<'cursor, 'tree> { self.current = NodeIterState::Node(None); Ok(()) } else { - self.handle_optional_err("error state didn't match")?; + self.handle_optional_err(|| "state didn't match".into())?; Ok(()) } } ExtractFieldState::Choice(values, optional) => { - if self.cursor.field_name() != Some(self.field_name) { + let cursor_field = self.cursor.field_name(); + let field_name = self.field_name; + if cursor_field != Some(field_name) { debug!("advance_state: field names didn't match"); - self.handle_optional_err("error fields didn't match")?; + self.handle_optional_err(|| { + format!( + "fields didn't match, cursor had: {:?}, expected: {}", + cursor_field, field_name + ) + })?; return Ok(()); } for (value, named) in values { @@ -126,7 +136,7 @@ impl<'cursor, 'tree: 'cursor> ExtractFieldIterator<'cursor, 'tree> { self.current = NodeIterState::Node(None); Ok(()) } else { - self.handle_optional_err("error none of the values matched")?; + self.handle_optional_err(|| "none of the choice values matched".into())?; Ok(()) } } @@ -136,7 +146,7 @@ impl<'cursor, 'tree: 'cursor> ExtractFieldIterator<'cursor, 'tree> { Ok(()) } ExtractFieldState::Overflow => { - self.handle_optional_err("error state overflowed")?; + self.handle_optional_err(|| "state overflowed".into())?; Ok(()) } } @@ -161,13 +171,39 @@ impl<'cursor, 'tree: 'cursor> ExtractFieldIterator<'cursor, 'tree> { } pub fn finalize(&self) -> Result<'tree, ()> { - if self.ctx.state != self.ctx.num_states + 1 { - todo!("error state didn't finalize") + let state = self.ctx.state; + let expected = self.ctx.num_states + 1; + if state != expected { + return Err(ExtractError::field_extraction( + self, + format!("Could not finalize, was in state: {state}, expected: {expected}"), + )); } Ok(()) } } +// Some helpers. +impl<'cursor, 'tree> ExtractFieldIterator<'cursor, 'tree> { + fn handle_optional_err(&mut self, f: F) -> Result<'tree, ()> + where + F: FnOnce() -> String, + { + if self.ctx.optional && self.ctx.state == 1 { + debug!("advance_state: optional, outputting None"); + self.ctx.state = self.ctx.num_states + 1; + self.current = NodeIterState::Complete; + Ok(()) + } else { + Err(ExtractError::field_extraction(self, f())) + } + } + + pub(crate) fn position(&self) -> crate::Position { + crate::Position::from_node(self.cursor.node()) + } +} + #[derive(Default, Clone, Copy, PartialEq)] pub(crate) enum NodeIterState<'tree> { Node(Option>), diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs index 4356f58..451e677 100644 --- a/runtime/src/lib.rs +++ b/runtime/src/lib.rs @@ -94,6 +94,11 @@ impl Position { pub fn point_range(&self) -> (Point, Point) { (self.start, self.end) } + + fn extend_from(&mut self, other: Position) { + self.bytes = self.bytes.start..other.bytes.end; + self.end = other.end; + } } impl PartialOrd for Position { @@ -156,22 +161,13 @@ impl Extract for Spanned { source: &[u8], l: Self::LeafFn, ) -> extract::Result<'tree, Self::Output> { - // TODO: Figure this out correctly. We need to extend the span over all of the consumed - // nodes when we do this. - let start_byte = ctx.last_idx; - let start = ctx.last_pt; + let mut start = it.position(); let value = T::extract_field(ctx, it, source, l)?; - // We need to make sure these get updated; maybe in this case it should just be in the - // iterator instead of in here. - let end_byte = ctx.last_idx; - let end = ctx.last_pt; + let end = it.position(); + start.extend_from(end); Ok(Spanned { value, - position: Position { - bytes: start_byte..end_byte, // TODO: This is incorrect, needs to be fixed. - start: Point::from_tree_sitter(start), - end: Point::from_tree_sitter(end), - }, + position: start, }) } } diff --git a/runtime/src/rule.rs b/runtime/src/rule.rs index bff7bed..6428bf6 100644 --- a/runtime/src/rule.rs +++ b/runtime/src/rule.rs @@ -20,8 +20,8 @@ pub trait Rule: Extract { let mut ctx = ExtractContext { last_pt: n.start_position(), last_idx: n.start_byte(), - field_name: Self::rule_name(), - node_kind: "", + field_name: "", + struct_name: Self::rule_name(), }; // Extract the errors, and try to parse anyway. let mut errors = vec![]; @@ -33,7 +33,7 @@ pub trait Rule: Extract { } } -pub trait Language: Sized { +pub trait Language: Rule { fn produce_grammar() -> String; fn language() -> tree_sitter::Language; diff --git a/tool/src/snapshots/rust_sitter_tool__tests__enum_conflicts_prec_dynamic.snap b/tool/src/snapshots/rust_sitter_tool__tests__enum_conflicts_prec_dynamic.snap index 4141495..8e07aa9 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__enum_conflicts_prec_dynamic.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__enum_conflicts_prec_dynamic.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"Program","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"List_Program_0"}}]}]},"List_Program_0":{"type":"REPEAT1","content":{"type":"FIELD","name":"Program_0_element","content":{"type":"SYMBOL","name":"Statement"}}},"Program":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"List_Program_0"}}]}]},"Statement_ExpressionStatement":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"ExpressionStatement"}}]},"Statement_IfStatement":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"IfStatement"}}]},"Statement":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Statement_ExpressionStatement"},{"type":"SYMBOL","name":"Statement_IfStatement"}]},"Expression_Identifier":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Identifier"}}]},"Expression_Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Number"}}]},"Expression_BinaryExpression":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"BinaryExpression"}}]},"Expression":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Identifier"},{"type":"SYMBOL","name":"Expression_Number"},{"type":"SYMBOL","name":"Expression_BinaryExpression"}]},"BinaryExpression":{"type":"PREC_LEFT","value":1,"content":{"type":"SEQ","members":[{"type":"FIELD","name":"expression","content":{"type":"SYMBOL","name":"Expression"}},{"type":"FIELD","name":"binary_expression_inner","content":{"type":"SYMBOL","name":"BinaryExpressionInner"}},{"type":"FIELD","name":"expression2","content":{"type":"SYMBOL","name":"Expression"}}]}},"BinaryExpressionInner_String_0":{"type":"STRING","value":"+"},"BinaryExpressionInner_String":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"BinaryExpressionInner_String_0"}}]},"BinaryExpressionInner_String2_0":{"type":"STRING","value":"-"},"BinaryExpressionInner_String2":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"BinaryExpressionInner_String2_0"}}]},"BinaryExpressionInner_String3_0":{"type":"STRING","value":"*"},"BinaryExpressionInner_String3":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"BinaryExpressionInner_String3_0"}}]},"BinaryExpressionInner_String4_0":{"type":"STRING","value":"/"},"BinaryExpressionInner_String4":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"BinaryExpressionInner_String4_0"}}]},"BinaryExpressionInner":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"BinaryExpressionInner_String"},{"type":"SYMBOL","name":"BinaryExpressionInner_String2"},{"type":"SYMBOL","name":"BinaryExpressionInner_String3"},{"type":"SYMBOL","name":"BinaryExpressionInner_String4"}]},"_ExpressionStatement__semicolon":{"type":"STRING","value":";"},"ExpressionStatement":{"type":"SEQ","members":[{"type":"FIELD","name":"expression","content":{"type":"SYMBOL","name":"Expression"}},{"type":"FIELD","name":"_semicolon","content":{"type":"SYMBOL","name":"_ExpressionStatement__semicolon"}}]},"_IfStatement__if":{"type":"STRING","value":"if"},"_IfStatement__lparen":{"type":"STRING","value":"("},"_IfStatement__rparen":{"type":"STRING","value":")"},"_IfStatement__lbrace":{"type":"STRING","value":"{"},"_IfStatement__rbrace":{"type":"STRING","value":"}"},"IfStatement":{"type":"PREC_DYNAMIC","value":1,"content":{"type":"SEQ","members":[{"type":"FIELD","name":"_if","content":{"type":"SYMBOL","name":"_IfStatement__if"}},{"type":"FIELD","name":"_lparen","content":{"type":"SYMBOL","name":"_IfStatement__lparen"}},{"type":"FIELD","name":"expression","content":{"type":"SYMBOL","name":"Expression"}},{"type":"FIELD","name":"_rparen","content":{"type":"SYMBOL","name":"_IfStatement__rparen"}},{"type":"FIELD","name":"_lbrace","content":{"type":"SYMBOL","name":"_IfStatement__lbrace"}},{"type":"FIELD","name":"statement","content":{"type":"SYMBOL","name":"Statement"}},{"type":"FIELD","name":"_rbrace","content":{"type":"SYMBOL","name":"_IfStatement__rbrace"}},{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"if_statement_inner","content":{"type":"SYMBOL","name":"IfStatementElse"}}]}]}},"_IfStatementElse__else":{"type":"STRING","value":"else"},"_IfStatementElse__lbrace":{"type":"STRING","value":"{"},"_IfStatementElse__rbrace":{"type":"STRING","value":"}"},"IfStatementElse":{"type":"SEQ","members":[{"type":"FIELD","name":"_else","content":{"type":"SYMBOL","name":"_IfStatementElse__else"}},{"type":"FIELD","name":"_lbrace","content":{"type":"SYMBOL","name":"_IfStatementElse__lbrace"}},{"type":"FIELD","name":"statement","content":{"type":"SYMBOL","name":"Statement"}},{"type":"FIELD","name":"_rbrace","content":{"type":"SYMBOL","name":"_IfStatementElse__rbrace"}}]},"Identifier_0":{"type":"PATTERN","value":"[a-zA-Z_][a-zA-Z0-9_]*"},"Identifier":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Identifier_0"}}]},"Number_0":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Number_0"}}]}},"extras":[]} +{"name":"Program","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"List_Program_0"}}]}]},"List_Program_0":{"type":"REPEAT1","content":{"type":"FIELD","name":"Program_0_element","content":{"type":"SYMBOL","name":"Statement"}}},"Program":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"List_Program_0"}}]}]},"Statement_ExpressionStatement":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"ExpressionStatement"}}]},"Statement_IfStatement":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"IfStatement"}}]},"Statement":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Statement_ExpressionStatement"},{"type":"SYMBOL","name":"Statement_IfStatement"}]},"Expression_Identifier":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Identifier"}}]},"Expression_Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Number"}}]},"Expression_BinaryExpression":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"BinaryExpression"}}]},"Expression":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Identifier"},{"type":"SYMBOL","name":"Expression_Number"},{"type":"SYMBOL","name":"Expression_BinaryExpression"}]},"BinaryExpression":{"type":"PREC_LEFT","value":1,"content":{"type":"SEQ","members":[{"type":"FIELD","name":"expression","content":{"type":"SYMBOL","name":"Expression"}},{"type":"FIELD","name":"binary_expression_inner","content":{"type":"SYMBOL","name":"BinaryExpressionInner"}},{"type":"FIELD","name":"expression2","content":{"type":"SYMBOL","name":"Expression"}}]}},"BinaryExpressionInner_String":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"STRING","value":"+"}}]},"BinaryExpressionInner_String2":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"STRING","value":"-"}}]},"BinaryExpressionInner_String3":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"STRING","value":"*"}}]},"BinaryExpressionInner_String4":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"STRING","value":"/"}}]},"BinaryExpressionInner":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"BinaryExpressionInner_String"},{"type":"SYMBOL","name":"BinaryExpressionInner_String2"},{"type":"SYMBOL","name":"BinaryExpressionInner_String3"},{"type":"SYMBOL","name":"BinaryExpressionInner_String4"}]},"ExpressionStatement":{"type":"SEQ","members":[{"type":"FIELD","name":"expression","content":{"type":"SYMBOL","name":"Expression"}},{"type":"FIELD","name":"_semicolon","content":{"type":"STRING","value":";"}}]},"IfStatement":{"type":"PREC_DYNAMIC","value":1,"content":{"type":"SEQ","members":[{"type":"FIELD","name":"_if","content":{"type":"STRING","value":"if"}},{"type":"FIELD","name":"_lparen","content":{"type":"STRING","value":"("}},{"type":"FIELD","name":"expression","content":{"type":"SYMBOL","name":"Expression"}},{"type":"FIELD","name":"_rparen","content":{"type":"STRING","value":")"}},{"type":"FIELD","name":"_lbrace","content":{"type":"STRING","value":"{"}},{"type":"FIELD","name":"statement","content":{"type":"SYMBOL","name":"Statement"}},{"type":"FIELD","name":"_rbrace","content":{"type":"STRING","value":"}"}},{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"if_statement_inner","content":{"type":"SYMBOL","name":"IfStatementElse"}}]}]}},"IfStatementElse":{"type":"SEQ","members":[{"type":"FIELD","name":"_else","content":{"type":"STRING","value":"else"}},{"type":"FIELD","name":"_lbrace","content":{"type":"STRING","value":"{"}},{"type":"FIELD","name":"statement","content":{"type":"SYMBOL","name":"Statement"}},{"type":"FIELD","name":"_rbrace","content":{"type":"STRING","value":"}"}}]},"Identifier":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"PATTERN","value":"[a-zA-Z_][a-zA-Z0-9_]*"}}]},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"PATTERN","value":"\\d+"}}]}},"extras":[]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__enum_prec_left.snap b/tool/src/snapshots/rust_sitter_tool__tests__enum_prec_left.snap index b8aca76..04e00a9 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__enum_prec_left.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__enum_prec_left.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"Expression","word":null,"rules":{"source_file":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"},{"type":"SYMBOL","name":"Expression_Sub"}]},"Expression_Number_0":{"type":"PATTERN","value":"\\d+"},"Expression_Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Expression_Number_0"}}]},"Expression_Sub_1":{"type":"STRING","value":"-"},"Expression_Sub":{"type":"PREC_LEFT","value":1,"content":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Expression"}},{"type":"FIELD","name":"1","content":{"type":"SYMBOL","name":"Expression_Sub_1"}},{"type":"FIELD","name":"2","content":{"type":"SYMBOL","name":"Expression"}}]}},"Expression":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"},{"type":"SYMBOL","name":"Expression_Sub"}]}},"extras":[]} +{"name":"Expression","word":null,"rules":{"source_file":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"},{"type":"SYMBOL","name":"Expression_Sub"}]},"Expression_Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"PATTERN","value":"\\d+"}}]},"Expression_Sub":{"type":"PREC_LEFT","value":1,"content":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Expression"}},{"type":"FIELD","name":"1","content":{"type":"STRING","value":"-"}},{"type":"FIELD","name":"2","content":{"type":"SYMBOL","name":"Expression"}}]}},"Expression":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"},{"type":"SYMBOL","name":"Expression_Sub"}]}},"extras":[]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__enum_recursive.snap b/tool/src/snapshots/rust_sitter_tool__tests__enum_recursive.snap index 4732692..9568dc1 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__enum_recursive.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__enum_recursive.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"Expression","word":null,"rules":{"source_file":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"},{"type":"SYMBOL","name":"Expression_Neg"}]},"Expression_Number_0":{"type":"PATTERN","value":"\\d+"},"Expression_Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Expression_Number_0"}}]},"Expression_Neg_0":{"type":"STRING","value":"-"},"Expression_Neg":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Expression_Neg_0"}},{"type":"FIELD","name":"1","content":{"type":"SYMBOL","name":"Expression"}}]},"Expression":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"},{"type":"SYMBOL","name":"Expression_Neg"}]}},"extras":[]} +{"name":"Expression","word":null,"rules":{"source_file":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"},{"type":"SYMBOL","name":"Expression_Neg"}]},"Expression_Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"PATTERN","value":"\\d+"}}]},"Expression_Neg":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"STRING","value":"-"}},{"type":"FIELD","name":"1","content":{"type":"SYMBOL","name":"Expression"}}]},"Expression":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"},{"type":"SYMBOL","name":"Expression_Neg"}]}},"extras":[]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__enum_transformed_fields.snap b/tool/src/snapshots/rust_sitter_tool__tests__enum_transformed_fields.snap index 86e431e..d87eca9 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__enum_transformed_fields.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__enum_transformed_fields.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"Expression","word":null,"rules":{"source_file":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"}]},"Expression_Number_0":{"type":"PATTERN","value":"\\d+"},"Expression_Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Expression_Number_0"}}]},"Expression":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"}]}},"extras":[]} +{"name":"Expression","word":null,"rules":{"source_file":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"}]},"Expression_Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"PATTERN","value":"\\d+"}}]},"Expression":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"}]}},"extras":[]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__enum_with_named_field.snap b/tool/src/snapshots/rust_sitter_tool__tests__enum_with_named_field.snap index 64f76f3..bfb7539 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__enum_with_named_field.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__enum_with_named_field.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"Expr","word":null,"rules":{"source_file":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expr_Number"},{"type":"SYMBOL","name":"Expr_Neg"}]},"Expr_Number_0":{"type":"PATTERN","value":"\\d+"},"Expr_Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Expr_Number_0"}}]},"_Expr_Neg__bang":{"type":"STRING","value":"!"},"Expr_Neg":{"type":"SEQ","members":[{"type":"FIELD","name":"_bang","content":{"type":"SYMBOL","name":"_Expr_Neg__bang"}},{"type":"FIELD","name":"value","content":{"type":"SYMBOL","name":"Expr"}}]},"Expr":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expr_Number"},{"type":"SYMBOL","name":"Expr_Neg"}]}},"extras":[]} +{"name":"Expr","word":null,"rules":{"source_file":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expr_Number"},{"type":"SYMBOL","name":"Expr_Neg"}]},"Expr_Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"PATTERN","value":"\\d+"}}]},"Expr_Neg":{"type":"SEQ","members":[{"type":"FIELD","name":"_bang","content":{"type":"STRING","value":"!"}},{"type":"FIELD","name":"value","content":{"type":"SYMBOL","name":"Expr"}}]},"Expr":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expr_Number"},{"type":"SYMBOL","name":"Expr_Neg"}]}},"extras":[]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__enum_with_unamed_vector.snap b/tool/src/snapshots/rust_sitter_tool__tests__enum_with_unamed_vector.snap index efec7f5..585ee4b 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__enum_with_unamed_vector.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__enum_with_unamed_vector.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"Expr","word":null,"rules":{"source_file":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expr_Numbers"}]},"Number_value":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"value","content":{"type":"SYMBOL","name":"Number_value"}}]},"List_Expr_Numbers_0":{"type":"REPEAT1","content":{"type":"FIELD","name":"Expr_Numbers_0_element","content":{"type":"SYMBOL","name":"Number"}}},"Expr_Numbers":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"List_Expr_Numbers_0"}}]},"Expr":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expr_Numbers"}]}},"extras":[]} +{"name":"Expr","word":null,"rules":{"source_file":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expr_Numbers"}]},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"value","content":{"type":"PATTERN","value":"\\d+"}}]},"List_Expr_Numbers_0":{"type":"REPEAT1","content":{"type":"FIELD","name":"Expr_Numbers_0_element","content":{"type":"SYMBOL","name":"Number"}}},"Expr_Numbers":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"List_Expr_Numbers_0"}}]},"Expr":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expr_Numbers"}]}},"extras":[]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat.snap b/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat.snap index 6b7cc59..fd52c3f 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"NumberList","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"List_NumberList_numbers":{"type":"SEQ","members":[{"type":"FIELD","name":"NumberList_numbers_element","content":{"type":"SYMBOL","name":"Number"}},{"type":"REPEAT","content":{"type":"SEQ","members":[{"type":"STRING","value":","},{"type":"FIELD","name":"NumberList_numbers_element","content":{"type":"SYMBOL","name":"Number"}}]}}]},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"Number_v":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"SYMBOL","name":"Number_v"}}]}},"extras":[{"type":"PATTERN","value":"\\s"}]} +{"name":"NumberList","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"List_NumberList_numbers":{"type":"SEQ","members":[{"type":"FIELD","name":"NumberList_numbers_element","content":{"type":"SYMBOL","name":"Number"}},{"type":"REPEAT","content":{"type":"SEQ","members":[{"type":"STRING","value":","},{"type":"FIELD","name":"NumberList_numbers_element","content":{"type":"SYMBOL","name":"Number"}}]}}]},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"PATTERN","value":"\\d+"}}]}},"extras":[{"type":"PATTERN","value":"\\s"}]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat1.snap b/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat1.snap index e6e68f3..0719a91 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat1.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat1.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"NumberList","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"List_NumberList_numbers":{"type":"REPEAT1","content":{"type":"FIELD","name":"NumberList_numbers_element","content":{"type":"SYMBOL","name":"Number"}}},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"Number_v":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"SYMBOL","name":"Number_v"}}]}},"extras":[{"type":"PATTERN","value":"\\s"}]} +{"name":"NumberList","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"List_NumberList_numbers":{"type":"REPEAT1","content":{"type":"FIELD","name":"NumberList_numbers_element","content":{"type":"SYMBOL","name":"Number"}}},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"PATTERN","value":"\\d+"}}]}},"extras":[{"type":"PATTERN","value":"\\s"}]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat_no_delimiter.snap b/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat_no_delimiter.snap index e6e68f3..0719a91 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat_no_delimiter.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat_no_delimiter.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"NumberList","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"List_NumberList_numbers":{"type":"REPEAT1","content":{"type":"FIELD","name":"NumberList_numbers_element","content":{"type":"SYMBOL","name":"Number"}}},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"Number_v":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"SYMBOL","name":"Number_v"}}]}},"extras":[{"type":"PATTERN","value":"\\s"}]} +{"name":"NumberList","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"List_NumberList_numbers":{"type":"REPEAT1","content":{"type":"FIELD","name":"NumberList_numbers_element","content":{"type":"SYMBOL","name":"Number"}}},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"PATTERN","value":"\\d+"}}]}},"extras":[{"type":"PATTERN","value":"\\s"}]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__grammar_unboxed_field.snap b/tool/src/snapshots/rust_sitter_tool__tests__grammar_unboxed_field.snap index fcf50cd..1c3bfcc 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__grammar_unboxed_field.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__grammar_unboxed_field.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"Language","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"FIELD","name":"e","content":{"type":"SYMBOL","name":"Expression"}}]},"Language":{"type":"SEQ","members":[{"type":"FIELD","name":"e","content":{"type":"SYMBOL","name":"Expression"}}]},"Expression_Number_0":{"type":"PATTERN","value":"\\d+"},"Expression_Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Expression_Number_0"}}]},"Expression":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"}]}},"extras":[]} +{"name":"Language","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"FIELD","name":"e","content":{"type":"SYMBOL","name":"Expression"}}]},"Language":{"type":"SEQ","members":[{"type":"FIELD","name":"e","content":{"type":"SYMBOL","name":"Expression"}}]},"Expression_Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"PATTERN","value":"\\d+"}}]},"Expression":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"}]}},"extras":[]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__grammar_with_extras.snap b/tool/src/snapshots/rust_sitter_tool__tests__grammar_with_extras.snap index 37b6d7d..4269765 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__grammar_with_extras.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__grammar_with_extras.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"Expression","word":null,"rules":{"source_file":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"}]},"Expression_Number_0":{"type":"PATTERN","value":"\\d+"},"Expression_Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Expression_Number_0"}}]},"Expression":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"}]}},"extras":[{"type":"PATTERN","value":"\\s"}]} +{"name":"Expression","word":null,"rules":{"source_file":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"}]},"Expression_Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"PATTERN","value":"\\d+"}}]},"Expression":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Number"}]}},"extras":[{"type":"PATTERN","value":"\\s"}]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__immediate.snap b/tool/src/snapshots/rust_sitter_tool__tests__immediate.snap index e5d9a19..a047b29 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__immediate.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__immediate.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"StringFragment","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"StringFragment_0"}}]},"StringFragment_0":{"type":"IMMEDIATE_TOKEN","content":{"type":"PREC","value":1,"content":{"type":"PATTERN","value":"[^\"\\\\]+"}}},"StringFragment":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"StringFragment_0"}}]}},"extras":[{"type":"PATTERN","value":"\\s"}]} +{"name":"StringFragment","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"PREC","value":1,"content":{"type":"PATTERN","value":"[^\"\\\\]+"}}}]},"StringFragment":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"PREC","value":1,"content":{"type":"PATTERN","value":"[^\"\\\\]+"}}}]}},"extras":[{"type":"PATTERN","value":"\\s"}]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__spanned_in_vec.snap b/tool/src/snapshots/rust_sitter_tool__tests__spanned_in_vec.snap index c6cf4f3..9aac286 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__spanned_in_vec.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__spanned_in_vec.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"NumberList","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"NumberList_numbers":{"type":"PATTERN","value":"\\d+"},"List_NumberList_numbers":{"type":"REPEAT1","content":{"type":"FIELD","name":"NumberList_numbers_element","content":{"type":"SYMBOL","name":"NumberList_numbers"}}},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]}},"extras":[{"type":"PATTERN","value":"\\s"}]} +{"name":"NumberList","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"List_NumberList_numbers":{"type":"REPEAT1","content":{"type":"FIELD","name":"NumberList_numbers_element","content":{"type":"PATTERN","value":"\\d+"}}},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]}},"extras":[{"type":"PATTERN","value":"\\s"}]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__struct_optional.snap b/tool/src/snapshots/rust_sitter_tool__tests__struct_optional.snap index 470b6fc..d3bd089 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__struct_optional.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__struct_optional.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"Language","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"v","content":{"type":"SYMBOL","name":"Language_v"}}]},{"type":"FIELD","name":"space","content":{"type":"SYMBOL","name":"Language_space"}},{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"t","content":{"type":"SYMBOL","name":"Number"}}]}]},"Language_v":{"type":"PATTERN","value":"\\d+"},"Language_space":{"type":"PATTERN","value":" "},"Language":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"v","content":{"type":"SYMBOL","name":"Language_v"}}]},{"type":"FIELD","name":"space","content":{"type":"SYMBOL","name":"Language_space"}},{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"t","content":{"type":"SYMBOL","name":"Number"}}]}]},"Number_v":{"type":"PATTERN","value":"\\d+"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"SYMBOL","name":"Number_v"}}]}},"extras":[]} +{"name":"Language","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"v","content":{"type":"PATTERN","value":"\\d+"}}]},{"type":"FIELD","name":"space","content":{"type":"PATTERN","value":" "}},{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"t","content":{"type":"SYMBOL","name":"Number"}}]}]},"Language":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"v","content":{"type":"PATTERN","value":"\\d+"}}]},{"type":"FIELD","name":"space","content":{"type":"PATTERN","value":" "}},{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"t","content":{"type":"SYMBOL","name":"Number"}}]}]},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"PATTERN","value":"\\d+"}}]}},"extras":[]} diff --git a/types/src/lib.rs b/types/src/lib.rs index ea680a6..38f3afc 100644 --- a/types/src/lib.rs +++ b/types/src/lib.rs @@ -1,2 +1 @@ - pub mod grammar; From f2ff2f5581c11c573c3e00e59b13eb6213821b2f Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Tue, 26 Aug 2025 20:32:02 -0500 Subject: [PATCH 40/50] Finish implementing Vec translation to have a flattened grammar. --- common/src/expansion.rs | 86 +++++----- example/src/arithmetic.rs | 18 ++ example/src/main.rs | 2 +- example/src/repetitions.rs | 37 ++++- ...e__arithmetic__tests__failed_parses-3.snap | 6 + ...e__arithmetic__tests__failed_parses-4.snap | 1 + ...ple__arithmetic__tests__failed_parses.snap | 1 + ..._optionals__tests__optional_grammar-6.snap | 6 +- ..._optionals__tests__optional_grammar-8.snap | 6 +- ...titions__tests__repetitions_grammar-2.snap | 73 +++++---- ...titions__tests__repetitions_grammar-3.snap | 85 +++++----- ...petitions__tests__repetitions_grammar.snap | 60 ++----- example/src/words.rs | 2 +- macro/src/expansion.rs | 137 ++++++++++++---- ...macro__tests__enum_with_unamed_vector.snap | 9 +- ...t_sitter_macro__tests__spanned_in_vec.snap | 9 +- ...st_sitter_macro__tests__struct_repeat.snap | 9 +- runtime/src/__private.rs | 16 +- runtime/src/error.rs | 9 +- runtime/src/extract.rs | 50 +++--- runtime/src/extract/field.rs | 154 +++++++++++++++++- runtime/src/lib.rs | 2 +- tool/src/lib.rs | 5 +- ...l__tests__enum_conflicts_prec_dynamic.snap | 2 +- ..._tool__tests__enum_with_unamed_vector.snap | 2 +- ...st_sitter_tool__tests__grammar_repeat.snap | 2 +- ...t_sitter_tool__tests__grammar_repeat1.snap | 2 +- ...l__tests__grammar_repeat_no_delimiter.snap | 2 +- ...st_sitter_tool__tests__spanned_in_vec.snap | 2 +- types/src/grammar.rs | 4 +- 30 files changed, 526 insertions(+), 273 deletions(-) diff --git a/common/src/expansion.rs b/common/src/expansion.rs index 91a222a..1dc50b1 100644 --- a/common/src/expansion.rs +++ b/common/src/expansion.rs @@ -248,6 +248,19 @@ pub fn process_rule(input: RuleDerive, ctx: &mut ExpansionState) -> Result<()> { if input.extras.language { ctx.set_language(&input.ident)?; } + if let Some(word) = &input.extras.word { + let rule = word.evaluate()?; + match rule { + RuleDef::SYMBOL { name } => ctx.set_word(name)?, + + _ => { + return Err(Error::new( + word.span(), + "word must be a reference to another rule", + )); + } + } + } if let Some(extras) = &input.extras.extras { if !input.extras.language { return Err(Error::new( @@ -324,7 +337,7 @@ pub struct RuleParams { pub prec_dynamic_param: Option, pub language: bool, pub extras: Option>, - pub word: bool, + pub word: Option, } impl RuleParams { @@ -373,7 +386,17 @@ impl RuleParams { .transpose()?; let language = attrs.iter().any(|a| sitter_attr_matches(a, "language")); - let word = attrs.iter().any(|a| sitter_attr_matches(a, "word")); + let word = attrs + .iter() + .find(|a| sitter_attr_matches(a, "word")) + .map(|a| a.parse_args_with(TsInput::parse)) + .transpose()?; + if !language && let Some(w) = &word { + return Err(Error::new( + w.span(), + "Cannot specify #[word] on non-language rule", + )); + } Ok(Self { prec_param, @@ -455,18 +478,12 @@ impl RuleParams { } fn gen_field( - path: String, + ident_str: &str, leaf_type: Option, attrs: Vec, - ctx: &mut ExpansionState, ) -> Result<(RuleDef, bool)> { let precs = RuleParams::new(&attrs)?; - if precs.word { - // TODO: We don't want to allow this, but because we generate a dummy `_unit` field - // currently, we have to. Super dumb, but we can fix it later. - ctx.set_word(path.clone())?; - } if precs.language { return Err(Error::new( leaf_type.span(), @@ -528,10 +545,9 @@ fn gen_field( } } else if is_vec { let (field_json, field_optional) = gen_field( - path.clone(), + ident_str, Some(inner_type_vec), leaf_attr.iter().cloned().cloned().collect(), - ctx, )?; let (delimited_param, repeat_non_empty) = attrs @@ -557,11 +573,7 @@ fn gen_field( .map(|p| precs.apply(p.evaluate()?)) .transpose()?; - let field_rule_non_optional = RuleDef::FIELD { - name: format!("{path}_element"), - content: field_json.into(), - }; - + let field_rule_non_optional = field_json; let field_rule = if field_optional { RuleDef::optional(field_rule_non_optional) } else { @@ -582,27 +594,21 @@ fn gen_field( } } else { RuleDef::REPEAT1 { - content: field_rule.into(), + // This FIELD is used only for the macro generation phase to distinguish between + // the two different types of REPEAT1. + content: Box::new(RuleDef::FIELD { + name: ident_str.to_owned(), + content: field_rule.into(), + }), } }; let vec_contents = precs.apply(vec_contents)?; - let contents_ident = format!("List_{path}"); - ctx.grammar - .rules - .insert(contents_ident.clone(), vec_contents); - - Ok(( - // vec_contents, - RuleDef::SYMBOL { - name: contents_ident, - }, - !repeat_non_empty, - )) + Ok((vec_contents, !repeat_non_empty)) } else { // is_option - let (field_json, field_optional) = gen_field(path, Some(inner_type_option), attrs, ctx)?; + let (field_json, field_optional) = gen_field(ident_str, Some(inner_type_option), attrs)?; if field_optional { return Err(Error::new( @@ -622,20 +628,9 @@ fn gen_struct_or_variant( is_variant: bool, ctx: &mut ExpansionState, ) -> Result<()> { - fn gen_field_optional( - path: &str, - field: &Field, - ctx: &mut ExpansionState, - ident_str: String, - ) -> Result { - // Produce a cleaner grammar: fields with `_` are hidden fields. - let path = if ident_str.starts_with("_") { - format!("_{path}_{ident_str}") - } else { - format!("{path}_{ident_str}") - }; + fn gen_field_optional(field: &Field, ident_str: String) -> Result { let (field_contents, is_option) = - gen_field(path, Some(field.ty.clone()), field.attrs.clone(), ctx)?; + gen_field(&ident_str, Some(field.ty.clone()), field.attrs.clone())?; let core = RuleDef::FIELD { name: ident_str, @@ -667,7 +662,7 @@ fn gen_struct_or_variant( .map(|v| v.to_string()) .unwrap_or(format!("{i}")); - Some(gen_field_optional(&path, field, ctx, ident_str)) + Some(gen_field_optional(field, ident_str)) } }) .partition_result(); @@ -681,8 +676,7 @@ fn gen_struct_or_variant( let base_rule = match fields { Fields::Unit => { - let (field_contents, _is_option) = - gen_field(path.clone(), None, attrs.to_owned(), ctx)?; + let (field_contents, _is_option) = gen_field("unit", None, attrs.to_owned())?; if is_variant { RuleDef::FIELD { name: "unit".to_owned(), diff --git a/example/src/arithmetic.rs b/example/src/arithmetic.rs index 62a5bc0..36c8c88 100644 --- a/example/src/arithmetic.rs +++ b/example/src/arithmetic.rs @@ -15,8 +15,14 @@ pub mod grammar { Let(LetExpression), Complex(ComplexExpression), Print(PrintExpression), + Vec(VecExpression), + Table(NewTable, #[leaf(";")] (), VecExpression), } + #[derive(Debug, Clone, PartialEq, Eq, Rule)] + #[leaf(seq("table", "(", ")"))] + pub struct NewTable; + #[derive(PartialEq, Eq, Debug, Rule)] pub struct LetExpression { #[text("let")] @@ -72,6 +78,18 @@ pub mod grammar { _semi: Option<()>, } + #[derive(PartialEq, Eq, Debug, Rule)] + pub struct VecExpression { + #[text("[")] + _vec: (), + #[sep_by(",")] + #[leaf(seq(Ident, ":", Expression))] + things: Vec<(String, (), Expression)>, + #[text("]")] + _vec_close: (), + other: Box, + } + #[derive(PartialEq, Eq, Debug, Rule)] pub struct PrintExpression { #[text("print")] diff --git a/example/src/main.rs b/example/src/main.rs index 0d9611f..2c41f2b 100644 --- a/example/src/main.rs +++ b/example/src/main.rs @@ -59,7 +59,7 @@ fn main() { match grammar { "Expression" => process_input::(input), - "Repetition" => process_input::(input), + "Repetition" => process_input::(input), "Optional" => process_input::(input), "Word" => process_input::(input), _ => {} diff --git a/example/src/repetitions.rs b/example/src/repetitions.rs index da9f652..7b1e1ef 100644 --- a/example/src/repetitions.rs +++ b/example/src/repetitions.rs @@ -1,12 +1,37 @@ +#[allow(dead_code)] pub mod grammar { use rust_sitter::{Rule, Spanned}; - #[derive(Debug, Rule)] #[language] #[extras(re(r"\s"))] - #[allow(dead_code)] + pub enum Repetitions { + List(NumberList), + ListRep1(NumberListRep1), + ListNoSep(NoSepNumberList), + } + + #[derive(Debug, Rule)] pub struct NumberList { - #[sep_by1(",")] + #[text("list")] + _list: (), + #[sep_by(",")] + #[leaf(Number)] + numbers: Spanned>>>, + } + + #[derive(Debug, Rule)] + pub struct NumberListRep1 { + #[text("list1")] + _list: (), + #[repeat1] + #[leaf(Number)] + numbers: Spanned>>, + } + + #[derive(Debug, Rule)] + pub struct NoSepNumberList { + #[text("list2")] + _list: (), #[leaf(Number)] numbers: Spanned>>, } @@ -65,9 +90,9 @@ mod tests { #[test] fn repetitions_grammar() { - insta::assert_debug_snapshot!(grammar::NumberList::parse("")); - insta::assert_debug_snapshot!(grammar::NumberList::parse("1")); - insta::assert_debug_snapshot!(grammar::NumberList::parse("1, 2")); + insta::assert_debug_snapshot!(grammar::Repetitions::parse("list")); + insta::assert_debug_snapshot!(grammar::Repetitions::parse("list 1")); + insta::assert_debug_snapshot!(grammar::Repetitions::parse("list 1, 2")); } // #[test] diff --git a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-3.snap b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-3.snap index 0eecef4..8a1a354 100644 --- a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-3.snap +++ b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-3.snap @@ -34,8 +34,10 @@ ParseResult { }, lookaheads: [ "Expression_Number_token1", + "table", "let", "log", + "[", "print", "source_file", "Expression_Number", @@ -44,9 +46,13 @@ ParseResult { "Expression_Let", "Expression_Complex", "Expression_Print", + "Expression_Vec", + "Expression_Table", "Expression", + "NewTable", "LetExpression", "ComplexExpression", + "VecExpression", "PrintExpression", ], reason: Error, diff --git a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-4.snap b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-4.snap index 0032772..10c4a9d 100644 --- a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-4.snap +++ b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-4.snap @@ -33,6 +33,7 @@ ParseResult { "*", ")", ",", + "]", ], reason: Error, }, diff --git a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses.snap b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses.snap index f6dff06..27b2abb 100644 --- a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses.snap +++ b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses.snap @@ -37,6 +37,7 @@ ParseResult { "*", ")", ",", + "]", ], reason: Error, }, diff --git a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-6.snap b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-6.snap index 69391f5..55173f5 100644 --- a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-6.snap +++ b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-6.snap @@ -18,14 +18,14 @@ ParseResult { }, ), position: Position { - bytes: 3..4, + bytes: 2..3, start: Point { line: 1, - column: 4, + column: 3, }, end: Point { line: 1, - column: 5, + column: 4, }, }, }, diff --git a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-8.snap b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-8.snap index b88c69a..9e8f944 100644 --- a/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-8.snap +++ b/example/src/snapshots/rust_sitter_example__optionals__tests__optional_grammar-8.snap @@ -14,14 +14,14 @@ ParseResult { }, ), position: Position { - bytes: 2..3, + bytes: 1..2, start: Point { line: 1, - column: 3, + column: 2, }, end: Point { line: 1, - column: 4, + column: 3, }, }, }, diff --git a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar-2.snap b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar-2.snap index 4ad8950..7e7bce2 100644 --- a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar-2.snap +++ b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar-2.snap @@ -1,40 +1,55 @@ --- source: example/src/repetitions.rs -expression: "grammar::NumberList::parse(\"1\")" +expression: "grammar::Repetitions::parse(\"list 1\")" --- ParseResult { result: Some( - NumberList { - numbers: Spanned { - value: [ - Spanned { - value: 1, - position: Position { - bytes: 0..1, - start: Point { - line: 1, - column: 1, - }, - end: Point { - line: 1, - column: 2, - }, + List( + NumberList { + _list: (), + numbers: Spanned { + value: [], + position: Position { + bytes: 0..0, + start: Point { + line: 1, + column: 1, + }, + end: Point { + line: 1, + column: 1, }, - }, - ], - position: Position { - bytes: 0..1, - start: Point { - line: 1, - column: 1, - }, - end: Point { - line: 1, - column: 2, }, }, }, - }, + ), ), - errors: [], + errors: [ + ParseError { + node_position: Position { + bytes: 5..6, + start: Point { + line: 1, + column: 6, + }, + end: Point { + line: 1, + column: 7, + }, + }, + error_position: Position { + bytes: 5..6, + start: Point { + line: 1, + column: 6, + }, + end: Point { + line: 1, + column: 7, + }, + }, + lookaheads: [], + reason: Error, + }, + ], } diff --git a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar-3.snap b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar-3.snap index 1634ee9..f7e4d2e 100644 --- a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar-3.snap +++ b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar-3.snap @@ -1,54 +1,61 @@ --- source: example/src/repetitions.rs -expression: "grammar::NumberList::parse(\"1, 2\")" +expression: "grammar::Repetitions::parse(\"list 1, 2\")" --- ParseResult { result: Some( - NumberList { - numbers: Spanned { - value: [ - Spanned { - value: 1, - position: Position { - bytes: 0..1, - start: Point { - line: 1, - column: 1, - }, - end: Point { - line: 1, - column: 2, + List( + NumberList { + _list: (), + numbers: Spanned { + value: [ + Spanned { + value: Some( + 1, + ), + position: Position { + bytes: 5..6, + start: Point { + line: 1, + column: 6, + }, + end: Point { + line: 1, + column: 7, + }, }, }, - }, - Spanned { - value: 2, - position: Position { - bytes: 3..4, - start: Point { - line: 1, - column: 4, - }, - end: Point { - line: 1, - column: 5, + Spanned { + value: Some( + 2, + ), + position: Position { + bytes: 8..9, + start: Point { + line: 1, + column: 9, + }, + end: Point { + line: 1, + column: 10, + }, }, }, - }, - ], - position: Position { - bytes: 0..4, - start: Point { - line: 1, - column: 1, - }, - end: Point { - line: 1, - column: 5, + ], + position: Position { + bytes: 5..9, + start: Point { + line: 1, + column: 6, + }, + end: Point { + line: 1, + column: 10, + }, }, }, }, - }, + ), ), errors: [], } diff --git a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar.snap b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar.snap index 7beb90d..d7b1213 100644 --- a/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar.snap +++ b/example/src/snapshots/rust_sitter_example__repetitions__tests__repetitions_grammar.snap @@ -1,52 +1,28 @@ --- source: example/src/repetitions.rs -expression: "grammar::NumberList::parse(\"\")" +expression: "grammar::Repetitions::parse(\"list\")" --- ParseResult { result: Some( - NumberList { - numbers: Spanned { - value: [], - position: Position { - bytes: 0..0, - start: Point { - line: 1, - column: 1, - }, - end: Point { - line: 1, - column: 1, + List( + NumberList { + _list: (), + numbers: Spanned { + value: [], + position: Position { + bytes: 0..0, + start: Point { + line: 1, + column: 1, + }, + end: Point { + line: 1, + column: 1, + }, }, }, }, - }, + ), ), - errors: [ - ParseError { - node_position: Position { - bytes: 0..0, - start: Point { - line: 1, - column: 1, - }, - end: Point { - line: 1, - column: 1, - }, - }, - error_position: Position { - bytes: 0..0, - start: Point { - line: 1, - column: 1, - }, - end: Point { - line: 1, - column: 1, - }, - }, - lookaheads: [], - reason: Error, - }, - ], + errors: [], } diff --git a/example/src/words.rs b/example/src/words.rs index d68bb4a..68a7fcd 100644 --- a/example/src/words.rs +++ b/example/src/words.rs @@ -4,6 +4,7 @@ pub mod grammar { #[derive(Debug, Rule)] #[language] #[extras(re(r"\s"))] + #[word(Ident)] #[allow(dead_code)] pub struct Words { #[leaf("if")] @@ -14,7 +15,6 @@ pub mod grammar { #[derive(Debug, Rule)] #[leaf(pattern(r"[a-z_]+"))] - #[word] pub struct Ident; } diff --git a/macro/src/expansion.rs b/macro/src/expansion.rs index 98cb3e9..8f8a8e9 100644 --- a/macro/src/expansion.rs +++ b/macro/src/expansion.rs @@ -186,7 +186,11 @@ pub fn expand_rule(input: DeriveInput) -> Result { }) } -fn gen_field(ident_str: String, leaf: Field, grammar: &RuleDef) -> Result { +fn gen_field( + ident_str: Option<&str>, + leaf: Field, + rule: &RuleDef, +) -> Result { let leaf_type = &leaf.ty; let leaf_attr = leaf @@ -203,6 +207,11 @@ fn gen_field(ident_str: String, leaf: Field, grammar: &RuleDef) -> Result return Err(Error::new(leaf.span(), "Cannot transform non-leaf nodes")); } + let (ident_str, mut should_skip) = match ident_str { + Some(n) => (n, false), + None => ("", true), + }; + let text_attr = leaf .attrs .iter() @@ -216,6 +225,10 @@ fn gen_field(ident_str: String, leaf: Field, grammar: &RuleDef) -> Result } let text_input = text_attr.parse_args::()?; text_input.evaluate()?; + should_skip = true; + } + + if should_skip { // TODO: Handle this correctly. return Ok(syn::parse_quote!({ ::rust_sitter::__private::skip_text(state, #ident_str)?; @@ -244,7 +257,7 @@ fn gen_field(ident_str: String, leaf: Field, grammar: &RuleDef) -> Result None => (leaf_type.clone(), parse_quote! { () }), }; - let extract_state = rule_def_to_extract(grammar)?; + let extract_state = rule_def_to_extract(rule)?; Ok(parse_quote! { ::rust_sitter::__private::extract_field::<#leaf_type, _>(#extractor, #leaf_fn, state, #extract_state, source, #ident_str) @@ -276,8 +289,12 @@ fn gen_struct_or_variant( colon_token: None, ty: Type::Verbatim(quote!(())), // unit type. }; - - gen_field("unit".to_owned(), dummy_field, rule)? + let ident_str = if variant_ident.is_some() { + Some("unit") + } else { + None + }; + gen_field(ident_str, dummy_field, rule)? }; vec![ParamOrField::Param(expr)] } else { @@ -286,24 +303,21 @@ fn gen_struct_or_variant( // optional, the optional part comes before the `FIELD` definition, although that may be // unnecessary. However, we don't need to check the fields specifically, because they can be // determined by the actual field names instead. - let field_grammars: HashMap<_, _> = match rule.as_seq().expect("Must be a SEQ") { - RuleDef::SEQ { members } => fields - .iter() - .enumerate() - .zip(members) - .map(|((i, field), def)| { - let ident_str = field - .ident - .as_ref() - .map(|v| v.to_string()) - .unwrap_or(format!("{i}")); - (ident_str, def) - }) - .collect(), - _ => { - unreachable!() - } - }; + let field_grammars: HashMap<_, _> = rule + .as_seq() + .expect("Must be a SEQ") + .iter() + .enumerate() + .zip(&fields) + .map(|((i, def), field)| { + let ident_str = field + .ident + .as_ref() + .map(|v| v.to_string()) + .unwrap_or(format!("{i}")); + (ident_str, def) + }) + .collect(); fields .iter() @@ -322,10 +336,10 @@ fn gen_struct_or_variant( .map(|v| v.to_string()) .unwrap_or(format!("{i}")); - let grammar = field_grammars + let rule = field_grammars .get(&ident_str) .expect("Missing ident grammar"); - gen_field(ident_str, field.clone(), grammar)? + gen_field(Some(&ident_str), field.clone(), rule)? }; let field = if let Some(field_name) = &field.ident { @@ -380,7 +394,11 @@ fn gen_struct_or_variant( }; Ok( - syn::parse_quote!(::rust_sitter::__private::extract_struct_or_variant(stringify!(#construct_name), node, move |state| #construct_expr)), + syn::parse_quote!(::rust_sitter::__private::extract_struct_or_variant( + stringify!(#construct_name), + node, + move |state| #construct_expr + )), ) } @@ -413,9 +431,16 @@ fn rule_def_to_extract(def: &RuleDef) -> Result { }) } -fn rule_def_add_state(def: &RuleDef, optional: bool, states: &mut Vec) { +fn rule_def_add_state( + def: &RuleDef, + optional: bool, + states: &mut Vec, +) { let s = match def { RuleDef::SYMBOL { name } => { + // This `grammar` is local to the particular macro expansion and does not include other + // grammars. If it exists here, then we need to return a special state which embeds the + // inner state within it. quote! { ::rust_sitter::extract::ExtractFieldState::Str(#name, true, #optional) } @@ -438,10 +463,9 @@ fn rule_def_add_state(def: &RuleDef, optional: bool, states: &mut Vec quote! { (#value, false) }, RuleDef::SYMBOL { name } => quote! { (#name, true) }, @@ -470,8 +494,57 @@ fn rule_def_add_state(def: &RuleDef, optional: bool, states: &mut Vec unreachable!("ALIAS not supported in this context"), - RuleDef::REPEAT { content: _ } => unreachable!("REPEAT not supported in this context"), - RuleDef::REPEAT1 { content: _ } => unreachable!("REPEAT1 not supported in this context"), + RuleDef::REPEAT { content } => { + // In the REPEAT case we only ever generate a rule that looks like this: + // seq($.rule, repeat($.sep, $.rule)), + // so we will have pushed all of the elements of `$.rule` already into the extraction + // state. We just need to insert a `REPEAT` then on the first token. + let seq = content.as_seq().expect("REPEAT was not a sequence"); + let repeat_rule = match seq { + [rule, _] => rule, + _ => panic!("REPEAT had more than two rules"), + }; + // Technically we support anything here as the first rule - it could be a sequence, + // etc. but practically it is only ever literal strings. Maybe SYMBOL. So we will only + // support those for now. + let (value, named) = match repeat_rule { + RuleDef::SYMBOL { name } => (name, true), + RuleDef::STRING { value } => (value, false), + _ => panic!("sep_by can only use SYMBOL or STRING currently"), + }; + quote! { + ::rust_sitter::extract::ExtractFieldState::Repeat(#value, #named) + } + } + RuleDef::REPEAT1 { content } => match &**content { + RuleDef::SEQ { members } => { + let repeat_rule = match members.as_slice() { + [rule, _] => rule, + _ => panic!("REPEAT1 had more than two rules"), + }; + let (value, named) = match repeat_rule { + RuleDef::SYMBOL { name } => (name, true), + RuleDef::STRING { value } => (value, false), + _ => panic!("sep_by can only use SYMBOL or STRING currently"), + }; + quote! { + ::rust_sitter::extract::ExtractFieldState::Repeat(#value, #named) + } + } + RuleDef::FIELD { name: _, content } => { + // Add all the inner states of the REPEAT1, then conclude with a repeat state. + // Note, that currently we don't support inner repeats, although we could, it would + // look like a tuple (e.g. `(u32, Vec, ())`), and to support that we just + // need the repeat enum to hold the state it should return to instead of just + // returning to zero. It also will need to know when the field ends - that is, it + // would need to check that the first value isn't the repeat value. + rule_def_add_state(content, optional, states); + quote! { + ::rust_sitter::extract::ExtractFieldState::Repeat1 + } + } + _ => panic!("Unsupported input in REPEAT1"), + }, RuleDef::RESERVED { .. } => unreachable!("RESERVED not supported in this context"), }; diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap index 6cb4762..6a25a37 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap @@ -95,15 +95,16 @@ mod grammar { (), state, ::rust_sitter::extract::ExtractFieldContext::new( - 1u32, + 2u32, false, |state| match state { 0u32 => ::rust_sitter::extract::ExtractFieldState::Str( - "List_Expr_Numbers_0", - true, - false, + "Number", true, false, ), 1u32 => { + ::rust_sitter::extract::ExtractFieldState::Repeat1 + } + 2u32 => { ::rust_sitter::extract::ExtractFieldState::Complete } _ => { diff --git a/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap b/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap index ecbd08f..9128ec0 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap @@ -41,14 +41,13 @@ mod grammar { ::rust_sitter::extract::BaseExtractor::default(), (), state, - ::rust_sitter::extract::ExtractFieldContext::new(1u32, true, |state| { + ::rust_sitter::extract::ExtractFieldContext::new(2u32, true, |state| { match state { 0u32 => ::rust_sitter::extract::ExtractFieldState::Str( - "List_NumberList_numbers", - true, - false, + "Number", true, false, ), - 1u32 => ::rust_sitter::extract::ExtractFieldState::Complete, + 1u32 => ::rust_sitter::extract::ExtractFieldState::Repeat1, + 2u32 => ::rust_sitter::extract::ExtractFieldState::Complete, _ => ::rust_sitter::extract::ExtractFieldState::Overflow, } }), diff --git a/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap b/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap index 65b5d7c..ed956d0 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap @@ -40,14 +40,13 @@ mod grammar { ::rust_sitter::extract::BaseExtractor::default(), (), state, - ::rust_sitter::extract::ExtractFieldContext::new(1u32, true, |state| { + ::rust_sitter::extract::ExtractFieldContext::new(2u32, true, |state| { match state { 0u32 => ::rust_sitter::extract::ExtractFieldState::Str( - "List_NumberList_numbers", - true, - false, + "Number", true, false, ), - 1u32 => ::rust_sitter::extract::ExtractFieldState::Complete, + 1u32 => ::rust_sitter::extract::ExtractFieldState::Repeat1, + 2u32 => ::rust_sitter::extract::ExtractFieldState::Complete, _ => ::rust_sitter::extract::ExtractFieldState::Overflow, } }), diff --git a/runtime/src/__private.rs b/runtime/src/__private.rs index 5accdf0..0c02377 100644 --- a/runtime/src/__private.rs +++ b/runtime/src/__private.rs @@ -65,20 +65,20 @@ pub fn extract_field<'tree, T: Extract, E: Extractor>( if state.has_children { if let Some(cursor) = state.cursor.as_mut() { trace!("extract_field has_children: {}", cursor.node()); - let mut iter = ExtractFieldIterator { + let mut iter = ExtractFieldIterator::new( + field_state, cursor, + state.struct_name, field_name, - struct_name: state.struct_name, - ctx: field_state, source, - current: Default::default(), - }; + ); // Start the iterator. // Iteration requires knowing if there is a valid starting state or not. iter.advance_state()?; let result = extractor.do_extract_field(&mut ctx, &mut iter, source, leaf_fn)?; + iter.finalize()?; Ok(result) } else { extractor.do_extract(&mut ctx, None, source, leaf_fn) @@ -97,15 +97,15 @@ pub fn extract_field<'tree, T: Extract, E: Extractor>( // TODO: Handle errors in this one too. pub fn skip_text<'tree>( state: &mut ExtractStructState<'tree>, - field_name: &str, + field_name: &'static str, ) -> Result<'tree, ()> { debug!( - "skip field: {field_name}, has cursor: {}", + "skip field: {field_name:?}, has cursor: {}", state.cursor.is_some() ); if let Some(cursor) = state.cursor.as_mut() { debug!( - "skip field: expects: {field_name}, has: {:?}", + "skip field: expects: {field_name:?}, has: {:?}", cursor.field_name() ); loop { diff --git a/runtime/src/error.rs b/runtime/src/error.rs index 6f297f9..8b4da52 100644 --- a/runtime/src/error.rs +++ b/runtime/src/error.rs @@ -354,14 +354,17 @@ impl<'a> ExtractError<'a> { ctx: &ExtractFieldIterator<'_, '_>, msg: impl Into, ) -> Self { + let msg = msg.into(); + log::error!( + "field_extraction error: {}::{}, msg={}", + ctx.struct_name, ctx.field_name, msg + ); let position = ctx.position(); Self::new( ctx.struct_name, ctx.field_name, position, - ExtractErrorReason::FieldExtraction { - message: msg.into(), - }, + ExtractErrorReason::FieldExtraction { message: msg }, ) } diff --git a/runtime/src/extract.rs b/runtime/src/extract.rs index e516406..cf3cfe6 100644 --- a/runtime/src/extract.rs +++ b/runtime/src/extract.rs @@ -52,7 +52,6 @@ pub trait Extract: Sized { leaf_fn: Self::LeafFn, ) -> Result<'tree, Self::Output> { let node = it.next_node()?; - assert!(it.current_node().is_none()); Self::extract(ctx, node, source, leaf_fn) } } @@ -200,6 +199,7 @@ impl Extract for Option { if it.current_node().is_some() { Ok(Some(T::extract_field(ctx, it, source, l)?)) } else { + it.advance_state()?; Ok(None) } } @@ -234,37 +234,35 @@ where type LeafFn = T::LeafFn; type Output = Vec; fn extract<'a, 'tree>( - ctx: &mut ExtractContext, + _ctx: &mut ExtractContext, node: Option>, + _source: &[u8], + _l: Self::LeafFn, + ) -> Result<'tree, Self::Output> { + match node { + None => Ok(vec![]), + _ => panic!("Cannot be implemented on Vec"), + } + } + + fn extract_field<'cursor, 'tree>( + ctx: &mut ExtractContext, + it: &mut ExtractFieldIterator<'cursor, 'tree>, source: &[u8], - l: Self::LeafFn, + leaf_fn: Self::LeafFn, ) -> Result<'tree, Self::Output> { - let node = match node { - Some(node) => node, - None => return Ok(vec![]), - }; let mut out = vec![]; - let mut cursor = node.walk(); let mut error = ExtractError::empty(); - if cursor.goto_first_child() { - loop { - // Try and parse the error specially. - let n = cursor.node(); - if n.is_error() { - // println!("Processing error... for {}", ctx.field_name); - // TODO: Do some error handling here instead. - // For now we just ignore it. - } else if cursor.field_name().is_some() { - match T::extract(ctx, Some(n), source, l.clone()) { - Ok(t) => out.push(t), - Err(e) => error.merge(e), - } - } + while it.is_valid() { + let n = it.current_node(); + // Try and parse the error specially. + match T::extract_field(ctx, it, source, leaf_fn.clone()) { + Ok(t) => out.push(t), + Err(e) => error.merge(e), + } + if let Some(n) = n { ctx.last_idx = n.end_byte(); ctx.last_pt = n.end_position(); - if !cursor.goto_next_sibling() { - break; - } } } error.prop()?; @@ -335,7 +333,7 @@ macro_rules! extract_for_tuple { log::debug!("extract_field on tuple"); Ok(( $( - $t::extract(ctx, it.next_node()?, source, Default::default())? + $t::extract_field(ctx, it, source, Default::default())? ),* )) } diff --git a/runtime/src/extract/field.rs b/runtime/src/extract/field.rs index 9cfd449..4a3240f 100644 --- a/runtime/src/extract/field.rs +++ b/runtime/src/extract/field.rs @@ -2,6 +2,7 @@ use crate::error::ExtractError; use super::Result; use log::{debug, trace}; +use tree_sitter::Node; pub struct ExtractFieldIterator<'cursor, 'tree: 'cursor> { pub(crate) cursor: &'cursor mut tree_sitter::TreeCursor<'tree>, @@ -10,6 +11,8 @@ pub struct ExtractFieldIterator<'cursor, 'tree: 'cursor> { pub(crate) ctx: ExtractFieldContext, pub(crate) source: &'cursor [u8], pub(crate) current: NodeIterState<'tree>, + pub(crate) did_advance: bool, + pub(crate) final_node: Option>, } pub struct ExtractFieldContext { @@ -20,7 +23,12 @@ pub struct ExtractFieldContext { } impl ExtractFieldContext { - pub fn new(num_states: u32, optional: bool, state_fn: fn(u32) -> ExtractFieldState) -> Self { + pub fn new( + num_states: u32, + optional: bool, + // repeat_type: RepeatType, + state_fn: fn(u32) -> ExtractFieldState, + ) -> Self { Self { state_fn, state: 0, @@ -36,13 +44,15 @@ pub enum ExtractFieldState { Str(&'static str, bool, bool), // Current implementation only really supports doing this with a list of strings. Choice(&'static [(&'static str, bool)], bool), + Repeat(&'static str, bool), + Repeat1, Complete, // State went too far. Overflow, } impl<'cursor, 'tree: 'cursor> ExtractFieldIterator<'cursor, 'tree> { - fn advance_node(&mut self) { + fn skip_extras(&mut self) { loop { if self.cursor.node().is_extra() { if !self.cursor.goto_next_sibling() { @@ -54,13 +64,27 @@ impl<'cursor, 'tree: 'cursor> ExtractFieldIterator<'cursor, 'tree> { } } + fn advance_cursor(&mut self) { + if let NodeIterState::Node(n) = self.current { + self.final_node = n; + } + self.did_advance = self.cursor.goto_next_sibling(); + } + + fn set_complete(&mut self) { + if let NodeIterState::Node(n) = self.current { + self.final_node = n; + } + self.current = NodeIterState::Complete; + } + pub fn advance_state(&mut self) -> Result<'tree, ()> { if self.current == NodeIterState::Complete { debug!("advance_state: verifying completion"); self.finalize()?; return Ok(()); } - self.advance_node(); + self.skip_extras(); let n = self.cursor.node(); debug!( "advance_state: field_name={}, cursor.field_name={:?}, state={}, num_states={}, optional={}, node={}, node.kind={}", @@ -87,6 +111,13 @@ impl<'cursor, 'tree: 'cursor> ExtractFieldIterator<'cursor, 'tree> { let field_name = self.field_name; if cursor_field != Some(field_name) { debug!("advance_state: field names didn't match"); + // TODO: It would be generally lovely to clean up this logic throughout. + if optional { + debug!("advance_state: state didn't match, but optional, skipping"); + self.current = NodeIterState::Node(None); + return Ok(()); + } + // Check if we have an optional overall. self.handle_optional_err(|| { format!( @@ -99,7 +130,7 @@ impl<'cursor, 'tree: 'cursor> ExtractFieldIterator<'cursor, 'tree> { if n.kind() == expected && n.is_named() == named { debug!("advance_state: state matched, advancing iteration"); // advance the cursor and return the current node. - self.cursor.goto_next_sibling(); + self.advance_cursor(); self.current = NodeIterState::Node(Some(n)); Ok(()) } else if optional { @@ -116,6 +147,11 @@ impl<'cursor, 'tree: 'cursor> ExtractFieldIterator<'cursor, 'tree> { let field_name = self.field_name; if cursor_field != Some(field_name) { debug!("advance_state: field names didn't match"); + if optional { + debug!("advance_state: state didn't match, but optional, skipping"); + self.current = NodeIterState::Node(None); + return Ok(()); + } self.handle_optional_err(|| { format!( "fields didn't match, cursor had: {:?}, expected: {}", @@ -127,7 +163,7 @@ impl<'cursor, 'tree: 'cursor> ExtractFieldIterator<'cursor, 'tree> { for (value, named) in values { if n.kind() == *value && n.is_named() == *named { // Found one. - self.cursor.goto_next_sibling(); + self.advance_cursor(); self.current = NodeIterState::Node(Some(n)); return Ok(()); } @@ -140,9 +176,76 @@ impl<'cursor, 'tree: 'cursor> ExtractFieldIterator<'cursor, 'tree> { Ok(()) } } + ExtractFieldState::Repeat(expected, named) => { + debug!("advance_state: repeat state: expected={expected}, named={named}"); + if !self.did_advance { + // We reached the end of the cursor state, we can advance to the end. + self.ctx.state = self.ctx.num_states + 1; + self.set_complete(); + return Ok(()); + } + // Check if the state matches the repeat and then start over from the beginning. If + // it doesn't, then we need to advance again and we should hit the complete state + // after that. + let cursor_field = self.cursor.field_name(); + let field_name = self.field_name; + if cursor_field != Some(field_name) { + debug!("advance_state: field names didn't match in repeat, completing state"); + self.ctx.state = self.ctx.num_states + 1; + self.set_complete(); + // Check if we have an optional overall. + // self.handle_optional_err(|| { + // format!( + // "fields didn't match, cursor had: {:?}, expected: {}", + // cursor_field, field_name + // ) + // })?; + return Ok(()); + } + if n.kind() == expected && n.is_named() == named { + debug!("advance_state: repeat state matched, resetting iteration"); + // Advance past the repeat symbol and start over. + self.advance_cursor(); + self.ctx.state = 0; + self.advance_state()?; + Ok(()) + } else { + self.handle_optional_err(|| "state didn't match".into())?; + Ok(()) + } + } + ExtractFieldState::Repeat1 => { + debug!("advance_state: repeat1 state"); + if !self.did_advance { + self.ctx.state = self.ctx.num_states + 1; + self.set_complete(); + return Ok(()); + } + let cursor_field = self.cursor.field_name(); + let field_name = self.field_name; + if cursor_field != Some(field_name) { + debug!("advance_state: field names didn't match in repeat, completing state"); + self.ctx.state = self.ctx.num_states + 1; + self.set_complete(); + // Check if we have an optional overall. + // self.handle_optional_err(|| { + // format!( + // "fields didn't match, cursor had: {:?}, expected: {}", + // cursor_field, field_name + // ) + // })?; + Ok(()) + } else { + debug!("advance_state: field names matched, triggering repeat"); + // No repeat symbol in this case, we just are at the next repeat node already. + self.ctx.state = 0; + self.advance_state()?; + Ok(()) + } + } ExtractFieldState::Complete => { debug!("advance_state: got complete state"); - self.current = NodeIterState::Complete; + self.set_complete(); Ok(()) } ExtractFieldState::Overflow => { @@ -170,6 +273,10 @@ impl<'cursor, 'tree: 'cursor> ExtractFieldIterator<'cursor, 'tree> { } } + pub fn is_valid(&self) -> bool { + matches!(self.current, NodeIterState::Node(_)) + } + pub fn finalize(&self) -> Result<'tree, ()> { let state = self.ctx.state; let expected = self.ctx.num_states + 1; @@ -189,18 +296,47 @@ impl<'cursor, 'tree> ExtractFieldIterator<'cursor, 'tree> { where F: FnOnce() -> String, { - if self.ctx.optional && self.ctx.state == 1 { + if self.ctx.state == 1 && self.ctx.optional { debug!("advance_state: optional, outputting None"); self.ctx.state = self.ctx.num_states + 1; - self.current = NodeIterState::Complete; + self.set_complete(); Ok(()) } else { Err(ExtractError::field_extraction(self, f())) } } + pub(crate) fn new( + ctx: ExtractFieldContext, + cursor: &'cursor mut tree_sitter::TreeCursor<'tree>, + struct_name: &'static str, + field_name: &'static str, + source: &'cursor [u8], + ) -> Self { + Self { + cursor, + final_node: None, + current: NodeIterState::Start, + did_advance: false, + source, + field_name, + struct_name, + ctx, + } + } + pub(crate) fn position(&self) -> crate::Position { - crate::Position::from_node(self.cursor.node()) + match self.current { + NodeIterState::Node(Some(n)) => crate::Position::from_node(n), + _ => crate::Position::from_node(self.cursor.node()), + } + } + + pub(crate) fn final_position(&self) -> crate::Position { + match self.final_node { + Some(n) => crate::Position::from_node(n), + _ => self.position(), + } } } diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs index 451e677..10bcca8 100644 --- a/runtime/src/lib.rs +++ b/runtime/src/lib.rs @@ -163,7 +163,7 @@ impl Extract for Spanned { ) -> extract::Result<'tree, Self::Output> { let mut start = it.position(); let value = T::extract_field(ctx, it, source, l)?; - let end = it.position(); + let end = it.final_position(); start.extend_from(end); Ok(Spanned { value, diff --git a/tool/src/lib.rs b/tool/src/lib.rs index eddcfef..595a803 100644 --- a/tool/src/lib.rs +++ b/tool/src/lib.rs @@ -287,6 +287,7 @@ mod tests { mod grammar { #[derive(rust_sitter::Rule)] #[language] + #[word(Identifier)] pub struct Program(pub Vec); #[derive(rust_sitter::Rule)] @@ -355,8 +356,8 @@ mod tests { } #[derive(rust_sitter::Rule)] - #[word] - pub struct Identifier(#[leaf(pattern("[a-zA-Z_][a-zA-Z0-9_]*"))] ()); + #[leaf(pattern("[a-zA-Z_][a-zA-Z0-9_]*"))] + pub struct Identifier; #[derive(rust_sitter::Rule)] pub struct Number(#[leaf(pattern("\\d+"))] ()); diff --git a/tool/src/snapshots/rust_sitter_tool__tests__enum_conflicts_prec_dynamic.snap b/tool/src/snapshots/rust_sitter_tool__tests__enum_conflicts_prec_dynamic.snap index 8e07aa9..1fd0c09 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__enum_conflicts_prec_dynamic.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__enum_conflicts_prec_dynamic.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"Program","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"List_Program_0"}}]}]},"List_Program_0":{"type":"REPEAT1","content":{"type":"FIELD","name":"Program_0_element","content":{"type":"SYMBOL","name":"Statement"}}},"Program":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"List_Program_0"}}]}]},"Statement_ExpressionStatement":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"ExpressionStatement"}}]},"Statement_IfStatement":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"IfStatement"}}]},"Statement":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Statement_ExpressionStatement"},{"type":"SYMBOL","name":"Statement_IfStatement"}]},"Expression_Identifier":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Identifier"}}]},"Expression_Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Number"}}]},"Expression_BinaryExpression":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"BinaryExpression"}}]},"Expression":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Identifier"},{"type":"SYMBOL","name":"Expression_Number"},{"type":"SYMBOL","name":"Expression_BinaryExpression"}]},"BinaryExpression":{"type":"PREC_LEFT","value":1,"content":{"type":"SEQ","members":[{"type":"FIELD","name":"expression","content":{"type":"SYMBOL","name":"Expression"}},{"type":"FIELD","name":"binary_expression_inner","content":{"type":"SYMBOL","name":"BinaryExpressionInner"}},{"type":"FIELD","name":"expression2","content":{"type":"SYMBOL","name":"Expression"}}]}},"BinaryExpressionInner_String":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"STRING","value":"+"}}]},"BinaryExpressionInner_String2":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"STRING","value":"-"}}]},"BinaryExpressionInner_String3":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"STRING","value":"*"}}]},"BinaryExpressionInner_String4":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"STRING","value":"/"}}]},"BinaryExpressionInner":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"BinaryExpressionInner_String"},{"type":"SYMBOL","name":"BinaryExpressionInner_String2"},{"type":"SYMBOL","name":"BinaryExpressionInner_String3"},{"type":"SYMBOL","name":"BinaryExpressionInner_String4"}]},"ExpressionStatement":{"type":"SEQ","members":[{"type":"FIELD","name":"expression","content":{"type":"SYMBOL","name":"Expression"}},{"type":"FIELD","name":"_semicolon","content":{"type":"STRING","value":";"}}]},"IfStatement":{"type":"PREC_DYNAMIC","value":1,"content":{"type":"SEQ","members":[{"type":"FIELD","name":"_if","content":{"type":"STRING","value":"if"}},{"type":"FIELD","name":"_lparen","content":{"type":"STRING","value":"("}},{"type":"FIELD","name":"expression","content":{"type":"SYMBOL","name":"Expression"}},{"type":"FIELD","name":"_rparen","content":{"type":"STRING","value":")"}},{"type":"FIELD","name":"_lbrace","content":{"type":"STRING","value":"{"}},{"type":"FIELD","name":"statement","content":{"type":"SYMBOL","name":"Statement"}},{"type":"FIELD","name":"_rbrace","content":{"type":"STRING","value":"}"}},{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"if_statement_inner","content":{"type":"SYMBOL","name":"IfStatementElse"}}]}]}},"IfStatementElse":{"type":"SEQ","members":[{"type":"FIELD","name":"_else","content":{"type":"STRING","value":"else"}},{"type":"FIELD","name":"_lbrace","content":{"type":"STRING","value":"{"}},{"type":"FIELD","name":"statement","content":{"type":"SYMBOL","name":"Statement"}},{"type":"FIELD","name":"_rbrace","content":{"type":"STRING","value":"}"}}]},"Identifier":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"PATTERN","value":"[a-zA-Z_][a-zA-Z0-9_]*"}}]},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"PATTERN","value":"\\d+"}}]}},"extras":[]} +{"name":"Program","word":"Identifier","rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"0","content":{"type":"REPEAT1","content":{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Statement"}}}}]}]},"Program":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"0","content":{"type":"REPEAT1","content":{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Statement"}}}}]}]},"Statement_ExpressionStatement":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"ExpressionStatement"}}]},"Statement_IfStatement":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"IfStatement"}}]},"Statement":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Statement_ExpressionStatement"},{"type":"SYMBOL","name":"Statement_IfStatement"}]},"Expression_Identifier":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Identifier"}}]},"Expression_Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Number"}}]},"Expression_BinaryExpression":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"BinaryExpression"}}]},"Expression":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expression_Identifier"},{"type":"SYMBOL","name":"Expression_Number"},{"type":"SYMBOL","name":"Expression_BinaryExpression"}]},"BinaryExpression":{"type":"PREC_LEFT","value":1,"content":{"type":"SEQ","members":[{"type":"FIELD","name":"expression","content":{"type":"SYMBOL","name":"Expression"}},{"type":"FIELD","name":"binary_expression_inner","content":{"type":"SYMBOL","name":"BinaryExpressionInner"}},{"type":"FIELD","name":"expression2","content":{"type":"SYMBOL","name":"Expression"}}]}},"BinaryExpressionInner_String":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"STRING","value":"+"}}]},"BinaryExpressionInner_String2":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"STRING","value":"-"}}]},"BinaryExpressionInner_String3":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"STRING","value":"*"}}]},"BinaryExpressionInner_String4":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"STRING","value":"/"}}]},"BinaryExpressionInner":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"BinaryExpressionInner_String"},{"type":"SYMBOL","name":"BinaryExpressionInner_String2"},{"type":"SYMBOL","name":"BinaryExpressionInner_String3"},{"type":"SYMBOL","name":"BinaryExpressionInner_String4"}]},"ExpressionStatement":{"type":"SEQ","members":[{"type":"FIELD","name":"expression","content":{"type":"SYMBOL","name":"Expression"}},{"type":"FIELD","name":"_semicolon","content":{"type":"STRING","value":";"}}]},"IfStatement":{"type":"PREC_DYNAMIC","value":1,"content":{"type":"SEQ","members":[{"type":"FIELD","name":"_if","content":{"type":"STRING","value":"if"}},{"type":"FIELD","name":"_lparen","content":{"type":"STRING","value":"("}},{"type":"FIELD","name":"expression","content":{"type":"SYMBOL","name":"Expression"}},{"type":"FIELD","name":"_rparen","content":{"type":"STRING","value":")"}},{"type":"FIELD","name":"_lbrace","content":{"type":"STRING","value":"{"}},{"type":"FIELD","name":"statement","content":{"type":"SYMBOL","name":"Statement"}},{"type":"FIELD","name":"_rbrace","content":{"type":"STRING","value":"}"}},{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"if_statement_inner","content":{"type":"SYMBOL","name":"IfStatementElse"}}]}]}},"IfStatementElse":{"type":"SEQ","members":[{"type":"FIELD","name":"_else","content":{"type":"STRING","value":"else"}},{"type":"FIELD","name":"_lbrace","content":{"type":"STRING","value":"{"}},{"type":"FIELD","name":"statement","content":{"type":"SYMBOL","name":"Statement"}},{"type":"FIELD","name":"_rbrace","content":{"type":"STRING","value":"}"}}]},"Identifier":{"type":"PATTERN","value":"[a-zA-Z_][a-zA-Z0-9_]*"},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"PATTERN","value":"\\d+"}}]}},"extras":[]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__enum_with_unamed_vector.snap b/tool/src/snapshots/rust_sitter_tool__tests__enum_with_unamed_vector.snap index 585ee4b..b8e218c 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__enum_with_unamed_vector.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__enum_with_unamed_vector.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"Expr","word":null,"rules":{"source_file":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expr_Numbers"}]},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"value","content":{"type":"PATTERN","value":"\\d+"}}]},"List_Expr_Numbers_0":{"type":"REPEAT1","content":{"type":"FIELD","name":"Expr_Numbers_0_element","content":{"type":"SYMBOL","name":"Number"}}},"Expr_Numbers":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"List_Expr_Numbers_0"}}]},"Expr":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expr_Numbers"}]}},"extras":[]} +{"name":"Expr","word":null,"rules":{"source_file":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expr_Numbers"}]},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"value","content":{"type":"PATTERN","value":"\\d+"}}]},"Expr_Numbers":{"type":"SEQ","members":[{"type":"FIELD","name":"0","content":{"type":"REPEAT1","content":{"type":"FIELD","name":"0","content":{"type":"SYMBOL","name":"Number"}}}}]},"Expr":{"type":"CHOICE","members":[{"type":"SYMBOL","name":"Expr_Numbers"}]}},"extras":[]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat.snap b/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat.snap index fd52c3f..5038607 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"NumberList","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"List_NumberList_numbers":{"type":"SEQ","members":[{"type":"FIELD","name":"NumberList_numbers_element","content":{"type":"SYMBOL","name":"Number"}},{"type":"REPEAT","content":{"type":"SEQ","members":[{"type":"STRING","value":","},{"type":"FIELD","name":"NumberList_numbers_element","content":{"type":"SYMBOL","name":"Number"}}]}}]},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"PATTERN","value":"\\d+"}}]}},"extras":[{"type":"PATTERN","value":"\\s"}]} +{"name":"NumberList","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SEQ","members":[{"type":"SYMBOL","name":"Number"},{"type":"REPEAT","content":{"type":"SEQ","members":[{"type":"STRING","value":","},{"type":"SYMBOL","name":"Number"}]}}]}}]}]},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SEQ","members":[{"type":"SYMBOL","name":"Number"},{"type":"REPEAT","content":{"type":"SEQ","members":[{"type":"STRING","value":","},{"type":"SYMBOL","name":"Number"}]}}]}}]}]},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"PATTERN","value":"\\d+"}}]}},"extras":[{"type":"PATTERN","value":"\\s"}]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat1.snap b/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat1.snap index 0719a91..2fcba85 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat1.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat1.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"NumberList","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"List_NumberList_numbers":{"type":"REPEAT1","content":{"type":"FIELD","name":"NumberList_numbers_element","content":{"type":"SYMBOL","name":"Number"}}},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"PATTERN","value":"\\d+"}}]}},"extras":[{"type":"PATTERN","value":"\\s"}]} +{"name":"NumberList","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"REPEAT1","content":{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"Number"}}}}]}]},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"REPEAT1","content":{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"Number"}}}}]}]},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"PATTERN","value":"\\d+"}}]}},"extras":[{"type":"PATTERN","value":"\\s"}]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat_no_delimiter.snap b/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat_no_delimiter.snap index 0719a91..2fcba85 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat_no_delimiter.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__grammar_repeat_no_delimiter.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"NumberList","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"List_NumberList_numbers":{"type":"REPEAT1","content":{"type":"FIELD","name":"NumberList_numbers_element","content":{"type":"SYMBOL","name":"Number"}}},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"PATTERN","value":"\\d+"}}]}},"extras":[{"type":"PATTERN","value":"\\s"}]} +{"name":"NumberList","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"REPEAT1","content":{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"Number"}}}}]}]},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"REPEAT1","content":{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"Number"}}}}]}]},"Number":{"type":"SEQ","members":[{"type":"FIELD","name":"v","content":{"type":"PATTERN","value":"\\d+"}}]}},"extras":[{"type":"PATTERN","value":"\\s"}]} diff --git a/tool/src/snapshots/rust_sitter_tool__tests__spanned_in_vec.snap b/tool/src/snapshots/rust_sitter_tool__tests__spanned_in_vec.snap index 9aac286..6c39108 100644 --- a/tool/src/snapshots/rust_sitter_tool__tests__spanned_in_vec.snap +++ b/tool/src/snapshots/rust_sitter_tool__tests__spanned_in_vec.snap @@ -2,4 +2,4 @@ source: tool/src/lib.rs expression: grammar --- -{"name":"NumberList","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]},"List_NumberList_numbers":{"type":"REPEAT1","content":{"type":"FIELD","name":"NumberList_numbers_element","content":{"type":"PATTERN","value":"\\d+"}}},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"SYMBOL","name":"List_NumberList_numbers"}}]}]}},"extras":[{"type":"PATTERN","value":"\\s"}]} +{"name":"NumberList","word":null,"rules":{"source_file":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"REPEAT1","content":{"type":"FIELD","name":"numbers","content":{"type":"PATTERN","value":"\\d+"}}}}]}]},"NumberList":{"type":"SEQ","members":[{"type":"CHOICE","members":[{"type":"BLANK"},{"type":"FIELD","name":"numbers","content":{"type":"REPEAT1","content":{"type":"FIELD","name":"numbers","content":{"type":"PATTERN","value":"\\d+"}}}}]}]}},"extras":[{"type":"PATTERN","value":"\\s"}]} diff --git a/types/src/grammar.rs b/types/src/grammar.rs index 3b2a881..729d913 100644 --- a/types/src/grammar.rs +++ b/types/src/grammar.rs @@ -112,9 +112,9 @@ impl RuleDef { } /// Pull out a sequence, including through precedence unwrapping. - pub fn as_seq(&self) -> Option<&RuleDef> { + pub fn as_seq(&self) -> Option<&[RuleDef]> { match self { - Self::SEQ { .. } => Some(self), + Self::SEQ { members } => Some(members), Self::PREC { value: _, content } | Self::PREC_LEFT { value: _, content } | Self::PREC_RIGHT { value: _, content } From ed7bbd8df713ef381cd44138cbc7f5f962a65082 Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Wed, 27 Aug 2025 15:02:08 -0500 Subject: [PATCH 41/50] Add ability to rename grammars. --- common/src/expansion.rs | 364 ++++++++++++++++++++-------------------- common/src/lib.rs | 71 +++++--- macro/src/expansion.rs | 11 +- 3 files changed, 236 insertions(+), 210 deletions(-) diff --git a/common/src/expansion.rs b/common/src/expansion.rs index 1dc50b1..ee29ae2 100644 --- a/common/src/expansion.rs +++ b/common/src/expansion.rs @@ -9,7 +9,7 @@ use syn::{parse::Parse, punctuated::Punctuated, spanned::Spanned}; pub struct RuleDerive { pub ident: syn::Ident, pub attrs: Vec, - pub extras: RuleParams, + pub params: RuleParams, pub data: syn::Data, } @@ -42,7 +42,7 @@ impl RuleDerive { Ok(Self { ident: d.ident, attrs: d.attrs, - extras, + params: extras, data: d.data, }) } @@ -79,26 +79,31 @@ pub fn generate_grammar(root_file: Vec) -> Result> { } // This error is useful for us and cannot be generated by proc macro expansion. - let language = state - .language_rule - .ok_or_else(|| { - Error::new( - Span::call_site(), - "Must specify exactly one root with #[language]", - ) - })? - .to_string(); - state.grammar.name = language.clone(); + let (language_ident, language) = state.language_rule.ok_or_else(|| { + Error::new( + Span::call_site(), + "Must specify exactly one root with #[language]", + ) + })?; + let name = language + .name() + .unwrap_or_else(|| language_ident.to_string()); + state.grammar.name = name.clone(); state.grammar.rules.insert( "source_file".to_string(), - state.grammar.rules.get(&language).unwrap().clone(), + state + .grammar + .rules + .get(&language_ident.to_string()) + .unwrap() + .clone(), ); Ok(Some(state.grammar)) } pub struct ExpansionState { pub grammar: Grammar, - pub language_rule: Option, + pub language_rule: Option<(Ident, LanguageExpr)>, // Accumulated errors. pub error: Option, } @@ -189,20 +194,16 @@ impl ExpansionState { Ok(()) } - fn set_language(&mut self, ident: &Ident) -> Result<()> { - if let Some(existing) = &self.language_rule { + fn set_language(&mut self, ident: &Ident, lang: LanguageExpr) -> Result<()> { + if let Some((existing, _)) = &self.language_rule { return Err(self.accumulate_error(Error::new( - existing.span(), + lang.path.span(), format!( - "Language rule already defined as {}:{:?}, found duplicate with {}:{:?}", - existing, - existing.span(), - ident, - ident.span(), + "Language rule already defined as {existing}, found duplicate with {ident}", ), ))); } - self.language_rule = Some(ident.clone()); + self.language_rule = Some((ident.clone(), lang)); Ok(()) } @@ -245,10 +246,10 @@ fn process_item(item: Item, ctx: &mut ExpansionState) -> Result<()> { } pub fn process_rule(input: RuleDerive, ctx: &mut ExpansionState) -> Result<()> { - if input.extras.language { - ctx.set_language(&input.ident)?; + if let Some(l) = &input.params.language { + ctx.set_language(&input.ident, l.clone())?; } - if let Some(word) = &input.extras.word { + if let Some(word) = &input.params.word { let rule = word.evaluate()?; match rule { RuleDef::SYMBOL { name } => ctx.set_word(name)?, @@ -261,8 +262,8 @@ pub fn process_rule(input: RuleDerive, ctx: &mut ExpansionState) -> Result<()> { } } } - if let Some(extras) = &input.extras.extras { - if !input.extras.language { + if let Some(extras) = &input.params.extras { + if input.params.language.is_none() { return Err(Error::new( extras.span(), "Cannot specify extras without #[language]", @@ -319,7 +320,7 @@ pub fn process_rule(input: RuleDerive, ctx: &mut ExpansionState) -> Result<()> { let rule = RuleDef::CHOICE { members }; - let rule = input.extras.apply(rule)?; + let rule = input.params.apply(rule)?; ctx.grammar.rules.insert(ident.to_string(), rule); } @@ -329,154 +330,6 @@ pub fn process_rule(input: RuleDerive, ctx: &mut ExpansionState) -> Result<()> { Ok(()) } -#[derive(Debug)] -pub struct RuleParams { - pub prec_param: Option, - pub prec_left_param: Option, - pub prec_right_param: Option, - pub prec_dynamic_param: Option, - pub language: bool, - pub extras: Option>, - pub word: Option, -} - -impl RuleParams { - fn new(attrs: &[Attribute]) -> Result { - let prec_attr = attrs.iter().find(|attr| sitter_attr_matches(attr, "prec")); - - let prec_param = prec_attr - .map(|a| a.parse_args_with(Expr::parse)) - .transpose()?; - - let prec_left_attr = attrs - .iter() - .find(|attr| sitter_attr_matches(attr, "prec_left")); - - let prec_left_param = prec_left_attr - .map(|a| a.parse_args_with(Expr::parse)) - .transpose()?; - - let prec_right_attr = attrs - .iter() - .find(|attr| sitter_attr_matches(attr, "prec_right")); - - let prec_right_param = prec_right_attr - .map(|a| a.parse_args_with(Expr::parse)) - .transpose()?; - - let prec_dynamic_attr = attrs - .iter() - .find(|attr| sitter_attr_matches(attr, "prec_dynamic")); - - let prec_dynamic_param = prec_dynamic_attr - .map(|a| a.parse_args_with(Expr::parse)) - .transpose()?; - - if let (Some(prec_left), Some(_prec_right)) = (prec_left_attr, prec_right_attr) { - return Err(Error::new( - prec_left.span(), - "only one of prec, prec_left, and prec_right can be specified", - )); - } - - let extras = attrs - .iter() - .find(|a| sitter_attr_matches(a, "extras")) - .map(|a| a.parse_args_with(Punctuated::::parse_terminated)) - .transpose()?; - - let language = attrs.iter().any(|a| sitter_attr_matches(a, "language")); - let word = attrs - .iter() - .find(|a| sitter_attr_matches(a, "word")) - .map(|a| a.parse_args_with(TsInput::parse)) - .transpose()?; - if !language && let Some(w) = &word { - return Err(Error::new( - w.span(), - "Cannot specify #[word] on non-language rule", - )); - } - - Ok(Self { - prec_param, - prec_left_param, - prec_right_param, - prec_dynamic_param, - extras, - word, - language, - }) - } - - fn apply(&self, rule: RuleDef) -> Result { - let Self { - prec_param, - prec_left_param, - prec_right_param, - prec_dynamic_param, - .. - } = self; - - let rule = if let Some(Expr::Lit(lit)) = prec_param { - if let Lit::Int(i) = &lit.lit { - let value = i.base10_parse::()?; - RuleDef::PREC { - value: value.into(), - content: Box::new(rule), - } - } else { - return Err(Error::new( - lit.span(), - "Expected integer literal for precedence", - )); - } - } else if let Some(Expr::Lit(lit)) = prec_left_param { - let value = if let Lit::Int(i) = &lit.lit { - i.base10_parse::()? - } else { - return Err(Error::new( - lit.span(), - "Expected integer literal for precedence", - )); - }; - RuleDef::PREC_LEFT { - value: value.into(), - content: Box::new(rule), - } - } else if let Some(Expr::Lit(lit)) = prec_right_param { - let value = if let Lit::Int(i) = &lit.lit { - i.base10_parse::()? - } else { - return Err(Error::new( - lit.span(), - "Expected integer literal for precedence", - )); - }; - RuleDef::PREC_RIGHT { - value: value.into(), - content: Box::new(rule), - } - } else if let Some(Expr::Lit(lit)) = prec_dynamic_param { - if let Lit::Int(i) = &lit.lit { - RuleDef::PREC_DYNAMIC { - value: i.base10_parse::()?, - content: Box::new(rule), - } - } else { - return Err(Error::new( - lit.span(), - "Expected integer literal for precedence", - )); - } - } else { - rule - }; - - Ok(rule) - } -} - fn gen_field( ident_str: &str, leaf_type: Option, @@ -484,7 +337,7 @@ fn gen_field( ) -> Result<(RuleDef, bool)> { let precs = RuleParams::new(&attrs)?; - if precs.language { + if precs.language.is_some() { return Err(Error::new( leaf_type.span(), "Cannot specify language on a field", @@ -694,3 +547,158 @@ fn gen_struct_or_variant( ctx.grammar.rules.insert(path, precs.apply(base_rule)?); Ok(()) } + +#[derive(Debug)] +pub struct RuleParams { + pub prec_param: Option, + pub prec_left_param: Option, + pub prec_right_param: Option, + pub prec_dynamic_param: Option, + pub language: Option, + pub extras: Option>, + pub word: Option, +} + +impl RuleParams { + fn new(attrs: &[Attribute]) -> Result { + let prec_attr = attrs.iter().find(|attr| sitter_attr_matches(attr, "prec")); + + let prec_param = prec_attr + .map(|a| a.parse_args_with(Expr::parse)) + .transpose()?; + + let prec_left_attr = attrs + .iter() + .find(|attr| sitter_attr_matches(attr, "prec_left")); + + let prec_left_param = prec_left_attr + .map(|a| a.parse_args_with(Expr::parse)) + .transpose()?; + + let prec_right_attr = attrs + .iter() + .find(|attr| sitter_attr_matches(attr, "prec_right")); + + let prec_right_param = prec_right_attr + .map(|a| a.parse_args_with(Expr::parse)) + .transpose()?; + + let prec_dynamic_attr = attrs + .iter() + .find(|attr| sitter_attr_matches(attr, "prec_dynamic")); + + let prec_dynamic_param = prec_dynamic_attr + .map(|a| a.parse_args_with(Expr::parse)) + .transpose()?; + + if let (Some(prec_left), Some(_prec_right)) = (prec_left_attr, prec_right_attr) { + return Err(Error::new( + prec_left.span(), + "only one of prec, prec_left, and prec_right can be specified", + )); + } + + let extras = attrs + .iter() + .find(|a| sitter_attr_matches(a, "extras")) + .map(|a| a.parse_args_with(Punctuated::::parse_terminated)) + .transpose()?; + + let language = attrs + .iter() + .find(|a| sitter_attr_matches(a, "language")) + .map(LanguageExpr::from_attr) + .transpose()?; + let word = attrs + .iter() + .find(|a| sitter_attr_matches(a, "word")) + .map(|a| a.parse_args_with(TsInput::parse)) + .transpose()?; + // TODO: Refactor this and make it a sub-field of the `LanguageExpr` type instead. + if language.is_none() + && let Some(w) = &word + { + return Err(Error::new( + w.span(), + "Cannot specify #[word] on non-language rule", + )); + } + + Ok(Self { + prec_param, + prec_left_param, + prec_right_param, + prec_dynamic_param, + extras, + word, + language, + }) + } + + fn apply(&self, rule: RuleDef) -> Result { + let Self { + prec_param, + prec_left_param, + prec_right_param, + prec_dynamic_param, + .. + } = self; + + let rule = if let Some(Expr::Lit(lit)) = prec_param { + if let Lit::Int(i) = &lit.lit { + let value = i.base10_parse::()?; + RuleDef::PREC { + value: value.into(), + content: Box::new(rule), + } + } else { + return Err(Error::new( + lit.span(), + "Expected integer literal for precedence", + )); + } + } else if let Some(Expr::Lit(lit)) = prec_left_param { + let value = if let Lit::Int(i) = &lit.lit { + i.base10_parse::()? + } else { + return Err(Error::new( + lit.span(), + "Expected integer literal for precedence", + )); + }; + RuleDef::PREC_LEFT { + value: value.into(), + content: Box::new(rule), + } + } else if let Some(Expr::Lit(lit)) = prec_right_param { + let value = if let Lit::Int(i) = &lit.lit { + i.base10_parse::()? + } else { + return Err(Error::new( + lit.span(), + "Expected integer literal for precedence", + )); + }; + RuleDef::PREC_RIGHT { + value: value.into(), + content: Box::new(rule), + } + } else if let Some(Expr::Lit(lit)) = prec_dynamic_param { + if let Lit::Int(i) = &lit.lit { + RuleDef::PREC_DYNAMIC { + value: i.base10_parse::()?, + content: Box::new(rule), + } + } else { + return Err(Error::new( + lit.span(), + "Expected integer literal for precedence", + )); + } + } else { + rule + }; + + Ok(rule) + } +} diff --git a/common/src/lib.rs b/common/src/lib.rs index 1f9eada..950210e 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -11,6 +11,52 @@ use syn::{ pub mod expansion; +/// Language expression parsed from an attribute. +/// `#[language]` is the default, additional fields can be provided like so: +/// `#[language(name = "example")]` +#[derive(Debug, Clone)] +pub struct LanguageExpr { + // Useful to hold this for a useful span location on error generation. + pub path: Ident, + pub name: Option, +} + +impl LanguageExpr { + pub fn from_attr(a: &Attribute) -> Result { + let path = a.path().require_ident()?.clone(); + if path != "language" { + panic!("Expected language in LanguageExpr, this is a bug in rust-sitter"); + } + let mut s = Self { path, name: None }; + if matches!(&a.meta, Meta::List(_)) { + let args = + a.parse_args_with(Punctuated::::parse_terminated)?; + for arg in args { + if arg.path == "name" { + if s.name.is_some() { + return Err(Error::new(arg.path.span(), "Duplicate name field")); + } + let value = match arg.expr { + Expr::Lit(ExprLit { attrs:_ , lit: Lit::Str(s) }) => s, + _ => { + return Err(Error::new( + arg.expr.span(), + "name must be a literal string", + )); + } + }; + s.name = Some(value.value()); + } + } + } + Ok(s) + } + + pub fn name(&self) -> Option { + self.name.clone() + } +} + #[derive(Debug, Clone, PartialEq, Eq)] pub struct NameValueExpr { pub path: Ident, @@ -28,31 +74,6 @@ impl Parse for NameValueExpr { } } -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct FieldThenParams { - pub field: Field, - pub comma: Option, - pub params: Punctuated, -} - -impl Parse for FieldThenParams { - fn parse(input: ParseStream) -> syn::Result { - let field = Field::parse_unnamed(input)?; - let comma: Option = input.parse()?; - let params = if comma.is_some() { - Punctuated::parse_terminated_with(input, NameValueExpr::parse)? - } else { - Punctuated::new() - }; - - Ok(FieldThenParams { - field, - comma, - params, - }) - } -} - /// tree-sitter input parsing. #[derive(Debug, Clone, PartialEq, Eq)] pub struct TsInput { diff --git a/macro/src/expansion.rs b/macro/src/expansion.rs index 8f8a8e9..e930d40 100644 --- a/macro/src/expansion.rs +++ b/macro/src/expansion.rs @@ -32,11 +32,7 @@ pub fn expand_rule(input: DeriveInput) -> Result { let mut ctx = ExpansionState::new(); rust_sitter_common::expansion::process_rule(d, &mut ctx)?; - // TODO: Allow renaming it. - let is_language = input - .attrs - .iter() - .any(|a| sitter_attr_matches(a, "language")); + let ident = input.ident; let attrs = input.attrs; let (extract, rule) = match input.data { @@ -151,8 +147,9 @@ pub fn expand_rule(input: DeriveInput) -> Result { }; // If it is language, then we need to generate the corresponding functions. - let lang = if is_language { - let tree_sitter_ident = Ident::new(&format!("tree_sitter_{ident}"), Span::call_site()); + let lang = if let Some((ident, lang)) = ctx.language_rule { + let name = lang.name().unwrap_or_else(|| ident.to_string()); + let tree_sitter_ident = Ident::new(&format!("tree_sitter_{name}"), Span::call_site()); let root_type_docstr = format!("[`{ident}`]"); quote! { From 3d79d5ec42f72ba570fc9bec18b9121802edc06d Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Mon, 1 Sep 2025 19:35:08 -0500 Subject: [PATCH 42/50] Add `RULE_NAME` as a constant so it is easier to match against --- macro/src/expansion.rs | 9 ++------- .../rust_sitter_macro__tests__enum_prec_left.snap | 4 +--- .../rust_sitter_macro__tests__enum_recursive.snap | 4 +--- ...sitter_macro__tests__enum_transformed_fields.snap | 4 +--- ...t_sitter_macro__tests__enum_with_named_field.snap | 4 +--- ...sitter_macro__tests__enum_with_unamed_vector.snap | 8 ++------ ...t_sitter_macro__tests__grammar_unboxed_field.snap | 8 ++------ .../rust_sitter_macro__tests__spanned_in_vec.snap | 12 +++--------- .../rust_sitter_macro__tests__struct_extra.snap | 8 ++------ .../rust_sitter_macro__tests__struct_optional.snap | 8 ++------ .../rust_sitter_macro__tests__struct_repeat.snap | 12 +++--------- runtime/src/rule.rs | 5 ++++- 12 files changed, 24 insertions(+), 62 deletions(-) diff --git a/macro/src/expansion.rs b/macro/src/expansion.rs index e930d40..3fa354b 100644 --- a/macro/src/expansion.rs +++ b/macro/src/expansion.rs @@ -66,12 +66,10 @@ pub fn expand_rule(input: DeriveInput) -> Result { let ident_str = ident.to_string(); let rule_impl: Item = syn::parse_quote! { impl ::rust_sitter::rule::Rule for #ident { + const RULE_NAME: &'static str = #ident_str; fn produce_ast() -> String { String::new() } - fn rule_name() -> &'static str { - #ident_str - } } }; @@ -132,13 +130,10 @@ pub fn expand_rule(input: DeriveInput) -> Result { let rule_impl: Item = syn::parse_quote! { impl ::rust_sitter::rule::Rule for #enum_name { + const RULE_NAME: &'static str = #ident_str; fn produce_ast() -> String { String::new() } - - fn rule_name() -> &'static str { - #ident_str - } } }; (extract_impl, rule_impl) diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap index fbeb228..1ad76c1 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_prec_left.snap @@ -124,11 +124,9 @@ mod grammar { } } impl ::rust_sitter::rule::Rule for Expression { + const RULE_NAME: &'static str = "Expression"; fn produce_ast() -> String { String::new() } - fn rule_name() -> &'static str { - "Expression" - } } } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap index 04e4210..99bda4f 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_recursive.snap @@ -110,11 +110,9 @@ mod grammar { } } impl ::rust_sitter::rule::Rule for Expression { + const RULE_NAME: &'static str = "Expression"; fn produce_ast() -> String { String::new() } - fn rule_name() -> &'static str { - "Expression" - } } } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap index 41024e7..92795ce 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_transformed_fields.snap @@ -73,11 +73,9 @@ mod grammar { } } impl ::rust_sitter::rule::Rule for Expression { + const RULE_NAME: &'static str = "Expression"; fn produce_ast() -> String { String::new() } - fn rule_name() -> &'static str { - "Expression" - } } } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap index 22969ea..d66293f 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_named_field.snap @@ -116,11 +116,9 @@ mod grammar { } } impl ::rust_sitter::rule::Rule for Expr { + const RULE_NAME: &'static str = "Expr"; fn produce_ast() -> String { String::new() } - fn rule_name() -> &'static str { - "Expr" - } } } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap index 6a25a37..bb45fd8 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__enum_with_unamed_vector.snap @@ -40,12 +40,10 @@ mod grammar { } } impl ::rust_sitter::rule::Rule for Number { + const RULE_NAME: &'static str = "Number"; fn produce_ast() -> String { String::new() } - fn rule_name() -> &'static str { - "Number" - } } impl ::rust_sitter::rule::Language for Expr { fn produce_grammar() -> String { @@ -128,11 +126,9 @@ mod grammar { } } impl ::rust_sitter::rule::Rule for Expr { + const RULE_NAME: &'static str = "Expr"; fn produce_ast() -> String { String::new() } - fn rule_name() -> &'static str { - "Expr" - } } } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap b/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap index 8db0bd2..9606554 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__grammar_unboxed_field.snap @@ -62,12 +62,10 @@ mod grammar { } } impl ::rust_sitter::rule::Rule for Language { + const RULE_NAME: &'static str = "Language"; fn produce_ast() -> String { String::new() } - fn rule_name() -> &'static str { - "Language" - } } impl ::rust_sitter::Extract for Expression { type Output = Self; @@ -121,11 +119,9 @@ mod grammar { } } impl ::rust_sitter::rule::Rule for Expression { + const RULE_NAME: &'static str = "Expression"; fn produce_ast() -> String { String::new() } - fn rule_name() -> &'static str { - "Expression" - } } } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap b/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap index 9128ec0..7484aa3 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__spanned_in_vec.snap @@ -60,12 +60,10 @@ mod grammar { } } impl ::rust_sitter::rule::Rule for NumberList { + const RULE_NAME: &'static str = "NumberList"; fn produce_ast() -> String { String::new() } - fn rule_name() -> &'static str { - "NumberList" - } } impl ::rust_sitter::Extract for Number { type Output = Self; @@ -104,12 +102,10 @@ mod grammar { } } impl ::rust_sitter::rule::Rule for Number { + const RULE_NAME: &'static str = "Number"; fn produce_ast() -> String { String::new() } - fn rule_name() -> &'static str { - "Number" - } } impl ::rust_sitter::Extract for Whitespace { type Output = Self; @@ -148,11 +144,9 @@ mod grammar { } } impl ::rust_sitter::rule::Rule for Whitespace { + const RULE_NAME: &'static str = "Whitespace"; fn produce_ast() -> String { String::new() } - fn rule_name() -> &'static str { - "Whitespace" - } } } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap b/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap index fddafa7..5dc70d9 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__struct_extra.snap @@ -72,12 +72,10 @@ mod grammar { } } impl ::rust_sitter::rule::Rule for Expression { + const RULE_NAME: &'static str = "Expression"; fn produce_ast() -> String { String::new() } - fn rule_name() -> &'static str { - "Expression" - } } impl ::rust_sitter::Extract for Whitespace { type Output = Self; @@ -116,11 +114,9 @@ mod grammar { } } impl ::rust_sitter::rule::Rule for Whitespace { + const RULE_NAME: &'static str = "Whitespace"; fn produce_ast() -> String { String::new() } - fn rule_name() -> &'static str { - "Whitespace" - } } } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__struct_optional.snap b/macro/src/snapshots/rust_sitter_macro__tests__struct_optional.snap index 51281a9..07117a4 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__struct_optional.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__struct_optional.snap @@ -71,12 +71,10 @@ mod grammar { } } impl ::rust_sitter::rule::Rule for Language { + const RULE_NAME: &'static str = "Language"; fn produce_ast() -> String { String::new() } - fn rule_name() -> &'static str { - "Language" - } } impl ::rust_sitter::Extract for Number { type Output = Self; @@ -115,11 +113,9 @@ mod grammar { } } impl ::rust_sitter::rule::Rule for Number { + const RULE_NAME: &'static str = "Number"; fn produce_ast() -> String { String::new() } - fn rule_name() -> &'static str { - "Number" - } } } diff --git a/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap b/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap index ed956d0..0113824 100644 --- a/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap +++ b/macro/src/snapshots/rust_sitter_macro__tests__struct_repeat.snap @@ -59,12 +59,10 @@ mod grammar { } } impl ::rust_sitter::rule::Rule for NumberList { + const RULE_NAME: &'static str = "NumberList"; fn produce_ast() -> String { String::new() } - fn rule_name() -> &'static str { - "NumberList" - } } impl ::rust_sitter::Extract for Number { type Output = Self; @@ -103,12 +101,10 @@ mod grammar { } } impl ::rust_sitter::rule::Rule for Number { + const RULE_NAME: &'static str = "Number"; fn produce_ast() -> String { String::new() } - fn rule_name() -> &'static str { - "Number" - } } impl ::rust_sitter::Extract for Whitespace { type Output = Self; @@ -147,11 +143,9 @@ mod grammar { } } impl ::rust_sitter::rule::Rule for Whitespace { + const RULE_NAME: &'static str = "Whitespace"; fn produce_ast() -> String { String::new() } - fn rule_name() -> &'static str { - "Whitespace" - } } } diff --git a/runtime/src/rule.rs b/runtime/src/rule.rs index 6428bf6..d0db17b 100644 --- a/runtime/src/rule.rs +++ b/runtime/src/rule.rs @@ -3,6 +3,7 @@ use tree_sitter::Node; use crate::{Extract, NodeParseResult, ParseResult, extract::ExtractContext}; pub trait Rule: Extract { + const RULE_NAME: &'static str; // TODO: Use the grammar::RuleDef and grammar::Grammar // For this to work as expected we need a #[derive(Language)], or at least a `Language` trait // which then has the `parse` function and the `generate_grammar() -> grammar::Grammar` @@ -10,7 +11,9 @@ pub trait Rule: Extract { // Since we aren't using any of this yet though, we will leave this alone. fn produce_ast() -> String; // Maybe Cow instead. - fn rule_name() -> &'static str; + fn rule_name() -> &'static str { + Self::RULE_NAME + } /// Extracts directly from a node. fn extract_node<'a>(n: Node<'a>, source: &[u8]) -> NodeParseResult<'a, Self> From 487faeb05b2bd22281e8d04c3925bf1f8d249b22 Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Tue, 2 Sep 2025 13:01:12 -0500 Subject: [PATCH 43/50] pub from_node function --- runtime/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs index 10bcca8..716eee3 100644 --- a/runtime/src/lib.rs +++ b/runtime/src/lib.rs @@ -84,7 +84,7 @@ impl Position { Self { bytes, start, end } } - fn from_node(node: Node<'_>) -> Self { + pub fn from_node(node: Node<'_>) -> Self { let bytes = node.byte_range(); let start = Point::from_tree_sitter(node.start_position()); let end = Point::from_tree_sitter(node.end_position()); From 8ffa3ca3e3cd6bb11ccc9c7be8aa0729bfadb3ce Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Mon, 8 Sep 2025 15:04:00 -0500 Subject: [PATCH 44/50] Fix skipping extra nodes --- Cargo.lock | 106 +++++++++++++++++++++++---------------- runtime/src/__private.rs | 7 +++ 2 files changed, 70 insertions(+), 43 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7ce757e..5043477 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -69,9 +69,9 @@ checksum = "b0674a1ddeecb70197781e945de4b3b8ffb61fa939a5597bcf48503737663100" [[package]] name = "bitflags" -version = "2.9.3" +version = "2.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34efbcccd345379ca2868b2b2c9d3782e9cc58ba87bc7d79d5b53d9c9ae6f25d" +checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394" [[package]] name = "bumpalo" @@ -81,10 +81,11 @@ checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" [[package]] name = "cc" -version = "1.2.34" +version = "1.2.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42bc4aea80032b7bf409b0bc7ccad88853858911b7713a8062fdc0623867bedc" +checksum = "5252b3d2648e5eedbc1a6f501e3c795e07025c1e93bbf8bbdd6eef7f447a6d54" dependencies = [ + "find-msvc-tools", "shlex", ] @@ -196,6 +197,12 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +[[package]] +name = "find-msvc-tools" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fd99930f64d146689264c637b5af2f0233a933bef0d8570e2526bf9e083192d" + [[package]] name = "form_urlencoded" version = "1.2.2" @@ -355,9 +362,9 @@ checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd" [[package]] name = "insta" -version = "1.43.1" +version = "1.43.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "154934ea70c58054b556dd430b99a98c2a7ff5309ac9891597e339b5c28f4371" +checksum = "46fdb647ebde000f43b5b53f773c30cf9b0cb4300453208713fa38b2c70935a0" dependencies = [ "console", "once_cell", @@ -411,9 +418,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.77" +version = "0.3.78" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" +checksum = "0c0b063578492ceec17683ef2f8c5e89121fbd0b172cbc280635ab7567db2738" dependencies = [ "once_cell", "wasm-bindgen", @@ -439,9 +446,9 @@ checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956" [[package]] name = "log" -version = "0.4.27" +version = "0.4.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" +checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" [[package]] name = "memchr" @@ -494,9 +501,9 @@ dependencies = [ [[package]] name = "potential_utf" -version = "0.1.2" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5a7c30837279ca13e7c867e9e40053bc68740f988cb07f7ca6df43cc734b585" +checksum = "84df19adbe5b5a0782edcab45899906947ab039ccf4573713735ee7de1e6b08a" dependencies = [ "zerovec", ] @@ -916,29 +923,30 @@ dependencies = [ [[package]] name = "wasi" -version = "0.14.2+wasi-0.2.4" +version = "0.14.4+wasi-0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" +checksum = "88a5f4a424faf49c3c2c344f166f0662341d470ea185e939657aaff130f0ec4a" dependencies = [ - "wit-bindgen-rt", + "wit-bindgen", ] [[package]] name = "wasm-bindgen" -version = "0.2.100" +version = "0.2.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" +checksum = "7e14915cadd45b529bb8d1f343c4ed0ac1de926144b746e2710f9cd05df6603b" dependencies = [ "cfg-if", "once_cell", "wasm-bindgen-macro", + "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-backend" -version = "0.2.100" +version = "0.2.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" +checksum = "e28d1ba982ca7923fd01448d5c30c6864d0a14109560296a162f80f305fb93bb" dependencies = [ "bumpalo", "log", @@ -950,9 +958,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.50" +version = "0.4.51" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61" +checksum = "0ca85039a9b469b38336411d6d6ced91f3fc87109a2a27b0c197663f5144dffe" dependencies = [ "cfg-if", "js-sys", @@ -963,9 +971,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.100" +version = "0.2.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" +checksum = "7c3d463ae3eff775b0c45df9da45d68837702ac35af998361e2c84e7c5ec1b0d" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -973,9 +981,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.100" +version = "0.2.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" +checksum = "7bb4ce89b08211f923caf51d527662b75bdc9c9c7aab40f86dcb9fb85ac552aa" dependencies = [ "proc-macro2", "quote", @@ -986,18 +994,18 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.100" +version = "0.2.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +checksum = "f143854a3b13752c6950862c906306adb27c7e839f7414cec8fea35beab624c1" dependencies = [ "unicode-ident", ] [[package]] name = "wasm-bindgen-test" -version = "0.3.50" +version = "0.3.51" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66c8d5e33ca3b6d9fa3b4676d774c5778031d27a578c2b007f905acf816152c3" +checksum = "80cc7f8a4114fdaa0c58383caf973fc126cf004eba25c9dc639bccd3880d55ad" dependencies = [ "js-sys", "minicov", @@ -1008,9 +1016,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-test-macro" -version = "0.3.50" +version = "0.3.51" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17d5042cc5fa009658f9a7333ef24291b1291a25b6382dd68862a7f3b969f69b" +checksum = "c5ada2ab788d46d4bda04c9d567702a79c8ced14f51f221646a16ed39d0e6a5d" dependencies = [ "proc-macro2", "quote", @@ -1019,9 +1027,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.77" +version = "0.3.78" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" +checksum = "77e4b637749ff0d92b8fad63aa1f7cff3cbe125fd49c175cd6345e7272638b12" dependencies = [ "js-sys", "wasm-bindgen", @@ -1029,11 +1037,11 @@ dependencies = [ [[package]] name = "winapi-util" -version = "0.1.10" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0978bf7171b3d90bac376700cb56d606feb40f251a475a5d6634613564460b22" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.60.2", + "windows-sys 0.61.0", ] [[package]] @@ -1042,6 +1050,12 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" +[[package]] +name = "windows-link" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45e46c0661abb7180e7b9c281db115305d49ca1709ab8242adf09666d2173c65" + [[package]] name = "windows-sys" version = "0.59.0" @@ -1060,6 +1074,15 @@ dependencies = [ "windows-targets 0.53.3", ] +[[package]] +name = "windows-sys" +version = "0.61.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e201184e40b2ede64bc2ea34968b28e33622acdbbf37104f0e4a33f7abe657aa" +dependencies = [ + "windows-link 0.2.0", +] + [[package]] name = "windows-targets" version = "0.52.6" @@ -1082,7 +1105,7 @@ version = "0.53.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d5fe6031c4041849d7c496a8ded650796e7b6ecc19df1a431c1a363342e5dc91" dependencies = [ - "windows-link", + "windows-link 0.1.3", "windows_aarch64_gnullvm 0.53.0", "windows_aarch64_msvc 0.53.0", "windows_i686_gnu 0.53.0", @@ -1190,13 +1213,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" [[package]] -name = "wit-bindgen-rt" -version = "0.39.0" +name = "wit-bindgen" +version = "0.45.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" -dependencies = [ - "bitflags", -] +checksum = "5c573471f125075647d03df72e026074b7203790d41351cd6edc96f46bcccd36" [[package]] name = "writeable" diff --git a/runtime/src/__private.rs b/runtime/src/__private.rs index 0c02377..8200e89 100644 --- a/runtime/src/__private.rs +++ b/runtime/src/__private.rs @@ -109,6 +109,13 @@ pub fn skip_text<'tree>( cursor.field_name() ); loop { + if cursor.node().is_extra() { + if !cursor.goto_next_sibling() { + state.cursor = None; + return Ok(()); + } + continue; + } if let Some(name) = cursor.field_name() { if name == field_name { if !cursor.goto_next_sibling() { From 221e396addaa8c3f4269bad11d5718a87cfc55a2 Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Thu, 11 Sep 2025 11:29:55 -0500 Subject: [PATCH 45/50] Fix extraction on empty `Vec` --- common/src/lib.rs | 5 ++++- macro/src/expansion.rs | 13 ++----------- runtime/src/error.rs | 4 +++- runtime/src/extract.rs | 20 ++++++++++---------- 4 files changed, 19 insertions(+), 23 deletions(-) diff --git a/common/src/lib.rs b/common/src/lib.rs index 950210e..b11cd47 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -37,7 +37,10 @@ impl LanguageExpr { return Err(Error::new(arg.path.span(), "Duplicate name field")); } let value = match arg.expr { - Expr::Lit(ExprLit { attrs:_ , lit: Lit::Str(s) }) => s, + Expr::Lit(ExprLit { + attrs: _, + lit: Lit::Str(s), + }) => s, _ => { return Err(Error::new( arg.expr.span(), diff --git a/macro/src/expansion.rs b/macro/src/expansion.rs index 3fa354b..6825a67 100644 --- a/macro/src/expansion.rs +++ b/macro/src/expansion.rs @@ -32,7 +32,6 @@ pub fn expand_rule(input: DeriveInput) -> Result { let mut ctx = ExpansionState::new(); rust_sitter_common::expansion::process_rule(d, &mut ctx)?; - let ident = input.ident; let attrs = input.attrs; let (extract, rule) = match input.data { @@ -178,11 +177,7 @@ pub fn expand_rule(input: DeriveInput) -> Result { }) } -fn gen_field( - ident_str: Option<&str>, - leaf: Field, - rule: &RuleDef, -) -> Result { +fn gen_field(ident_str: Option<&str>, leaf: Field, rule: &RuleDef) -> Result { let leaf_type = &leaf.ty; let leaf_attr = leaf @@ -423,11 +418,7 @@ fn rule_def_to_extract(def: &RuleDef) -> Result { }) } -fn rule_def_add_state( - def: &RuleDef, - optional: bool, - states: &mut Vec, -) { +fn rule_def_add_state(def: &RuleDef, optional: bool, states: &mut Vec) { let s = match def { RuleDef::SYMBOL { name } => { // This `grammar` is local to the particular macro expansion and does not include other diff --git a/runtime/src/error.rs b/runtime/src/error.rs index 8b4da52..00e2b16 100644 --- a/runtime/src/error.rs +++ b/runtime/src/error.rs @@ -357,7 +357,9 @@ impl<'a> ExtractError<'a> { let msg = msg.into(); log::error!( "field_extraction error: {}::{}, msg={}", - ctx.struct_name, ctx.field_name, msg + ctx.struct_name, + ctx.field_name, + msg ); let position = ctx.position(); Self::new( diff --git a/runtime/src/extract.rs b/runtime/src/extract.rs index cf3cfe6..e658114 100644 --- a/runtime/src/extract.rs +++ b/runtime/src/extract.rs @@ -125,10 +125,10 @@ where fn do_extract_field<'cursor, 'tree>( self, - ctx: &mut ExtractContext, - it: &mut ExtractFieldIterator<'cursor, 'tree>, - source: &[u8], - leaf_fn: E::LeafFn, + _ctx: &mut ExtractContext, + _it: &mut ExtractFieldIterator<'cursor, 'tree>, + _source: &[u8], + _leaf_fn: E::LeafFn, ) -> Result<'tree, O::Output> { todo!() } @@ -147,7 +147,7 @@ where type LeafFn = F; type Output = L; - fn extract<'a, 'tree>( + fn extract<'tree>( ctx: &mut ExtractContext, node: Option>, source: &[u8], @@ -167,7 +167,7 @@ where impl Extract for () { type LeafFn = (); type Output = (); - fn extract<'a, 'tree>( + fn extract<'tree>( _ctx: &mut ExtractContext, _node: Option>, _source: &[u8], @@ -180,7 +180,7 @@ impl Extract for () { impl Extract for Option { type LeafFn = T::LeafFn; type Output = Option; - fn extract<'a, 'tree>( + fn extract<'tree>( ctx: &mut ExtractContext, node: Option>, source: &[u8], @@ -208,7 +208,7 @@ impl Extract for Option { impl Extract for Box { type LeafFn = T::LeafFn; type Output = Box; - fn extract<'a, 'tree>( + fn extract<'tree>( ctx: &mut ExtractContext, node: Option>, source: &[u8], @@ -233,7 +233,7 @@ where { type LeafFn = T::LeafFn; type Output = Vec; - fn extract<'a, 'tree>( + fn extract<'tree>( _ctx: &mut ExtractContext, node: Option>, _source: &[u8], @@ -241,6 +241,7 @@ where ) -> Result<'tree, Self::Output> { match node { None => Ok(vec![]), + Some(n) if n.child_count() == 0 => Ok(vec![]), _ => panic!("Cannot be implemented on Vec"), } } @@ -255,7 +256,6 @@ where let mut error = ExtractError::empty(); while it.is_valid() { let n = it.current_node(); - // Try and parse the error specially. match T::extract_field(ctx, it, source, leaf_fn.clone()) { Ok(t) => out.push(t), Err(e) => error.merge(e), From 316667ce48293ab04755825e315f1adf1cf25dbe Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Thu, 11 Sep 2025 19:02:14 -0500 Subject: [PATCH 46/50] Update README --- README.md | 28 +++++++++++++++++++++++----- example/src/main.rs | 29 +++++++++++++++-------------- 2 files changed, 38 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 491e168..f9726c5 100644 --- a/README.md +++ b/README.md @@ -14,8 +14,6 @@ rust-sitter = { git = "https://github.com/otonoma/rust-sitter" } rust-sitter-tool = { git = "https://github.com/otonoma/rust-sitter" } ``` -_Note: By default, Rust Sitter uses a fork of Tree Sitter with a pure-Rust runtime to support `wasm32-unknown-unknown`. To use the standard C runtime instead, disable default features and enable the `tree-sitter-standard` feature_ - The first step is to configure your `build.rs` to compile and link the generated Tree Sitter parser: ```rust @@ -101,9 +99,9 @@ pub enum Expr { We can then parse text using this grammar: ```rust -dbg!(grammar::Expr::parse("1+2+3")); +dbg!(grammar::Expr::parse("1+2+3").into_result()); /* -grammar::Expr::parse("1+2+3") = Ok(Add( +grammar::Expr::parse("1+2+3").into_result() = Ok(Add( Add( Number( 1, @@ -194,7 +192,27 @@ Usually, whitespace is optional before each token. This attribute means that the This annotation can be used to define a field that does not correspond to anything in the input string, such as some metadata. This annotation takes a single parameter, which is the value that should be used to populate that field at runtime. ### `#[word]` -This annotation marks the field as a Tree Sitter [word](https://tree-sitter.github.io/tree-sitter/creating-parsers#keywords), which is useful when handling errors involving keywords. Only one field in the grammar can be marked as a word. +This annotation marks the field as a Tree Sitter [word](https://tree-sitter.github.io/tree-sitter/creating-parsers#keywords), which is useful when handling errors involving keywords. Like `#[extras]`, the `#[word]` is specified on the `#[language]` implementation: + +```rust +#[derive(Debug, Rule)] +#[language] +#[word(Ident)] +pub struct Language { + // ... +} + +#[derive(Rule)] +#[leaf(re(r"[a-zA-Z_]+"))] +pub struct Ident; +``` + +## Partial AST and Errors +rust-sitter, like tree-sitter, can produce a partial AST along with its errors. Calling `Language::parse` will +produce a `ParseResult` object which includes as much of the AST as it was able to extract, as well as a `Vec` +of all of the parsing errors encountered. This is useful for language servers and other contexts which can +make use of a partial AST. Currently this may not produce the _maximal_ AST, but this may be possible +in the future. ## Special Types Rust Sitter has a few special types that can be used to define more complex grammars. diff --git a/example/src/main.rs b/example/src/main.rs index 2c41f2b..1b29af0 100644 --- a/example/src/main.rs +++ b/example/src/main.rs @@ -68,19 +68,20 @@ fn main() { } fn process_input(input: &str) { - match T::parse(input).into_result() { - Ok(expr) => println!("{expr:#?}"), - Err(errs) => { - let mut codemap = CodeMap::new(); - let file_span = codemap.add_file("".to_string(), input.to_string()); - let mut diagnostics = vec![]; - for error in errs { - let d = convert_parse_error_to_diagnostics(&file_span.span, &error); - diagnostics.push(d); - } - - let mut emitter = Emitter::stderr(ColorConfig::Always, Some(&codemap)); - emitter.emit(&diagnostics); + let result = T::parse(input); + match T::parse(input).result { + Some(expr) => println!("{expr:#?}"), + None => eprintln!("Could not parse"), + } + if !result.errors.is_empty() { + let mut codemap = CodeMap::new(); + let file_span = codemap.add_file("".to_string(), input.to_string()); + let mut diagnostics = vec![]; + for error in result.errors { + let d = convert_parse_error_to_diagnostics(&file_span.span, &error); + diagnostics.push(d); } - }; + let mut emitter = Emitter::stderr(ColorConfig::Always, Some(&codemap)); + emitter.emit(&diagnostics); + } } From c74bbde4788651921e97a0a5827e99230e3b82e1 Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Tue, 23 Sep 2025 13:29:35 -0500 Subject: [PATCH 47/50] Change debug! to trace! --- runtime/src/__private.rs | 10 +++++----- runtime/src/error.rs | 10 +++++----- runtime/src/extract.rs | 1 - runtime/src/extract/field.rs | 38 ++++++++++++++++++------------------ 4 files changed, 29 insertions(+), 30 deletions(-) diff --git a/runtime/src/__private.rs b/runtime/src/__private.rs index 8200e89..c880220 100644 --- a/runtime/src/__private.rs +++ b/runtime/src/__private.rs @@ -8,14 +8,14 @@ use crate::{ Extract, ExtractContext, Extractor, extract::{ExtractFieldContext, ExtractFieldIterator, Result}, }; -use log::{debug, trace}; +use log::trace; pub fn extract_struct_or_variant<'tree, T>( struct_name: &'static str, node: tree_sitter::Node<'tree>, construct_expr: impl for<'t> Fn(&mut ExtractStructState<'t>) -> Result<'t, T>, ) -> Result<'tree, T> { - debug!("extract_struct_or_variant node.kind={}", node.kind()); + trace!("extract_struct_or_variant node.kind={}", node.kind()); trace!("extract_struct_or_variant node={}", node); trace!( "extract_struct_or_variant node.child_count={}", @@ -52,7 +52,7 @@ pub fn extract_field<'tree, T: Extract, E: Extractor>( source: &[u8], field_name: &'static str, ) -> Result<'tree, T::Output> { - debug!( + trace!( "extract_field struct_name={} field_name={field_name}", state.struct_name ); @@ -99,12 +99,12 @@ pub fn skip_text<'tree>( state: &mut ExtractStructState<'tree>, field_name: &'static str, ) -> Result<'tree, ()> { - debug!( + trace!( "skip field: {field_name:?}, has cursor: {}", state.cursor.is_some() ); if let Some(cursor) = state.cursor.as_mut() { - debug!( + trace!( "skip field: expects: {field_name:?}, has: {:?}", cursor.field_name() ); diff --git a/runtime/src/error.rs b/runtime/src/error.rs index 00e2b16..33db2cd 100644 --- a/runtime/src/error.rs +++ b/runtime/src/error.rs @@ -1,4 +1,4 @@ -use log::{debug, trace}; +use log::trace; use std::{collections::HashSet, ops::Range}; use crate::{ExtractContext, Point, Position, extract::ExtractFieldIterator}; @@ -89,7 +89,7 @@ impl<'a> NodeError<'a> { if self.node.is_missing() && let Some(parent) = self.node.parent() { - debug!("attempting missing shift: {}", parent.to_sexp()); + trace!("attempting missing shift: {}", parent.to_sexp()); // Find where the missing node is located in the parent, then shift it backwards by // removing any extra nodes in its place. // let mut c = parent.walk(); @@ -111,14 +111,14 @@ impl<'a> NodeError<'a> { if !has_shifted { has_shifted = node.is_extra(); } - debug!("shifting past extra: {}", n); + trace!("shifting past extra: {}", n); if !node.is_extra() { break; } } if has_shifted { - debug!("shifted to node: {}", node.kind()); + trace!("shifted to node: {}", node.kind()); let range = node.byte_range(); let range = range.end..range.end; let new_err = Position::new( @@ -129,7 +129,7 @@ impl<'a> NodeError<'a> { parent.byte_range(), (parent.start_position().into(), parent.end_position().into()), ); - debug!("shifted position from {error_position:?} to {new_pos:?}"); + trace!("shifted position from {error_position:?} to {new_pos:?}"); error_position = new_err; node_position = new_pos; } diff --git a/runtime/src/extract.rs b/runtime/src/extract.rs index e658114..cac4e94 100644 --- a/runtime/src/extract.rs +++ b/runtime/src/extract.rs @@ -330,7 +330,6 @@ macro_rules! extract_for_tuple { fn extract_field<'cursor, 'tree>(ctx: &mut ExtractContext, it: &mut ExtractFieldIterator<'cursor, 'tree>, source: &[u8], _l: ()) -> Result<'tree, Self> { // NOTE: Nested tuples are not supported as it stands. - log::debug!("extract_field on tuple"); Ok(( $( $t::extract_field(ctx, it, source, Default::default())? diff --git a/runtime/src/extract/field.rs b/runtime/src/extract/field.rs index 4a3240f..b621b34 100644 --- a/runtime/src/extract/field.rs +++ b/runtime/src/extract/field.rs @@ -1,7 +1,7 @@ use crate::error::ExtractError; use super::Result; -use log::{debug, trace}; +use log::trace; use tree_sitter::Node; pub struct ExtractFieldIterator<'cursor, 'tree: 'cursor> { @@ -80,13 +80,13 @@ impl<'cursor, 'tree: 'cursor> ExtractFieldIterator<'cursor, 'tree> { pub fn advance_state(&mut self) -> Result<'tree, ()> { if self.current == NodeIterState::Complete { - debug!("advance_state: verifying completion"); + trace!("advance_state: verifying completion"); self.finalize()?; return Ok(()); } self.skip_extras(); let n = self.cursor.node(); - debug!( + trace!( "advance_state: field_name={}, cursor.field_name={:?}, state={}, num_states={}, optional={}, node={}, node.kind={}", self.field_name, self.cursor.field_name(), @@ -104,16 +104,16 @@ impl<'cursor, 'tree: 'cursor> ExtractFieldIterator<'cursor, 'tree> { let state = (self.ctx.state_fn)(self.ctx.state); self.ctx.state += 1; - debug!("advance_state: got state={:?}", state); + trace!("advance_state: got state={:?}", state); match state { ExtractFieldState::Str(expected, named, optional) => { let cursor_field = self.cursor.field_name(); let field_name = self.field_name; if cursor_field != Some(field_name) { - debug!("advance_state: field names didn't match"); + trace!("advance_state: field names didn't match"); // TODO: It would be generally lovely to clean up this logic throughout. if optional { - debug!("advance_state: state didn't match, but optional, skipping"); + trace!("advance_state: state didn't match, but optional, skipping"); self.current = NodeIterState::Node(None); return Ok(()); } @@ -128,13 +128,13 @@ impl<'cursor, 'tree: 'cursor> ExtractFieldIterator<'cursor, 'tree> { return Ok(()); } if n.kind() == expected && n.is_named() == named { - debug!("advance_state: state matched, advancing iteration"); + trace!("advance_state: state matched, advancing iteration"); // advance the cursor and return the current node. self.advance_cursor(); self.current = NodeIterState::Node(Some(n)); Ok(()) } else if optional { - debug!("advance_state: state didn't match, but optional, skipping"); + trace!("advance_state: state didn't match, but optional, skipping"); self.current = NodeIterState::Node(None); Ok(()) } else { @@ -146,9 +146,9 @@ impl<'cursor, 'tree: 'cursor> ExtractFieldIterator<'cursor, 'tree> { let cursor_field = self.cursor.field_name(); let field_name = self.field_name; if cursor_field != Some(field_name) { - debug!("advance_state: field names didn't match"); + trace!("advance_state: field names didn't match"); if optional { - debug!("advance_state: state didn't match, but optional, skipping"); + trace!("advance_state: state didn't match, but optional, skipping"); self.current = NodeIterState::Node(None); return Ok(()); } @@ -177,7 +177,7 @@ impl<'cursor, 'tree: 'cursor> ExtractFieldIterator<'cursor, 'tree> { } } ExtractFieldState::Repeat(expected, named) => { - debug!("advance_state: repeat state: expected={expected}, named={named}"); + trace!("advance_state: repeat state: expected={expected}, named={named}"); if !self.did_advance { // We reached the end of the cursor state, we can advance to the end. self.ctx.state = self.ctx.num_states + 1; @@ -190,7 +190,7 @@ impl<'cursor, 'tree: 'cursor> ExtractFieldIterator<'cursor, 'tree> { let cursor_field = self.cursor.field_name(); let field_name = self.field_name; if cursor_field != Some(field_name) { - debug!("advance_state: field names didn't match in repeat, completing state"); + trace!("advance_state: field names didn't match in repeat, completing state"); self.ctx.state = self.ctx.num_states + 1; self.set_complete(); // Check if we have an optional overall. @@ -203,7 +203,7 @@ impl<'cursor, 'tree: 'cursor> ExtractFieldIterator<'cursor, 'tree> { return Ok(()); } if n.kind() == expected && n.is_named() == named { - debug!("advance_state: repeat state matched, resetting iteration"); + trace!("advance_state: repeat state matched, resetting iteration"); // Advance past the repeat symbol and start over. self.advance_cursor(); self.ctx.state = 0; @@ -215,7 +215,7 @@ impl<'cursor, 'tree: 'cursor> ExtractFieldIterator<'cursor, 'tree> { } } ExtractFieldState::Repeat1 => { - debug!("advance_state: repeat1 state"); + trace!("advance_state: repeat1 state"); if !self.did_advance { self.ctx.state = self.ctx.num_states + 1; self.set_complete(); @@ -224,7 +224,7 @@ impl<'cursor, 'tree: 'cursor> ExtractFieldIterator<'cursor, 'tree> { let cursor_field = self.cursor.field_name(); let field_name = self.field_name; if cursor_field != Some(field_name) { - debug!("advance_state: field names didn't match in repeat, completing state"); + trace!("advance_state: field names didn't match in repeat, completing state"); self.ctx.state = self.ctx.num_states + 1; self.set_complete(); // Check if we have an optional overall. @@ -236,7 +236,7 @@ impl<'cursor, 'tree: 'cursor> ExtractFieldIterator<'cursor, 'tree> { // })?; Ok(()) } else { - debug!("advance_state: field names matched, triggering repeat"); + trace!("advance_state: field names matched, triggering repeat"); // No repeat symbol in this case, we just are at the next repeat node already. self.ctx.state = 0; self.advance_state()?; @@ -244,7 +244,7 @@ impl<'cursor, 'tree: 'cursor> ExtractFieldIterator<'cursor, 'tree> { } } ExtractFieldState::Complete => { - debug!("advance_state: got complete state"); + trace!("advance_state: got complete state"); self.set_complete(); Ok(()) } @@ -264,7 +264,7 @@ impl<'cursor, 'tree: 'cursor> ExtractFieldIterator<'cursor, 'tree> { pub fn current_node(&self) -> Option> { match self.current { NodeIterState::Node(n) => { - debug!("current_node: {:?}", n.map(|n| n.kind())); + trace!("current_node: {:?}", n.map(|n| n.kind())); n } NodeIterState::Complete => None, @@ -297,7 +297,7 @@ impl<'cursor, 'tree> ExtractFieldIterator<'cursor, 'tree> { F: FnOnce() -> String, { if self.ctx.state == 1 && self.ctx.optional { - debug!("advance_state: optional, outputting None"); + trace!("advance_state: optional, outputting None"); self.ctx.state = self.ctx.num_states + 1; self.set_complete(); Ok(()) From deb6a38d73348bcdf436c6de146188620e9c31fa Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Tue, 25 Nov 2025 14:00:03 -0600 Subject: [PATCH 48/50] Update tree-sitter --- Cargo.lock | 884 ++++++++---------- ...e__arithmetic__tests__failed_parses-4.snap | 3 - ...ple__arithmetic__tests__failed_parses.snap | 3 - 3 files changed, 404 insertions(+), 486 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5043477..54abfc9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,18 +4,24 @@ version = 4 [[package]] name = "aho-corasick" -version = "1.1.3" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" dependencies = [ "memchr", ] +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + [[package]] name = "anstream" -version = "0.6.20" +version = "0.6.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ae563653d1938f79b1ab1b5e668c87c76a9930414574a6583a7b7e11a8e6192" +checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" dependencies = [ "anstyle", "anstyle-parse", @@ -28,9 +34,9 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.11" +version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd" +checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" [[package]] name = "anstyle-parse" @@ -43,35 +49,49 @@ dependencies = [ [[package]] name = "anstyle-query" -version = "1.1.4" +version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" dependencies = [ - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] name = "anstyle-wincon" -version = "3.0.10" +version = "3.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e0633414522a32ffaac8ac6cc8f748e090c5717661fddeea04219e2344f5f2a" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" dependencies = [ "anstyle", "once_cell_polyfill", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] -name = "anyhow" -version = "1.0.99" +name = "bindgen" +version = "0.72.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0674a1ddeecb70197781e945de4b3b8ffb61fa939a5597bcf48503737663100" +checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" +dependencies = [ + "bitflags", + "cexpr", + "clang-sys", + "itertools 0.13.0", + "log", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn", +] [[package]] name = "bitflags" -version = "2.9.4" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394" +checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" [[package]] name = "bumpalo" @@ -81,19 +101,39 @@ checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" [[package]] name = "cc" -version = "1.2.36" +version = "1.2.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5252b3d2648e5eedbc1a6f501e3c795e07025c1e93bbf8bbdd6eef7f447a6d54" +checksum = "cd405d82c84ff7f35739f175f67d8b9fb7687a0e84ccdc78bd3568839827cf07" dependencies = [ "find-msvc-tools", "shlex", ] +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + [[package]] name = "cfg-if" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "clang-sys" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" +dependencies = [ + "glob", + "libc", + "libloading", +] [[package]] name = "codemap" @@ -130,16 +170,20 @@ dependencies = [ ] [[package]] -name = "displaydoc" -version = "0.2.5" +name = "convert_case" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +checksum = "baaaa0ecca5b51987b9423ccdc971514dd8b0bb7b4060b983d3664dad3f1f89f" dependencies = [ - "proc-macro2", - "quote", - "syn", + "unicode-segmentation", ] +[[package]] +name = "dunce" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" + [[package]] name = "either" version = "1.15.0" @@ -154,9 +198,9 @@ checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" [[package]] name = "env_filter" -version = "0.1.3" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0" +checksum = "1bf3c259d255ca70051b30e2e95b5446cdb8949ac4cd22c0d7fd634d89f568e2" dependencies = [ "log", "regex", @@ -183,12 +227,12 @@ checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] name = "errno" -version = "0.3.13" +version = "0.3.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -199,172 +243,83 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] name = "find-msvc-tools" -version = "0.1.1" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fd99930f64d146689264c637b5af2f0233a933bef0d8570e2526bf9e083192d" +checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844" [[package]] -name = "form_urlencoded" -version = "1.2.2" +name = "fnv" +version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" -dependencies = [ - "percent-encoding", -] +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foldhash" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" [[package]] name = "getrandom" -version = "0.3.3" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" dependencies = [ "cfg-if", "libc", "r-efi", - "wasi", + "wasip2", ] [[package]] -name = "hashbrown" -version = "0.15.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" - -[[package]] -name = "heck" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" - -[[package]] -name = "icu_collections" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "200072f5d0e3614556f94a9930d5dc3e0662a652823904c3a75dc3b0af7fee47" -dependencies = [ - "displaydoc", - "potential_utf", - "yoke", - "zerofrom", - "zerovec", -] - -[[package]] -name = "icu_locale_core" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cde2700ccaed3872079a65fb1a78f6c0a36c91570f28755dda67bc8f7d9f00a" -dependencies = [ - "displaydoc", - "litemap", - "tinystr", - "writeable", - "zerovec", -] - -[[package]] -name = "icu_normalizer" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "436880e8e18df4d7bbc06d58432329d6458cc84531f7ac5f024e93deadb37979" -dependencies = [ - "displaydoc", - "icu_collections", - "icu_normalizer_data", - "icu_properties", - "icu_provider", - "smallvec", - "zerovec", -] - -[[package]] -name = "icu_normalizer_data" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00210d6893afc98edb752b664b8890f0ef174c8adbb8d0be9710fa66fbbf72d3" - -[[package]] -name = "icu_properties" -version = "2.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "016c619c1eeb94efb86809b015c58f479963de65bdb6253345c1a1276f22e32b" -dependencies = [ - "displaydoc", - "icu_collections", - "icu_locale_core", - "icu_properties_data", - "icu_provider", - "potential_utf", - "zerotrie", - "zerovec", -] - -[[package]] -name = "icu_properties_data" -version = "2.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "298459143998310acd25ffe6810ed544932242d3f07083eee1084d83a71bd632" - -[[package]] -name = "icu_provider" -version = "2.0.0" +name = "glob" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03c80da27b5f4187909049ee2d72f276f0d9f99a42c306bd0131ecfe04d8e5af" -dependencies = [ - "displaydoc", - "icu_locale_core", - "stable_deref_trait", - "tinystr", - "writeable", - "yoke", - "zerofrom", - "zerotrie", - "zerovec", -] +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" [[package]] -name = "idna" -version = "1.1.0" +name = "hashbrown" +version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" dependencies = [ - "idna_adapter", - "smallvec", - "utf8_iter", + "allocator-api2", + "equivalent", + "foldhash", ] [[package]] -name = "idna_adapter" -version = "1.2.1" +name = "ident_case" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" -dependencies = [ - "icu_normalizer", - "icu_properties", -] +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" [[package]] name = "indexmap" -version = "2.11.0" +version = "2.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2481980430f9f78649238835720ddccc57e52df14ffce1c6f37391d61b563e9" +checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2" dependencies = [ "equivalent", "hashbrown", "serde", + "serde_core", ] [[package]] name = "indoc" -version = "2.0.6" +version = "2.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd" +checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706" +dependencies = [ + "rustversion", +] [[package]] name = "insta" -version = "1.43.2" +version = "1.44.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46fdb647ebde000f43b5b53f773c30cf9b0cb4300453208713fa38b2c70935a0" +checksum = "e8732d3774162a0851e3f2b150eb98f31a9885dd75985099421d393385a01dfd" dependencies = [ "console", "once_cell", @@ -373,9 +328,18 @@ dependencies = [ [[package]] name = "is_terminal_polyfill" -version = "1.70.1" +version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] [[package]] name = "itertools" @@ -394,22 +358,22 @@ checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" [[package]] name = "jiff" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be1f93b8b1eb69c77f24bbb0afdf66f54b632ee39af40ca21c4365a1d7347e49" +checksum = "49cce2b81f2098e7e3efc35bc2e0a6b7abec9d34128283d7a26fa8f32a6dbb35" dependencies = [ "jiff-static", "log", "portable-atomic", "portable-atomic-util", - "serde", + "serde_core", ] [[package]] name = "jiff-static" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4" +checksum = "980af8b43c3ad5d8d349ace167ec8170839f753a42d233ba19e08afe1850fa69" dependencies = [ "proc-macro2", "quote", @@ -418,9 +382,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.78" +version = "0.3.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c0b063578492ceec17683ef2f8c5e89121fbd0b172cbc280635ab7567db2738" +checksum = "b011eec8cc36da2aab2d5cff675ec18454fad408585853910a202391cf9f8e65" dependencies = [ "once_cell", "wasm-bindgen", @@ -428,21 +392,25 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.175" +version = "0.2.177" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543" +checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" [[package]] -name = "linux-raw-sys" -version = "0.9.4" +name = "libloading" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" +dependencies = [ + "cfg-if", + "windows-link", +] [[package]] -name = "litemap" -version = "0.8.0" +name = "linux-raw-sys" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956" +checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" [[package]] name = "log" @@ -452,9 +420,9 @@ checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" [[package]] name = "memchr" -version = "2.7.5" +version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" [[package]] name = "minicov" @@ -466,6 +434,22 @@ dependencies = [ "walkdir", ] +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "once_cell" version = "1.21.3" @@ -474,15 +458,44 @@ checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "once_cell_polyfill" -version = "1.70.1" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + +[[package]] +name = "pathdiff" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3" + +[[package]] +name = "phf" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf" +dependencies = [ + "phf_shared", + "serde", +] + +[[package]] +name = "phf_generator" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" +checksum = "135ace3a761e564ec88c03a77317a7c6b80bb7f7135ef2544dbe054243b89737" +dependencies = [ + "fastrand", + "phf_shared", +] [[package]] -name = "percent-encoding" -version = "2.3.2" +name = "phf_shared" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" +checksum = "e57fef6bc5981e38c2ce2d63bfa546861309f875b8a75f092d1d54ae2d64f266" +dependencies = [ + "siphasher", +] [[package]] name = "portable-atomic" @@ -500,28 +513,38 @@ dependencies = [ ] [[package]] -name = "potential_utf" -version = "0.1.3" +name = "prettyplease" +version = "0.2.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84df19adbe5b5a0782edcab45899906947ab039ccf4573713735ee7de1e6b08a" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ - "zerovec", + "proc-macro2", + "syn", +] + +[[package]] +name = "proc-macro-crate" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983" +dependencies = [ + "toml_edit", ] [[package]] name = "proc-macro2" -version = "1.0.101" +version = "1.0.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" +checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.40" +version = "1.0.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" dependencies = [ "proc-macro2", ] @@ -534,9 +557,9 @@ checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" [[package]] name = "regex" -version = "1.11.2" +version = "1.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23d7fd106d8c02486a8d64e778353d1cffe08ce79ac2e82f540c86d0facf6912" +checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" dependencies = [ "aho-corasick", "memchr", @@ -546,9 +569,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.10" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b9458fa0bfeeac22b5ca447c63aaf45f28439a709ccd244698632f9aa6394d6" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" dependencies = [ "aho-corasick", "memchr", @@ -557,9 +580,69 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.6" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" + +[[package]] +name = "relative-path" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001" +checksum = "bca40a312222d8ba74837cb474edef44b37f561da5f773981007a10bbaa992b0" +dependencies = [ + "serde", +] + +[[package]] +name = "rquickjs" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a135375fbac5ba723bb6a48f432a72f81539cedde422f0121a86c7c4e96d8e0d" +dependencies = [ + "rquickjs-core", + "rquickjs-macro", +] + +[[package]] +name = "rquickjs-core" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bccb7121a123865c8ace4dea42e7ed84d78b90cbaf4ca32c59849d8d210c9672" +dependencies = [ + "hashbrown", + "phf", + "relative-path", + "rquickjs-sys", +] + +[[package]] +name = "rquickjs-macro" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89f93602cc3112c7f30bf5f29e722784232138692c7df4c52ebbac7e035d900d" +dependencies = [ + "convert_case", + "fnv", + "ident_case", + "indexmap", + "phf_generator", + "phf_shared", + "proc-macro-crate", + "proc-macro2", + "quote", + "rquickjs-core", + "syn", +] + +[[package]] +name = "rquickjs-sys" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57b1b6528590d4d65dc86b5159eae2d0219709546644c66408b2441696d1d725" +dependencies = [ + "bindgen", + "cc", +] [[package]] name = "rust-sitter" @@ -579,7 +662,7 @@ dependencies = [ name = "rust-sitter-common" version = "0.5.0" dependencies = [ - "itertools", + "itertools 0.14.0", "proc-macro2", "quote", "rust-sitter-types", @@ -646,17 +729,23 @@ checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" [[package]] name = "rustix" -version = "1.0.8" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11181fbabf243db407ef8df94a6ce0b2f9a733bd8be4ad02b4eda9602296cac8" +checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" dependencies = [ "bitflags", "errno", "libc", "linux-raw-sys", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + [[package]] name = "ryu" version = "1.0.20" @@ -674,27 +763,38 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.26" +version = "1.0.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" +checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" dependencies = [ "serde", + "serde_core", ] [[package]] name = "serde" -version = "1.0.219" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.219" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", @@ -703,15 +803,16 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.143" +version = "1.0.145" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d401abef1d108fbd9cbaebc3e46611f4b1021f714a0597a71f41ee463f5f4a5a" +checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" dependencies = [ "indexmap", "itoa", "memchr", "ryu", "serde", + "serde_core", ] [[package]] @@ -727,22 +828,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa" [[package]] -name = "smallbitvec" -version = "2.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d31d263dd118560e1a492922182ab6ca6dc1d03a3bf54e7699993f31a4150e3f" - -[[package]] -name = "smallvec" -version = "1.15.1" +name = "siphasher" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" [[package]] -name = "stable_deref_trait" -version = "1.2.0" +name = "smallbitvec" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +checksum = "d31d263dd118560e1a492922182ab6ca6dc1d03a3bf54e7699993f31a4150e3f" [[package]] name = "streaming-iterator" @@ -752,9 +847,9 @@ checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" [[package]] name = "syn" -version = "2.0.106" +version = "2.0.111" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" +checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87" dependencies = [ "proc-macro2", "quote", @@ -771,28 +866,17 @@ dependencies = [ "syn", ] -[[package]] -name = "synstructure" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "tempfile" -version = "3.21.0" +version = "3.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15b61f8f20e3a6f7e0649d825294eaf317edce30f82cf6026e7e4cb9222a7d1e" +checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" dependencies = [ "fastrand", "getrandom", "once_cell", "rustix", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -806,18 +890,18 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.16" +version = "2.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3467d614147380f2e4e374161426ff399c91084acd2363eaf549172b3d5e60c0" +checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "2.0.16" +version = "2.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c5e1be1c48b9172ee610da68fd9cd2770e7a4056cb3fc98710ee6906f0c7960" +checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" dependencies = [ "proc-macro2", "quote", @@ -825,13 +909,33 @@ dependencies = [ ] [[package]] -name = "tinystr" -version = "0.8.1" +name = "toml_datetime" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2cdb639ebbc97961c51720f858597f7f24c4fc295327923af55b74c3c724533" +dependencies = [ + "serde_core", +] + +[[package]] +name = "toml_edit" +version = "0.23.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6485ef6d0d9b5d0ec17244ff7eb05310113c3f316f2d14200d4de56b3cb98f8d" +dependencies = [ + "indexmap", + "toml_datetime", + "toml_parser", + "winnow", +] + +[[package]] +name = "toml_parser" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d4f6d1145dcb577acf783d4e601bc1d76a13337bb54e6233add580b07344c8b" +checksum = "c0cbe268d35bdb4bb5a56a2de88d0ad0eb70af5384a99d648cd4b3d04039800e" dependencies = [ - "displaydoc", - "zerovec", + "winnow", ] [[package]] @@ -843,7 +947,7 @@ checksum = "ea68304e134ecd095ac6c3574494fc62b909f416c4fca77e440530221e549d3d" [[package]] name = "tree-sitter" version = "0.26.0" -source = "git+https://github.com/jaboatman/tree-sitter?branch=combined#0a7c89a71b4dc35cf6a2f3cddc091f78e4c78af2" +source = "git+https://github.com/jaboatman/tree-sitter?branch=combined#550b30e768ddd32714e91d4304461335d6ac8cd1" dependencies = [ "cc", "regex", @@ -856,15 +960,17 @@ dependencies = [ [[package]] name = "tree-sitter-generate" version = "0.26.0" -source = "git+https://github.com/jaboatman/tree-sitter?branch=combined#0a7c89a71b4dc35cf6a2f3cddc091f78e4c78af2" +source = "git+https://github.com/jaboatman/tree-sitter?branch=combined#550b30e768ddd32714e91d4304461335d6ac8cd1" dependencies = [ - "anyhow", - "heck", + "bitflags", + "dunce", "indexmap", "indoc", "log", + "pathdiff", "regex", "regex-syntax", + "rquickjs", "rustc-hash", "semver", "serde", @@ -872,38 +978,24 @@ dependencies = [ "smallbitvec", "thiserror", "topological-sort", - "tree-sitter", - "url", ] [[package]] name = "tree-sitter-language" -version = "0.1.4" -source = "git+https://github.com/jaboatman/tree-sitter?branch=combined#0a7c89a71b4dc35cf6a2f3cddc091f78e4c78af2" +version = "0.1.5" +source = "git+https://github.com/jaboatman/tree-sitter?branch=combined#550b30e768ddd32714e91d4304461335d6ac8cd1" [[package]] name = "unicode-ident" -version = "1.0.18" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" [[package]] -name = "url" -version = "2.5.7" +name = "unicode-segmentation" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08bc136a29a3d1758e07a9cca267be308aeebf5cfd5a10f3f67ab2097683ef5b" -dependencies = [ - "form_urlencoded", - "idna", - "percent-encoding", - "serde", -] - -[[package]] -name = "utf8_iter" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" [[package]] name = "utf8parse" @@ -922,45 +1014,32 @@ dependencies = [ ] [[package]] -name = "wasi" -version = "0.14.4+wasi-0.2.4" +name = "wasip2" +version = "1.0.1+wasi-0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88a5f4a424faf49c3c2c344f166f0662341d470ea185e939657aaff130f0ec4a" +checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" dependencies = [ "wit-bindgen", ] [[package]] name = "wasm-bindgen" -version = "0.2.101" +version = "0.2.105" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e14915cadd45b529bb8d1f343c4ed0ac1de926144b746e2710f9cd05df6603b" +checksum = "da95793dfc411fbbd93f5be7715b0578ec61fe87cb1a42b12eb625caa5c5ea60" dependencies = [ "cfg-if", "once_cell", + "rustversion", "wasm-bindgen-macro", "wasm-bindgen-shared", ] -[[package]] -name = "wasm-bindgen-backend" -version = "0.2.101" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e28d1ba982ca7923fd01448d5c30c6864d0a14109560296a162f80f305fb93bb" -dependencies = [ - "bumpalo", - "log", - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-shared", -] - [[package]] name = "wasm-bindgen-futures" -version = "0.4.51" +version = "0.4.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ca85039a9b469b38336411d6d6ced91f3fc87109a2a27b0c197663f5144dffe" +checksum = "551f88106c6d5e7ccc7cd9a16f312dd3b5d36ea8b4954304657d5dfba115d4a0" dependencies = [ "cfg-if", "js-sys", @@ -971,9 +1050,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.101" +version = "0.2.105" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c3d463ae3eff775b0c45df9da45d68837702ac35af998361e2c84e7c5ec1b0d" +checksum = "04264334509e04a7bf8690f2384ef5265f05143a4bff3889ab7a3269adab59c2" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -981,31 +1060,31 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.101" +version = "0.2.105" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7bb4ce89b08211f923caf51d527662b75bdc9c9c7aab40f86dcb9fb85ac552aa" +checksum = "420bc339d9f322e562942d52e115d57e950d12d88983a14c79b86859ee6c7ebc" dependencies = [ + "bumpalo", "proc-macro2", "quote", "syn", - "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.101" +version = "0.2.105" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f143854a3b13752c6950862c906306adb27c7e839f7414cec8fea35beab624c1" +checksum = "76f218a38c84bcb33c25ec7059b07847d465ce0e0a76b995e134a45adcb6af76" dependencies = [ "unicode-ident", ] [[package]] name = "wasm-bindgen-test" -version = "0.3.51" +version = "0.3.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80cc7f8a4114fdaa0c58383caf973fc126cf004eba25c9dc639bccd3880d55ad" +checksum = "bfc379bfb624eb59050b509c13e77b4eb53150c350db69628141abce842f2373" dependencies = [ "js-sys", "minicov", @@ -1016,9 +1095,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-test-macro" -version = "0.3.51" +version = "0.3.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5ada2ab788d46d4bda04c9d567702a79c8ced14f51f221646a16ed39d0e6a5d" +checksum = "085b2df989e1e6f9620c1311df6c996e83fe16f57792b272ce1e024ac16a90f1" dependencies = [ "proc-macro2", "quote", @@ -1027,9 +1106,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.78" +version = "0.3.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77e4b637749ff0d92b8fad63aa1f7cff3cbe125fd49c175cd6345e7272638b12" +checksum = "3a1f95c0d03a47f4ae1f7a64643a6bb97465d9b740f0fa8f90ea33915c99a9a1" dependencies = [ "js-sys", "wasm-bindgen", @@ -1041,20 +1120,14 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.61.0", + "windows-sys 0.61.2", ] [[package]] name = "windows-link" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" - -[[package]] -name = "windows-link" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45e46c0661abb7180e7b9c281db115305d49ca1709ab8242adf09666d2173c65" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" [[package]] name = "windows-sys" @@ -1062,25 +1135,16 @@ version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" dependencies = [ - "windows-targets 0.52.6", -] - -[[package]] -name = "windows-sys" -version = "0.60.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" -dependencies = [ - "windows-targets 0.53.3", + "windows-targets", ] [[package]] name = "windows-sys" -version = "0.61.0" +version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e201184e40b2ede64bc2ea34968b28e33622acdbbf37104f0e4a33f7abe657aa" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" dependencies = [ - "windows-link 0.2.0", + "windows-link", ] [[package]] @@ -1089,31 +1153,14 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ - "windows_aarch64_gnullvm 0.52.6", - "windows_aarch64_msvc 0.52.6", - "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm 0.52.6", - "windows_i686_msvc 0.52.6", - "windows_x86_64_gnu 0.52.6", - "windows_x86_64_gnullvm 0.52.6", - "windows_x86_64_msvc 0.52.6", -] - -[[package]] -name = "windows-targets" -version = "0.53.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5fe6031c4041849d7c496a8ded650796e7b6ecc19df1a431c1a363342e5dc91" -dependencies = [ - "windows-link 0.1.3", - "windows_aarch64_gnullvm 0.53.0", - "windows_aarch64_msvc 0.53.0", - "windows_i686_gnu 0.53.0", - "windows_i686_gnullvm 0.53.0", - "windows_i686_msvc 0.53.0", - "windows_x86_64_gnu 0.53.0", - "windows_x86_64_gnullvm 0.53.0", - "windows_x86_64_msvc 0.53.0", + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", ] [[package]] @@ -1122,84 +1169,42 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" - [[package]] name = "windows_aarch64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" -[[package]] -name = "windows_aarch64_msvc" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" - [[package]] name = "windows_i686_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" -[[package]] -name = "windows_i686_gnu" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" - [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" -[[package]] -name = "windows_i686_gnullvm" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" - [[package]] name = "windows_i686_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" -[[package]] -name = "windows_i686_msvc" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" - [[package]] name = "windows_x86_64_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" -[[package]] -name = "windows_x86_64_gnu" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" - [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" - [[package]] name = "windows_x86_64_msvc" version = "0.52.6" @@ -1207,97 +1212,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] -name = "windows_x86_64_msvc" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" - -[[package]] -name = "wit-bindgen" -version = "0.45.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c573471f125075647d03df72e026074b7203790d41351cd6edc96f46bcccd36" - -[[package]] -name = "writeable" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb" - -[[package]] -name = "yoke" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f41bb01b8226ef4bfd589436a297c53d118f65921786300e427be8d487695cc" -dependencies = [ - "serde", - "stable_deref_trait", - "yoke-derive", - "zerofrom", -] - -[[package]] -name = "yoke-derive" -version = "0.8.0" +name = "winnow" +version = "0.7.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6" +checksum = "21a0236b59786fed61e2a80582dd500fe61f18b5dca67a4a067d0bc9039339cf" dependencies = [ - "proc-macro2", - "quote", - "syn", - "synstructure", -] - -[[package]] -name = "zerofrom" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" -dependencies = [ - "zerofrom-derive", -] - -[[package]] -name = "zerofrom-derive" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" -dependencies = [ - "proc-macro2", - "quote", - "syn", - "synstructure", -] - -[[package]] -name = "zerotrie" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36f0bbd478583f79edad978b407914f61b2972f5af6fa089686016be8f9af595" -dependencies = [ - "displaydoc", - "yoke", - "zerofrom", -] - -[[package]] -name = "zerovec" -version = "0.11.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7aa2bd55086f1ab526693ecbe444205da57e25f4489879da80635a46d90e73b" -dependencies = [ - "yoke", - "zerofrom", - "zerovec-derive", + "memchr", ] [[package]] -name = "zerovec-derive" -version = "0.11.1" +name = "wit-bindgen" +version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] +checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" diff --git a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-4.snap b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-4.snap index 10c4a9d..bdf10e6 100644 --- a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-4.snap +++ b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses-4.snap @@ -31,9 +31,6 @@ ParseResult { lookaheads: [ "-", "*", - ")", - ",", - "]", ], reason: Error, }, diff --git a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses.snap b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses.snap index 27b2abb..edeaae8 100644 --- a/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses.snap +++ b/example/src/snapshots/rust_sitter_example__arithmetic__tests__failed_parses.snap @@ -35,9 +35,6 @@ ParseResult { lookaheads: [ "-", "*", - ")", - ",", - "]", ], reason: Error, }, From 6446e70e752b103aa9b2e628adb2088b7249ca36 Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Fri, 13 Mar 2026 13:55:13 -0700 Subject: [PATCH 49/50] Reduce tree-sitter dependencies for a nice boost in compile times. --- Cargo.lock | 438 +++++++++++++---------------------------------------- Cargo.toml | 2 +- 2 files changed, 107 insertions(+), 333 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 54abfc9..55f17e1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,12 +11,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "allocator-api2" -version = "0.2.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" - [[package]] name = "anstream" version = "0.6.21" @@ -68,25 +62,22 @@ dependencies = [ ] [[package]] -name = "bindgen" -version = "0.72.1" +name = "async-trait" +version = "0.1.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ - "bitflags", - "cexpr", - "clang-sys", - "itertools 0.13.0", - "log", - "prettyplease", "proc-macro2", "quote", - "regex", - "rustc-hash", - "shlex", "syn", ] +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + [[package]] name = "bitflags" version = "2.10.0" @@ -95,27 +86,24 @@ checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" [[package]] name = "bumpalo" -version = "3.19.0" +version = "3.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" +checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" [[package]] -name = "cc" -version = "1.2.47" +name = "cast" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd405d82c84ff7f35739f175f67d8b9fb7687a0e84ccdc78bd3568839827cf07" -dependencies = [ - "find-msvc-tools", - "shlex", -] +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] -name = "cexpr" -version = "0.6.0" +name = "cc" +version = "1.2.52" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +checksum = "cd4932aefd12402b36c60956a4fe0035421f544799057659ff86f923657aada3" dependencies = [ - "nom", + "find-msvc-tools", + "shlex", ] [[package]] @@ -124,17 +112,6 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" -[[package]] -name = "clang-sys" -version = "1.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" -dependencies = [ - "glob", - "libc", - "libloading", -] - [[package]] name = "codemap" version = "0.1.3" @@ -169,15 +146,6 @@ dependencies = [ "windows-sys 0.59.0", ] -[[package]] -name = "convert_case" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baaaa0ecca5b51987b9423ccdc971514dd8b0bb7b4060b983d3664dad3f1f89f" -dependencies = [ - "unicode-segmentation", -] - [[package]] name = "dunce" version = "1.0.5" @@ -243,21 +211,9 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] name = "find-msvc-tools" -version = "0.1.5" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844" - -[[package]] -name = "fnv" -version = "1.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" - -[[package]] -name = "foldhash" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" +checksum = "f449e6c6c08c865631d4890cfacf252b3d396c9bcc83adb6623cdb02a8336c41" [[package]] name = "getrandom" @@ -271,34 +227,17 @@ dependencies = [ "wasip2", ] -[[package]] -name = "glob" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" - [[package]] name = "hashbrown" version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" -dependencies = [ - "allocator-api2", - "equivalent", - "foldhash", -] - -[[package]] -name = "ident_case" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" [[package]] name = "indexmap" -version = "2.12.1" +version = "2.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" dependencies = [ "equivalent", "hashbrown", @@ -317,13 +256,14 @@ dependencies = [ [[package]] name = "insta" -version = "1.44.1" +version = "1.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8732d3774162a0851e3f2b150eb98f31a9885dd75985099421d393385a01dfd" +checksum = "1b66886d14d18d420ab5052cbff544fc5d34d0b2cdd35eb5976aaa10a4a472e5" dependencies = [ "console", "once_cell", "similar", + "tempfile", ] [[package]] @@ -332,15 +272,6 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" -[[package]] -name = "itertools" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" -dependencies = [ - "either", -] - [[package]] name = "itertools" version = "0.14.0" @@ -352,15 +283,15 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.15" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" [[package]] name = "jiff" -version = "0.2.16" +version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49cce2b81f2098e7e3efc35bc2e0a6b7abec9d34128283d7a26fa8f32a6dbb35" +checksum = "e67e8da4c49d6d9909fe03361f9b620f58898859f5c7aded68351e85e71ecf50" dependencies = [ "jiff-static", "log", @@ -371,9 +302,9 @@ dependencies = [ [[package]] name = "jiff-static" -version = "0.2.16" +version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "980af8b43c3ad5d8d349ace167ec8170839f753a42d233ba19e08afe1850fa69" +checksum = "e0c84ee7f197eca9a86c6fd6cb771e55eb991632f15f2bc3ca6ec838929e6e78" dependencies = [ "proc-macro2", "quote", @@ -382,9 +313,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.82" +version = "0.3.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b011eec8cc36da2aab2d5cff675ec18454fad408585853910a202391cf9f8e65" +checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8" dependencies = [ "once_cell", "wasm-bindgen", @@ -392,19 +323,15 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.177" +version = "0.2.180" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" +checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" [[package]] -name = "libloading" -version = "0.8.9" +name = "libm" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" -dependencies = [ - "cfg-if", - "windows-link", -] +checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" [[package]] name = "linux-raw-sys" @@ -414,9 +341,9 @@ checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" [[package]] name = "log" -version = "0.4.28" +version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" [[package]] name = "memchr" @@ -426,28 +353,31 @@ checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" [[package]] name = "minicov" -version = "0.3.7" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f27fe9f1cc3c22e1687f9446c2083c4c5fc7f0bcf1c7a86bdbded14985895b4b" +checksum = "4869b6a491569605d66d3952bcdf03df789e5b536e5f0cf7758a7f08a55ae24d" dependencies = [ "cc", "walkdir", ] [[package]] -name = "minimal-lexical" -version = "0.2.1" +name = "nu-ansi-term" +version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" +dependencies = [ + "windows-sys 0.61.2", +] [[package]] -name = "nom" -version = "7.1.3" +name = "num-traits" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ - "memchr", - "minimal-lexical", + "autocfg", + "libm", ] [[package]] @@ -463,45 +393,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" [[package]] -name = "pathdiff" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3" - -[[package]] -name = "phf" -version = "0.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf" -dependencies = [ - "phf_shared", - "serde", -] - -[[package]] -name = "phf_generator" -version = "0.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "135ace3a761e564ec88c03a77317a7c6b80bb7f7135ef2544dbe054243b89737" -dependencies = [ - "fastrand", - "phf_shared", -] - -[[package]] -name = "phf_shared" -version = "0.13.1" +name = "oorandom" +version = "11.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e57fef6bc5981e38c2ce2d63bfa546861309f875b8a75f092d1d54ae2d64f266" -dependencies = [ - "siphasher", -] +checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" [[package]] name = "portable-atomic" -version = "1.11.1" +version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" +checksum = "f89776e4d69bb58bc6993e99ffa1d11f228b839984854c7daeb5d37f87cbe950" [[package]] name = "portable-atomic-util" @@ -512,39 +413,20 @@ dependencies = [ "portable-atomic", ] -[[package]] -name = "prettyplease" -version = "0.2.37" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" -dependencies = [ - "proc-macro2", - "syn", -] - -[[package]] -name = "proc-macro-crate" -version = "3.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983" -dependencies = [ - "toml_edit", -] - [[package]] name = "proc-macro2" -version = "1.0.103" +version = "1.0.105" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +checksum = "535d180e0ecab6268a3e718bb9fd44db66bbbc256257165fc699dadf70d16fe7" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.42" +version = "1.0.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" +checksum = "dc74d9a594b72ae6656596548f56f667211f8a97b3d4c3d467150794690dc40a" dependencies = [ "proc-macro2", ] @@ -584,66 +466,6 @@ version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" -[[package]] -name = "relative-path" -version = "2.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bca40a312222d8ba74837cb474edef44b37f561da5f773981007a10bbaa992b0" -dependencies = [ - "serde", -] - -[[package]] -name = "rquickjs" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a135375fbac5ba723bb6a48f432a72f81539cedde422f0121a86c7c4e96d8e0d" -dependencies = [ - "rquickjs-core", - "rquickjs-macro", -] - -[[package]] -name = "rquickjs-core" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bccb7121a123865c8ace4dea42e7ed84d78b90cbaf4ca32c59849d8d210c9672" -dependencies = [ - "hashbrown", - "phf", - "relative-path", - "rquickjs-sys", -] - -[[package]] -name = "rquickjs-macro" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89f93602cc3112c7f30bf5f29e722784232138692c7df4c52ebbac7e035d900d" -dependencies = [ - "convert_case", - "fnv", - "ident_case", - "indexmap", - "phf_generator", - "phf_shared", - "proc-macro-crate", - "proc-macro2", - "quote", - "rquickjs-core", - "syn", -] - -[[package]] -name = "rquickjs-sys" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57b1b6528590d4d65dc86b5159eae2d0219709546644c66408b2441696d1d725" -dependencies = [ - "bindgen", - "cc", -] - [[package]] name = "rust-sitter" version = "0.5.0" @@ -662,7 +484,7 @@ dependencies = [ name = "rust-sitter-common" version = "0.5.0" dependencies = [ - "itertools 0.14.0", + "itertools", "proc-macro2", "quote", "rust-sitter-types", @@ -729,9 +551,9 @@ checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" [[package]] name = "rustix" -version = "1.1.2" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" +checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" dependencies = [ "bitflags", "errno", @@ -746,12 +568,6 @@ version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" -[[package]] -name = "ryu" -version = "1.0.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" - [[package]] name = "same-file" version = "1.0.6" @@ -803,16 +619,16 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.145" +version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" dependencies = [ "indexmap", "itoa", "memchr", - "ryu", "serde", "serde_core", + "zmij", ] [[package]] @@ -827,12 +643,6 @@ version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa" -[[package]] -name = "siphasher" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" - [[package]] name = "smallbitvec" version = "2.6.0" @@ -847,9 +657,9 @@ checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" [[package]] name = "syn" -version = "2.0.111" +version = "2.0.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87" +checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" dependencies = [ "proc-macro2", "quote", @@ -868,9 +678,9 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.23.0" +version = "3.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" +checksum = "655da9c7eb6305c55742045d5a8d2037996d61d8de95806335c7c86ce0f82e9c" dependencies = [ "fastrand", "getrandom", @@ -908,36 +718,6 @@ dependencies = [ "syn", ] -[[package]] -name = "toml_datetime" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2cdb639ebbc97961c51720f858597f7f24c4fc295327923af55b74c3c724533" -dependencies = [ - "serde_core", -] - -[[package]] -name = "toml_edit" -version = "0.23.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6485ef6d0d9b5d0ec17244ff7eb05310113c3f316f2d14200d4de56b3cb98f8d" -dependencies = [ - "indexmap", - "toml_datetime", - "toml_parser", - "winnow", -] - -[[package]] -name = "toml_parser" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0cbe268d35bdb4bb5a56a2de88d0ad0eb70af5384a99d648cd4b3d04039800e" -dependencies = [ - "winnow", -] - [[package]] name = "topological-sort" version = "0.2.2" @@ -946,8 +726,7 @@ checksum = "ea68304e134ecd095ac6c3574494fc62b909f416c4fca77e440530221e549d3d" [[package]] name = "tree-sitter" -version = "0.26.0" -source = "git+https://github.com/jaboatman/tree-sitter?branch=combined#550b30e768ddd32714e91d4304461335d6ac8cd1" +version = "0.27.0" dependencies = [ "cc", "regex", @@ -959,18 +738,15 @@ dependencies = [ [[package]] name = "tree-sitter-generate" -version = "0.26.0" -source = "git+https://github.com/jaboatman/tree-sitter?branch=combined#550b30e768ddd32714e91d4304461335d6ac8cd1" +version = "0.27.0" dependencies = [ "bitflags", "dunce", "indexmap", "indoc", "log", - "pathdiff", "regex", "regex-syntax", - "rquickjs", "rustc-hash", "semver", "serde", @@ -982,8 +758,7 @@ dependencies = [ [[package]] name = "tree-sitter-language" -version = "0.1.5" -source = "git+https://github.com/jaboatman/tree-sitter?branch=combined#550b30e768ddd32714e91d4304461335d6ac8cd1" +version = "0.1.7" [[package]] name = "unicode-ident" @@ -991,12 +766,6 @@ version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" -[[package]] -name = "unicode-segmentation" -version = "1.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" - [[package]] name = "utf8parse" version = "0.2.2" @@ -1024,9 +793,9 @@ dependencies = [ [[package]] name = "wasm-bindgen" -version = "0.2.105" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da95793dfc411fbbd93f5be7715b0578ec61fe87cb1a42b12eb625caa5c5ea60" +checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd" dependencies = [ "cfg-if", "once_cell", @@ -1037,9 +806,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.55" +version = "0.4.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "551f88106c6d5e7ccc7cd9a16f312dd3b5d36ea8b4954304657d5dfba115d4a0" +checksum = "836d9622d604feee9e5de25ac10e3ea5f2d65b41eac0d9ce72eb5deae707ce7c" dependencies = [ "cfg-if", "js-sys", @@ -1050,9 +819,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.105" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04264334509e04a7bf8690f2384ef5265f05143a4bff3889ab7a3269adab59c2" +checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -1060,9 +829,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.105" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "420bc339d9f322e562942d52e115d57e950d12d88983a14c79b86859ee6c7ebc" +checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40" dependencies = [ "bumpalo", "proc-macro2", @@ -1073,21 +842,29 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.105" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76f218a38c84bcb33c25ec7059b07847d465ce0e0a76b995e134a45adcb6af76" +checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4" dependencies = [ "unicode-ident", ] [[package]] name = "wasm-bindgen-test" -version = "0.3.55" +version = "0.3.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfc379bfb624eb59050b509c13e77b4eb53150c350db69628141abce842f2373" +checksum = "25e90e66d265d3a1efc0e72a54809ab90b9c0c515915c67cdf658689d2c22c6c" dependencies = [ + "async-trait", + "cast", "js-sys", + "libm", "minicov", + "nu-ansi-term", + "num-traits", + "oorandom", + "serde", + "serde_json", "wasm-bindgen", "wasm-bindgen-futures", "wasm-bindgen-test-macro", @@ -1095,9 +872,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-test-macro" -version = "0.3.55" +version = "0.3.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "085b2df989e1e6f9620c1311df6c996e83fe16f57792b272ce1e024ac16a90f1" +checksum = "7150335716dce6028bead2b848e72f47b45e7b9422f64cccdc23bedca89affc1" dependencies = [ "proc-macro2", "quote", @@ -1106,9 +883,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.82" +version = "0.3.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a1f95c0d03a47f4ae1f7a64643a6bb97465d9b740f0fa8f90ea33915c99a9a1" +checksum = "9b32828d774c412041098d182a8b38b16ea816958e07cf40eec2bc080ae137ac" dependencies = [ "js-sys", "wasm-bindgen", @@ -1211,17 +988,14 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" -[[package]] -name = "winnow" -version = "0.7.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21a0236b59786fed61e2a80582dd500fe61f18b5dca67a4a067d0bc9039339cf" -dependencies = [ - "memchr", -] - [[package]] name = "wit-bindgen" version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" + +[[package]] +name = "zmij" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fc5a66a20078bf1251bde995aa2fdcc4b800c70b5d92dd2c62abc5c60f679f8" diff --git a/Cargo.toml b/Cargo.toml index 8b3d587..f451268 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,4 +16,4 @@ authors = [ [workspace.dependencies] tree-sitter = { git = "https://github.com/jaboatman/tree-sitter", branch = "combined" } -tree-sitter-generate = { git = "https://github.com/jaboatman/tree-sitter", branch = "combined" } +tree-sitter-generate = { git = "https://github.com/jaboatman/tree-sitter", branch = "combined", default-features = false, features = ["load"] } From 88fc01df8dad4f5ac89bc16dc66e9291df1825f5 Mon Sep 17 00:00:00 2001 From: Jason Boatman Date: Fri, 13 Mar 2026 14:01:16 -0700 Subject: [PATCH 50/50] Update dependencies --- Cargo.lock | 402 +++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 315 insertions(+), 87 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 55f17e1..e4eaa89 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -28,9 +28,9 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.13" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" +checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" [[package]] name = "anstyle-parse" @@ -61,6 +61,12 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "anyhow" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" + [[package]] name = "async-trait" version = "0.1.89" @@ -80,15 +86,15 @@ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "bitflags" -version = "2.10.0" +version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" +checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" [[package]] name = "bumpalo" -version = "3.19.1" +version = "3.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" +checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" [[package]] name = "cast" @@ -98,9 +104,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.2.52" +version = "1.2.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd4932aefd12402b36c60956a4fe0035421f544799057659ff86f923657aada3" +checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2" dependencies = [ "find-msvc-tools", "shlex", @@ -130,9 +136,9 @@ dependencies = [ [[package]] name = "colorchoice" -version = "1.0.4" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" [[package]] name = "console" @@ -166,9 +172,9 @@ checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" [[package]] name = "env_filter" -version = "0.1.4" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bf3c259d255ca70051b30e2e95b5446cdb8949ac4cd22c0d7fd634d89f568e2" +checksum = "7a1c3cc8e57274ec99de65301228b537f1e4eedc1b8e0f9411c6caac8ae7308f" dependencies = [ "log", "regex", @@ -176,9 +182,9 @@ dependencies = [ [[package]] name = "env_logger" -version = "0.11.8" +version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f" +checksum = "b2daee4ea451f429a58296525ddf28b45a3b64f1acf6587e2067437bb11e218d" dependencies = [ "anstream", "anstyle", @@ -211,20 +217,60 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] name = "find-msvc-tools" -version = "0.1.7" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "futures-core" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" + +[[package]] +name = "futures-task" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f449e6c6c08c865631d4890cfacf252b3d396c9bcc83adb6623cdb02a8336c41" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" + +[[package]] +name = "futures-util" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" +dependencies = [ + "futures-core", + "futures-task", + "pin-project-lite", + "slab", +] [[package]] name = "getrandom" -version = "0.3.4" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" dependencies = [ "cfg-if", "libc", "r-efi", "wasip2", + "wasip3", +] + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "foldhash", ] [[package]] @@ -233,6 +279,18 @@ version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + [[package]] name = "indexmap" version = "2.13.0" @@ -240,7 +298,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" dependencies = [ "equivalent", - "hashbrown", + "hashbrown 0.16.1", "serde", "serde_core", ] @@ -256,9 +314,9 @@ dependencies = [ [[package]] name = "insta" -version = "1.46.0" +version = "1.46.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b66886d14d18d420ab5052cbff544fc5d34d0b2cdd35eb5976aaa10a4a472e5" +checksum = "e82db8c87c7f1ccecb34ce0c24399b8a73081427f3c7c50a5d597925356115e4" dependencies = [ "console", "once_cell", @@ -289,9 +347,9 @@ checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" [[package]] name = "jiff" -version = "0.2.18" +version = "0.2.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e67e8da4c49d6d9909fe03361f9b620f58898859f5c7aded68351e85e71ecf50" +checksum = "1a3546dc96b6d42c5f24902af9e2538e82e39ad350b0c766eb3fbf2d8f3d8359" dependencies = [ "jiff-static", "log", @@ -302,9 +360,9 @@ dependencies = [ [[package]] name = "jiff-static" -version = "0.2.18" +version = "0.2.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0c84ee7f197eca9a86c6fd6cb771e55eb991632f15f2bc3ca6ec838929e6e78" +checksum = "2a8c8b344124222efd714b73bb41f8b5120b27a7cc1c75593a6ff768d9d05aa4" dependencies = [ "proc-macro2", "quote", @@ -313,31 +371,37 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.83" +version = "0.3.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8" +checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c" dependencies = [ "once_cell", "wasm-bindgen", ] +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + [[package]] name = "libc" -version = "0.2.180" +version = "0.2.183" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" +checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" [[package]] name = "libm" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" +checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" [[package]] name = "linux-raw-sys" -version = "0.11.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" [[package]] name = "log" @@ -347,9 +411,9 @@ checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" [[package]] name = "memchr" -version = "2.7.6" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" [[package]] name = "minicov" @@ -382,9 +446,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.21.3" +version = "1.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" [[package]] name = "once_cell_polyfill" @@ -398,50 +462,66 @@ version = "11.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" +[[package]] +name = "pin-project-lite" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" + [[package]] name = "portable-atomic" -version = "1.13.0" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f89776e4d69bb58bc6993e99ffa1d11f228b839984854c7daeb5d37f87cbe950" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" [[package]] name = "portable-atomic-util" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" +checksum = "7a9db96d7fa8782dd8c15ce32ffe8680bbd1e978a43bf51a34d39483540495f5" dependencies = [ "portable-atomic", ] +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn", +] + [[package]] name = "proc-macro2" -version = "1.0.105" +version = "1.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "535d180e0ecab6268a3e718bb9fd44db66bbbc256257165fc699dadf70d16fe7" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.43" +version = "1.0.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc74d9a594b72ae6656596548f56f667211f8a97b3d4c3d467150794690dc40a" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" dependencies = [ "proc-macro2", ] [[package]] name = "r-efi" -version = "5.3.0" +version = "6.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" [[package]] name = "regex" -version = "1.12.2" +version = "1.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" dependencies = [ "aho-corasick", "memchr", @@ -451,9 +531,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" dependencies = [ "aho-corasick", "memchr", @@ -462,9 +542,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.8" +version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" [[package]] name = "rust-sitter" @@ -551,9 +631,9 @@ checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" [[package]] name = "rustix" -version = "1.1.3" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" dependencies = [ "bitflags", "errno", @@ -643,6 +723,12 @@ version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa" +[[package]] +name = "slab" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" + [[package]] name = "smallbitvec" version = "2.6.0" @@ -657,9 +743,9 @@ checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" [[package]] name = "syn" -version = "2.0.114" +version = "2.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" dependencies = [ "proc-macro2", "quote", @@ -678,9 +764,9 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.24.0" +version = "3.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "655da9c7eb6305c55742045d5a8d2037996d61d8de95806335c7c86ce0f82e9c" +checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" dependencies = [ "fastrand", "getrandom", @@ -700,18 +786,18 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.17" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "2.0.17" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" dependencies = [ "proc-macro2", "quote", @@ -727,6 +813,7 @@ checksum = "ea68304e134ecd095ac6c3574494fc62b909f416c4fca77e440530221e549d3d" [[package]] name = "tree-sitter" version = "0.27.0" +source = "git+https://github.com/jaboatman/tree-sitter?branch=combined#e5a0e2d43efe5ca11fc5c1555390b617880cded6" dependencies = [ "cc", "regex", @@ -739,6 +826,7 @@ dependencies = [ [[package]] name = "tree-sitter-generate" version = "0.27.0" +source = "git+https://github.com/jaboatman/tree-sitter?branch=combined#e5a0e2d43efe5ca11fc5c1555390b617880cded6" dependencies = [ "bitflags", "dunce", @@ -759,12 +847,19 @@ dependencies = [ [[package]] name = "tree-sitter-language" version = "0.1.7" +source = "git+https://github.com/jaboatman/tree-sitter?branch=combined#e5a0e2d43efe5ca11fc5c1555390b617880cded6" [[package]] name = "unicode-ident" -version = "1.0.22" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "unicode-xid" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" [[package]] name = "utf8parse" @@ -784,18 +879,27 @@ dependencies = [ [[package]] name = "wasip2" -version = "1.0.1+wasi-0.2.4" +version = "1.0.2+wasi-0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" +checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" dependencies = [ "wit-bindgen", ] [[package]] name = "wasm-bindgen" -version = "0.2.106" +version = "0.2.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd" +checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e" dependencies = [ "cfg-if", "once_cell", @@ -806,11 +910,12 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.56" +version = "0.4.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "836d9622d604feee9e5de25ac10e3ea5f2d65b41eac0d9ce72eb5deae707ce7c" +checksum = "e9c5522b3a28661442748e09d40924dfb9ca614b21c00d3fd135720e48b67db8" dependencies = [ "cfg-if", + "futures-util", "js-sys", "once_cell", "wasm-bindgen", @@ -819,9 +924,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.106" +version = "0.2.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3" +checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -829,9 +934,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.106" +version = "0.2.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40" +checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3" dependencies = [ "bumpalo", "proc-macro2", @@ -842,18 +947,18 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.106" +version = "0.2.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4" +checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16" dependencies = [ "unicode-ident", ] [[package]] name = "wasm-bindgen-test" -version = "0.3.56" +version = "0.3.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25e90e66d265d3a1efc0e72a54809ab90b9c0c515915c67cdf658689d2c22c6c" +checksum = "6311c867385cc7d5602463b31825d454d0837a3aba7cdb5e56d5201792a3f7fe" dependencies = [ "async-trait", "cast", @@ -868,24 +973,65 @@ dependencies = [ "wasm-bindgen", "wasm-bindgen-futures", "wasm-bindgen-test-macro", + "wasm-bindgen-test-shared", ] [[package]] name = "wasm-bindgen-test-macro" -version = "0.3.56" +version = "0.3.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7150335716dce6028bead2b848e72f47b45e7b9422f64cccdc23bedca89affc1" +checksum = "67008cdde4769831958536b0f11b3bdd0380bde882be17fff9c2f34bb4549abd" dependencies = [ "proc-macro2", "quote", "syn", ] +[[package]] +name = "wasm-bindgen-test-shared" +version = "0.2.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe29135b180b72b04c74aa97b2b4a2ef275161eff9a6c7955ea9eaedc7e1d4e" + +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap", + "wasm-encoder", + "wasmparser", +] + +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags", + "hashbrown 0.15.5", + "indexmap", + "semver", +] + [[package]] name = "web-sys" -version = "0.3.83" +version = "0.3.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b32828d774c412041098d182a8b38b16ea816958e07cf40eec2bc080ae137ac" +checksum = "854ba17bb104abfb26ba36da9729addc7ce7f06f5c0f90f3c391f8461cca21f9" dependencies = [ "js-sys", "wasm-bindgen", @@ -990,12 +1136,94 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "wit-bindgen" -version = "0.46.0" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap", + "prettyplease", + "syn", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] [[package]] name = "zmij" -version = "1.0.12" +version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fc5a66a20078bf1251bde995aa2fdcc4b800c70b5d92dd2c62abc5c60f679f8" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"