tree-sitter / tree-sitter

An incremental parsing system for programming tools

Home Page:https://tree-sitter.github.io

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

`ts_node_parent` can still return wrong parent for a zero-width node

vanaigr opened this issue · comments

Problem

Related to #3185.

Sorry if I misunderstood something in how tree-sitter works, but it looks like ts_node_parent() sometimes returns the previous sibling (or its descendant) of a zero-width node instead of its parent. This can happen when the previous sibling has children, since currently ts_node_parent() descends into the first node that has child nodes and can contain the given node:

if (iterator.position.bytes >= end_byte && ts_node_child_count(child) > 0) {

I made a modified version of python grammar that could produce such a scenario (python.patch). It allows function definitions without ":" at the end, so that parameters node could be a sibling of a zero-width body.

Steps to reproduce

test.patch
diff --git a/cli/src/tests/node_test.rs b/cli/src/tests/node_test.rs
index 193b4562..d4c987db 100644
--- a/cli/src/tests/node_test.rs
+++ b/cli/src/tests/node_test.rs
@@ -250,7 +250,7 @@ fn test_node_parent_of_child_by_field_name() {
 
 #[test]
 fn test_parent_of_zero_width_node() {
-    let code = "def dupa(foo):";
+    let code = "def dupa(foo)";
 
     let mut parser = Parser::new();
     parser.set_language(&get_language("python")).unwrap();
@@ -258,12 +258,13 @@ fn test_parent_of_zero_width_node() {
     let tree = parser.parse(code, None).unwrap();
     let root = tree.root_node();
     let function_definition = root.child(0).unwrap();
-    let block = function_definition.child(4).unwrap();
+    let block = function_definition.child(3).unwrap();
     let block_parent = block.parent().unwrap();
 
     assert_eq!(block.to_string(), "(block)");
-    assert_eq!(block_parent.kind(), "function_definition");
-    assert_eq!(block_parent.to_string(), "(function_definition name: (identifier) parameters: (parameters (identifier)) body: (block))");
+    assert_eq!(block_parent.kind(), "parameters");
+    assert_eq!(block_parent.to_string(), "(parameters (identifier))");
+    assert_eq!(function_definition.to_string(), "(function_definition name: (identifier) parameters: (parameters (identifier)) body: (block))");
 }
 
 #[test]
python.patch
diff --git a/src/grammar.json b/src/grammar.json
index 56fdf8e..dac95ec 100644
--- a/src/grammar.json
+++ b/src/grammar.json
@@ -1647,18 +1647,22 @@
                     "type": "SYMBOL",
                     "name": "type"
                   }
+                },
+                {
+                    "type": "STRING",
+                    "value": ":"
                 }
               ]
             },
+            {
+                "type": "STRING",
+                "value": ":"
+            },
             {
               "type": "BLANK"
             }
           ]
         },
-        {
-          "type": "STRING",
-          "value": ":"
-        },
         {
           "type": "FIELD",
           "name": "body",

Put both files into the home directory and run this:

git clone https://github.com/tree-sitter/tree-sitter
cd tree-sitter
git reset --hard cbcb51b857
git apply ~/test.patch
script/fetch-fixtures
git -C test/fixtures/grammars/python/ reset --hard a22761025cdac
git -C test/fixtures/grammars/python/ apply ~/python.patch
script/generate-fixtures
script/test

All tests pass.

Expected behavior

(modified) test_parent_of_zero_width_node() should fail since parameters node is not body's immediate parent.

Tree-sitter version (tree-sitter --version)

tree-sitter 0.22.2 (cbcb51b)

Operating system/version

Ubuntu 22.04.3 LTS (WSL 2)

doesn't seem to pass anymore after your reverse iteration pr it seems like, correct me if I'm wrong though

I added child_containing_descendant() calls to the end of test_parent_of_zero_width_node() in my pr (#3214), so the original 'test.patch' no longer has all the necessary changes to pass on c19f23f.

assert_eq!(
root.child_containing_descendant(block).unwrap(),
function_definition
);
assert_eq!(function_definition.child_containing_descendant(block), None);

Updated test.patch:

test.patch
diff --git a/cli/src/tests/node_test.rs b/cli/src/tests/node_test.rs
index f226a890..895868ca 100644
--- a/cli/src/tests/node_test.rs
+++ b/cli/src/tests/node_test.rs
@@ -269,7 +269,7 @@ fn test_node_parent_of_child_by_field_name() {
 
 #[test]
 fn test_parent_of_zero_width_node() {
-    let code = "def dupa(foo):";
+    let code = "def dupa(foo)";
 
     let mut parser = Parser::new();
     parser.set_language(&get_language("python")).unwrap();
@@ -277,18 +277,20 @@ fn test_parent_of_zero_width_node() {
     let tree = parser.parse(code, None).unwrap();
     let root = tree.root_node();
     let function_definition = root.child(0).unwrap();
-    let block = function_definition.child(4).unwrap();
+    let parameters = function_definition.child(2).unwrap();
+    let block = function_definition.child(3).unwrap();
     let block_parent = block.parent().unwrap();
 
     assert_eq!(block.to_string(), "(block)");
-    assert_eq!(block_parent.kind(), "function_definition");
-    assert_eq!(block_parent.to_string(), "(function_definition name: (identifier) parameters: (parameters (identifier)) body: (block))");
+    assert_eq!(block_parent.kind(), "parameters");
+    assert_eq!(block_parent.to_string(), "(parameters (identifier))");
+    assert_eq!(function_definition.to_string(), "(function_definition name: (identifier) parameters: (parameters (identifier)) body: (block))");
 
     assert_eq!(
         root.child_containing_descendant(block).unwrap(),
         function_definition
     );
-    assert_eq!(function_definition.child_containing_descendant(block), None);
+    assert_eq!(function_definition.child_containing_descendant(block).unwrap(), parameters);
 }
 
 #[test]

thanks for updating