Skip to content

Commit b4a2fef

Browse files
committed
fix: refactor
1 parent e8ffcb7 commit b4a2fef

26 files changed

+302
-2774
lines changed

Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,8 @@ members = [
33
"crates/*",
44
]
55

6+
[workspace.dependencies]
7+
parser = { path = "./crates/parser", version = "0.0.0" }
8+
triomphe = { version = "0.1.8", default-features = false, features = ["std"] }
9+
10+

crates/parser/Cargo.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
[package]
22
name = "parser"
3-
version = "0.1.0"
3+
version = "0.0.0"
44
edition = "2021"
55

66
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
77

88
[dependencies]
9+
cstree = { version = "0.12.0", features = ["derive"] }
910
pg_query = "0.7"
1011
logos = "0.13.0"
12+
serde_json = "1.0"
13+
serde = { version = "1.0", features = ["derive"] }

crates/parser/src/ast_node.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
use cstree::text::TextRange;
2+
use pg_query::NodeEnum;
3+
4+
// TODO: implement serde for node: https://serde.rs/remote-derive.html
5+
6+
#[derive(Debug)]
7+
pub struct RawStmt {
8+
pub stmt: NodeEnum,
9+
pub range: TextRange,
10+
}

crates/parser/src/event_buffer.rs

Lines changed: 0 additions & 79 deletions
This file was deleted.

crates/parser/src/event_sink.rs

Lines changed: 0 additions & 8 deletions
This file was deleted.

crates/parser/src/lib.rs

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -54,12 +54,16 @@
5454
// so we will have a `parse_statement` and a `parse_source_file` function
5555
// the tree always covers all text since we use the scantokens and, if failing, the StatementTokens
5656
// errors are added to a list, and are not part of the tree
57-
//
58-
mod event_buffer;
59-
mod event_sink;
57+
58+
mod ast_node;
59+
mod parser;
6060
mod pg_query_utils;
61-
mod source_file_lexer;
62-
mod source_file_parsing;
63-
mod statement_lexer;
64-
mod statement_parsing;
61+
mod source_file;
62+
mod statement;
63+
mod syntax_error;
6564
mod syntax_kind;
65+
mod syntax_node;
66+
67+
pub use crate::parser::{Parse, Parser};
68+
pub use crate::syntax_kind::SyntaxKind;
69+
pub use crate::syntax_node::{SyntaxElement, SyntaxNode, SyntaxToken};
Lines changed: 42 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,33 @@
1-
use cstree::testing::GreenNodeBuilder;
1+
use cstree::syntax::ResolvedNode;
2+
use cstree::{build::GreenNodeBuilder, text::TextRange};
3+
use pg_query::NodeEnum;
24

5+
use crate::ast_node::RawStmt;
6+
use crate::syntax_error::SyntaxError;
37
use crate::syntax_kind::{SyntaxKind, SyntaxKindType};
8+
use crate::syntax_node::SyntaxNode;
49

5-
pub struct StatementBuilder<'builder> {
6-
builder: &'builder mut GreenNodeBuilder<'static, 'static, SyntaxKind>,
10+
#[derive(Default, Debug)]
11+
pub struct Parser {
12+
inner: GreenNodeBuilder<'static, 'static, SyntaxKind>,
713
token_buffer: Vec<(SyntaxKind, String)>,
814
curr_depth: i32,
15+
errors: Vec<SyntaxError>,
16+
stmts: Vec<RawStmt>,
917
}
1018

11-
/// Wrapper around GreenNodeBuilder to simplify integration with SyntaxKind
12-
impl<'builder> StatementBuilder<'builder> {
13-
pub fn new(builder: &'builder mut GreenNodeBuilder<'static, 'static, SyntaxKind>) -> Self {
14-
return Self {
15-
builder,
16-
token_buffer: Vec::new(),
17-
curr_depth: 0,
18-
};
19-
}
19+
#[derive(Debug)]
20+
pub struct Parse {
21+
pub cst: ResolvedNode<SyntaxKind>,
22+
pub errors: Vec<SyntaxError>,
23+
pub stmts: Vec<RawStmt>,
24+
}
2025

26+
/// Main parser that controls the cst building process, and collects errors and statements
27+
impl Parser {
2128
pub fn close_until_depth(&mut self, depth: i32) {
2229
while self.curr_depth >= depth {
23-
self.builder.finish_node();
30+
self.inner.finish_node();
2431
self.curr_depth -= 1;
2532
}
2633
}
@@ -35,17 +42,17 @@ impl<'builder> StatementBuilder<'builder> {
3542
self.consume_token_buffer();
3643

3744
self.curr_depth = *depth;
38-
self.builder.start_node(kind);
45+
self.inner.start_node(kind);
3946
}
4047

4148
pub fn finish_node(&mut self) {
42-
self.builder.finish_node();
49+
self.inner.finish_node();
4350
}
4451

4552
/// Drains the token buffer and applies all tokens
4653
pub fn consume_token_buffer(&mut self) {
4754
for (kind, text) in self.token_buffer.drain(..) {
48-
self.builder.token(kind, &text);
55+
self.inner.token(kind, &text);
4956
}
5057
}
5158

@@ -60,15 +67,32 @@ impl<'builder> StatementBuilder<'builder> {
6067
// move up to depth 2 and consume buffered tokens before applying closing token
6168
self.close_until_depth(2);
6269
self.consume_token_buffer();
63-
self.builder.token(kind, text);
70+
self.inner.token(kind, text);
6471
}
6572
Some(SyntaxKindType::Follow) => {
6673
// wait until next node, and apply token at same depth
6774
self.token_buffer.push((kind, text.to_string()));
6875
}
6976
_ => {
70-
self.builder.token(kind, text);
77+
self.inner.token(kind, text);
7178
}
7279
}
7380
}
81+
82+
pub fn error(&mut self, error: String, range: TextRange) {
83+
self.errors.push(SyntaxError::new(error, range));
84+
}
85+
86+
pub fn stmt(&mut self, stmt: NodeEnum, range: TextRange) {
87+
self.stmts.push(RawStmt { stmt, range });
88+
}
89+
90+
pub fn finish(self) -> Parse {
91+
let (tree, cache) = self.inner.finish();
92+
Parse {
93+
cst: SyntaxNode::new_root_with_resolver(tree, cache.unwrap().into_interner().unwrap()),
94+
stmts: self.stmts,
95+
errors: self.errors,
96+
}
97+
}
7498
}

crates/parser/src/source_file_lexer.rs renamed to crates/parser/src/source_file.rs

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
/// This lexer does the split.
88
use logos::Logos;
99

10+
use crate::{parser::Parser, syntax_kind::SyntaxKind};
11+
1012
#[derive(Logos, Debug, PartialEq)]
1113
#[logos(skip r"[ \t\f]+")] // Ignore this regex pattern between tokens
1214
pub enum SourceFileToken {
@@ -20,12 +22,39 @@ pub enum SourceFileToken {
2022
Comment,
2123
}
2224

25+
impl Parser {
26+
pub fn parse_source_file(&mut self, text: &str) {
27+
let mut lexer = SourceFileToken::lexer(text);
28+
29+
self.start_node(SyntaxKind::SourceFile, &0);
30+
while let Some(token) = lexer.next() {
31+
match token {
32+
Ok(token) => {
33+
match token {
34+
SourceFileToken::Comment => {
35+
self.token(SyntaxKind::Comment, lexer.slice());
36+
}
37+
SourceFileToken::Newline => {
38+
self.token(SyntaxKind::Newline, lexer.slice());
39+
}
40+
SourceFileToken::Statement => {
41+
self.parse_statement(lexer.slice(), Some(lexer.span().start as u32));
42+
}
43+
};
44+
}
45+
Err(_) => panic!("Unknown SourceFileToken: {:?}", lexer.span()),
46+
}
47+
}
48+
self.finish_node();
49+
}
50+
}
51+
2352
#[cfg(test)]
2453
mod tests {
2554
use super::*;
2655

2756
#[test]
28-
fn test_expr_lexer() {
57+
fn test_source_file_lexer() {
2958
let input = "select * from contact where id = '123';\n\n-- test comment\n\nselect wrong statement;\n\nselect id,username from contact\n\nselect id,name\nfrom contact -- test inline comment\nwhere id = '123';\n\n";
3059

3160
let mut lex = SourceFileToken::lexer(&input);

crates/parser/src/source_file_parsing.rs

Lines changed: 0 additions & 27 deletions
This file was deleted.

0 commit comments

Comments
 (0)