fix: repeated, map, and edge cases

This commit is contained in:
Gulag 2024-02-03 05:59:41 -05:00
parent b51e203a10
commit 1ad88bd85b
5 changed files with 569 additions and 190 deletions

22
02-ws-js/Cargo.lock generated
View file

@ -38,15 +38,6 @@ version = "2.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149"
[[package]]
name = "parse_js"
version = "0.1.0"
dependencies = [
"heck",
"tree-sitter",
"tree-sitter-javascript",
]
[[package]]
name = "regex"
version = "1.10.3"
@ -88,10 +79,19 @@ dependencies = [
[[package]]
name = "tree-sitter-javascript"
version = "0.20.1"
version = "0.20.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "edbc663376bdd294bd1f0a6daf859aedb9aa5bdb72217d7ad8ba2d5314102cf7"
checksum = "38d1463af5be7052171161db7cfe45c7621ed959ae533972ab47a09b1ed70ec0"
dependencies = [
"cc",
"tree-sitter",
]
[[package]]
name = "ws-js-parser"
version = "0.1.0"
dependencies = [
"heck",
"tree-sitter",
"tree-sitter-javascript",
]

View file

@ -1,9 +1,9 @@
[package]
name = "parse_js"
name = "ws-js-parser"
version = "0.1.0"
edition = "2021"
[dependencies]
heck = "0.4.1"
tree-sitter = "0.20.10"
tree-sitter-javascript = "0.20.1"
tree-sitter-javascript = "0.20.3"

View file

@ -5,13 +5,14 @@
(identifier) @_n
(property_identifier) @enum_name
(#eq? @_n "n"))
right: (parenthesized_expression (sequence_expression
left: (parenthesized_expression
right: (parenthesized_expression
(sequence_expression
(parenthesized_expression
(assignment_expression
left: (identifier) @_var
right: (object . "{" . "}" . )))
right: (sequence_expression
left: (parenthesized_expression
[
(parenthesized_expression
(assignment_expression
left: (subscript_expression
object: (parenthesized_expression
@ -32,6 +33,19 @@
left: (subscript_expression)
right: (string (string_fragment) @enum_field))))
right: (number) @enum_value)
)
))) @stage2
))))
(parenthesized_expression
(assignment_expression
left: (subscript_expression
object: (identifier)
index: (parenthesized_expression
(assignment_expression
left: (subscript_expression
object: ((identifier) @_var2
(#eq? @_var2 @_var)))
right: (string (string_fragment) @enum_field))))
right: (number) @enum_value)
)
]
)))))

View file

@ -2,15 +2,30 @@
(parenthesized_expression
(assignment_expression
left: (member_expression
(identifier) @_n
(property_identifier) @message_name
object:
[
; n.Message
((identifier) @_n
(#eq? @_n "n"))
right: (parenthesized_expression (sequence_expression
right: (sequence_expression
right: (sequence_expression
left: (parenthesized_expression (assignment_expression
left: (member_expression (identifier) (property_identifier) @_decode (#eq? @_decode "decode"))
right: (function
; (n = {}).Message
(parenthesized_expression
(assignment_expression
left: (identifier) @_n
"="
right: (object . "{" . "}"))
(#eq? @_n "n"))
]
property: (property_identifier) @message_name)
right: (parenthesized_expression
(sequence_expression
(parenthesized_expression
(assignment_expression
left: (member_expression
(identifier)
(property_identifier) @_decode
(#eq? @_decode "decode"))
right: (function_expression
body: (statement_block (for_statement
body: (statement_block
[
@ -24,15 +39,20 @@
body:
(switch_body (switch_case
value: (number) @field_id
body: (expression_statement
body: [
(expression_statement
[
; Single items
(assignment_expression
left: (member_expression (identifier) (property_identifier) @field_name)
right: [
; Single primitive
(call_expression
function: (member_expression
object: (identifier)
property: (property_identifier) @field_type))
; Single non-primitive
(call_expression
function: (member_expression
object: (member_expression
@ -41,28 +61,176 @@
(#eq? @_rootakiprotocol "$root.Aki.Protocol"))
property: (property_identifier) @_decode_field
(#eq? @_decode_field "decode")))
])))))
])
; Repeated items
(sequence_expression
(binary_expression)
(call_expression
function: (member_expression
object: (member_expression
object: (identifier)
property: (property_identifier) @field_name)
property: ((property_identifier) @push
(#eq? @push "push")))
arguments: (arguments
[
; Repeated primitive
(call_expression
function: (member_expression
object: (identifier)
property: (property_identifier) @field_type))
; Repeated non-primitive
(call_expression
function: (member_expression
object: (member_expression
object: (member_expression) @_rootakiprotocol
property: (property_identifier) @field_type
(#eq? @_rootakiprotocol "$root.Aki.Protocol"))
property: (property_identifier) @_decode_field
(#eq? @_decode_field "decode")))
])
))
; Map items
(binary_expression
left: (binary_expression
left: (member_expression
(identifier)
(property_identifier) @field_name)
operator: "==="
right: (member_expression) @_util_emptyobject
(#eq? @_util_emptyobject "$util.emptyObject")))
])
(if_statement
consequence: (for_statement
body: (expression_statement
(call_expression
function: (member_expression
object: (member_expression
object: (identifier)
property: (property_identifier) @field_name)
property: ((property_identifier) @push
(#eq? @push "push")))
arguments: (arguments
[
; Repeated primitive
(call_expression
function: (member_expression
object: (identifier)
property: (property_identifier) @field_type))
; Repeated non-primitive
(call_expression
function: (member_expression
object: (member_expression
object: (member_expression) @_rootakiprotocol
property: (property_identifier) @field_type
(#eq? @_rootakiprotocol "$root.Aki.Protocol"))
property: (property_identifier) @_decode_field
(#eq? @_decode_field "decode")))
])
))))
]
body: (variable_declaration)?
; Map body, if present, in some cases there's a statement block, in some it's a switch statement
; seems like some cases with inlined assigns omit the braces - for (s = "", c = null; t.pos < a;) switch ((u = t.uint32()) >>> 3) { ... }
body: (for_statement
body: [
(statement_block
(switch_statement
(switch_body
(switch_case
"case"
value: ((number) @kv_id (#any-of? @kv_id "1" "2"))
body: (expression_statement
[
; Primitive map item
(assignment_expression
right: (call_expression
function: (member_expression
object: (identifier)
property: (property_identifier) @kv_field_type))
)
; Non-primitive map item
(assignment_expression
right: (call_expression
function: (member_expression
object: (member_expression
object: (member_expression) @_rootakiprotocol
property: (property_identifier) @kv_field_type
(#eq? @_rootakiprotocol "$root.Aki.Protocol"))
property: (property_identifier) @_decode_field
(#eq? @_decode_field "decode")))
)
]
))
)))
; Due to inlined assignment in the switch case, we need this duplication
; e.g. switch ((u = t.uint32()) >>> 3) instead of var u = t.uint32(); switch (u >>> 3)
(switch_statement
(switch_body
(switch_case
"case"
value: ((number) @kv_id (#any-of? @kv_id "1" "2"))
body: (expression_statement
[
; Primitive map item
(assignment_expression
right: (call_expression
function: (member_expression
object: (identifier)
property: (property_identifier) @kv_field_type))
)
; Non-primitive map item
(assignment_expression
right: (call_expression
function: (member_expression
object: (member_expression
object: (member_expression) @_rootakiprotocol
property: (property_identifier) @kv_field_type
(#eq? @_rootakiprotocol "$root.Aki.Protocol"))
property: (property_identifier) @_decode_field
(#eq? @_decode_field "decode")))
)
]
))
))
]
)?
)))
; Single field ternary
; i >>> 3 == 1 ? (r.Field = t.decodeMethod()) : ...
(expression_statement
(ternary_expression
condition: (binary_expression
left: (binary_expression
left: (identifier)
">>>"
operator: ">>>"
right: (number) @_shift3
(#eq? @_shift3 "3"))
"=="
right: (number) @field_id)
consequence: (parenthesized_expression
[
; Single items
(assignment_expression
left: (member_expression (identifier) (property_identifier) @field_name)
right: [
; Single primitive
(call_expression
function: (member_expression
object: (identifier)
property: (property_identifier) @field_type))
; Single non-primitive
(call_expression
function: (member_expression
object: (member_expression
@ -71,6 +239,170 @@
(#eq? @_rootakiprotocol "$root.Aki.Protocol"))
property: (property_identifier) @_decode_field
(#eq? @_decode_field "decode")))
]))))
]
)))))))))))))
)
; Repeated items
(sequence_expression
(binary_expression)
(call_expression
function: (member_expression
object: (member_expression
object: (identifier)
property: (property_identifier) @field_name)
property: ((property_identifier) @push
(#eq? @push "push")))
arguments: (arguments
[
; Repeated primitive
(call_expression
function: (member_expression
object: (identifier)
property: (property_identifier) @field_type))
; Repeated non-primitive
(call_expression
function: (member_expression
object: (member_expression
object: (member_expression) @_rootakiprotocol
property: (property_identifier) @field_type
(#eq? @_rootakiprotocol "$root.Aki.Protocol"))
property: (property_identifier) @_decode_field
(#eq? @_decode_field "decode")))
])
))
]
)))
; Inlined ternary into if statement for repeated...
(if_statement
condition: (parenthesized_expression
(binary_expression
left: (binary_expression
left: (identifier)
operator: ">>>"
right: (number) @_shift3
(#eq? @_shift3 "3"))
"=="
right: (number) @field_id))
consequence: (if_statement
consequence: (for_statement
body: (expression_statement
(call_expression
function: (member_expression
object: (member_expression
object: (identifier)
property: (property_identifier) @field_name)
property: ((property_identifier) @push
(#eq? @push "push")))
arguments: (arguments
[
; Repeated primitive
(call_expression
function: (member_expression
object: (identifier)
property: (property_identifier) @field_type))
; Repeated non-primitive
(call_expression
function: (member_expression
object: (member_expression
object: (member_expression) @_rootakiprotocol
property: (property_identifier) @field_type
(#eq? @_rootakiprotocol "$root.Aki.Protocol"))
property: (property_identifier) @_decode_field
(#eq? @_decode_field "decode")))
])
)))))
; Inlined ternary into if statement for maps...
(if_statement
condition: (parenthesized_expression
(binary_expression
left: (binary_expression
left: (identifier)
operator: ">>>"
right: (number) @_shift3
(#eq? @_shift3 "3"))
"=="
right: (number) @field_id))
consequence: (statement_block
(expression_statement
(binary_expression
left: (binary_expression
left: (member_expression
(identifier)
(property_identifier) @field_name)
operator: "==="
right: (member_expression) @_util_emptyobject
(#eq? @_util_emptyobject "$util.emptyObject"))))
(for_statement
body: [
(statement_block
(switch_statement
(switch_body
(switch_case
"case"
value: ((number) @kv_id (#any-of? @kv_id "1" "2"))
body: (expression_statement
[
; Primitive map item
(assignment_expression
right: (call_expression
function: (member_expression
object: (identifier)
property: (property_identifier) @kv_field_type))
)
; Non-primitive map item
(assignment_expression
right: (call_expression
function: (member_expression
object: (member_expression
object: (member_expression) @_rootakiprotocol
property: (property_identifier) @kv_field_type
(#eq? @_rootakiprotocol "$root.Aki.Protocol"))
property: (property_identifier) @_decode_field
(#eq? @_decode_field "decode")))
)
]
))
)))
; Due to inlined assignment in the switch case, we need this duplication
; e.g. switch ((u = t.uint32()) >>> 3) instead of var u = t.uint32(); switch (u >>> 3)
(switch_statement
(switch_body
(switch_case
"case"
value: ((number) @kv_id (#any-of? @kv_id "1" "2"))
body: (expression_statement
[
; Primitive map item
(assignment_expression
right: (call_expression
function: (member_expression
object: (identifier)
property: (property_identifier) @kv_field_type))
)
; Non-primitive map item
(assignment_expression
right: (call_expression
function: (member_expression
object: (member_expression
object: (member_expression) @_rootakiprotocol
property: (property_identifier) @kv_field_type
(#eq? @_rootakiprotocol "$root.Aki.Protocol"))
property: (property_identifier) @_decode_field
(#eq? @_decode_field "decode")))
)
]
))
))
]
)
))
]
))))
)))))))

View file

@ -7,7 +7,20 @@ use tree_sitter::{Parser, Query, QueryCursor, Tree};
struct Message {
name: String,
id: Option<i32>,
fields: Vec<(i32, String, String)>,
fields: Vec<Field>,
}
struct Field {
id: i32,
name: String,
_type: FieldType,
}
#[derive(Clone, Debug)]
enum FieldType {
Single(String),
Repeated(String),
Map(String, String),
}
struct Enum {
@ -104,9 +117,13 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let _ = writeln!(
output,
" {} {} = {};",
field.2,
field.1.to_snake_case(),
field.0
match &field._type {
FieldType::Single(t) => t.clone(),
FieldType::Repeated(t) => format!("repeated {t}"),
FieldType::Map(k, v) => format!("map<{k}, {v}>"),
},
field.name.to_snake_case(),
field.id
);
output
})
@ -125,7 +142,19 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let _ = writeln!(
output,
" {} = {};",
field.0.to_shouty_snake_case(),
if !field
.0
.to_shouty_snake_case()
.starts_with(&en.name.to_shouty_snake_case())
{
format!(
"{}_{}",
en.name.to_shouty_snake_case(),
field.0.to_shouty_snake_case()
)
} else {
field.0.to_shouty_snake_case()
},
field.1
);
output
@ -176,6 +205,19 @@ fn parse_messages(state: &mut State) -> Result<(), Box<dyn std::error::Error>> {
.capture_index_for_name("field_type")
.unwrap();
// repeated fields
let push_idx = state.message_query.capture_index_for_name("push").unwrap();
// map fields
let kv_id_idx = state.message_query.capture_index_for_name("kv_id").unwrap();
let kv_field_type_idx = state
.message_query
.capture_index_for_name("kv_field_type")
.unwrap();
let mut field_type = None;
let mut do_push = true;
for m in matches {
if m.captures.is_empty() {
continue;
@ -184,7 +226,7 @@ fn parse_messages(state: &mut State) -> Result<(), Box<dyn std::error::Error>> {
let mut message_name = String::new();
let mut field_id = String::new();
let mut field_name = String::new();
let mut field_type = String::new();
let mut kv_field_id = 0;
for capture in m.captures {
let node_text = capture
@ -199,33 +241,77 @@ fn parse_messages(state: &mut State) -> Result<(), Box<dyn std::error::Error>> {
} else if capture.index == field_name_idx {
field_name = node_text;
} else if capture.index == field_type_idx {
field_type = node_text;
if let Some(FieldType::Repeated(ref mut t)) = field_type {
*t = node_text;
} else {
field_type = Some(FieldType::Single(node_text));
}
} else if capture.index == push_idx {
if let Some(FieldType::Single(text)) = field_type {
field_type = Some(FieldType::Repeated(text));
} else {
field_type = Some(FieldType::Repeated(String::new()));
}
} else if capture.index == kv_id_idx {
kv_field_id = node_text.parse()?;
} else if capture.index == kv_field_type_idx {
if let Some(FieldType::Map(ref mut key_type, ref mut value_type)) = field_type {
if kv_field_id == 1 {
*key_type = node_text;
} else if kv_field_id == 2 {
*value_type = node_text;
} else {
panic!("Unexpected kv_field_id: {kv_field_id}");
}
do_push = true;
} else {
field_type = if kv_field_id == 1 {
Some(FieldType::Map(node_text, String::new()))
} else if kv_field_id == 2 {
Some(FieldType::Map(String::new(), node_text))
} else {
panic!("Unexpected kv_field_id: {kv_field_id}");
};
do_push = false;
}
}
}
if do_push {
if state.messages.contains_key(&message_name) {
state.messages.get_mut(&message_name).unwrap().fields.push((
field_id.parse()?,
field_name,
field_type,
));
state
.messages
.get_mut(&message_name)
.unwrap()
.fields
.push(Field {
id: field_id.parse()?,
name: field_name,
_type: field_type.unwrap(),
});
} else {
state.messages.insert(
message_name.clone(),
Message {
name: message_name,
id: None,
fields: vec![(field_id.parse()?, field_name, field_type)],
fields: vec![Field {
id: field_id.parse()?,
name: field_name,
_type: field_type.unwrap(),
}],
},
);
}
field_type = None;
}
}
Ok(())
}
fn parse_enums(state: &mut State) -> Result<(), Box<dyn std::error::Error>> {
let object_idx = state.enum_query.capture_index_for_name("object").unwrap();
let enum_name_idx = state
.enum_query
.capture_index_for_name("enum_name")
@ -238,7 +324,6 @@ fn parse_enums(state: &mut State) -> Result<(), Box<dyn std::error::Error>> {
.enum_query
.capture_index_for_name("enum_value")
.unwrap();
let stage2_idx = state.enum_query.capture_index_for_name("stage2").unwrap();
let matches = state.cursor.matches(
&state.enum_query,
@ -246,19 +331,15 @@ fn parse_enums(state: &mut State) -> Result<(), Box<dyn std::error::Error>> {
state.code_bytes.as_slice(),
);
// extract object_name, then execute subquery enum_2.scm on the stage2 node
for m in matches {
if m.captures.is_empty() {
continue;
}
let mut object_name = String::new();
let mut enum_name = String::new();
let mut enum_objects: Vec<(String, i32)> = Vec::new();
let mut current_enum_field = String::new();
let mut current_enum_value = String::new();
let mut stage2_node = None;
for capture in m.captures {
let node_text = capture
@ -266,67 +347,19 @@ fn parse_enums(state: &mut State) -> Result<(), Box<dyn std::error::Error>> {
.utf8_text(state.code_bytes.as_slice())?
.to_string();
if capture.index == object_idx {
object_name = node_text;
} else if capture.index == enum_name_idx {
if capture.index == enum_name_idx {
enum_name = node_text;
} else if capture.index == enum_field_idx {
current_enum_field = node_text;
} else if capture.index == enum_value_idx {
current_enum_value = node_text;
} else if capture.index == stage2_idx {
stage2_node = Some(capture.node);
}
}
enum_objects.push((current_enum_field, current_enum_value.parse().unwrap()));
let Some(stage2_node) = stage2_node else {
panic!("stage2 node not found!: {object_name}");
};
let mut stage2_query_txt = std::fs::read_to_string("queries/enum_2.scm")?;
// replace "REPLACEME" with object_name
stage2_query_txt = stage2_query_txt.replace("REPLACEME", &object_name);
let stage2_query = Query::new(tree_sitter_javascript::language(), &stage2_query_txt)?;
let mut stage2_qc = QueryCursor::new();
let stage2_matches =
stage2_qc.matches(&stage2_query, stage2_node, state.code_bytes.as_slice());
let stage2_enum_field_idx = stage2_query.capture_index_for_name("enum_field").unwrap();
let stage2_enum_value_idx = stage2_query.capture_index_for_name("enum_value").unwrap();
for m in stage2_matches {
if m.captures.is_empty() {
continue;
}
let mut current_enum_field = String::new();
let mut current_enum_value = String::new();
for capture in m.captures {
let node_text = capture
.node
.utf8_text(state.code_bytes.as_slice())?
.to_string();
if capture.index == stage2_enum_field_idx {
current_enum_field = node_text;
} else if capture.index == stage2_enum_value_idx {
current_enum_value = node_text;
}
}
if current_enum_value.contains('e') {
replace_exp(&mut current_enum_value);
}
}
}
enum_objects.push((current_enum_field, current_enum_value.parse().unwrap()));
}
if state.enums.contains_key(&enum_name) {
state