fix: repeated, map, and edge cases

This commit is contained in:
Gulag 2024-02-03 05:59:41 -05:00
parent b51e203a10
commit 1ad88bd85b
5 changed files with 569 additions and 190 deletions

22
02-ws-js/Cargo.lock generated
View file

@ -38,15 +38,6 @@ version = "2.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149"
[[package]]
name = "parse_js"
version = "0.1.0"
dependencies = [
"heck",
"tree-sitter",
"tree-sitter-javascript",
]
[[package]] [[package]]
name = "regex" name = "regex"
version = "1.10.3" version = "1.10.3"
@ -88,10 +79,19 @@ dependencies = [
[[package]] [[package]]
name = "tree-sitter-javascript" name = "tree-sitter-javascript"
version = "0.20.1" version = "0.20.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "edbc663376bdd294bd1f0a6daf859aedb9aa5bdb72217d7ad8ba2d5314102cf7" checksum = "38d1463af5be7052171161db7cfe45c7621ed959ae533972ab47a09b1ed70ec0"
dependencies = [ dependencies = [
"cc", "cc",
"tree-sitter", "tree-sitter",
] ]
[[package]]
name = "ws-js-parser"
version = "0.1.0"
dependencies = [
"heck",
"tree-sitter",
"tree-sitter-javascript",
]

View file

@ -1,9 +1,9 @@
[package] [package]
name = "parse_js" name = "ws-js-parser"
version = "0.1.0" version = "0.1.0"
edition = "2021" edition = "2021"
[dependencies] [dependencies]
heck = "0.4.1" heck = "0.4.1"
tree-sitter = "0.20.10" tree-sitter = "0.20.10"
tree-sitter-javascript = "0.20.1" tree-sitter-javascript = "0.20.3"

View file

@ -5,33 +5,47 @@
(identifier) @_n (identifier) @_n
(property_identifier) @enum_name (property_identifier) @enum_name
(#eq? @_n "n")) (#eq? @_n "n"))
right: (parenthesized_expression (sequence_expression right: (parenthesized_expression
left: (parenthesized_expression (sequence_expression
(assignment_expression (parenthesized_expression
left: (identifier) @_var (assignment_expression
right: (object . "{" . "}" . ))) left: (identifier) @_var
right: (sequence_expression right: (object . "{" . "}" . )))
left: (parenthesized_expression [
(assignment_expression (parenthesized_expression
left: (subscript_expression (assignment_expression
object: (parenthesized_expression left: (subscript_expression
(assignment_expression object: (parenthesized_expression
left: (identifier) @object (assignment_expression
"=" left: (identifier) @object
right: (call_expression "="
function: (member_expression right: (call_expression
object: (identifier) @_object function: (member_expression
"." object: (identifier) @_object
property: (property_identifier) @_create "."
(#eq? @_object "Object") property: (property_identifier) @_create
(#eq? @_create "create")) (#eq? @_object "Object")
arguments: (arguments . "(" (identifier) @_var2 . ")" . (#eq? @_create "create"))
(#eq? @_var @_var2))))) arguments: (arguments . "(" (identifier) @_var2 . ")" .
index: (parenthesized_expression (#eq? @_var @_var2)))))
(assignment_expression index: (parenthesized_expression
left: (subscript_expression) (assignment_expression
right: (string (string_fragment) @enum_field)))) left: (subscript_expression)
right: (number) @enum_value) right: (string (string_fragment) @enum_field))))
right: (number) @enum_value)
)
))) @stage2 (parenthesized_expression
)))) (assignment_expression
left: (subscript_expression
object: (identifier)
index: (parenthesized_expression
(assignment_expression
left: (subscript_expression
object: ((identifier) @_var2
(#eq? @_var2 @_var)))
right: (string (string_fragment) @enum_field))))
right: (number) @enum_value)
)
]
)))))

View file

@ -2,75 +2,407 @@
(parenthesized_expression (parenthesized_expression
(assignment_expression (assignment_expression
left: (member_expression left: (member_expression
(identifier) @_n object:
(property_identifier) @message_name [
(#eq? @_n "n")) ; n.Message
right: (parenthesized_expression (sequence_expression ((identifier) @_n
right: (sequence_expression (#eq? @_n "n"))
right: (sequence_expression
left: (parenthesized_expression (assignment_expression
left: (member_expression (identifier) (property_identifier) @_decode (#eq? @_decode "decode"))
right: (function
body: (statement_block (for_statement
body: (statement_block
[
(switch_statement
value: (parenthesized_expression
(binary_expression
left: (identifier)
">>>"
right: (number) @_shift3
(#eq? @_shift3 "3")))
body:
(switch_body (switch_case
value: (number) @field_id
body: (expression_statement
(assignment_expression
left: (member_expression (identifier) (property_identifier) @field_name)
right: [
(call_expression
function: (member_expression
object: (identifier)
property: (property_identifier) @field_type))
(call_expression ; (n = {}).Message
function: (member_expression (parenthesized_expression
object: (member_expression (assignment_expression
object: (member_expression) @_rootakiprotocol left: (identifier) @_n
property: (property_identifier) @field_type "="
(#eq? @_rootakiprotocol "$root.Aki.Protocol")) right: (object . "{" . "}"))
property: (property_identifier) @_decode_field (#eq? @_n "n"))
(#eq? @_decode_field "decode"))) ]
]))))) property: (property_identifier) @message_name)
right: (parenthesized_expression
(sequence_expression
(parenthesized_expression
(assignment_expression
left: (member_expression
(identifier)
(property_identifier) @_decode
(#eq? @_decode "decode"))
right: (function_expression
body: (statement_block (for_statement
body: (statement_block
[
(switch_statement
value: (parenthesized_expression
(binary_expression
left: (identifier)
">>>"
right: (number) @_shift3
(#eq? @_shift3 "3")))
body:
(switch_body (switch_case
value: (number) @field_id
body: [
(expression_statement
[
; Single items
(assignment_expression
left: (member_expression (identifier) (property_identifier) @field_name)
right: [
; Single primitive
(call_expression
function: (member_expression
object: (identifier)
property: (property_identifier) @field_type))
(expression_statement ; Single non-primitive
(ternary_expression (call_expression
condition: (binary_expression function: (member_expression
left: (binary_expression object: (member_expression
left: (identifier) object: (member_expression) @_rootakiprotocol
">>>" property: (property_identifier) @field_type
right: (number) @_shift3 (#eq? @_rootakiprotocol "$root.Aki.Protocol"))
(#eq? @_shift3 "3")) property: (property_identifier) @_decode_field
"==" (#eq? @_decode_field "decode")))
right: (number) @field_id) ])
consequence: (parenthesized_expression ; Repeated items
(assignment_expression (sequence_expression
left: (member_expression (identifier) (property_identifier) @field_name) (binary_expression)
right: [ (call_expression
(call_expression function: (member_expression
function: (member_expression object: (member_expression
object: (identifier) object: (identifier)
property: (property_identifier) @field_type)) property: (property_identifier) @field_name)
property: ((property_identifier) @push
(#eq? @push "push")))
(call_expression arguments: (arguments
function: (member_expression [
object: (member_expression ; Repeated primitive
object: (member_expression) @_rootakiprotocol (call_expression
property: (property_identifier) @field_type function: (member_expression
(#eq? @_rootakiprotocol "$root.Aki.Protocol")) object: (identifier)
property: (property_identifier) @_decode_field property: (property_identifier) @field_type))
(#eq? @_decode_field "decode")))
])))) ; Repeated non-primitive
] (call_expression
))))))))))))) function: (member_expression
object: (member_expression
object: (member_expression) @_rootakiprotocol
property: (property_identifier) @field_type
(#eq? @_rootakiprotocol "$root.Aki.Protocol"))
property: (property_identifier) @_decode_field
(#eq? @_decode_field "decode")))
])
))
; Map items
(binary_expression
left: (binary_expression
left: (member_expression
(identifier)
(property_identifier) @field_name)
operator: "==="
right: (member_expression) @_util_emptyobject
(#eq? @_util_emptyobject "$util.emptyObject")))
])
(if_statement
consequence: (for_statement
body: (expression_statement
(call_expression
function: (member_expression
object: (member_expression
object: (identifier)
property: (property_identifier) @field_name)
property: ((property_identifier) @push
(#eq? @push "push")))
arguments: (arguments
[
; Repeated primitive
(call_expression
function: (member_expression
object: (identifier)
property: (property_identifier) @field_type))
; Repeated non-primitive
(call_expression
function: (member_expression
object: (member_expression
object: (member_expression) @_rootakiprotocol
property: (property_identifier) @field_type
(#eq? @_rootakiprotocol "$root.Aki.Protocol"))
property: (property_identifier) @_decode_field
(#eq? @_decode_field "decode")))
])
))))
]
body: (variable_declaration)?
; Map body, if present, in some cases there's a statement block, in some it's a switch statement
; seems like some cases with inlined assigns omit the braces - for (s = "", c = null; t.pos < a;) switch ((u = t.uint32()) >>> 3) { ... }
body: (for_statement
body: [
(statement_block
(switch_statement
(switch_body
(switch_case
"case"
value: ((number) @kv_id (#any-of? @kv_id "1" "2"))
body: (expression_statement
[
; Primitive map item
(assignment_expression
right: (call_expression
function: (member_expression
object: (identifier)
property: (property_identifier) @kv_field_type))
)
; Non-primitive map item
(assignment_expression
right: (call_expression
function: (member_expression
object: (member_expression
object: (member_expression) @_rootakiprotocol
property: (property_identifier) @kv_field_type
(#eq? @_rootakiprotocol "$root.Aki.Protocol"))
property: (property_identifier) @_decode_field
(#eq? @_decode_field "decode")))
)
]
))
)))
; Due to inlined assignment in the switch case, we need this duplication
; e.g. switch ((u = t.uint32()) >>> 3) instead of var u = t.uint32(); switch (u >>> 3)
(switch_statement
(switch_body
(switch_case
"case"
value: ((number) @kv_id (#any-of? @kv_id "1" "2"))
body: (expression_statement
[
; Primitive map item
(assignment_expression
right: (call_expression
function: (member_expression
object: (identifier)
property: (property_identifier) @kv_field_type))
)
; Non-primitive map item
(assignment_expression
right: (call_expression
function: (member_expression
object: (member_expression
object: (member_expression) @_rootakiprotocol
property: (property_identifier) @kv_field_type
(#eq? @_rootakiprotocol "$root.Aki.Protocol"))
property: (property_identifier) @_decode_field
(#eq? @_decode_field "decode")))
)
]
))
))
]
)?
)))
; Single field ternary
; i >>> 3 == 1 ? (r.Field = t.decodeMethod()) : ...
(expression_statement
(ternary_expression
condition: (binary_expression
left: (binary_expression
left: (identifier)
operator: ">>>"
right: (number) @_shift3
(#eq? @_shift3 "3"))
"=="
right: (number) @field_id)
consequence: (parenthesized_expression
[
; Single items
(assignment_expression
left: (member_expression (identifier) (property_identifier) @field_name)
right: [
; Single primitive
(call_expression
function: (member_expression
object: (identifier)
property: (property_identifier) @field_type))
; Single non-primitive
(call_expression
function: (member_expression
object: (member_expression
object: (member_expression) @_rootakiprotocol
property: (property_identifier) @field_type
(#eq? @_rootakiprotocol "$root.Aki.Protocol"))
property: (property_identifier) @_decode_field
(#eq? @_decode_field "decode")))
]
)
; Repeated items
(sequence_expression
(binary_expression)
(call_expression
function: (member_expression
object: (member_expression
object: (identifier)
property: (property_identifier) @field_name)
property: ((property_identifier) @push
(#eq? @push "push")))
arguments: (arguments
[
; Repeated primitive
(call_expression
function: (member_expression
object: (identifier)
property: (property_identifier) @field_type))
; Repeated non-primitive
(call_expression
function: (member_expression
object: (member_expression
object: (member_expression) @_rootakiprotocol
property: (property_identifier) @field_type
(#eq? @_rootakiprotocol "$root.Aki.Protocol"))
property: (property_identifier) @_decode_field
(#eq? @_decode_field "decode")))
])
))
]
)))
; Inlined ternary into if statement for repeated...
(if_statement
condition: (parenthesized_expression
(binary_expression
left: (binary_expression
left: (identifier)
operator: ">>>"
right: (number) @_shift3
(#eq? @_shift3 "3"))
"=="
right: (number) @field_id))
consequence: (if_statement
consequence: (for_statement
body: (expression_statement
(call_expression
function: (member_expression
object: (member_expression
object: (identifier)
property: (property_identifier) @field_name)
property: ((property_identifier) @push
(#eq? @push "push")))
arguments: (arguments
[
; Repeated primitive
(call_expression
function: (member_expression
object: (identifier)
property: (property_identifier) @field_type))
; Repeated non-primitive
(call_expression
function: (member_expression
object: (member_expression
object: (member_expression) @_rootakiprotocol
property: (property_identifier) @field_type
(#eq? @_rootakiprotocol "$root.Aki.Protocol"))
property: (property_identifier) @_decode_field
(#eq? @_decode_field "decode")))
])
)))))
; Inlined ternary into if statement for maps...
(if_statement
condition: (parenthesized_expression
(binary_expression
left: (binary_expression
left: (identifier)
operator: ">>>"
right: (number) @_shift3
(#eq? @_shift3 "3"))
"=="
right: (number) @field_id))
consequence: (statement_block
(expression_statement
(binary_expression
left: (binary_expression
left: (member_expression
(identifier)
(property_identifier) @field_name)
operator: "==="
right: (member_expression) @_util_emptyobject
(#eq? @_util_emptyobject "$util.emptyObject"))))
(for_statement
body: [
(statement_block
(switch_statement
(switch_body
(switch_case
"case"
value: ((number) @kv_id (#any-of? @kv_id "1" "2"))
body: (expression_statement
[
; Primitive map item
(assignment_expression
right: (call_expression
function: (member_expression
object: (identifier)
property: (property_identifier) @kv_field_type))
)
; Non-primitive map item
(assignment_expression
right: (call_expression
function: (member_expression
object: (member_expression
object: (member_expression) @_rootakiprotocol
property: (property_identifier) @kv_field_type
(#eq? @_rootakiprotocol "$root.Aki.Protocol"))
property: (property_identifier) @_decode_field
(#eq? @_decode_field "decode")))
)
]
))
)))
; Due to inlined assignment in the switch case, we need this duplication
; e.g. switch ((u = t.uint32()) >>> 3) instead of var u = t.uint32(); switch (u >>> 3)
(switch_statement
(switch_body
(switch_case
"case"
value: ((number) @kv_id (#any-of? @kv_id "1" "2"))
body: (expression_statement
[
; Primitive map item
(assignment_expression
right: (call_expression
function: (member_expression
object: (identifier)
property: (property_identifier) @kv_field_type))
)
; Non-primitive map item
(assignment_expression
right: (call_expression
function: (member_expression
object: (member_expression
object: (member_expression) @_rootakiprotocol
property: (property_identifier) @kv_field_type
(#eq? @_rootakiprotocol "$root.Aki.Protocol"))
property: (property_identifier) @_decode_field
(#eq? @_decode_field "decode")))
)
]
))
))
]
)
))
]
))))
)))))))

View file

@ -7,7 +7,20 @@ use tree_sitter::{Parser, Query, QueryCursor, Tree};
struct Message { struct Message {
name: String, name: String,
id: Option<i32>, id: Option<i32>,
fields: Vec<(i32, String, String)>, fields: Vec<Field>,
}
struct Field {
id: i32,
name: String,
_type: FieldType,
}
#[derive(Clone, Debug)]
enum FieldType {
Single(String),
Repeated(String),
Map(String, String),
} }
struct Enum { struct Enum {
@ -104,9 +117,13 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let _ = writeln!( let _ = writeln!(
output, output,
" {} {} = {};", " {} {} = {};",
field.2, match &field._type {
field.1.to_snake_case(), FieldType::Single(t) => t.clone(),
field.0 FieldType::Repeated(t) => format!("repeated {t}"),
FieldType::Map(k, v) => format!("map<{k}, {v}>"),
},
field.name.to_snake_case(),
field.id
); );
output output
}) })
@ -125,7 +142,19 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let _ = writeln!( let _ = writeln!(
output, output,
" {} = {};", " {} = {};",
field.0.to_shouty_snake_case(), if !field
.0
.to_shouty_snake_case()
.starts_with(&en.name.to_shouty_snake_case())
{
format!(
"{}_{}",
en.name.to_shouty_snake_case(),
field.0.to_shouty_snake_case()
)
} else {
field.0.to_shouty_snake_case()
},
field.1 field.1
); );
output output
@ -176,6 +205,19 @@ fn parse_messages(state: &mut State) -> Result<(), Box<dyn std::error::Error>> {
.capture_index_for_name("field_type") .capture_index_for_name("field_type")
.unwrap(); .unwrap();
// repeated fields
let push_idx = state.message_query.capture_index_for_name("push").unwrap();
// map fields
let kv_id_idx = state.message_query.capture_index_for_name("kv_id").unwrap();
let kv_field_type_idx = state
.message_query
.capture_index_for_name("kv_field_type")
.unwrap();
let mut field_type = None;
let mut do_push = true;
for m in matches { for m in matches {
if m.captures.is_empty() { if m.captures.is_empty() {
continue; continue;
@ -184,7 +226,7 @@ fn parse_messages(state: &mut State) -> Result<(), Box<dyn std::error::Error>> {
let mut message_name = String::new(); let mut message_name = String::new();
let mut field_id = String::new(); let mut field_id = String::new();
let mut field_name = String::new(); let mut field_name = String::new();
let mut field_type = String::new(); let mut kv_field_id = 0;
for capture in m.captures { for capture in m.captures {
let node_text = capture let node_text = capture
@ -199,25 +241,70 @@ fn parse_messages(state: &mut State) -> Result<(), Box<dyn std::error::Error>> {
} else if capture.index == field_name_idx { } else if capture.index == field_name_idx {
field_name = node_text; field_name = node_text;
} else if capture.index == field_type_idx { } else if capture.index == field_type_idx {
field_type = node_text; if let Some(FieldType::Repeated(ref mut t)) = field_type {
*t = node_text;
} else {
field_type = Some(FieldType::Single(node_text));
}
} else if capture.index == push_idx {
if let Some(FieldType::Single(text)) = field_type {
field_type = Some(FieldType::Repeated(text));
} else {
field_type = Some(FieldType::Repeated(String::new()));
}
} else if capture.index == kv_id_idx {
kv_field_id = node_text.parse()?;
} else if capture.index == kv_field_type_idx {
if let Some(FieldType::Map(ref mut key_type, ref mut value_type)) = field_type {
if kv_field_id == 1 {
*key_type = node_text;
} else if kv_field_id == 2 {
*value_type = node_text;
} else {
panic!("Unexpected kv_field_id: {kv_field_id}");
}
do_push = true;
} else {
field_type = if kv_field_id == 1 {
Some(FieldType::Map(node_text, String::new()))
} else if kv_field_id == 2 {
Some(FieldType::Map(String::new(), node_text))
} else {
panic!("Unexpected kv_field_id: {kv_field_id}");
};
do_push = false;
}
} }
} }
if state.messages.contains_key(&message_name) { if do_push {
state.messages.get_mut(&message_name).unwrap().fields.push(( if state.messages.contains_key(&message_name) {
field_id.parse()?, state
field_name, .messages
field_type, .get_mut(&message_name)
)); .unwrap()
} else { .fields
state.messages.insert( .push(Field {
message_name.clone(), id: field_id.parse()?,
Message { name: field_name,
name: message_name, _type: field_type.unwrap(),
id: None, });
fields: vec![(field_id.parse()?, field_name, field_type)], } else {
}, state.messages.insert(
); message_name.clone(),
Message {
name: message_name,
id: None,
fields: vec![Field {
id: field_id.parse()?,
name: field_name,
_type: field_type.unwrap(),
}],
},
);
}
field_type = None;
} }
} }
@ -225,7 +312,6 @@ fn parse_messages(state: &mut State) -> Result<(), Box<dyn std::error::Error>> {
} }
fn parse_enums(state: &mut State) -> Result<(), Box<dyn std::error::Error>> { fn parse_enums(state: &mut State) -> Result<(), Box<dyn std::error::Error>> {
let object_idx = state.enum_query.capture_index_for_name("object").unwrap();
let enum_name_idx = state let enum_name_idx = state
.enum_query .enum_query
.capture_index_for_name("enum_name") .capture_index_for_name("enum_name")
@ -238,7 +324,6 @@ fn parse_enums(state: &mut State) -> Result<(), Box<dyn std::error::Error>> {
.enum_query .enum_query
.capture_index_for_name("enum_value") .capture_index_for_name("enum_value")
.unwrap(); .unwrap();
let stage2_idx = state.enum_query.capture_index_for_name("stage2").unwrap();
let matches = state.cursor.matches( let matches = state.cursor.matches(
&state.enum_query, &state.enum_query,
@ -246,19 +331,15 @@ fn parse_enums(state: &mut State) -> Result<(), Box<dyn std::error::Error>> {
state.code_bytes.as_slice(), state.code_bytes.as_slice(),
); );
// extract object_name, then execute subquery enum_2.scm on the stage2 node
for m in matches { for m in matches {
if m.captures.is_empty() { if m.captures.is_empty() {
continue; continue;
} }
let mut object_name = String::new();
let mut enum_name = String::new(); let mut enum_name = String::new();
let mut enum_objects: Vec<(String, i32)> = Vec::new(); let mut enum_objects: Vec<(String, i32)> = Vec::new();
let mut current_enum_field = String::new(); let mut current_enum_field = String::new();
let mut current_enum_value = String::new(); let mut current_enum_value = String::new();
let mut stage2_node = None;
for capture in m.captures { for capture in m.captures {
let node_text = capture let node_text = capture
@ -266,68 +347,20 @@ fn parse_enums(state: &mut State) -> Result<(), Box<dyn std::error::Error>> {
.utf8_text(state.code_bytes.as_slice())? .utf8_text(state.code_bytes.as_slice())?
.to_string(); .to_string();
if capture.index == object_idx { if capture.index == enum_name_idx {
object_name = node_text;
} else if capture.index == enum_name_idx {
enum_name = node_text; enum_name = node_text;
} else if capture.index == enum_field_idx { } else if capture.index == enum_field_idx {
current_enum_field = node_text; current_enum_field = node_text;
} else if capture.index == enum_value_idx { } else if capture.index == enum_value_idx {
current_enum_value = node_text; current_enum_value = node_text;
} else if capture.index == stage2_idx { if current_enum_value.contains('e') {
stage2_node = Some(capture.node); replace_exp(&mut current_enum_value);
}
} }
} }
enum_objects.push((current_enum_field, current_enum_value.parse().unwrap())); enum_objects.push((current_enum_field, current_enum_value.parse().unwrap()));
let Some(stage2_node) = stage2_node else {
panic!("stage2 node not found!: {object_name}");
};
let mut stage2_query_txt = std::fs::read_to_string("queries/enum_2.scm")?;
// replace "REPLACEME" with object_name
stage2_query_txt = stage2_query_txt.replace("REPLACEME", &object_name);
let stage2_query = Query::new(tree_sitter_javascript::language(), &stage2_query_txt)?;
let mut stage2_qc = QueryCursor::new();
let stage2_matches =
stage2_qc.matches(&stage2_query, stage2_node, state.code_bytes.as_slice());
let stage2_enum_field_idx = stage2_query.capture_index_for_name("enum_field").unwrap();
let stage2_enum_value_idx = stage2_query.capture_index_for_name("enum_value").unwrap();
for m in stage2_matches {
if m.captures.is_empty() {
continue;
}
let mut current_enum_field = String::new();
let mut current_enum_value = String::new();
for capture in m.captures {
let node_text = capture
.node
.utf8_text(state.code_bytes.as_slice())?
.to_string();
if capture.index == stage2_enum_field_idx {
current_enum_field = node_text;
} else if capture.index == stage2_enum_value_idx {
current_enum_value = node_text;
}
}
if current_enum_value.contains('e') {
replace_exp(&mut current_enum_value);
}
enum_objects.push((current_enum_field, current_enum_value.parse().unwrap()));
}
if state.enums.contains_key(&enum_name) { if state.enums.contains_key(&enum_name) {
state state
.enums .enums