Compare commits

..

No commits in common. "672354ec5acaf207be76407f966f70006ac56598" and "1ad88bd85bf86c52ae1765605a403b5b183252dc" have entirely different histories.

5 changed files with 569 additions and 190 deletions

22
02-ws-js/Cargo.lock generated
View file

@ -38,15 +38,6 @@ version = "2.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149"
[[package]]
name = "parse_js"
version = "0.1.0"
dependencies = [
"heck",
"tree-sitter",
"tree-sitter-javascript",
]
[[package]] [[package]]
name = "regex" name = "regex"
version = "1.10.3" version = "1.10.3"
@ -88,10 +79,19 @@ dependencies = [
[[package]] [[package]]
name = "tree-sitter-javascript" name = "tree-sitter-javascript"
version = "0.20.1" version = "0.20.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "edbc663376bdd294bd1f0a6daf859aedb9aa5bdb72217d7ad8ba2d5314102cf7" checksum = "38d1463af5be7052171161db7cfe45c7621ed959ae533972ab47a09b1ed70ec0"
dependencies = [ dependencies = [
"cc", "cc",
"tree-sitter", "tree-sitter",
] ]
[[package]]
name = "ws-js-parser"
version = "0.1.0"
dependencies = [
"heck",
"tree-sitter",
"tree-sitter-javascript",
]

View file

@ -1,9 +1,9 @@
[package] [package]
name = "parse_js" name = "ws-js-parser"
version = "0.1.0" version = "0.1.0"
edition = "2021" edition = "2021"
[dependencies] [dependencies]
heck = "0.4.1" heck = "0.4.1"
tree-sitter = "0.20.10" tree-sitter = "0.20.10"
tree-sitter-javascript = "0.20.1" tree-sitter-javascript = "0.20.3"

View file

@ -5,13 +5,14 @@
(identifier) @_n (identifier) @_n
(property_identifier) @enum_name (property_identifier) @enum_name
(#eq? @_n "n")) (#eq? @_n "n"))
right: (parenthesized_expression (sequence_expression right: (parenthesized_expression
left: (parenthesized_expression (sequence_expression
(parenthesized_expression
(assignment_expression (assignment_expression
left: (identifier) @_var left: (identifier) @_var
right: (object . "{" . "}" . ))) right: (object . "{" . "}" . )))
right: (sequence_expression [
left: (parenthesized_expression (parenthesized_expression
(assignment_expression (assignment_expression
left: (subscript_expression left: (subscript_expression
object: (parenthesized_expression object: (parenthesized_expression
@ -32,6 +33,19 @@
left: (subscript_expression) left: (subscript_expression)
right: (string (string_fragment) @enum_field)))) right: (string (string_fragment) @enum_field))))
right: (number) @enum_value) right: (number) @enum_value)
)
))) @stage2 (parenthesized_expression
)))) (assignment_expression
left: (subscript_expression
object: (identifier)
index: (parenthesized_expression
(assignment_expression
left: (subscript_expression
object: ((identifier) @_var2
(#eq? @_var2 @_var)))
right: (string (string_fragment) @enum_field))))
right: (number) @enum_value)
)
]
)))))

View file

@ -2,15 +2,30 @@
(parenthesized_expression (parenthesized_expression
(assignment_expression (assignment_expression
left: (member_expression left: (member_expression
(identifier) @_n object:
(property_identifier) @message_name [
; n.Message
((identifier) @_n
(#eq? @_n "n")) (#eq? @_n "n"))
right: (parenthesized_expression (sequence_expression
right: (sequence_expression ; (n = {}).Message
right: (sequence_expression (parenthesized_expression
left: (parenthesized_expression (assignment_expression (assignment_expression
left: (member_expression (identifier) (property_identifier) @_decode (#eq? @_decode "decode")) left: (identifier) @_n
right: (function "="
right: (object . "{" . "}"))
(#eq? @_n "n"))
]
property: (property_identifier) @message_name)
right: (parenthesized_expression
(sequence_expression
(parenthesized_expression
(assignment_expression
left: (member_expression
(identifier)
(property_identifier) @_decode
(#eq? @_decode "decode"))
right: (function_expression
body: (statement_block (for_statement body: (statement_block (for_statement
body: (statement_block body: (statement_block
[ [
@ -24,15 +39,20 @@
body: body:
(switch_body (switch_case (switch_body (switch_case
value: (number) @field_id value: (number) @field_id
body: (expression_statement body: [
(expression_statement
[
; Single items
(assignment_expression (assignment_expression
left: (member_expression (identifier) (property_identifier) @field_name) left: (member_expression (identifier) (property_identifier) @field_name)
right: [ right: [
; Single primitive
(call_expression (call_expression
function: (member_expression function: (member_expression
object: (identifier) object: (identifier)
property: (property_identifier) @field_type)) property: (property_identifier) @field_type))
; Single non-primitive
(call_expression (call_expression
function: (member_expression function: (member_expression
object: (member_expression object: (member_expression
@ -41,28 +61,176 @@
(#eq? @_rootakiprotocol "$root.Aki.Protocol")) (#eq? @_rootakiprotocol "$root.Aki.Protocol"))
property: (property_identifier) @_decode_field property: (property_identifier) @_decode_field
(#eq? @_decode_field "decode"))) (#eq? @_decode_field "decode")))
]))))) ])
; Repeated items
(sequence_expression
(binary_expression)
(call_expression
function: (member_expression
object: (member_expression
object: (identifier)
property: (property_identifier) @field_name)
property: ((property_identifier) @push
(#eq? @push "push")))
arguments: (arguments
[
; Repeated primitive
(call_expression
function: (member_expression
object: (identifier)
property: (property_identifier) @field_type))
; Repeated non-primitive
(call_expression
function: (member_expression
object: (member_expression
object: (member_expression) @_rootakiprotocol
property: (property_identifier) @field_type
(#eq? @_rootakiprotocol "$root.Aki.Protocol"))
property: (property_identifier) @_decode_field
(#eq? @_decode_field "decode")))
])
))
; Map items
(binary_expression
left: (binary_expression
left: (member_expression
(identifier)
(property_identifier) @field_name)
operator: "==="
right: (member_expression) @_util_emptyobject
(#eq? @_util_emptyobject "$util.emptyObject")))
])
(if_statement
consequence: (for_statement
body: (expression_statement
(call_expression
function: (member_expression
object: (member_expression
object: (identifier)
property: (property_identifier) @field_name)
property: ((property_identifier) @push
(#eq? @push "push")))
arguments: (arguments
[
; Repeated primitive
(call_expression
function: (member_expression
object: (identifier)
property: (property_identifier) @field_type))
; Repeated non-primitive
(call_expression
function: (member_expression
object: (member_expression
object: (member_expression) @_rootakiprotocol
property: (property_identifier) @field_type
(#eq? @_rootakiprotocol "$root.Aki.Protocol"))
property: (property_identifier) @_decode_field
(#eq? @_decode_field "decode")))
])
))))
]
body: (variable_declaration)?
; Map body, if present, in some cases there's a statement block, in some it's a switch statement
; seems like some cases with inlined assigns omit the braces - for (s = "", c = null; t.pos < a;) switch ((u = t.uint32()) >>> 3) { ... }
body: (for_statement
body: [
(statement_block
(switch_statement
(switch_body
(switch_case
"case"
value: ((number) @kv_id (#any-of? @kv_id "1" "2"))
body: (expression_statement
[
; Primitive map item
(assignment_expression
right: (call_expression
function: (member_expression
object: (identifier)
property: (property_identifier) @kv_field_type))
)
; Non-primitive map item
(assignment_expression
right: (call_expression
function: (member_expression
object: (member_expression
object: (member_expression) @_rootakiprotocol
property: (property_identifier) @kv_field_type
(#eq? @_rootakiprotocol "$root.Aki.Protocol"))
property: (property_identifier) @_decode_field
(#eq? @_decode_field "decode")))
)
]
))
)))
; Due to inlined assignment in the switch case, we need this duplication
; e.g. switch ((u = t.uint32()) >>> 3) instead of var u = t.uint32(); switch (u >>> 3)
(switch_statement
(switch_body
(switch_case
"case"
value: ((number) @kv_id (#any-of? @kv_id "1" "2"))
body: (expression_statement
[
; Primitive map item
(assignment_expression
right: (call_expression
function: (member_expression
object: (identifier)
property: (property_identifier) @kv_field_type))
)
; Non-primitive map item
(assignment_expression
right: (call_expression
function: (member_expression
object: (member_expression
object: (member_expression) @_rootakiprotocol
property: (property_identifier) @kv_field_type
(#eq? @_rootakiprotocol "$root.Aki.Protocol"))
property: (property_identifier) @_decode_field
(#eq? @_decode_field "decode")))
)
]
))
))
]
)?
)))
; Single field ternary
; i >>> 3 == 1 ? (r.Field = t.decodeMethod()) : ...
(expression_statement (expression_statement
(ternary_expression (ternary_expression
condition: (binary_expression condition: (binary_expression
left: (binary_expression left: (binary_expression
left: (identifier) left: (identifier)
">>>" operator: ">>>"
right: (number) @_shift3 right: (number) @_shift3
(#eq? @_shift3 "3")) (#eq? @_shift3 "3"))
"==" "=="
right: (number) @field_id) right: (number) @field_id)
consequence: (parenthesized_expression consequence: (parenthesized_expression
[
; Single items
(assignment_expression (assignment_expression
left: (member_expression (identifier) (property_identifier) @field_name) left: (member_expression (identifier) (property_identifier) @field_name)
right: [ right: [
; Single primitive
(call_expression (call_expression
function: (member_expression function: (member_expression
object: (identifier) object: (identifier)
property: (property_identifier) @field_type)) property: (property_identifier) @field_type))
; Single non-primitive
(call_expression (call_expression
function: (member_expression function: (member_expression
object: (member_expression object: (member_expression
@ -71,6 +239,170 @@
(#eq? @_rootakiprotocol "$root.Aki.Protocol")) (#eq? @_rootakiprotocol "$root.Aki.Protocol"))
property: (property_identifier) @_decode_field property: (property_identifier) @_decode_field
(#eq? @_decode_field "decode"))) (#eq? @_decode_field "decode")))
]))))
] ]
))))))))))))) )
; Repeated items
(sequence_expression
(binary_expression)
(call_expression
function: (member_expression
object: (member_expression
object: (identifier)
property: (property_identifier) @field_name)
property: ((property_identifier) @push
(#eq? @push "push")))
arguments: (arguments
[
; Repeated primitive
(call_expression
function: (member_expression
object: (identifier)
property: (property_identifier) @field_type))
; Repeated non-primitive
(call_expression
function: (member_expression
object: (member_expression
object: (member_expression) @_rootakiprotocol
property: (property_identifier) @field_type
(#eq? @_rootakiprotocol "$root.Aki.Protocol"))
property: (property_identifier) @_decode_field
(#eq? @_decode_field "decode")))
])
))
]
)))
; Inlined ternary into if statement for repeated...
(if_statement
condition: (parenthesized_expression
(binary_expression
left: (binary_expression
left: (identifier)
operator: ">>>"
right: (number) @_shift3
(#eq? @_shift3 "3"))
"=="
right: (number) @field_id))
consequence: (if_statement
consequence: (for_statement
body: (expression_statement
(call_expression
function: (member_expression
object: (member_expression
object: (identifier)
property: (property_identifier) @field_name)
property: ((property_identifier) @push
(#eq? @push "push")))
arguments: (arguments
[
; Repeated primitive
(call_expression
function: (member_expression
object: (identifier)
property: (property_identifier) @field_type))
; Repeated non-primitive
(call_expression
function: (member_expression
object: (member_expression
object: (member_expression) @_rootakiprotocol
property: (property_identifier) @field_type
(#eq? @_rootakiprotocol "$root.Aki.Protocol"))
property: (property_identifier) @_decode_field
(#eq? @_decode_field "decode")))
])
)))))
; Inlined ternary into if statement for maps...
(if_statement
condition: (parenthesized_expression
(binary_expression
left: (binary_expression
left: (identifier)
operator: ">>>"
right: (number) @_shift3
(#eq? @_shift3 "3"))
"=="
right: (number) @field_id))
consequence: (statement_block
(expression_statement
(binary_expression
left: (binary_expression
left: (member_expression
(identifier)
(property_identifier) @field_name)
operator: "==="
right: (member_expression) @_util_emptyobject
(#eq? @_util_emptyobject "$util.emptyObject"))))
(for_statement
body: [
(statement_block
(switch_statement
(switch_body
(switch_case
"case"
value: ((number) @kv_id (#any-of? @kv_id "1" "2"))
body: (expression_statement
[
; Primitive map item
(assignment_expression
right: (call_expression
function: (member_expression
object: (identifier)
property: (property_identifier) @kv_field_type))
)
; Non-primitive map item
(assignment_expression
right: (call_expression
function: (member_expression
object: (member_expression
object: (member_expression) @_rootakiprotocol
property: (property_identifier) @kv_field_type
(#eq? @_rootakiprotocol "$root.Aki.Protocol"))
property: (property_identifier) @_decode_field
(#eq? @_decode_field "decode")))
)
]
))
)))
; Due to inlined assignment in the switch case, we need this duplication
; e.g. switch ((u = t.uint32()) >>> 3) instead of var u = t.uint32(); switch (u >>> 3)
(switch_statement
(switch_body
(switch_case
"case"
value: ((number) @kv_id (#any-of? @kv_id "1" "2"))
body: (expression_statement
[
; Primitive map item
(assignment_expression
right: (call_expression
function: (member_expression
object: (identifier)
property: (property_identifier) @kv_field_type))
)
; Non-primitive map item
(assignment_expression
right: (call_expression
function: (member_expression
object: (member_expression
object: (member_expression) @_rootakiprotocol
property: (property_identifier) @kv_field_type
(#eq? @_rootakiprotocol "$root.Aki.Protocol"))
property: (property_identifier) @_decode_field
(#eq? @_decode_field "decode")))
)
]
))
))
]
)
))
]
))))
)))))))

View file

@ -7,7 +7,20 @@ use tree_sitter::{Parser, Query, QueryCursor, Tree};
struct Message { struct Message {
name: String, name: String,
id: Option<i32>, id: Option<i32>,
fields: Vec<(i32, String, String)>, fields: Vec<Field>,
}
struct Field {
id: i32,
name: String,
_type: FieldType,
}
#[derive(Clone, Debug)]
enum FieldType {
Single(String),
Repeated(String),
Map(String, String),
} }
struct Enum { struct Enum {
@ -104,9 +117,13 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let _ = writeln!( let _ = writeln!(
output, output,
" {} {} = {};", " {} {} = {};",
field.2, match &field._type {
field.1.to_snake_case(), FieldType::Single(t) => t.clone(),
field.0 FieldType::Repeated(t) => format!("repeated {t}"),
FieldType::Map(k, v) => format!("map<{k}, {v}>"),
},
field.name.to_snake_case(),
field.id
); );
output output
}) })
@ -125,7 +142,19 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let _ = writeln!( let _ = writeln!(
output, output,
" {} = {};", " {} = {};",
field.0.to_shouty_snake_case(), if !field
.0
.to_shouty_snake_case()
.starts_with(&en.name.to_shouty_snake_case())
{
format!(
"{}_{}",
en.name.to_shouty_snake_case(),
field.0.to_shouty_snake_case()
)
} else {
field.0.to_shouty_snake_case()
},
field.1 field.1
); );
output output
@ -176,6 +205,19 @@ fn parse_messages(state: &mut State) -> Result<(), Box<dyn std::error::Error>> {
.capture_index_for_name("field_type") .capture_index_for_name("field_type")
.unwrap(); .unwrap();
// repeated fields
let push_idx = state.message_query.capture_index_for_name("push").unwrap();
// map fields
let kv_id_idx = state.message_query.capture_index_for_name("kv_id").unwrap();
let kv_field_type_idx = state
.message_query
.capture_index_for_name("kv_field_type")
.unwrap();
let mut field_type = None;
let mut do_push = true;
for m in matches { for m in matches {
if m.captures.is_empty() { if m.captures.is_empty() {
continue; continue;
@ -184,7 +226,7 @@ fn parse_messages(state: &mut State) -> Result<(), Box<dyn std::error::Error>> {
let mut message_name = String::new(); let mut message_name = String::new();
let mut field_id = String::new(); let mut field_id = String::new();
let mut field_name = String::new(); let mut field_name = String::new();
let mut field_type = String::new(); let mut kv_field_id = 0;
for capture in m.captures { for capture in m.captures {
let node_text = capture let node_text = capture
@ -199,33 +241,77 @@ fn parse_messages(state: &mut State) -> Result<(), Box<dyn std::error::Error>> {
} else if capture.index == field_name_idx { } else if capture.index == field_name_idx {
field_name = node_text; field_name = node_text;
} else if capture.index == field_type_idx { } else if capture.index == field_type_idx {
field_type = node_text; if let Some(FieldType::Repeated(ref mut t)) = field_type {
*t = node_text;
} else {
field_type = Some(FieldType::Single(node_text));
}
} else if capture.index == push_idx {
if let Some(FieldType::Single(text)) = field_type {
field_type = Some(FieldType::Repeated(text));
} else {
field_type = Some(FieldType::Repeated(String::new()));
}
} else if capture.index == kv_id_idx {
kv_field_id = node_text.parse()?;
} else if capture.index == kv_field_type_idx {
if let Some(FieldType::Map(ref mut key_type, ref mut value_type)) = field_type {
if kv_field_id == 1 {
*key_type = node_text;
} else if kv_field_id == 2 {
*value_type = node_text;
} else {
panic!("Unexpected kv_field_id: {kv_field_id}");
}
do_push = true;
} else {
field_type = if kv_field_id == 1 {
Some(FieldType::Map(node_text, String::new()))
} else if kv_field_id == 2 {
Some(FieldType::Map(String::new(), node_text))
} else {
panic!("Unexpected kv_field_id: {kv_field_id}");
};
do_push = false;
}
} }
} }
if do_push {
if state.messages.contains_key(&message_name) { if state.messages.contains_key(&message_name) {
state.messages.get_mut(&message_name).unwrap().fields.push(( state
field_id.parse()?, .messages
field_name, .get_mut(&message_name)
field_type, .unwrap()
)); .fields
.push(Field {
id: field_id.parse()?,
name: field_name,
_type: field_type.unwrap(),
});
} else { } else {
state.messages.insert( state.messages.insert(
message_name.clone(), message_name.clone(),
Message { Message {
name: message_name, name: message_name,
id: None, id: None,
fields: vec![(field_id.parse()?, field_name, field_type)], fields: vec![Field {
id: field_id.parse()?,
name: field_name,
_type: field_type.unwrap(),
}],
}, },
); );
} }
field_type = None;
}
} }
Ok(()) Ok(())
} }
fn parse_enums(state: &mut State) -> Result<(), Box<dyn std::error::Error>> { fn parse_enums(state: &mut State) -> Result<(), Box<dyn std::error::Error>> {
let object_idx = state.enum_query.capture_index_for_name("object").unwrap();
let enum_name_idx = state let enum_name_idx = state
.enum_query .enum_query
.capture_index_for_name("enum_name") .capture_index_for_name("enum_name")
@ -238,7 +324,6 @@ fn parse_enums(state: &mut State) -> Result<(), Box<dyn std::error::Error>> {
.enum_query .enum_query
.capture_index_for_name("enum_value") .capture_index_for_name("enum_value")
.unwrap(); .unwrap();
let stage2_idx = state.enum_query.capture_index_for_name("stage2").unwrap();
let matches = state.cursor.matches( let matches = state.cursor.matches(
&state.enum_query, &state.enum_query,
@ -246,19 +331,15 @@ fn parse_enums(state: &mut State) -> Result<(), Box<dyn std::error::Error>> {
state.code_bytes.as_slice(), state.code_bytes.as_slice(),
); );
// extract object_name, then execute subquery enum_2.scm on the stage2 node
for m in matches { for m in matches {
if m.captures.is_empty() { if m.captures.is_empty() {
continue; continue;
} }
let mut object_name = String::new();
let mut enum_name = String::new(); let mut enum_name = String::new();
let mut enum_objects: Vec<(String, i32)> = Vec::new(); let mut enum_objects: Vec<(String, i32)> = Vec::new();
let mut current_enum_field = String::new(); let mut current_enum_field = String::new();
let mut current_enum_value = String::new(); let mut current_enum_value = String::new();
let mut stage2_node = None;
for capture in m.captures { for capture in m.captures {
let node_text = capture let node_text = capture
@ -266,67 +347,19 @@ fn parse_enums(state: &mut State) -> Result<(), Box<dyn std::error::Error>> {
.utf8_text(state.code_bytes.as_slice())? .utf8_text(state.code_bytes.as_slice())?
.to_string(); .to_string();
if capture.index == object_idx { if capture.index == enum_name_idx {
object_name = node_text;
} else if capture.index == enum_name_idx {
enum_name = node_text; enum_name = node_text;
} else if capture.index == enum_field_idx { } else if capture.index == enum_field_idx {
current_enum_field = node_text; current_enum_field = node_text;
} else if capture.index == enum_value_idx { } else if capture.index == enum_value_idx {
current_enum_value = node_text; current_enum_value = node_text;
} else if capture.index == stage2_idx {
stage2_node = Some(capture.node);
}
}
enum_objects.push((current_enum_field, current_enum_value.parse().unwrap()));
let Some(stage2_node) = stage2_node else {
panic!("stage2 node not found!: {object_name}");
};
let mut stage2_query_txt = std::fs::read_to_string("queries/enum_2.scm")?;
// replace "REPLACEME" with object_name
stage2_query_txt = stage2_query_txt.replace("REPLACEME", &object_name);
let stage2_query = Query::new(tree_sitter_javascript::language(), &stage2_query_txt)?;
let mut stage2_qc = QueryCursor::new();
let stage2_matches =
stage2_qc.matches(&stage2_query, stage2_node, state.code_bytes.as_slice());
let stage2_enum_field_idx = stage2_query.capture_index_for_name("enum_field").unwrap();
let stage2_enum_value_idx = stage2_query.capture_index_for_name("enum_value").unwrap();
for m in stage2_matches {
if m.captures.is_empty() {
continue;
}
let mut current_enum_field = String::new();
let mut current_enum_value = String::new();
for capture in m.captures {
let node_text = capture
.node
.utf8_text(state.code_bytes.as_slice())?
.to_string();
if capture.index == stage2_enum_field_idx {
current_enum_field = node_text;
} else if capture.index == stage2_enum_value_idx {
current_enum_value = node_text;
}
}
if current_enum_value.contains('e') { if current_enum_value.contains('e') {
replace_exp(&mut current_enum_value); replace_exp(&mut current_enum_value);
} }
}
}
enum_objects.push((current_enum_field, current_enum_value.parse().unwrap())); enum_objects.push((current_enum_field, current_enum_value.parse().unwrap()));
}
if state.enums.contains_key(&enum_name) { if state.enums.contains_key(&enum_name) {
state state