Compare commits

...

No commits in common. "1ad88bd85bf86c52ae1765605a403b5b183252dc" and "672354ec5acaf207be76407f966f70006ac56598" have entirely different histories.

5 changed files with 190 additions and 569 deletions

22
02-ws-js/Cargo.lock generated
View file

@ -38,6 +38,15 @@ version = "2.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149"
[[package]]
name = "parse_js"
version = "0.1.0"
dependencies = [
"heck",
"tree-sitter",
"tree-sitter-javascript",
]
[[package]]
name = "regex"
version = "1.10.3"
@ -79,19 +88,10 @@ dependencies = [
[[package]]
name = "tree-sitter-javascript"
version = "0.20.3"
version = "0.20.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38d1463af5be7052171161db7cfe45c7621ed959ae533972ab47a09b1ed70ec0"
checksum = "edbc663376bdd294bd1f0a6daf859aedb9aa5bdb72217d7ad8ba2d5314102cf7"
dependencies = [
"cc",
"tree-sitter",
]
[[package]]
name = "ws-js-parser"
version = "0.1.0"
dependencies = [
"heck",
"tree-sitter",
"tree-sitter-javascript",
]

View file

@ -1,9 +1,9 @@
[package]
name = "ws-js-parser"
name = "parse_js"
version = "0.1.0"
edition = "2021"
[dependencies]
heck = "0.4.1"
tree-sitter = "0.20.10"
tree-sitter-javascript = "0.20.3"
tree-sitter-javascript = "0.20.1"

View file

@ -5,47 +5,33 @@
(identifier) @_n
(property_identifier) @enum_name
(#eq? @_n "n"))
right: (parenthesized_expression
(sequence_expression
(parenthesized_expression
(assignment_expression
left: (identifier) @_var
right: (object . "{" . "}" . )))
[
(parenthesized_expression
(assignment_expression
left: (subscript_expression
object: (parenthesized_expression
(assignment_expression
left: (identifier) @object
"="
right: (call_expression
function: (member_expression
object: (identifier) @_object
"."
property: (property_identifier) @_create
(#eq? @_object "Object")
(#eq? @_create "create"))
arguments: (arguments . "(" (identifier) @_var2 . ")" .
(#eq? @_var @_var2)))))
index: (parenthesized_expression
(assignment_expression
left: (subscript_expression)
right: (string (string_fragment) @enum_field))))
right: (number) @enum_value)
)
right: (parenthesized_expression (sequence_expression
left: (parenthesized_expression
(assignment_expression
left: (identifier) @_var
right: (object . "{" . "}" . )))
right: (sequence_expression
left: (parenthesized_expression
(assignment_expression
left: (subscript_expression
object: (parenthesized_expression
(assignment_expression
left: (identifier) @object
"="
right: (call_expression
function: (member_expression
object: (identifier) @_object
"."
property: (property_identifier) @_create
(#eq? @_object "Object")
(#eq? @_create "create"))
arguments: (arguments . "(" (identifier) @_var2 . ")" .
(#eq? @_var @_var2)))))
index: (parenthesized_expression
(assignment_expression
left: (subscript_expression)
right: (string (string_fragment) @enum_field))))
right: (number) @enum_value)
(parenthesized_expression
(assignment_expression
left: (subscript_expression
object: (identifier)
index: (parenthesized_expression
(assignment_expression
left: (subscript_expression
object: ((identifier) @_var2
(#eq? @_var2 @_var)))
right: (string (string_fragment) @enum_field))))
right: (number) @enum_value)
)
]
)))))
))) @stage2
))))

View file

@ -2,407 +2,75 @@
(parenthesized_expression
(assignment_expression
left: (member_expression
object:
[
; n.Message
((identifier) @_n
(#eq? @_n "n"))
(identifier) @_n
(property_identifier) @message_name
(#eq? @_n "n"))
right: (parenthesized_expression (sequence_expression
right: (sequence_expression
right: (sequence_expression
left: (parenthesized_expression (assignment_expression
left: (member_expression (identifier) (property_identifier) @_decode (#eq? @_decode "decode"))
right: (function
body: (statement_block (for_statement
body: (statement_block
[
(switch_statement
value: (parenthesized_expression
(binary_expression
left: (identifier)
">>>"
right: (number) @_shift3
(#eq? @_shift3 "3")))
body:
(switch_body (switch_case
value: (number) @field_id
body: (expression_statement
(assignment_expression
left: (member_expression (identifier) (property_identifier) @field_name)
right: [
(call_expression
function: (member_expression
object: (identifier)
property: (property_identifier) @field_type))
; (n = {}).Message
(parenthesized_expression
(assignment_expression
left: (identifier) @_n
"="
right: (object . "{" . "}"))
(#eq? @_n "n"))
]
property: (property_identifier) @message_name)
right: (parenthesized_expression
(sequence_expression
(parenthesized_expression
(assignment_expression
left: (member_expression
(identifier)
(property_identifier) @_decode
(#eq? @_decode "decode"))
right: (function_expression
body: (statement_block (for_statement
body: (statement_block
[
(switch_statement
value: (parenthesized_expression
(binary_expression
left: (identifier)
">>>"
right: (number) @_shift3
(#eq? @_shift3 "3")))
body:
(switch_body (switch_case
value: (number) @field_id
body: [
(expression_statement
[
; Single items
(assignment_expression
left: (member_expression (identifier) (property_identifier) @field_name)
right: [
; Single primitive
(call_expression
function: (member_expression
object: (identifier)
property: (property_identifier) @field_type))
(call_expression
function: (member_expression
object: (member_expression
object: (member_expression) @_rootakiprotocol
property: (property_identifier) @field_type
(#eq? @_rootakiprotocol "$root.Aki.Protocol"))
property: (property_identifier) @_decode_field
(#eq? @_decode_field "decode")))
])))))
; Single non-primitive
(call_expression
function: (member_expression
object: (member_expression
object: (member_expression) @_rootakiprotocol
property: (property_identifier) @field_type
(#eq? @_rootakiprotocol "$root.Aki.Protocol"))
property: (property_identifier) @_decode_field
(#eq? @_decode_field "decode")))
])
(expression_statement
(ternary_expression
condition: (binary_expression
left: (binary_expression
left: (identifier)
">>>"
right: (number) @_shift3
(#eq? @_shift3 "3"))
"=="
right: (number) @field_id)
; Repeated items
(sequence_expression
(binary_expression)
(call_expression
function: (member_expression
object: (member_expression
object: (identifier)
property: (property_identifier) @field_name)
property: ((property_identifier) @push
(#eq? @push "push")))
consequence: (parenthesized_expression
(assignment_expression
left: (member_expression (identifier) (property_identifier) @field_name)
right: [
(call_expression
function: (member_expression
object: (identifier)
property: (property_identifier) @field_type))
arguments: (arguments
[
; Repeated primitive
(call_expression
function: (member_expression
object: (identifier)
property: (property_identifier) @field_type))
; Repeated non-primitive
(call_expression
function: (member_expression
object: (member_expression
object: (member_expression) @_rootakiprotocol
property: (property_identifier) @field_type
(#eq? @_rootakiprotocol "$root.Aki.Protocol"))
property: (property_identifier) @_decode_field
(#eq? @_decode_field "decode")))
])
))
; Map items
(binary_expression
left: (binary_expression
left: (member_expression
(identifier)
(property_identifier) @field_name)
operator: "==="
right: (member_expression) @_util_emptyobject
(#eq? @_util_emptyobject "$util.emptyObject")))
])
(if_statement
consequence: (for_statement
body: (expression_statement
(call_expression
function: (member_expression
object: (member_expression
object: (identifier)
property: (property_identifier) @field_name)
property: ((property_identifier) @push
(#eq? @push "push")))
arguments: (arguments
[
; Repeated primitive
(call_expression
function: (member_expression
object: (identifier)
property: (property_identifier) @field_type))
; Repeated non-primitive
(call_expression
function: (member_expression
object: (member_expression
object: (member_expression) @_rootakiprotocol
property: (property_identifier) @field_type
(#eq? @_rootakiprotocol "$root.Aki.Protocol"))
property: (property_identifier) @_decode_field
(#eq? @_decode_field "decode")))
])
))))
]
body: (variable_declaration)?
; Map body, if present, in some cases there's a statement block, in some it's a switch statement
; seems like some cases with inlined assigns omit the braces - for (s = "", c = null; t.pos < a;) switch ((u = t.uint32()) >>> 3) { ... }
body: (for_statement
body: [
(statement_block
(switch_statement
(switch_body
(switch_case
"case"
value: ((number) @kv_id (#any-of? @kv_id "1" "2"))
body: (expression_statement
[
; Primitive map item
(assignment_expression
right: (call_expression
function: (member_expression
object: (identifier)
property: (property_identifier) @kv_field_type))
)
; Non-primitive map item
(assignment_expression
right: (call_expression
function: (member_expression
object: (member_expression
object: (member_expression) @_rootakiprotocol
property: (property_identifier) @kv_field_type
(#eq? @_rootakiprotocol "$root.Aki.Protocol"))
property: (property_identifier) @_decode_field
(#eq? @_decode_field "decode")))
)
]
))
)))
; Due to inlined assignment in the switch case, we need this duplication
; e.g. switch ((u = t.uint32()) >>> 3) instead of var u = t.uint32(); switch (u >>> 3)
(switch_statement
(switch_body
(switch_case
"case"
value: ((number) @kv_id (#any-of? @kv_id "1" "2"))
body: (expression_statement
[
; Primitive map item
(assignment_expression
right: (call_expression
function: (member_expression
object: (identifier)
property: (property_identifier) @kv_field_type))
)
; Non-primitive map item
(assignment_expression
right: (call_expression
function: (member_expression
object: (member_expression
object: (member_expression) @_rootakiprotocol
property: (property_identifier) @kv_field_type
(#eq? @_rootakiprotocol "$root.Aki.Protocol"))
property: (property_identifier) @_decode_field
(#eq? @_decode_field "decode")))
)
]
))
))
]
)?
)))
; Single field ternary
; i >>> 3 == 1 ? (r.Field = t.decodeMethod()) : ...
(expression_statement
(ternary_expression
condition: (binary_expression
left: (binary_expression
left: (identifier)
operator: ">>>"
right: (number) @_shift3
(#eq? @_shift3 "3"))
"=="
right: (number) @field_id)
consequence: (parenthesized_expression
[
; Single items
(assignment_expression
left: (member_expression (identifier) (property_identifier) @field_name)
right: [
; Single primitive
(call_expression
function: (member_expression
object: (identifier)
property: (property_identifier) @field_type))
; Single non-primitive
(call_expression
function: (member_expression
object: (member_expression
object: (member_expression) @_rootakiprotocol
property: (property_identifier) @field_type
(#eq? @_rootakiprotocol "$root.Aki.Protocol"))
property: (property_identifier) @_decode_field
(#eq? @_decode_field "decode")))
]
)
; Repeated items
(sequence_expression
(binary_expression)
(call_expression
function: (member_expression
object: (member_expression
object: (identifier)
property: (property_identifier) @field_name)
property: ((property_identifier) @push
(#eq? @push "push")))
arguments: (arguments
[
; Repeated primitive
(call_expression
function: (member_expression
object: (identifier)
property: (property_identifier) @field_type))
; Repeated non-primitive
(call_expression
function: (member_expression
object: (member_expression
object: (member_expression) @_rootakiprotocol
property: (property_identifier) @field_type
(#eq? @_rootakiprotocol "$root.Aki.Protocol"))
property: (property_identifier) @_decode_field
(#eq? @_decode_field "decode")))
])
))
]
)))
; Inlined ternary into if statement for repeated...
(if_statement
condition: (parenthesized_expression
(binary_expression
left: (binary_expression
left: (identifier)
operator: ">>>"
right: (number) @_shift3
(#eq? @_shift3 "3"))
"=="
right: (number) @field_id))
consequence: (if_statement
consequence: (for_statement
body: (expression_statement
(call_expression
function: (member_expression
object: (member_expression
object: (identifier)
property: (property_identifier) @field_name)
property: ((property_identifier) @push
(#eq? @push "push")))
arguments: (arguments
[
; Repeated primitive
(call_expression
function: (member_expression
object: (identifier)
property: (property_identifier) @field_type))
; Repeated non-primitive
(call_expression
function: (member_expression
object: (member_expression
object: (member_expression) @_rootakiprotocol
property: (property_identifier) @field_type
(#eq? @_rootakiprotocol "$root.Aki.Protocol"))
property: (property_identifier) @_decode_field
(#eq? @_decode_field "decode")))
])
)))))
; Inlined ternary into if statement for maps...
(if_statement
condition: (parenthesized_expression
(binary_expression
left: (binary_expression
left: (identifier)
operator: ">>>"
right: (number) @_shift3
(#eq? @_shift3 "3"))
"=="
right: (number) @field_id))
consequence: (statement_block
(expression_statement
(binary_expression
left: (binary_expression
left: (member_expression
(identifier)
(property_identifier) @field_name)
operator: "==="
right: (member_expression) @_util_emptyobject
(#eq? @_util_emptyobject "$util.emptyObject"))))
(for_statement
body: [
(statement_block
(switch_statement
(switch_body
(switch_case
"case"
value: ((number) @kv_id (#any-of? @kv_id "1" "2"))
body: (expression_statement
[
; Primitive map item
(assignment_expression
right: (call_expression
function: (member_expression
object: (identifier)
property: (property_identifier) @kv_field_type))
)
; Non-primitive map item
(assignment_expression
right: (call_expression
function: (member_expression
object: (member_expression
object: (member_expression) @_rootakiprotocol
property: (property_identifier) @kv_field_type
(#eq? @_rootakiprotocol "$root.Aki.Protocol"))
property: (property_identifier) @_decode_field
(#eq? @_decode_field "decode")))
)
]
))
)))
; Due to inlined assignment in the switch case, we need this duplication
; e.g. switch ((u = t.uint32()) >>> 3) instead of var u = t.uint32(); switch (u >>> 3)
(switch_statement
(switch_body
(switch_case
"case"
value: ((number) @kv_id (#any-of? @kv_id "1" "2"))
body: (expression_statement
[
; Primitive map item
(assignment_expression
right: (call_expression
function: (member_expression
object: (identifier)
property: (property_identifier) @kv_field_type))
)
; Non-primitive map item
(assignment_expression
right: (call_expression
function: (member_expression
object: (member_expression
object: (member_expression) @_rootakiprotocol
property: (property_identifier) @kv_field_type
(#eq? @_rootakiprotocol "$root.Aki.Protocol"))
property: (property_identifier) @_decode_field
(#eq? @_decode_field "decode")))
)
]
))
))
]
)
))
]
))))
)))))))
(call_expression
function: (member_expression
object: (member_expression
object: (member_expression) @_rootakiprotocol
property: (property_identifier) @field_type
(#eq? @_rootakiprotocol "$root.Aki.Protocol"))
property: (property_identifier) @_decode_field
(#eq? @_decode_field "decode")))
]))))
]
)))))))))))))

View file

@ -7,20 +7,7 @@ use tree_sitter::{Parser, Query, QueryCursor, Tree};
struct Message {
name: String,
id: Option<i32>,
fields: Vec<Field>,
}
struct Field {
id: i32,
name: String,
_type: FieldType,
}
#[derive(Clone, Debug)]
enum FieldType {
Single(String),
Repeated(String),
Map(String, String),
fields: Vec<(i32, String, String)>,
}
struct Enum {
@ -117,13 +104,9 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let _ = writeln!(
output,
" {} {} = {};",
match &field._type {
FieldType::Single(t) => t.clone(),
FieldType::Repeated(t) => format!("repeated {t}"),
FieldType::Map(k, v) => format!("map<{k}, {v}>"),
},
field.name.to_snake_case(),
field.id
field.2,
field.1.to_snake_case(),
field.0
);
output
})
@ -142,19 +125,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let _ = writeln!(
output,
" {} = {};",
if !field
.0
.to_shouty_snake_case()
.starts_with(&en.name.to_shouty_snake_case())
{
format!(
"{}_{}",
en.name.to_shouty_snake_case(),
field.0.to_shouty_snake_case()
)
} else {
field.0.to_shouty_snake_case()
},
field.0.to_shouty_snake_case(),
field.1
);
output
@ -205,19 +176,6 @@ fn parse_messages(state: &mut State) -> Result<(), Box<dyn std::error::Error>> {
.capture_index_for_name("field_type")
.unwrap();
// repeated fields
let push_idx = state.message_query.capture_index_for_name("push").unwrap();
// map fields
let kv_id_idx = state.message_query.capture_index_for_name("kv_id").unwrap();
let kv_field_type_idx = state
.message_query
.capture_index_for_name("kv_field_type")
.unwrap();
let mut field_type = None;
let mut do_push = true;
for m in matches {
if m.captures.is_empty() {
continue;
@ -226,7 +184,7 @@ fn parse_messages(state: &mut State) -> Result<(), Box<dyn std::error::Error>> {
let mut message_name = String::new();
let mut field_id = String::new();
let mut field_name = String::new();
let mut kv_field_id = 0;
let mut field_type = String::new();
for capture in m.captures {
let node_text = capture
@ -241,70 +199,25 @@ fn parse_messages(state: &mut State) -> Result<(), Box<dyn std::error::Error>> {
} else if capture.index == field_name_idx {
field_name = node_text;
} else if capture.index == field_type_idx {
if let Some(FieldType::Repeated(ref mut t)) = field_type {
*t = node_text;
} else {
field_type = Some(FieldType::Single(node_text));
}
} else if capture.index == push_idx {
if let Some(FieldType::Single(text)) = field_type {
field_type = Some(FieldType::Repeated(text));
} else {
field_type = Some(FieldType::Repeated(String::new()));
}
} else if capture.index == kv_id_idx {
kv_field_id = node_text.parse()?;
} else if capture.index == kv_field_type_idx {
if let Some(FieldType::Map(ref mut key_type, ref mut value_type)) = field_type {
if kv_field_id == 1 {
*key_type = node_text;
} else if kv_field_id == 2 {
*value_type = node_text;
} else {
panic!("Unexpected kv_field_id: {kv_field_id}");
}
do_push = true;
} else {
field_type = if kv_field_id == 1 {
Some(FieldType::Map(node_text, String::new()))
} else if kv_field_id == 2 {
Some(FieldType::Map(String::new(), node_text))
} else {
panic!("Unexpected kv_field_id: {kv_field_id}");
};
do_push = false;
}
field_type = node_text;
}
}
if do_push {
if state.messages.contains_key(&message_name) {
state
.messages
.get_mut(&message_name)
.unwrap()
.fields
.push(Field {
id: field_id.parse()?,
name: field_name,
_type: field_type.unwrap(),
});
} else {
state.messages.insert(
message_name.clone(),
Message {
name: message_name,
id: None,
fields: vec![Field {
id: field_id.parse()?,
name: field_name,
_type: field_type.unwrap(),
}],
},
);
}
field_type = None;
if state.messages.contains_key(&message_name) {
state.messages.get_mut(&message_name).unwrap().fields.push((
field_id.parse()?,
field_name,
field_type,
));
} else {
state.messages.insert(
message_name.clone(),
Message {
name: message_name,
id: None,
fields: vec![(field_id.parse()?, field_name, field_type)],
},
);
}
}
@ -312,6 +225,7 @@ fn parse_messages(state: &mut State) -> Result<(), Box<dyn std::error::Error>> {
}
fn parse_enums(state: &mut State) -> Result<(), Box<dyn std::error::Error>> {
let object_idx = state.enum_query.capture_index_for_name("object").unwrap();
let enum_name_idx = state
.enum_query
.capture_index_for_name("enum_name")
@ -324,6 +238,7 @@ fn parse_enums(state: &mut State) -> Result<(), Box<dyn std::error::Error>> {
.enum_query
.capture_index_for_name("enum_value")
.unwrap();
let stage2_idx = state.enum_query.capture_index_for_name("stage2").unwrap();
let matches = state.cursor.matches(
&state.enum_query,
@ -331,15 +246,19 @@ fn parse_enums(state: &mut State) -> Result<(), Box<dyn std::error::Error>> {
state.code_bytes.as_slice(),
);
// extract object_name, then execute subquery enum_2.scm on the stage2 node
for m in matches {
if m.captures.is_empty() {
continue;
}
let mut object_name = String::new();
let mut enum_name = String::new();
let mut enum_objects: Vec<(String, i32)> = Vec::new();
let mut current_enum_field = String::new();
let mut current_enum_value = String::new();
let mut stage2_node = None;
for capture in m.captures {
let node_text = capture
@ -347,20 +266,68 @@ fn parse_enums(state: &mut State) -> Result<(), Box<dyn std::error::Error>> {
.utf8_text(state.code_bytes.as_slice())?
.to_string();
if capture.index == enum_name_idx {
if capture.index == object_idx {
object_name = node_text;
} else if capture.index == enum_name_idx {
enum_name = node_text;
} else if capture.index == enum_field_idx {
current_enum_field = node_text;
} else if capture.index == enum_value_idx {
current_enum_value = node_text;
if current_enum_value.contains('e') {
replace_exp(&mut current_enum_value);
}
} else if capture.index == stage2_idx {
stage2_node = Some(capture.node);
}
}
enum_objects.push((current_enum_field, current_enum_value.parse().unwrap()));
let Some(stage2_node) = stage2_node else {
panic!("stage2 node not found!: {object_name}");
};
let mut stage2_query_txt = std::fs::read_to_string("queries/enum_2.scm")?;
// replace "REPLACEME" with object_name
stage2_query_txt = stage2_query_txt.replace("REPLACEME", &object_name);
let stage2_query = Query::new(tree_sitter_javascript::language(), &stage2_query_txt)?;
let mut stage2_qc = QueryCursor::new();
let stage2_matches =
stage2_qc.matches(&stage2_query, stage2_node, state.code_bytes.as_slice());
let stage2_enum_field_idx = stage2_query.capture_index_for_name("enum_field").unwrap();
let stage2_enum_value_idx = stage2_query.capture_index_for_name("enum_value").unwrap();
for m in stage2_matches {
if m.captures.is_empty() {
continue;
}
let mut current_enum_field = String::new();
let mut current_enum_value = String::new();
for capture in m.captures {
let node_text = capture
.node
.utf8_text(state.code_bytes.as_slice())?
.to_string();
if capture.index == stage2_enum_field_idx {
current_enum_field = node_text;
} else if capture.index == stage2_enum_value_idx {
current_enum_value = node_text;
}
}
if current_enum_value.contains('e') {
replace_exp(&mut current_enum_value);
}
enum_objects.push((current_enum_field, current_enum_value.parse().unwrap()));
}
if state.enums.contains_key(&enum_name) {
state
.enums