diff --git a/02-ws-js/Cargo.lock b/02-ws-js/Cargo.lock index 819edaf..69abf4f 100644 --- a/02-ws-js/Cargo.lock +++ b/02-ws-js/Cargo.lock @@ -38,15 +38,6 @@ version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" -[[package]] -name = "parse_js" -version = "0.1.0" -dependencies = [ - "heck", - "tree-sitter", - "tree-sitter-javascript", -] - [[package]] name = "regex" version = "1.10.3" @@ -88,10 +79,19 @@ dependencies = [ [[package]] name = "tree-sitter-javascript" -version = "0.20.1" +version = "0.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edbc663376bdd294bd1f0a6daf859aedb9aa5bdb72217d7ad8ba2d5314102cf7" +checksum = "38d1463af5be7052171161db7cfe45c7621ed959ae533972ab47a09b1ed70ec0" dependencies = [ "cc", "tree-sitter", ] + +[[package]] +name = "ws-js-parser" +version = "0.1.0" +dependencies = [ + "heck", + "tree-sitter", + "tree-sitter-javascript", +] diff --git a/02-ws-js/Cargo.toml b/02-ws-js/Cargo.toml index c8cca4c..f5a3088 100644 --- a/02-ws-js/Cargo.toml +++ b/02-ws-js/Cargo.toml @@ -1,9 +1,9 @@ [package] -name = "parse_js" +name = "ws-js-parser" version = "0.1.0" edition = "2021" [dependencies] heck = "0.4.1" tree-sitter = "0.20.10" -tree-sitter-javascript = "0.20.1" +tree-sitter-javascript = "0.20.3" diff --git a/02-ws-js/queries/enum.scm b/02-ws-js/queries/enum.scm index bcda980..a40da5b 100644 --- a/02-ws-js/queries/enum.scm +++ b/02-ws-js/queries/enum.scm @@ -5,33 +5,47 @@ (identifier) @_n (property_identifier) @enum_name (#eq? @_n "n")) - right: (parenthesized_expression (sequence_expression - left: (parenthesized_expression - (assignment_expression - left: (identifier) @_var - right: (object . "{" . "}" . ))) - right: (sequence_expression - left: (parenthesized_expression - (assignment_expression - left: (subscript_expression - object: (parenthesized_expression - (assignment_expression - left: (identifier) @object - "=" - right: (call_expression - function: (member_expression - object: (identifier) @_object - "." - property: (property_identifier) @_create - (#eq? @_object "Object") - (#eq? @_create "create")) - arguments: (arguments . "(" (identifier) @_var2 . ")" . - (#eq? @_var @_var2))))) - index: (parenthesized_expression - (assignment_expression - left: (subscript_expression) - right: (string (string_fragment) @enum_field)))) - right: (number) @enum_value) + right: (parenthesized_expression + (sequence_expression + (parenthesized_expression + (assignment_expression + left: (identifier) @_var + right: (object . "{" . "}" . ))) + [ + (parenthesized_expression + (assignment_expression + left: (subscript_expression + object: (parenthesized_expression + (assignment_expression + left: (identifier) @object + "=" + right: (call_expression + function: (member_expression + object: (identifier) @_object + "." + property: (property_identifier) @_create + (#eq? @_object "Object") + (#eq? @_create "create")) + arguments: (arguments . "(" (identifier) @_var2 . ")" . + (#eq? @_var @_var2))))) + index: (parenthesized_expression + (assignment_expression + left: (subscript_expression) + right: (string (string_fragment) @enum_field)))) + right: (number) @enum_value) + ) -))) @stage2 -)))) + (parenthesized_expression + (assignment_expression + left: (subscript_expression + object: (identifier) + index: (parenthesized_expression + (assignment_expression + left: (subscript_expression + object: ((identifier) @_var2 + (#eq? @_var2 @_var))) + right: (string (string_fragment) @enum_field)))) + right: (number) @enum_value) + ) + ] +))))) diff --git a/02-ws-js/queries/message.scm b/02-ws-js/queries/message.scm index 4bc8b2e..e6cf6db 100644 --- a/02-ws-js/queries/message.scm +++ b/02-ws-js/queries/message.scm @@ -2,75 +2,407 @@ (parenthesized_expression (assignment_expression left: (member_expression - (identifier) @_n - (property_identifier) @message_name - (#eq? @_n "n")) - right: (parenthesized_expression (sequence_expression - right: (sequence_expression - right: (sequence_expression - left: (parenthesized_expression (assignment_expression - left: (member_expression (identifier) (property_identifier) @_decode (#eq? @_decode "decode")) - right: (function - body: (statement_block (for_statement - body: (statement_block - [ - (switch_statement - value: (parenthesized_expression - (binary_expression - left: (identifier) - ">>>" - right: (number) @_shift3 - (#eq? @_shift3 "3"))) - body: - (switch_body (switch_case - value: (number) @field_id - body: (expression_statement - (assignment_expression - left: (member_expression (identifier) (property_identifier) @field_name) - right: [ - (call_expression - function: (member_expression - object: (identifier) - property: (property_identifier) @field_type)) + object: + [ + ; n.Message + ((identifier) @_n + (#eq? @_n "n")) - (call_expression - function: (member_expression - object: (member_expression - object: (member_expression) @_rootakiprotocol - property: (property_identifier) @field_type - (#eq? @_rootakiprotocol "$root.Aki.Protocol")) - property: (property_identifier) @_decode_field - (#eq? @_decode_field "decode"))) - ]))))) + ; (n = {}).Message + (parenthesized_expression + (assignment_expression + left: (identifier) @_n + "=" + right: (object . "{" . "}")) + (#eq? @_n "n")) + ] + property: (property_identifier) @message_name) + right: (parenthesized_expression + (sequence_expression + (parenthesized_expression + (assignment_expression + left: (member_expression + (identifier) + (property_identifier) @_decode + (#eq? @_decode "decode")) + right: (function_expression + body: (statement_block (for_statement + body: (statement_block + [ + (switch_statement + value: (parenthesized_expression + (binary_expression + left: (identifier) + ">>>" + right: (number) @_shift3 + (#eq? @_shift3 "3"))) + body: + (switch_body (switch_case + value: (number) @field_id + body: [ + (expression_statement + [ + ; Single items + (assignment_expression + left: (member_expression (identifier) (property_identifier) @field_name) + right: [ + ; Single primitive + (call_expression + function: (member_expression + object: (identifier) + property: (property_identifier) @field_type)) - (expression_statement - (ternary_expression - condition: (binary_expression - left: (binary_expression - left: (identifier) - ">>>" - right: (number) @_shift3 - (#eq? @_shift3 "3")) - "==" - right: (number) @field_id) + ; Single non-primitive + (call_expression + function: (member_expression + object: (member_expression + object: (member_expression) @_rootakiprotocol + property: (property_identifier) @field_type + (#eq? @_rootakiprotocol "$root.Aki.Protocol")) + property: (property_identifier) @_decode_field + (#eq? @_decode_field "decode"))) + ]) - consequence: (parenthesized_expression - (assignment_expression - left: (member_expression (identifier) (property_identifier) @field_name) - right: [ - (call_expression - function: (member_expression - object: (identifier) - property: (property_identifier) @field_type)) + ; Repeated items + (sequence_expression + (binary_expression) + (call_expression + function: (member_expression + object: (member_expression + object: (identifier) + property: (property_identifier) @field_name) + property: ((property_identifier) @push + (#eq? @push "push"))) - (call_expression - function: (member_expression - object: (member_expression - object: (member_expression) @_rootakiprotocol - property: (property_identifier) @field_type - (#eq? @_rootakiprotocol "$root.Aki.Protocol")) - property: (property_identifier) @_decode_field - (#eq? @_decode_field "decode"))) - ])))) - ] -))))))))))))) + arguments: (arguments + [ + ; Repeated primitive + (call_expression + function: (member_expression + object: (identifier) + property: (property_identifier) @field_type)) + + ; Repeated non-primitive + (call_expression + function: (member_expression + object: (member_expression + object: (member_expression) @_rootakiprotocol + property: (property_identifier) @field_type + (#eq? @_rootakiprotocol "$root.Aki.Protocol")) + property: (property_identifier) @_decode_field + (#eq? @_decode_field "decode"))) + ]) + )) + + ; Map items + (binary_expression + left: (binary_expression + left: (member_expression + (identifier) + (property_identifier) @field_name) + operator: "===" + right: (member_expression) @_util_emptyobject + (#eq? @_util_emptyobject "$util.emptyObject"))) + ]) + + (if_statement + consequence: (for_statement + body: (expression_statement + (call_expression + function: (member_expression + object: (member_expression + object: (identifier) + property: (property_identifier) @field_name) + property: ((property_identifier) @push + (#eq? @push "push"))) + arguments: (arguments + [ + ; Repeated primitive + (call_expression + function: (member_expression + object: (identifier) + property: (property_identifier) @field_type)) + ; Repeated non-primitive + (call_expression + function: (member_expression + object: (member_expression + object: (member_expression) @_rootakiprotocol + property: (property_identifier) @field_type + (#eq? @_rootakiprotocol "$root.Aki.Protocol")) + property: (property_identifier) @_decode_field + (#eq? @_decode_field "decode"))) + ]) + )))) + ] + body: (variable_declaration)? + ; Map body, if present, in some cases there's a statement block, in some it's a switch statement + ; seems like some cases with inlined assigns omit the braces - for (s = "", c = null; t.pos < a;) switch ((u = t.uint32()) >>> 3) { ... } + body: (for_statement + body: [ + (statement_block + (switch_statement + (switch_body + (switch_case + "case" + value: ((number) @kv_id (#any-of? @kv_id "1" "2")) + body: (expression_statement + [ + ; Primitive map item + (assignment_expression + right: (call_expression + function: (member_expression + object: (identifier) + property: (property_identifier) @kv_field_type)) + ) + + ; Non-primitive map item + (assignment_expression + right: (call_expression + function: (member_expression + object: (member_expression + object: (member_expression) @_rootakiprotocol + property: (property_identifier) @kv_field_type + (#eq? @_rootakiprotocol "$root.Aki.Protocol")) + property: (property_identifier) @_decode_field + (#eq? @_decode_field "decode"))) + ) + ] + )) + ))) + + ; Due to inlined assignment in the switch case, we need this duplication + ; e.g. switch ((u = t.uint32()) >>> 3) instead of var u = t.uint32(); switch (u >>> 3) + (switch_statement + (switch_body + (switch_case + "case" + value: ((number) @kv_id (#any-of? @kv_id "1" "2")) + body: (expression_statement + [ + ; Primitive map item + (assignment_expression + right: (call_expression + function: (member_expression + object: (identifier) + property: (property_identifier) @kv_field_type)) + ) + + ; Non-primitive map item + (assignment_expression + right: (call_expression + function: (member_expression + object: (member_expression + object: (member_expression) @_rootakiprotocol + property: (property_identifier) @kv_field_type + (#eq? @_rootakiprotocol "$root.Aki.Protocol")) + property: (property_identifier) @_decode_field + (#eq? @_decode_field "decode"))) + ) + ] + )) + )) + ] + )? + ))) + + ; Single field ternary + ; i >>> 3 == 1 ? (r.Field = t.decodeMethod()) : ... + (expression_statement + (ternary_expression + condition: (binary_expression + left: (binary_expression + left: (identifier) + operator: ">>>" + right: (number) @_shift3 + (#eq? @_shift3 "3")) + "==" + right: (number) @field_id) + + consequence: (parenthesized_expression + [ + ; Single items + (assignment_expression + left: (member_expression (identifier) (property_identifier) @field_name) + right: [ + ; Single primitive + (call_expression + function: (member_expression + object: (identifier) + property: (property_identifier) @field_type)) + + ; Single non-primitive + (call_expression + function: (member_expression + object: (member_expression + object: (member_expression) @_rootakiprotocol + property: (property_identifier) @field_type + (#eq? @_rootakiprotocol "$root.Aki.Protocol")) + property: (property_identifier) @_decode_field + (#eq? @_decode_field "decode"))) + ] + ) + + ; Repeated items + (sequence_expression + (binary_expression) + (call_expression + function: (member_expression + object: (member_expression + object: (identifier) + property: (property_identifier) @field_name) + property: ((property_identifier) @push + (#eq? @push "push"))) + + arguments: (arguments + [ + ; Repeated primitive + (call_expression + function: (member_expression + object: (identifier) + property: (property_identifier) @field_type)) + + ; Repeated non-primitive + (call_expression + function: (member_expression + object: (member_expression + object: (member_expression) @_rootakiprotocol + property: (property_identifier) @field_type + (#eq? @_rootakiprotocol "$root.Aki.Protocol")) + property: (property_identifier) @_decode_field + (#eq? @_decode_field "decode"))) + ]) + )) + ] + ))) + + ; Inlined ternary into if statement for repeated... + (if_statement + condition: (parenthesized_expression + (binary_expression + left: (binary_expression + left: (identifier) + operator: ">>>" + right: (number) @_shift3 + (#eq? @_shift3 "3")) + "==" + right: (number) @field_id)) + consequence: (if_statement + consequence: (for_statement + body: (expression_statement + (call_expression + function: (member_expression + object: (member_expression + object: (identifier) + property: (property_identifier) @field_name) + property: ((property_identifier) @push + (#eq? @push "push"))) + arguments: (arguments + [ + ; Repeated primitive + (call_expression + function: (member_expression + object: (identifier) + property: (property_identifier) @field_type)) + ; Repeated non-primitive + (call_expression + function: (member_expression + object: (member_expression + object: (member_expression) @_rootakiprotocol + property: (property_identifier) @field_type + (#eq? @_rootakiprotocol "$root.Aki.Protocol")) + property: (property_identifier) @_decode_field + (#eq? @_decode_field "decode"))) + ]) + ))))) + + ; Inlined ternary into if statement for maps... + (if_statement + condition: (parenthesized_expression + (binary_expression + left: (binary_expression + left: (identifier) + operator: ">>>" + right: (number) @_shift3 + (#eq? @_shift3 "3")) + "==" + right: (number) @field_id)) + consequence: (statement_block + (expression_statement + (binary_expression + left: (binary_expression + left: (member_expression + (identifier) + (property_identifier) @field_name) + operator: "===" + right: (member_expression) @_util_emptyobject + (#eq? @_util_emptyobject "$util.emptyObject")))) + (for_statement + body: [ + (statement_block + (switch_statement + (switch_body + (switch_case + "case" + value: ((number) @kv_id (#any-of? @kv_id "1" "2")) + body: (expression_statement + [ + ; Primitive map item + (assignment_expression + right: (call_expression + function: (member_expression + object: (identifier) + property: (property_identifier) @kv_field_type)) + ) + + ; Non-primitive map item + (assignment_expression + right: (call_expression + function: (member_expression + object: (member_expression + object: (member_expression) @_rootakiprotocol + property: (property_identifier) @kv_field_type + (#eq? @_rootakiprotocol "$root.Aki.Protocol")) + property: (property_identifier) @_decode_field + (#eq? @_decode_field "decode"))) + ) + ] + )) + ))) + + ; Due to inlined assignment in the switch case, we need this duplication + ; e.g. switch ((u = t.uint32()) >>> 3) instead of var u = t.uint32(); switch (u >>> 3) + (switch_statement + (switch_body + (switch_case + "case" + value: ((number) @kv_id (#any-of? @kv_id "1" "2")) + body: (expression_statement + [ + ; Primitive map item + (assignment_expression + right: (call_expression + function: (member_expression + object: (identifier) + property: (property_identifier) @kv_field_type)) + ) + + ; Non-primitive map item + (assignment_expression + right: (call_expression + function: (member_expression + object: (member_expression + object: (member_expression) @_rootakiprotocol + property: (property_identifier) @kv_field_type + (#eq? @_rootakiprotocol "$root.Aki.Protocol")) + property: (property_identifier) @_decode_field + (#eq? @_decode_field "decode"))) + ) + ] + )) + )) + ] + ) + )) + ] + )))) +))))))) diff --git a/02-ws-js/src/main.rs b/02-ws-js/src/main.rs index 54d3665..f60ad53 100644 --- a/02-ws-js/src/main.rs +++ b/02-ws-js/src/main.rs @@ -7,7 +7,20 @@ use tree_sitter::{Parser, Query, QueryCursor, Tree}; struct Message { name: String, id: Option, - fields: Vec<(i32, String, String)>, + fields: Vec, +} + +struct Field { + id: i32, + name: String, + _type: FieldType, +} + +#[derive(Clone, Debug)] +enum FieldType { + Single(String), + Repeated(String), + Map(String, String), } struct Enum { @@ -104,9 +117,13 @@ fn main() -> Result<(), Box> { let _ = writeln!( output, " {} {} = {};", - field.2, - field.1.to_snake_case(), - field.0 + match &field._type { + FieldType::Single(t) => t.clone(), + FieldType::Repeated(t) => format!("repeated {t}"), + FieldType::Map(k, v) => format!("map<{k}, {v}>"), + }, + field.name.to_snake_case(), + field.id ); output }) @@ -125,7 +142,19 @@ fn main() -> Result<(), Box> { let _ = writeln!( output, " {} = {};", - field.0.to_shouty_snake_case(), + if !field + .0 + .to_shouty_snake_case() + .starts_with(&en.name.to_shouty_snake_case()) + { + format!( + "{}_{}", + en.name.to_shouty_snake_case(), + field.0.to_shouty_snake_case() + ) + } else { + field.0.to_shouty_snake_case() + }, field.1 ); output @@ -176,6 +205,19 @@ fn parse_messages(state: &mut State) -> Result<(), Box> { .capture_index_for_name("field_type") .unwrap(); + // repeated fields + let push_idx = state.message_query.capture_index_for_name("push").unwrap(); + + // map fields + let kv_id_idx = state.message_query.capture_index_for_name("kv_id").unwrap(); + let kv_field_type_idx = state + .message_query + .capture_index_for_name("kv_field_type") + .unwrap(); + + let mut field_type = None; + let mut do_push = true; + for m in matches { if m.captures.is_empty() { continue; @@ -184,7 +226,7 @@ fn parse_messages(state: &mut State) -> Result<(), Box> { let mut message_name = String::new(); let mut field_id = String::new(); let mut field_name = String::new(); - let mut field_type = String::new(); + let mut kv_field_id = 0; for capture in m.captures { let node_text = capture @@ -199,25 +241,70 @@ fn parse_messages(state: &mut State) -> Result<(), Box> { } else if capture.index == field_name_idx { field_name = node_text; } else if capture.index == field_type_idx { - field_type = node_text; + if let Some(FieldType::Repeated(ref mut t)) = field_type { + *t = node_text; + } else { + field_type = Some(FieldType::Single(node_text)); + } + } else if capture.index == push_idx { + if let Some(FieldType::Single(text)) = field_type { + field_type = Some(FieldType::Repeated(text)); + } else { + field_type = Some(FieldType::Repeated(String::new())); + } + } else if capture.index == kv_id_idx { + kv_field_id = node_text.parse()?; + } else if capture.index == kv_field_type_idx { + if let Some(FieldType::Map(ref mut key_type, ref mut value_type)) = field_type { + if kv_field_id == 1 { + *key_type = node_text; + } else if kv_field_id == 2 { + *value_type = node_text; + } else { + panic!("Unexpected kv_field_id: {kv_field_id}"); + } + do_push = true; + } else { + field_type = if kv_field_id == 1 { + Some(FieldType::Map(node_text, String::new())) + } else if kv_field_id == 2 { + Some(FieldType::Map(String::new(), node_text)) + } else { + panic!("Unexpected kv_field_id: {kv_field_id}"); + }; + do_push = false; + } } } - if state.messages.contains_key(&message_name) { - state.messages.get_mut(&message_name).unwrap().fields.push(( - field_id.parse()?, - field_name, - field_type, - )); - } else { - state.messages.insert( - message_name.clone(), - Message { - name: message_name, - id: None, - fields: vec![(field_id.parse()?, field_name, field_type)], - }, - ); + if do_push { + if state.messages.contains_key(&message_name) { + state + .messages + .get_mut(&message_name) + .unwrap() + .fields + .push(Field { + id: field_id.parse()?, + name: field_name, + _type: field_type.unwrap(), + }); + } else { + state.messages.insert( + message_name.clone(), + Message { + name: message_name, + id: None, + fields: vec![Field { + id: field_id.parse()?, + name: field_name, + _type: field_type.unwrap(), + }], + }, + ); + } + + field_type = None; } } @@ -225,7 +312,6 @@ fn parse_messages(state: &mut State) -> Result<(), Box> { } fn parse_enums(state: &mut State) -> Result<(), Box> { - let object_idx = state.enum_query.capture_index_for_name("object").unwrap(); let enum_name_idx = state .enum_query .capture_index_for_name("enum_name") @@ -238,7 +324,6 @@ fn parse_enums(state: &mut State) -> Result<(), Box> { .enum_query .capture_index_for_name("enum_value") .unwrap(); - let stage2_idx = state.enum_query.capture_index_for_name("stage2").unwrap(); let matches = state.cursor.matches( &state.enum_query, @@ -246,19 +331,15 @@ fn parse_enums(state: &mut State) -> Result<(), Box> { state.code_bytes.as_slice(), ); - // extract object_name, then execute subquery enum_2.scm on the stage2 node - for m in matches { if m.captures.is_empty() { continue; } - let mut object_name = String::new(); let mut enum_name = String::new(); let mut enum_objects: Vec<(String, i32)> = Vec::new(); let mut current_enum_field = String::new(); let mut current_enum_value = String::new(); - let mut stage2_node = None; for capture in m.captures { let node_text = capture @@ -266,68 +347,20 @@ fn parse_enums(state: &mut State) -> Result<(), Box> { .utf8_text(state.code_bytes.as_slice())? .to_string(); - if capture.index == object_idx { - object_name = node_text; - } else if capture.index == enum_name_idx { + if capture.index == enum_name_idx { enum_name = node_text; } else if capture.index == enum_field_idx { current_enum_field = node_text; } else if capture.index == enum_value_idx { current_enum_value = node_text; - } else if capture.index == stage2_idx { - stage2_node = Some(capture.node); + if current_enum_value.contains('e') { + replace_exp(&mut current_enum_value); + } } } enum_objects.push((current_enum_field, current_enum_value.parse().unwrap())); - let Some(stage2_node) = stage2_node else { - panic!("stage2 node not found!: {object_name}"); - }; - - let mut stage2_query_txt = std::fs::read_to_string("queries/enum_2.scm")?; - - // replace "REPLACEME" with object_name - - stage2_query_txt = stage2_query_txt.replace("REPLACEME", &object_name); - - let stage2_query = Query::new(tree_sitter_javascript::language(), &stage2_query_txt)?; - let mut stage2_qc = QueryCursor::new(); - - let stage2_matches = - stage2_qc.matches(&stage2_query, stage2_node, state.code_bytes.as_slice()); - - let stage2_enum_field_idx = stage2_query.capture_index_for_name("enum_field").unwrap(); - let stage2_enum_value_idx = stage2_query.capture_index_for_name("enum_value").unwrap(); - - for m in stage2_matches { - if m.captures.is_empty() { - continue; - } - - let mut current_enum_field = String::new(); - let mut current_enum_value = String::new(); - - for capture in m.captures { - let node_text = capture - .node - .utf8_text(state.code_bytes.as_slice())? - .to_string(); - - if capture.index == stage2_enum_field_idx { - current_enum_field = node_text; - } else if capture.index == stage2_enum_value_idx { - current_enum_value = node_text; - } - } - - if current_enum_value.contains('e') { - replace_exp(&mut current_enum_value); - } - - enum_objects.push((current_enum_field, current_enum_value.parse().unwrap())); - } - if state.enums.contains_key(&enum_name) { state .enums