From b51e203a10034184a26be83d755b85668f984be1 Mon Sep 17 00:00:00 2001 From: Gulag Date: Tue, 30 Jan 2024 19:21:05 -0500 Subject: [PATCH] Initial commit --- .gitignore | 14 ++ 02-ws-js/Cargo.lock | 97 +++++++++ 02-ws-js/Cargo.toml | 9 + 02-ws-js/README.md | 24 +++ 02-ws-js/example_define.js | 4 + 02-ws-js/example_protocol.js | 69 ++++++ 02-ws-js/parse.py | 71 +++++++ 02-ws-js/queries/enum.scm | 37 ++++ 02-ws-js/queries/enum_2.scm | 8 + 02-ws-js/queries/id.scm | 6 + 02-ws-js/queries/message.scm | 76 +++++++ 02-ws-js/src/main.rs | 397 +++++++++++++++++++++++++++++++++++ README.md | 7 + 13 files changed, 819 insertions(+) create mode 100644 .gitignore create mode 100644 02-ws-js/Cargo.lock create mode 100644 02-ws-js/Cargo.toml create mode 100644 02-ws-js/README.md create mode 100644 02-ws-js/example_define.js create mode 100644 02-ws-js/example_protocol.js create mode 100644 02-ws-js/parse.py create mode 100644 02-ws-js/queries/enum.scm create mode 100644 02-ws-js/queries/enum_2.scm create mode 100644 02-ws-js/queries/id.scm create mode 100644 02-ws-js/queries/message.scm create mode 100644 02-ws-js/src/main.rs create mode 100644 README.md diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a4b61f0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,14 @@ +02-ws-js/Config +02-ws-js/ConfigCommon +02-ws-js/ConfigQuery +02-ws-js/Net +02-ws-js/target +02-ws-js/ClassDefine.js +02-ws-js/CommonDefine.js +02-ws-js/ConfigDefine.js +02-ws-js/EditorDefine.js +02-ws-js/HttpCodeDefine.js +02-ws-js/LogDefine.js +02-ws-js/QueryTypeDefine.js +02-ws-js/message.proto +**/*.so diff --git a/02-ws-js/Cargo.lock b/02-ws-js/Cargo.lock new file mode 100644 index 0000000..819edaf --- /dev/null +++ b/02-ws-js/Cargo.lock @@ -0,0 +1,97 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aho-corasick" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" +dependencies = [ + "memchr", +] + +[[package]] +name = "cc" +version = "1.0.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" +dependencies = [ + "libc", +] + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + +[[package]] +name = "libc" +version = "0.2.152" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7" + +[[package]] +name = "memchr" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" + +[[package]] +name = "parse_js" +version = "0.1.0" +dependencies = [ + "heck", + "tree-sitter", + "tree-sitter-javascript", +] + +[[package]] +name = "regex" +version = "1.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" + +[[package]] +name = "tree-sitter" +version = "0.20.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e747b1f9b7b931ed39a548c1fae149101497de3c1fc8d9e18c62c1a66c683d3d" +dependencies = [ + "cc", + "regex", +] + +[[package]] +name = "tree-sitter-javascript" +version = "0.20.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edbc663376bdd294bd1f0a6daf859aedb9aa5bdb72217d7ad8ba2d5314102cf7" +dependencies = [ + "cc", + "tree-sitter", +] diff --git a/02-ws-js/Cargo.toml b/02-ws-js/Cargo.toml new file mode 100644 index 0000000..c8cca4c --- /dev/null +++ b/02-ws-js/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "parse_js" +version = "0.1.0" +edition = "2021" + +[dependencies] +heck = "0.4.1" +tree-sitter = "0.20.10" +tree-sitter-javascript = "0.20.1" diff --git a/02-ws-js/README.md b/02-ws-js/README.md new file mode 100644 index 0000000..955d42e --- /dev/null +++ b/02-ws-js/README.md @@ -0,0 +1,24 @@ +# WW Proto Dumper (Tree-Sitter ❤️ Reverse Engineering) + +This is a PoC and demo to show how one can use tree-sitter on dumped javascript +netcode files for a certain game to extract fully intact proto files. To see its +potential, you can run the following (replace `"REPLACEME"` in enum_2.scm with `"e"`): + +The Python code was my initial attempt at this, but my partially-implemented +matches API had a bug, so I just opted to go with Rust. + +```bash +tree-sitter query queries/message.scm example_protocol.js +``` + +```bash +tree-sitter query queries/enum.scm example_protocol.js +``` + +```bash +tree-sitter query queries/enum_2.scm example_protocol.js +``` + +```bash +tree-sitter query queries/id.scm example_define.js +``` diff --git a/02-ws-js/example_define.js b/02-ws-js/example_define.js new file mode 100644 index 0000000..337c323 --- /dev/null +++ b/02-ws-js/example_define.js @@ -0,0 +1,4 @@ +(function(e) { + (e[(e.ExitGamePush = 1100)] = "ExitGamePush"), + (e[(e.ChangeSceneClockPush = 1199)] = "ChangeSceneClockPush") +}); diff --git a/02-ws-js/example_protocol.js b/02-ws-js/example_protocol.js new file mode 100644 index 0000000..a49186d --- /dev/null +++ b/02-ws-js/example_protocol.js @@ -0,0 +1,69 @@ +return ( + (B.Protocol = + (((n = {}).AceAntiDataPush = + ((W.create = function e(t) { + return new W(t); + }), + (W.encode = function e(t, o) { + return (o = o || $Writer.create()), t.AntiData && o.uint32(10).bytes(t.AntiData), o; + }), + (W.decode = function e(t, o) { + t instanceof $Reader || (t = $Reader.create(t)); + for (var n = void 0 === o ? t.len : t.pos + o, r = new $root.Aki.Protocol.AceAntiDataPush(); t.pos < n;) { + var i = t.uint32(); + i >>> 3 == 1 ? (r.AntiData = t.bytes()) : t.skipType(7 & i); + } + return r; + }), + W)), + (n.AchievementProgress = + ((j.create = function e(t) { + return new j(t); + }), + (j.encode = function e(t, o) { + return (o = o || $Writer.create()), t.CurProgress && o.uint32(8).int32(t.CurProgress), t.TotalProgress && o.uint32(16).int32(t.TotalProgress), o; + }), + (j.decode = function e(t, o) { + t instanceof $Reader || (t = $Reader.create(t)); + for (var n = void 0 === o ? t.len : t.pos + o, r = new $root.Aki.Protocol.AchievementProgress(); t.pos < n;) { + var i = t.uint32(); + switch (i >>> 3) { + case 1: + r.CurProgress = t.int32(); + break; + case 2: + r.TotalProgress = t.int32(); + break; + case 27: + r.ActionGroupNodeAction = $root.Aki.Protocol.ActionGroupNodeActionCtxPb.decode(t, t.uint32()); + break; + default: + t.skipType(7 & i); + } + } + return r; + }), + j)), + (n.ActionFinishResponse = + ((oe.create = function e(t) { + return new oe(t); + }), + (oe.encode = function e(t, o) { + return (o = o || $Writer.create()), t.Code && o.uint32(8).int32(t.Code), o; + }), + (oe.decode = function e(t, o) { + t instanceof $Reader || (t = $Reader.create(t)); + for (var n = void 0 === o ? t.len : t.pos + o, r = new $root.Aki.Protocol.ActionFinishResponse(); t.pos < n;) { + var i = t.uint32(); + i >>> 3 == 1 ? (r.EntityCtx = $root.Aki.Protocol.EntityCtxPb.decode(t, t.uint32())) : t.skipType(7 & i); + } + return r; + }), + oe)), + (n.SlientFirstAwardState = ((t = {}), ((e = Object.create(t))[(t[0] = "NotUnlock")] = 0), + (e[(t[1] = "NotFinish")] = 1), + (e[(t[2] = "IsFinish")] = 2), + (e[(t[3] = "IsReceive")] = 3), e)), + n)), + B +); diff --git a/02-ws-js/parse.py b/02-ws-js/parse.py new file mode 100644 index 0000000..b62ef46 --- /dev/null +++ b/02-ws-js/parse.py @@ -0,0 +1,71 @@ +from typing import Dict, List, Tuple +from tree_sitter import Language, Node, Parser +from dataclasses import dataclass + +js = "./js.so" + +JS = Language(js, "javascript") +parser = Parser() +parser.set_language(JS) + +q = open("queries/message.scm").read() +query = JS.query(q) + +tree = parser.parse(open("Net/Protocol.js", "rb").read()) + +matches = query.matches(tree.root_node) + + +@dataclass +class Proto: + name: str + fields: List[Tuple[int, str, str]] + + +protos: Dict[str, Proto] = {} + +# [(, '_n'), (, 'message_name'), (, '_decode'), (, '_shift3'), (, 'field_id'), (, 'field_name'), (, 'field_type')] + +match: List[Tuple[Node, str]] +for e, (_, match) in enumerate(matches): + if len(match) == 0: + continue + + m: Node + message_name, field_id, field_name, field_type = "", "", "", "" + for node, capture_name in match: + # offsets for the items might vary from each one, + match capture_name: + case "message_name": + message_name = node.text.decode("utf-8") + break + case "field_id": + field_id = node.text.decode("utf-8") + break + case "field_name": + field_name = node.text.decode("utf-8") + break + case "field_type": + field_type = node.text.decode("utf-8") + break + case _: + continue + + print(f"{message_name} {field_id} {field_name} {field_type}") + + if message_name not in protos: + protos[message_name] = Proto(e, message_name, [(int(field_id), field_name, field_type)]) + else: + protos[message_name].fields.append((int(field_id), field_name, field_type)) + +proto_dump = "" +for message in protos.values(): + proto_dump += f"message {message.name} {{\n" + for field in message.fields: + proto_dump += f" {field[2]} {field[1]} = {field[0]};\n" + proto_dump += "}\n\n" + + +with open("message.proto", "w") as f: + f.write(proto_dump) diff --git a/02-ws-js/queries/enum.scm b/02-ws-js/queries/enum.scm new file mode 100644 index 0000000..bcda980 --- /dev/null +++ b/02-ws-js/queries/enum.scm @@ -0,0 +1,37 @@ +(sequence_expression + (parenthesized_expression + (assignment_expression + left: (member_expression + (identifier) @_n + (property_identifier) @enum_name + (#eq? @_n "n")) + right: (parenthesized_expression (sequence_expression + left: (parenthesized_expression + (assignment_expression + left: (identifier) @_var + right: (object . "{" . "}" . ))) + right: (sequence_expression + left: (parenthesized_expression + (assignment_expression + left: (subscript_expression + object: (parenthesized_expression + (assignment_expression + left: (identifier) @object + "=" + right: (call_expression + function: (member_expression + object: (identifier) @_object + "." + property: (property_identifier) @_create + (#eq? @_object "Object") + (#eq? @_create "create")) + arguments: (arguments . "(" (identifier) @_var2 . ")" . + (#eq? @_var @_var2))))) + index: (parenthesized_expression + (assignment_expression + left: (subscript_expression) + right: (string (string_fragment) @enum_field)))) + right: (number) @enum_value) + +))) @stage2 +)))) diff --git a/02-ws-js/queries/enum_2.scm b/02-ws-js/queries/enum_2.scm new file mode 100644 index 0000000..ed00e9e --- /dev/null +++ b/02-ws-js/queries/enum_2.scm @@ -0,0 +1,8 @@ +(assignment_expression + left: (subscript_expression + object: ((identifier) @object_name (#eq? @object_name "REPLACEME")) + index: (parenthesized_expression + (assignment_expression + left: (subscript_expression) + right: (string (string_fragment) @enum_field)))) + right: (number) @enum_value) diff --git a/02-ws-js/queries/id.scm b/02-ws-js/queries/id.scm new file mode 100644 index 0000000..e159dbd --- /dev/null +++ b/02-ws-js/queries/id.scm @@ -0,0 +1,6 @@ +(assignment_expression + left: (member_expression + object: (identifier) + property: (property_identifier) @message_name) + right: (number) @message_id) + diff --git a/02-ws-js/queries/message.scm b/02-ws-js/queries/message.scm new file mode 100644 index 0000000..4bc8b2e --- /dev/null +++ b/02-ws-js/queries/message.scm @@ -0,0 +1,76 @@ +(sequence_expression + (parenthesized_expression + (assignment_expression + left: (member_expression + (identifier) @_n + (property_identifier) @message_name + (#eq? @_n "n")) + right: (parenthesized_expression (sequence_expression + right: (sequence_expression + right: (sequence_expression + left: (parenthesized_expression (assignment_expression + left: (member_expression (identifier) (property_identifier) @_decode (#eq? @_decode "decode")) + right: (function + body: (statement_block (for_statement + body: (statement_block + [ + (switch_statement + value: (parenthesized_expression + (binary_expression + left: (identifier) + ">>>" + right: (number) @_shift3 + (#eq? @_shift3 "3"))) + body: + (switch_body (switch_case + value: (number) @field_id + body: (expression_statement + (assignment_expression + left: (member_expression (identifier) (property_identifier) @field_name) + right: [ + (call_expression + function: (member_expression + object: (identifier) + property: (property_identifier) @field_type)) + + (call_expression + function: (member_expression + object: (member_expression + object: (member_expression) @_rootakiprotocol + property: (property_identifier) @field_type + (#eq? @_rootakiprotocol "$root.Aki.Protocol")) + property: (property_identifier) @_decode_field + (#eq? @_decode_field "decode"))) + ]))))) + + (expression_statement + (ternary_expression + condition: (binary_expression + left: (binary_expression + left: (identifier) + ">>>" + right: (number) @_shift3 + (#eq? @_shift3 "3")) + "==" + right: (number) @field_id) + + consequence: (parenthesized_expression + (assignment_expression + left: (member_expression (identifier) (property_identifier) @field_name) + right: [ + (call_expression + function: (member_expression + object: (identifier) + property: (property_identifier) @field_type)) + + (call_expression + function: (member_expression + object: (member_expression + object: (member_expression) @_rootakiprotocol + property: (property_identifier) @field_type + (#eq? @_rootakiprotocol "$root.Aki.Protocol")) + property: (property_identifier) @_decode_field + (#eq? @_decode_field "decode"))) + ])))) + ] +))))))))))))) diff --git a/02-ws-js/src/main.rs b/02-ws-js/src/main.rs new file mode 100644 index 0000000..54d3665 --- /dev/null +++ b/02-ws-js/src/main.rs @@ -0,0 +1,397 @@ +use std::collections::BTreeMap; +use std::fmt::Write; + +use heck::{ToShoutySnakeCase, ToSnakeCase}; +use tree_sitter::{Parser, Query, QueryCursor, Tree}; + +struct Message { + name: String, + id: Option, + fields: Vec<(i32, String, String)>, +} + +struct Enum { + name: String, + fields: Vec<(String, i32)>, +} + +type MessageDump = BTreeMap; +type EnumDump = BTreeMap; + +struct State { + parser: Parser, + + cursor: QueryCursor, + + message_tree: Option, + + code_bytes: Vec, + + messages: MessageDump, + message_query: Query, + + enums: EnumDump, + enum_query: Query, + + id_query: Query, + + protodump_str: String, +} + +fn main() -> Result<(), Box> { + let mut state = State { + parser: { + let mut parser = Parser::new(); + parser.set_language(tree_sitter_javascript::language())?; + parser + }, + + cursor: QueryCursor::new(), + + message_tree: None, + + code_bytes: std::fs::read("Net/Protocol.js")?, + + messages: MessageDump::new(), + message_query: Query::new( + tree_sitter_javascript::language(), + &std::fs::read_to_string("queries/message.scm")?, + )?, + + enums: EnumDump::new(), + enum_query: Query::new( + tree_sitter_javascript::language(), + &std::fs::read_to_string("queries/enum.scm")?, + )?, + + id_query: Query::new( + tree_sitter_javascript::language(), + &std::fs::read_to_string("queries/id.scm")?, + )?, + + protodump_str: String::new(), + }; + + state.message_tree = Some(state.parser.parse(&state.code_bytes, None).unwrap()); + + state.protodump_str.push_str("syntax = \"proto3\";\n\n"); + + parse_messages(&mut state)?; + + parse_enums(&mut state)?; + + map_ids_to_messages(&mut state)?; + + for message in state.messages.values() { + writeln!( + state.protodump_str, + "message {} {{{}", + message.name, + if let Some(id) = message.id { + format!(" // MessageId: {id}") + } else { + String::new() + } + )?; + + write!( + state.protodump_str, + "{}", + message + .fields + .iter() + .fold(String::new(), |mut output, field| { + let _ = writeln!( + output, + " {} {} = {};", + field.2, + field.1.to_snake_case(), + field.0 + ); + output + }) + )?; + + writeln!(state.protodump_str, "}}\n")?; + } + + for en in state.enums.values() { + writeln!(state.protodump_str, "enum {} {{", en.name)?; + + write!( + state.protodump_str, + "{}", + en.fields.iter().fold(String::new(), |mut output, field| { + let _ = writeln!( + output, + " {} = {};", + field.0.to_shouty_snake_case(), + field.1 + ); + output + }) + )?; + + writeln!(state.protodump_str, "}}\n")?; + } + + state.protodump_str = state.protodump_str.trim_end().to_string(); + + std::fs::write("message.proto", state.protodump_str)?; + + Ok(()) +} + +/// Handle js's `1e5` case, sigh +fn replace_exp(exp: &mut String) { + let idx_of_e = exp.find('e').unwrap(); + *exp = (exp[..idx_of_e].parse::().unwrap() + * 10i32.pow(exp[idx_of_e + 1..].parse().unwrap())) + .to_string(); +} + +fn parse_messages(state: &mut State) -> Result<(), Box> { + let mut qc = QueryCursor::new(); + + let matches = qc.matches( + &state.message_query, + state.message_tree.as_ref().unwrap().root_node(), + state.code_bytes.as_slice(), + ); + + let message_name_idx = state + .message_query + .capture_index_for_name("message_name") + .unwrap(); + let field_id_idx = state + .message_query + .capture_index_for_name("field_id") + .unwrap(); + let field_name_idx = state + .message_query + .capture_index_for_name("field_name") + .unwrap(); + let field_type_idx = state + .message_query + .capture_index_for_name("field_type") + .unwrap(); + + for m in matches { + if m.captures.is_empty() { + continue; + } + + let mut message_name = String::new(); + let mut field_id = String::new(); + let mut field_name = String::new(); + let mut field_type = String::new(); + + for capture in m.captures { + let node_text = capture + .node + .utf8_text(state.code_bytes.as_slice())? + .to_string(); + + if capture.index == message_name_idx { + message_name = node_text; + } else if capture.index == field_id_idx { + field_id = node_text; + } else if capture.index == field_name_idx { + field_name = node_text; + } else if capture.index == field_type_idx { + field_type = node_text; + } + } + + if state.messages.contains_key(&message_name) { + state.messages.get_mut(&message_name).unwrap().fields.push(( + field_id.parse()?, + field_name, + field_type, + )); + } else { + state.messages.insert( + message_name.clone(), + Message { + name: message_name, + id: None, + fields: vec![(field_id.parse()?, field_name, field_type)], + }, + ); + } + } + + Ok(()) +} + +fn parse_enums(state: &mut State) -> Result<(), Box> { + let object_idx = state.enum_query.capture_index_for_name("object").unwrap(); + let enum_name_idx = state + .enum_query + .capture_index_for_name("enum_name") + .unwrap(); + let enum_field_idx = state + .enum_query + .capture_index_for_name("enum_field") + .unwrap(); + let enum_value_idx = state + .enum_query + .capture_index_for_name("enum_value") + .unwrap(); + let stage2_idx = state.enum_query.capture_index_for_name("stage2").unwrap(); + + let matches = state.cursor.matches( + &state.enum_query, + state.message_tree.as_ref().unwrap().root_node(), + state.code_bytes.as_slice(), + ); + + // extract object_name, then execute subquery enum_2.scm on the stage2 node + + for m in matches { + if m.captures.is_empty() { + continue; + } + + let mut object_name = String::new(); + let mut enum_name = String::new(); + let mut enum_objects: Vec<(String, i32)> = Vec::new(); + let mut current_enum_field = String::new(); + let mut current_enum_value = String::new(); + let mut stage2_node = None; + + for capture in m.captures { + let node_text = capture + .node + .utf8_text(state.code_bytes.as_slice())? + .to_string(); + + if capture.index == object_idx { + object_name = node_text; + } else if capture.index == enum_name_idx { + enum_name = node_text; + } else if capture.index == enum_field_idx { + current_enum_field = node_text; + } else if capture.index == enum_value_idx { + current_enum_value = node_text; + } else if capture.index == stage2_idx { + stage2_node = Some(capture.node); + } + } + + enum_objects.push((current_enum_field, current_enum_value.parse().unwrap())); + + let Some(stage2_node) = stage2_node else { + panic!("stage2 node not found!: {object_name}"); + }; + + let mut stage2_query_txt = std::fs::read_to_string("queries/enum_2.scm")?; + + // replace "REPLACEME" with object_name + + stage2_query_txt = stage2_query_txt.replace("REPLACEME", &object_name); + + let stage2_query = Query::new(tree_sitter_javascript::language(), &stage2_query_txt)?; + let mut stage2_qc = QueryCursor::new(); + + let stage2_matches = + stage2_qc.matches(&stage2_query, stage2_node, state.code_bytes.as_slice()); + + let stage2_enum_field_idx = stage2_query.capture_index_for_name("enum_field").unwrap(); + let stage2_enum_value_idx = stage2_query.capture_index_for_name("enum_value").unwrap(); + + for m in stage2_matches { + if m.captures.is_empty() { + continue; + } + + let mut current_enum_field = String::new(); + let mut current_enum_value = String::new(); + + for capture in m.captures { + let node_text = capture + .node + .utf8_text(state.code_bytes.as_slice())? + .to_string(); + + if capture.index == stage2_enum_field_idx { + current_enum_field = node_text; + } else if capture.index == stage2_enum_value_idx { + current_enum_value = node_text; + } + } + + if current_enum_value.contains('e') { + replace_exp(&mut current_enum_value); + } + + enum_objects.push((current_enum_field, current_enum_value.parse().unwrap())); + } + + if state.enums.contains_key(&enum_name) { + state + .enums + .get_mut(&enum_name) + .unwrap() + .fields + .extend(enum_objects); + } else { + state.enums.insert( + enum_name.clone(), + Enum { + name: enum_name, + fields: enum_objects, + }, + ); + } + } + + Ok(()) +} + +fn map_ids_to_messages(state: &mut State) -> Result<(), Box> { + let id_query = Query::new( + tree_sitter_javascript::language(), + &std::fs::read_to_string("queries/id.scm")?, + )?; + + let message_name_idx = id_query.capture_index_for_name("message_name").unwrap(); + let message_id_idx = id_query.capture_index_for_name("message_id").unwrap(); + + let code = std::fs::read_to_string("Net/NetDefine.js")?; + + let tree = state.parser.parse(&code, None).unwrap(); + + let mut qc = QueryCursor::new(); + + let matches = qc.matches(&state.id_query, tree.root_node(), code.as_bytes()); + + for m in matches { + if m.captures.is_empty() { + continue; + } + + let mut message_name = String::new(); + let mut message_id = String::new(); + + for capture in m.captures { + let node_text = capture.node.utf8_text(code.as_bytes())?.to_string(); + + if capture.index == message_name_idx { + message_name = node_text; + } else if capture.index == message_id_idx { + message_id = node_text; + } + } + + if state.messages.contains_key(&message_name) { + if message_id.contains('e') { + replace_exp(&mut message_id); + } + + state.messages.get_mut(&message_name).unwrap().id = Some(message_id.parse().unwrap()); + } + } + + Ok(()) +} diff --git a/README.md b/README.md new file mode 100644 index 0000000..eb5c129 --- /dev/null +++ b/README.md @@ -0,0 +1,7 @@ +# Tree-Sitter ❤️ Reverse Engineering + +This is a repo that will be filled with examples and PoCs as I see fit of instances +where I've leveraged the power of tree-sitter to aid with reverse engineering. + +1. [ZZZ Proto Dumper (TODO!)](./src/branch/master/01-zzz-ida) +2. [WW Proto Dumper](./src/branch/master/02-ws-js)