Initial commit

This commit is contained in:
Gulag 2024-01-30 19:21:05 -05:00 committed by Gulag
commit b51e203a10
13 changed files with 819 additions and 0 deletions

14
.gitignore vendored Normal file
View file

@ -0,0 +1,14 @@
02-ws-js/Config
02-ws-js/ConfigCommon
02-ws-js/ConfigQuery
02-ws-js/Net
02-ws-js/target
02-ws-js/ClassDefine.js
02-ws-js/CommonDefine.js
02-ws-js/ConfigDefine.js
02-ws-js/EditorDefine.js
02-ws-js/HttpCodeDefine.js
02-ws-js/LogDefine.js
02-ws-js/QueryTypeDefine.js
02-ws-js/message.proto
**/*.so

97
02-ws-js/Cargo.lock generated Normal file
View file

@ -0,0 +1,97 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "aho-corasick"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0"
dependencies = [
"memchr",
]
[[package]]
name = "cc"
version = "1.0.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0"
dependencies = [
"libc",
]
[[package]]
name = "heck"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
[[package]]
name = "libc"
version = "0.2.152"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7"
[[package]]
name = "memchr"
version = "2.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149"
[[package]]
name = "parse_js"
version = "0.1.0"
dependencies = [
"heck",
"tree-sitter",
"tree-sitter-javascript",
]
[[package]]
name = "regex"
version = "1.10.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f"
[[package]]
name = "tree-sitter"
version = "0.20.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e747b1f9b7b931ed39a548c1fae149101497de3c1fc8d9e18c62c1a66c683d3d"
dependencies = [
"cc",
"regex",
]
[[package]]
name = "tree-sitter-javascript"
version = "0.20.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "edbc663376bdd294bd1f0a6daf859aedb9aa5bdb72217d7ad8ba2d5314102cf7"
dependencies = [
"cc",
"tree-sitter",
]

9
02-ws-js/Cargo.toml Normal file
View file

@ -0,0 +1,9 @@
[package]
name = "parse_js"
version = "0.1.0"
edition = "2021"
[dependencies]
heck = "0.4.1"
tree-sitter = "0.20.10"
tree-sitter-javascript = "0.20.1"

24
02-ws-js/README.md Normal file
View file

@ -0,0 +1,24 @@
# WW Proto Dumper (Tree-Sitter ❤️ Reverse Engineering)
This is a PoC and demo to show how one can use tree-sitter on dumped javascript
netcode files for a certain game to extract fully intact proto files. To see its
potential, you can run the following (replace `"REPLACEME"` in enum_2.scm with `"e"`):
The Python code was my initial attempt at this, but my partially-implemented
matches API had a bug, so I just opted to go with Rust.
```bash
tree-sitter query queries/message.scm example_protocol.js
```
```bash
tree-sitter query queries/enum.scm example_protocol.js
```
```bash
tree-sitter query queries/enum_2.scm example_protocol.js
```
```bash
tree-sitter query queries/id.scm example_define.js
```

View file

@ -0,0 +1,4 @@
(function(e) {
(e[(e.ExitGamePush = 1100)] = "ExitGamePush"),
(e[(e.ChangeSceneClockPush = 1199)] = "ChangeSceneClockPush")
});

View file

@ -0,0 +1,69 @@
return (
(B.Protocol =
(((n = {}).AceAntiDataPush =
((W.create = function e(t) {
return new W(t);
}),
(W.encode = function e(t, o) {
return (o = o || $Writer.create()), t.AntiData && o.uint32(10).bytes(t.AntiData), o;
}),
(W.decode = function e(t, o) {
t instanceof $Reader || (t = $Reader.create(t));
for (var n = void 0 === o ? t.len : t.pos + o, r = new $root.Aki.Protocol.AceAntiDataPush(); t.pos < n;) {
var i = t.uint32();
i >>> 3 == 1 ? (r.AntiData = t.bytes()) : t.skipType(7 & i);
}
return r;
}),
W)),
(n.AchievementProgress =
((j.create = function e(t) {
return new j(t);
}),
(j.encode = function e(t, o) {
return (o = o || $Writer.create()), t.CurProgress && o.uint32(8).int32(t.CurProgress), t.TotalProgress && o.uint32(16).int32(t.TotalProgress), o;
}),
(j.decode = function e(t, o) {
t instanceof $Reader || (t = $Reader.create(t));
for (var n = void 0 === o ? t.len : t.pos + o, r = new $root.Aki.Protocol.AchievementProgress(); t.pos < n;) {
var i = t.uint32();
switch (i >>> 3) {
case 1:
r.CurProgress = t.int32();
break;
case 2:
r.TotalProgress = t.int32();
break;
case 27:
r.ActionGroupNodeAction = $root.Aki.Protocol.ActionGroupNodeActionCtxPb.decode(t, t.uint32());
break;
default:
t.skipType(7 & i);
}
}
return r;
}),
j)),
(n.ActionFinishResponse =
((oe.create = function e(t) {
return new oe(t);
}),
(oe.encode = function e(t, o) {
return (o = o || $Writer.create()), t.Code && o.uint32(8).int32(t.Code), o;
}),
(oe.decode = function e(t, o) {
t instanceof $Reader || (t = $Reader.create(t));
for (var n = void 0 === o ? t.len : t.pos + o, r = new $root.Aki.Protocol.ActionFinishResponse(); t.pos < n;) {
var i = t.uint32();
i >>> 3 == 1 ? (r.EntityCtx = $root.Aki.Protocol.EntityCtxPb.decode(t, t.uint32())) : t.skipType(7 & i);
}
return r;
}),
oe)),
(n.SlientFirstAwardState = ((t = {}), ((e = Object.create(t))[(t[0] = "NotUnlock")] = 0),
(e[(t[1] = "NotFinish")] = 1),
(e[(t[2] = "IsFinish")] = 2),
(e[(t[3] = "IsReceive")] = 3), e)),
n)),
B
);

71
02-ws-js/parse.py Normal file
View file

@ -0,0 +1,71 @@
from typing import Dict, List, Tuple
from tree_sitter import Language, Node, Parser
from dataclasses import dataclass
js = "./js.so"
JS = Language(js, "javascript")
parser = Parser()
parser.set_language(JS)
q = open("queries/message.scm").read()
query = JS.query(q)
tree = parser.parse(open("Net/Protocol.js", "rb").read())
matches = query.matches(tree.root_node)
@dataclass
class Proto:
name: str
fields: List[Tuple[int, str, str]]
protos: Dict[str, Proto] = {}
# [(<Node type=identifier, start_point=(54940, 13), end_point=(54940, 14)>, '_n'), (<Node type=property_identifier, start_point=(54940, 15), end_point=(54940, 40)>, 'message_name'), (<Node type=property_identifier, start_point=(54953, 20), end_point=(54953, 26)>, '_decode'), (<Node type=number, start_point=(54957, 38), end_point=
# (54957, 39)>, '_shift3'), (<Node type=number, start_point=(54961, 33), end_point=(54961, 34)>, 'field_id'), (<Node type=property_identifier, start_point=(54962, 34), end_point=(54962, 47)>, 'field_name'), (<Node type=property_identifier, start_point=(54962, 52), end_point=(54962, 57)>, 'field_type')]
match: List[Tuple[Node, str]]
for e, (_, match) in enumerate(matches):
if len(match) == 0:
continue
m: Node
message_name, field_id, field_name, field_type = "", "", "", ""
for node, capture_name in match:
# offsets for the items might vary from each one,
match capture_name:
case "message_name":
message_name = node.text.decode("utf-8")
break
case "field_id":
field_id = node.text.decode("utf-8")
break
case "field_name":
field_name = node.text.decode("utf-8")
break
case "field_type":
field_type = node.text.decode("utf-8")
break
case _:
continue
print(f"{message_name} {field_id} {field_name} {field_type}")
if message_name not in protos:
protos[message_name] = Proto(e, message_name, [(int(field_id), field_name, field_type)])
else:
protos[message_name].fields.append((int(field_id), field_name, field_type))
proto_dump = ""
for message in protos.values():
proto_dump += f"message {message.name} {{\n"
for field in message.fields:
proto_dump += f" {field[2]} {field[1]} = {field[0]};\n"
proto_dump += "}\n\n"
with open("message.proto", "w") as f:
f.write(proto_dump)

37
02-ws-js/queries/enum.scm Normal file
View file

@ -0,0 +1,37 @@
(sequence_expression
(parenthesized_expression
(assignment_expression
left: (member_expression
(identifier) @_n
(property_identifier) @enum_name
(#eq? @_n "n"))
right: (parenthesized_expression (sequence_expression
left: (parenthesized_expression
(assignment_expression
left: (identifier) @_var
right: (object . "{" . "}" . )))
right: (sequence_expression
left: (parenthesized_expression
(assignment_expression
left: (subscript_expression
object: (parenthesized_expression
(assignment_expression
left: (identifier) @object
"="
right: (call_expression
function: (member_expression
object: (identifier) @_object
"."
property: (property_identifier) @_create
(#eq? @_object "Object")
(#eq? @_create "create"))
arguments: (arguments . "(" (identifier) @_var2 . ")" .
(#eq? @_var @_var2)))))
index: (parenthesized_expression
(assignment_expression
left: (subscript_expression)
right: (string (string_fragment) @enum_field))))
right: (number) @enum_value)
))) @stage2
))))

View file

@ -0,0 +1,8 @@
(assignment_expression
left: (subscript_expression
object: ((identifier) @object_name (#eq? @object_name "REPLACEME"))
index: (parenthesized_expression
(assignment_expression
left: (subscript_expression)
right: (string (string_fragment) @enum_field))))
right: (number) @enum_value)

6
02-ws-js/queries/id.scm Normal file
View file

@ -0,0 +1,6 @@
(assignment_expression
left: (member_expression
object: (identifier)
property: (property_identifier) @message_name)
right: (number) @message_id)

View file

@ -0,0 +1,76 @@
(sequence_expression
(parenthesized_expression
(assignment_expression
left: (member_expression
(identifier) @_n
(property_identifier) @message_name
(#eq? @_n "n"))
right: (parenthesized_expression (sequence_expression
right: (sequence_expression
right: (sequence_expression
left: (parenthesized_expression (assignment_expression
left: (member_expression (identifier) (property_identifier) @_decode (#eq? @_decode "decode"))
right: (function
body: (statement_block (for_statement
body: (statement_block
[
(switch_statement
value: (parenthesized_expression
(binary_expression
left: (identifier)
">>>"
right: (number) @_shift3
(#eq? @_shift3 "3")))
body:
(switch_body (switch_case
value: (number) @field_id
body: (expression_statement
(assignment_expression
left: (member_expression (identifier) (property_identifier) @field_name)
right: [
(call_expression
function: (member_expression
object: (identifier)
property: (property_identifier) @field_type))
(call_expression
function: (member_expression
object: (member_expression
object: (member_expression) @_rootakiprotocol
property: (property_identifier) @field_type
(#eq? @_rootakiprotocol "$root.Aki.Protocol"))
property: (property_identifier) @_decode_field
(#eq? @_decode_field "decode")))
])))))
(expression_statement
(ternary_expression
condition: (binary_expression
left: (binary_expression
left: (identifier)
">>>"
right: (number) @_shift3
(#eq? @_shift3 "3"))
"=="
right: (number) @field_id)
consequence: (parenthesized_expression
(assignment_expression
left: (member_expression (identifier) (property_identifier) @field_name)
right: [
(call_expression
function: (member_expression
object: (identifier)
property: (property_identifier) @field_type))
(call_expression
function: (member_expression
object: (member_expression
object: (member_expression) @_rootakiprotocol
property: (property_identifier) @field_type
(#eq? @_rootakiprotocol "$root.Aki.Protocol"))
property: (property_identifier) @_decode_field
(#eq? @_decode_field "decode")))
]))))
]
)))))))))))))

397
02-ws-js/src/main.rs Normal file
View file

@ -0,0 +1,397 @@
use std::collections::BTreeMap;
use std::fmt::Write;
use heck::{ToShoutySnakeCase, ToSnakeCase};
use tree_sitter::{Parser, Query, QueryCursor, Tree};
struct Message {
name: String,
id: Option<i32>,
fields: Vec<(i32, String, String)>,
}
struct Enum {
name: String,
fields: Vec<(String, i32)>,
}
type MessageDump = BTreeMap<String, Message>;
type EnumDump = BTreeMap<String, Enum>;
struct State {
parser: Parser,
cursor: QueryCursor,
message_tree: Option<Tree>,
code_bytes: Vec<u8>,
messages: MessageDump,
message_query: Query,
enums: EnumDump,
enum_query: Query,
id_query: Query,
protodump_str: String,
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut state = State {
parser: {
let mut parser = Parser::new();
parser.set_language(tree_sitter_javascript::language())?;
parser
},
cursor: QueryCursor::new(),
message_tree: None,
code_bytes: std::fs::read("Net/Protocol.js")?,
messages: MessageDump::new(),
message_query: Query::new(
tree_sitter_javascript::language(),
&std::fs::read_to_string("queries/message.scm")?,
)?,
enums: EnumDump::new(),
enum_query: Query::new(
tree_sitter_javascript::language(),
&std::fs::read_to_string("queries/enum.scm")?,
)?,
id_query: Query::new(
tree_sitter_javascript::language(),
&std::fs::read_to_string("queries/id.scm")?,
)?,
protodump_str: String::new(),
};
state.message_tree = Some(state.parser.parse(&state.code_bytes, None).unwrap());
state.protodump_str.push_str("syntax = \"proto3\";\n\n");
parse_messages(&mut state)?;
parse_enums(&mut state)?;
map_ids_to_messages(&mut state)?;
for message in state.messages.values() {
writeln!(
state.protodump_str,
"message {} {{{}",
message.name,
if let Some(id) = message.id {
format!(" // MessageId: {id}")
} else {
String::new()
}
)?;
write!(
state.protodump_str,
"{}",
message
.fields
.iter()
.fold(String::new(), |mut output, field| {
let _ = writeln!(
output,
" {} {} = {};",
field.2,
field.1.to_snake_case(),
field.0
);
output
})
)?;
writeln!(state.protodump_str, "}}\n")?;
}
for en in state.enums.values() {
writeln!(state.protodump_str, "enum {} {{", en.name)?;
write!(
state.protodump_str,
"{}",
en.fields.iter().fold(String::new(), |mut output, field| {
let _ = writeln!(
output,
" {} = {};",
field.0.to_shouty_snake_case(),
field.1
);
output
})
)?;
writeln!(state.protodump_str, "}}\n")?;
}
state.protodump_str = state.protodump_str.trim_end().to_string();
std::fs::write("message.proto", state.protodump_str)?;
Ok(())
}
/// Handle js's `1e5` case, sigh
fn replace_exp(exp: &mut String) {
let idx_of_e = exp.find('e').unwrap();
*exp = (exp[..idx_of_e].parse::<i32>().unwrap()
* 10i32.pow(exp[idx_of_e + 1..].parse().unwrap()))
.to_string();
}
fn parse_messages(state: &mut State) -> Result<(), Box<dyn std::error::Error>> {
let mut qc = QueryCursor::new();
let matches = qc.matches(
&state.message_query,
state.message_tree.as_ref().unwrap().root_node(),
state.code_bytes.as_slice(),
);
let message_name_idx = state
.message_query
.capture_index_for_name("message_name")
.unwrap();
let field_id_idx = state
.message_query
.capture_index_for_name("field_id")
.unwrap();
let field_name_idx = state
.message_query
.capture_index_for_name("field_name")
.unwrap();
let field_type_idx = state
.message_query
.capture_index_for_name("field_type")
.unwrap();
for m in matches {
if m.captures.is_empty() {
continue;
}
let mut message_name = String::new();
let mut field_id = String::new();
let mut field_name = String::new();
let mut field_type = String::new();
for capture in m.captures {
let node_text = capture
.node
.utf8_text(state.code_bytes.as_slice())?
.to_string();
if capture.index == message_name_idx {
message_name = node_text;
} else if capture.index == field_id_idx {
field_id = node_text;
} else if capture.index == field_name_idx {
field_name = node_text;
} else if capture.index == field_type_idx {
field_type = node_text;
}
}
if state.messages.contains_key(&message_name) {
state.messages.get_mut(&message_name).unwrap().fields.push((
field_id.parse()?,
field_name,
field_type,
));
} else {
state.messages.insert(
message_name.clone(),
Message {
name: message_name,
id: None,
fields: vec![(field_id.parse()?, field_name, field_type)],
},
);
}
}
Ok(())
}
fn parse_enums(state: &mut State) -> Result<(), Box<dyn std::error::Error>> {
let object_idx = state.enum_query.capture_index_for_name("object").unwrap();
let enum_name_idx = state
.enum_query
.capture_index_for_name("enum_name")
.unwrap();
let enum_field_idx = state
.enum_query
.capture_index_for_name("enum_field")
.unwrap();
let enum_value_idx = state
.enum_query
.capture_index_for_name("enum_value")
.unwrap();
let stage2_idx = state.enum_query.capture_index_for_name("stage2").unwrap();
let matches = state.cursor.matches(
&state.enum_query,
state.message_tree.as_ref().unwrap().root_node(),
state.code_bytes.as_slice(),
);
// extract object_name, then execute subquery enum_2.scm on the stage2 node
for m in matches {
if m.captures.is_empty() {
continue;
}
let mut object_name = String::new();
let mut enum_name = String::new();
let mut enum_objects: Vec<(String, i32)> = Vec::new();
let mut current_enum_field = String::new();
let mut current_enum_value = String::new();
let mut stage2_node = None;
for capture in m.captures {
let node_text = capture
.node
.utf8_text(state.code_bytes.as_slice())?
.to_string();
if capture.index == object_idx {
object_name = node_text;
} else if capture.index == enum_name_idx {
enum_name = node_text;
} else if capture.index == enum_field_idx {
current_enum_field = node_text;
} else if capture.index == enum_value_idx {
current_enum_value = node_text;
} else if capture.index == stage2_idx {
stage2_node = Some(capture.node);
}
}
enum_objects.push((current_enum_field, current_enum_value.parse().unwrap()));
let Some(stage2_node) = stage2_node else {
panic!("stage2 node not found!: {object_name}");
};
let mut stage2_query_txt = std::fs::read_to_string("queries/enum_2.scm")?;
// replace "REPLACEME" with object_name
stage2_query_txt = stage2_query_txt.replace("REPLACEME", &object_name);
let stage2_query = Query::new(tree_sitter_javascript::language(), &stage2_query_txt)?;
let mut stage2_qc = QueryCursor::new();
let stage2_matches =
stage2_qc.matches(&stage2_query, stage2_node, state.code_bytes.as_slice());
let stage2_enum_field_idx = stage2_query.capture_index_for_name("enum_field").unwrap();
let stage2_enum_value_idx = stage2_query.capture_index_for_name("enum_value").unwrap();
for m in stage2_matches {
if m.captures.is_empty() {
continue;
}
let mut current_enum_field = String::new();
let mut current_enum_value = String::new();
for capture in m.captures {
let node_text = capture
.node
.utf8_text(state.code_bytes.as_slice())?
.to_string();
if capture.index == stage2_enum_field_idx {
current_enum_field = node_text;
} else if capture.index == stage2_enum_value_idx {
current_enum_value = node_text;
}
}
if current_enum_value.contains('e') {
replace_exp(&mut current_enum_value);
}
enum_objects.push((current_enum_field, current_enum_value.parse().unwrap()));
}
if state.enums.contains_key(&enum_name) {
state
.enums
.get_mut(&enum_name)
.unwrap()
.fields
.extend(enum_objects);
} else {
state.enums.insert(
enum_name.clone(),
Enum {
name: enum_name,
fields: enum_objects,
},
);
}
}
Ok(())
}
fn map_ids_to_messages(state: &mut State) -> Result<(), Box<dyn std::error::Error>> {
let id_query = Query::new(
tree_sitter_javascript::language(),
&std::fs::read_to_string("queries/id.scm")?,
)?;
let message_name_idx = id_query.capture_index_for_name("message_name").unwrap();
let message_id_idx = id_query.capture_index_for_name("message_id").unwrap();
let code = std::fs::read_to_string("Net/NetDefine.js")?;
let tree = state.parser.parse(&code, None).unwrap();
let mut qc = QueryCursor::new();
let matches = qc.matches(&state.id_query, tree.root_node(), code.as_bytes());
for m in matches {
if m.captures.is_empty() {
continue;
}
let mut message_name = String::new();
let mut message_id = String::new();
for capture in m.captures {
let node_text = capture.node.utf8_text(code.as_bytes())?.to_string();
if capture.index == message_name_idx {
message_name = node_text;
} else if capture.index == message_id_idx {
message_id = node_text;
}
}
if state.messages.contains_key(&message_name) {
if message_id.contains('e') {
replace_exp(&mut message_id);
}
state.messages.get_mut(&message_name).unwrap().id = Some(message_id.parse().unwrap());
}
}
Ok(())
}

7
README.md Normal file
View file

@ -0,0 +1,7 @@
# Tree-Sitter ❤️ Reverse Engineering
This is a repo that will be filled with examples and PoCs as I see fit of instances
where I've leveraged the power of tree-sitter to aid with reverse engineering.
1. [ZZZ Proto Dumper (TODO!)](./src/branch/master/01-zzz-ida)
2. [WW Proto Dumper](./src/branch/master/02-ws-js)