Written geosite protobuf parser and tests 4 it

This commit is contained in:
namilsk 2026-03-18 21:21:21 +03:00
parent da8e70f2e3
commit 8887a775f5
No known key found for this signature in database
GPG key ID: 2B2F0A4D870B4F9F
10 changed files with 548 additions and 18 deletions

190
Cargo.lock generated
View file

@ -449,6 +449,9 @@ name = "bytes"
version = "1.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33"
dependencies = [
"serde",
]
[[package]]
name = "c2rust-bitfields"
@ -1251,15 +1254,6 @@ dependencies = [
"windows-sys 0.61.2",
]
[[package]]
name = "etherparse"
version = "0.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b119b9796ff800751a220394b8b3613f26dd30c48f254f6837e64c464872d1c7"
dependencies = [
"arrayvec",
]
[[package]]
name = "event-listener"
version = "5.4.1"
@ -1345,6 +1339,12 @@ version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582"
[[package]]
name = "fixedbitset"
version = "0.5.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99"
[[package]]
name = "flate2"
version = "1.1.9"
@ -2207,6 +2207,12 @@ dependencies = [
"windows-sys 0.61.2",
]
[[package]]
name = "multimap"
version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084"
[[package]]
name = "native-tls"
version = "0.2.18"
@ -2343,15 +2349,19 @@ name = "nsc"
version = "0.1.0"
dependencies = [
"arti-client",
"etherparse",
"bytes",
"ipnet",
"iptables",
"maxminddb",
"prost",
"prost-build",
"prost-types",
"rtnetlink",
"serde",
"tokio",
"toml 1.0.6+spec-1.1.0",
"tun",
"ureq",
]
[[package]]
@ -2662,6 +2672,17 @@ version = "2.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
[[package]]
name = "petgraph"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455"
dependencies = [
"fixedbitset",
"hashbrown 0.15.5",
"indexmap 2.13.0",
]
[[package]]
name = "phf"
version = "0.13.1"
@ -2912,6 +2933,57 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "prost"
version = "0.14.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2ea70524a2f82d518bce41317d0fae74151505651af45faf1ffbd6fd33f0568"
dependencies = [
"bytes",
"prost-derive",
]
[[package]]
name = "prost-build"
version = "0.14.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7"
dependencies = [
"heck",
"itertools 0.14.0",
"log",
"multimap",
"petgraph",
"prettyplease",
"prost",
"prost-types",
"regex",
"syn 2.0.117",
"tempfile",
]
[[package]]
name = "prost-derive"
version = "0.14.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b"
dependencies = [
"anyhow",
"itertools 0.14.0",
"proc-macro2",
"quote",
"syn 2.0.117",
]
[[package]]
name = "prost-types"
version = "0.14.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8991c4cbdb8bc5b11f0b074ffe286c30e523de90fee5ba8132f1399f23cb3dd7"
dependencies = [
"prost",
]
[[package]]
name = "pwd-grp"
version = "1.0.2"
@ -3146,6 +3218,20 @@ dependencies = [
"subtle",
]
[[package]]
name = "ring"
version = "0.17.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7"
dependencies = [
"cc",
"cfg-if",
"getrandom 0.2.17",
"libc",
"untrusted",
"windows-sys 0.52.0",
]
[[package]]
name = "rsa"
version = "0.9.10"
@ -3242,6 +3328,41 @@ dependencies = [
"windows-sys 0.61.2",
]
[[package]]
name = "rustls"
version = "0.23.37"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4"
dependencies = [
"log",
"once_cell",
"ring",
"rustls-pki-types",
"rustls-webpki",
"subtle",
"zeroize",
]
[[package]]
name = "rustls-pki-types"
version = "1.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd"
dependencies = [
"zeroize",
]
[[package]]
name = "rustls-webpki"
version = "0.103.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7df23109aa6c1567d1c575b9952556388da57401e4ace1d15f79eedad0d8f53"
dependencies = [
"ring",
"rustls-pki-types",
"untrusted",
]
[[package]]
name = "rustversion"
version = "1.0.22"
@ -5166,12 +5287,34 @@ version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
[[package]]
name = "untrusted"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
[[package]]
name = "unty"
version = "0.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d49784317cd0d1ee7ec5c716dd598ec5b4483ea832a2dced265471cc0f690ae"
[[package]]
name = "ureq"
version = "2.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "02d1a66277ed75f640d608235660df48c8e3c19f3b4edb6a263315626cc3c01d"
dependencies = [
"base64",
"flate2",
"log",
"once_cell",
"rustls",
"rustls-pki-types",
"url",
"webpki-roots 0.26.11",
]
[[package]]
name = "url"
version = "2.5.8"
@ -5363,6 +5506,24 @@ dependencies = [
"wasm-bindgen",
]
[[package]]
name = "webpki-roots"
version = "0.26.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9"
dependencies = [
"webpki-roots 1.0.6",
]
[[package]]
name = "webpki-roots"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22cfaf3c063993ff62e73cb4311efde4db1efb31ab78a3e5c457939ad5cc0bed"
dependencies = [
"rustls-pki-types",
]
[[package]]
name = "winapi"
version = "0.3.9"
@ -5533,6 +5694,15 @@ dependencies = [
"windows-link 0.2.1",
]
[[package]]
name = "windows-sys"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
dependencies = [
"windows-targets 0.52.6",
]
[[package]]
name = "windows-sys"
version = "0.59.0"

View file

@ -4,14 +4,19 @@ version = "0.1.0"
edition = "2024"
description = "Tun-in interface for Mesh networks like Tor/I2P with traffic-routing support (DIRECT/PROXY/BLOCK etc.)"
repository = "https://codeberg.org/NamelessTeam/nsc"
build = "build.rs"
[build-dependencies]
prost-build = "0.14.3"
[dependencies]
arti-client = "0.40.0"
etherparse = "0.19.0"
bytes = { version = "1.11.1", features = ["serde"] }
ipnet = "2.12.0"
iptables = "0.6.0"
maxminddb = "0.27.3"
prost = "0.14.3"
prost-types = "0.14.3"
rtnetlink = "0.20.0"
serde = { version = "1.0.228", features = ["derive"] }
tokio = { version = "1.50.0", features = ["full"] }
@ -24,4 +29,7 @@ lto = true
codegen-units = 1
opt-level = 3
[dev-dependencies]
ureq = "2.12"

16
build.rs Normal file
View file

@ -0,0 +1,16 @@
use std::io::Result;
fn main() -> Result<()> {
let out_dir = std::path::PathBuf::from("src/geoparsers/v2ray/");
prost_build::Config::new()
.out_dir(&out_dir)
.compile_protos(
&[
"src/geoparsers/v2ray/proto_src/geosite.proto",
],
&["src/geoparsers/v2ray/proto_src/"],
)?;
Ok(())
}

View file

@ -1,2 +1,3 @@
pub mod geoip2;
pub mod toml;
pub mod v2ray;

View file

@ -0,0 +1,2 @@
pub mod parsing;
pub mod types;

View file

@ -0,0 +1,79 @@
use crate::geoparsers::v2ray::types::{Domain, GeoSite, GeoSiteList};
use prost::bytes::Buf;
use prost::Message;
use std::fs;
pub struct GeoSiteService {
index: GeoSiteList,
}
impl GeoSiteService {
// TODO: Make more smart memory mapping; geosite files can be > 70MB
pub fn new(path: &str) -> Result<Self, Box<dyn std::error::Error>> {
let bytes = fs::read(path)?;
let geosite_list = decode_geosite_stream(&bytes)?;
Ok(Self {
index: geosite_list,
})
}
// Idk but i think it can work
pub fn lookup(&self, value: &str) -> Option<&GeoSite> {
self.index
.entry
.iter()
.find(|site| site.domain.iter().any(|d| d.value == value))
}
/// Returns the number of GeoSite entries in the list
pub fn len(&self) -> usize {
self.index.entry.len()
}
/// Returns true if the GeoSite list is empty
pub fn is_empty(&self) -> bool {
self.index.entry.is_empty()
}
}
/// Decode a stream of length-delimited GeoSite messages
/// `geosite.dat` ts is not one protobuf-message, stream of length-delimited messages
/// so we need ts helper
fn decode_geosite_stream(bytes: &[u8]) -> Result<GeoSiteList, Box<dyn std::error::Error>> {
let mut buf = bytes;
let mut entries = Vec::new();
while buf.has_remaining() {
// Read tag (0x0a field 1, wire type 2)
let tag = buf.get_u8();
if tag != 0x0a {
return Err(format!("Unexpected tag: {:#04x}", tag).into());
}
// varint
let mut len = 0usize;
let mut shift = 0;
loop {
if !buf.has_remaining() {
return Err("Unexpected end of buffer while reading varint".into());
}
let b = buf.get_u8();
len |= ((b & 0x7f) as usize) << shift;
if b & 0x80 == 0 {
break;
}
shift += 7;
if shift >= 70 {
return Err("Varint too long".into());
}
}
let entry_bytes = &buf[..len];
let site = GeoSite::decode(entry_bytes)?;
entries.push(site);
buf.advance(len);
}
Ok(GeoSiteList { entry: entries })
}

View file

@ -0,0 +1,66 @@
syntax = "proto3";
package types;
// Domain for routing decision.
message Domain {
// Type of domain value.
enum Type {
// The value is used as is.
Plain = 0;
// The value is used as a regular expression.
Regex = 1;
// The value is a root domain.
Domain = 2;
// The value is a domain.
Full = 3;
}
// Domain matching type.
Type type = 1;
// Domain value.
string value = 2;
// Attribute of the domain.
message Attribute {
string key = 1;
oneof typed_value {
bool bool_value = 2;
int64 int_value = 3;
}
}
// Attributes of this domain. May be used for filtering.
repeated Attribute attribute = 3;
}
// IP for routing decision, in CIDR form.
message CIDR {
// IP address, should be either 4 or 16 bytes.
bytes ip = 1;
// Number of leading ones in the network mask.
uint32 prefix = 2;
}
message GeoIP {
string country_code = 1;
repeated CIDR cidr = 2;
}
message GeoIPList {
repeated GeoIP entry = 1;
}
message GeoSite {
string country_code = 1;
repeated Domain domain = 2;
// resource_hash instruct simplified config converter to load domain from geo file.
bytes resource_hash = 3;
string code = 4;
}
message GeoSiteList {
repeated GeoSite entry = 1;
}

View file

@ -0,0 +1,121 @@
// This file is @generated by prost-build.
/// Domain for routing decision.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct Domain {
/// Domain matching type.
#[prost(enumeration = "domain::Type", tag = "1")]
pub r#type: i32,
/// Domain value.
#[prost(string, tag = "2")]
pub value: ::prost::alloc::string::String,
/// Attributes of this domain. May be used for filtering.
#[prost(message, repeated, tag = "3")]
pub attribute: ::prost::alloc::vec::Vec<domain::Attribute>,
}
/// Nested message and enum types in `Domain`.
pub mod domain {
/// Attribute of the domain.
#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
pub struct Attribute {
#[prost(string, tag = "1")]
pub key: ::prost::alloc::string::String,
#[prost(oneof = "attribute::TypedValue", tags = "2, 3")]
pub typed_value: ::core::option::Option<attribute::TypedValue>,
}
/// Nested message and enum types in `Attribute`.
pub mod attribute {
#[derive(Clone, Copy, PartialEq, Eq, Hash, ::prost::Oneof)]
pub enum TypedValue {
#[prost(bool, tag = "2")]
BoolValue(bool),
#[prost(int64, tag = "3")]
IntValue(i64),
}
}
/// Type of domain value.
#[derive(
Clone,
Copy,
Debug,
PartialEq,
Eq,
Hash,
PartialOrd,
Ord,
::prost::Enumeration
)]
#[repr(i32)]
pub enum Type {
/// The value is used as is.
Plain = 0,
/// The value is used as a regular expression.
Regex = 1,
/// The value is a root domain.
Domain = 2,
/// The value is a domain.
Full = 3,
}
impl Type {
/// String value of the enum field names used in the ProtoBuf definition.
///
/// The values are not transformed in any way and thus are considered stable
/// (if the ProtoBuf definition does not change) and safe for programmatic use.
pub fn as_str_name(&self) -> &'static str {
match self {
Self::Plain => "Plain",
Self::Regex => "Regex",
Self::Domain => "Domain",
Self::Full => "Full",
}
}
/// Creates an enum from field names used in the ProtoBuf definition.
pub fn from_str_name(value: &str) -> ::core::option::Option<Self> {
match value {
"Plain" => Some(Self::Plain),
"Regex" => Some(Self::Regex),
"Domain" => Some(Self::Domain),
"Full" => Some(Self::Full),
_ => None,
}
}
}
}
/// IP for routing decision, in CIDR form.
#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
pub struct Cidr {
/// IP address, should be either 4 or 16 bytes.
#[prost(bytes = "vec", tag = "1")]
pub ip: ::prost::alloc::vec::Vec<u8>,
/// Number of leading ones in the network mask.
#[prost(uint32, tag = "2")]
pub prefix: u32,
}
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct GeoIp {
#[prost(string, tag = "1")]
pub country_code: ::prost::alloc::string::String,
#[prost(message, repeated, tag = "2")]
pub cidr: ::prost::alloc::vec::Vec<Cidr>,
}
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct GeoIpList {
#[prost(message, repeated, tag = "1")]
pub entry: ::prost::alloc::vec::Vec<GeoIp>,
}
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct GeoSite {
#[prost(string, tag = "1")]
pub country_code: ::prost::alloc::string::String,
#[prost(message, repeated, tag = "2")]
pub domain: ::prost::alloc::vec::Vec<Domain>,
/// resource_hash instruct simplified config converter to load domain from geo file.
#[prost(bytes = "vec", tag = "3")]
pub resource_hash: ::prost::alloc::vec::Vec<u8>,
#[prost(string, tag = "4")]
pub code: ::prost::alloc::string::String,
}
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct GeoSiteList {
#[prost(message, repeated, tag = "1")]
pub entry: ::prost::alloc::vec::Vec<GeoSite>,
}

View file

@ -1,13 +1,12 @@
mod routing;
mod config;
mod geoparsers;
pub mod sniffing;
mod startup;
//mod routing;
//mod config;
//mod geoparsers;
//pub mod sniffing;
//mod startup;
use nsc::startup::init;
use std::io::Read;
fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> {
init()
}

68
tests/v2ray_geosite.rs Normal file
View file

@ -0,0 +1,68 @@
use nsc::geoparsers::v2ray::parsing::GeoSiteService;
use nsc::geoparsers::v2ray::types::Domain;
use std::fs;
use std::path::PathBuf;
fn download_geosite() -> Result<PathBuf, Box<dyn std::error::Error>> {
let tmp_dir = std::env::temp_dir().join("seccontrol_test");
fs::create_dir_all(&tmp_dir)?;
let geosite_path = tmp_dir.join("geosite.dat");
if !geosite_path.exists() {
// Use v2fly domain-list-community which has standard protobuf format
let url = "https://github.com/v2fly/domain-list-community/releases/latest/download/dlc.dat";
let response = ureq::get(url).call()?;
let mut file = fs::File::create(&geosite_path)?;
let mut reader = response.into_reader();
std::io::copy(&mut reader, &mut file)?;
}
Ok(geosite_path)
}
fn get_geosite_service() -> Result<GeoSiteService, Box<dyn std::error::Error>> {
let geosite_path = download_geosite()?;
let service = GeoSiteService::new(geosite_path.to_str().unwrap())?;
Ok(service)
}
#[test]
fn geosite_service_creation() {
let service = get_geosite_service();
assert!(service.is_ok(), "Failed to create GeoSiteService: {:?}", service.err());
}
#[test]
fn lookup_existing_domain() {
let service = get_geosite_service().expect("Failed to create service");
assert!(!service.is_empty(), "Service should have entries");
println!("Loaded {} GeoSite entries", service.len());
}
#[test]
fn lookup_nonexistent_domain() {
let service = get_geosite_service().expect("Failed to create service");
let domain = Domain {
r#type: nsc::geoparsers::v2ray::types::domain::Type::Full as i32,
value: "nfaklsfjlasfvjkcnjnasxcjsas-not-existing-domain.com".to_string(),
attribute: vec![],
};
let result = service.lookup(domain.value.as_str());
assert!(result.is_none(), "Should return none for not existing domain");
println!("{:?}", result);
}
#[test]
fn geosite_list_not_empty() {
let service = get_geosite_service().expect("Failed to create service");
assert!(
!service.is_empty(),
"GeoSiteList should not be empty"
);
println!("Loaded {} GeoSite entries", service.len());
}