Written geosite protobuf parser and tests 4 it

This commit is contained in:
namilsk 2026-03-18 21:21:21 +03:00
parent da8e70f2e3
commit 8887a775f5
No known key found for this signature in database
GPG key ID: 2B2F0A4D870B4F9F
10 changed files with 548 additions and 18 deletions

View file

@ -1,2 +1,3 @@
pub mod geoip2;
pub mod toml;
pub mod v2ray;

View file

@ -0,0 +1,2 @@
pub mod parsing;
pub mod types;

View file

@ -0,0 +1,79 @@
use crate::geoparsers::v2ray::types::{Domain, GeoSite, GeoSiteList};
use prost::bytes::Buf;
use prost::Message;
use std::fs;
pub struct GeoSiteService {
index: GeoSiteList,
}
impl GeoSiteService {
// TODO: Make more smart memory mapping; geosite files can be > 70MB
pub fn new(path: &str) -> Result<Self, Box<dyn std::error::Error>> {
let bytes = fs::read(path)?;
let geosite_list = decode_geosite_stream(&bytes)?;
Ok(Self {
index: geosite_list,
})
}
// Idk but i think it can work
pub fn lookup(&self, value: &str) -> Option<&GeoSite> {
self.index
.entry
.iter()
.find(|site| site.domain.iter().any(|d| d.value == value))
}
/// Returns the number of GeoSite entries in the list
pub fn len(&self) -> usize {
self.index.entry.len()
}
/// Returns true if the GeoSite list is empty
pub fn is_empty(&self) -> bool {
self.index.entry.is_empty()
}
}
/// Decode a stream of length-delimited GeoSite messages
/// `geosite.dat` ts is not one protobuf-message, stream of length-delimited messages
/// so we need ts helper
fn decode_geosite_stream(bytes: &[u8]) -> Result<GeoSiteList, Box<dyn std::error::Error>> {
let mut buf = bytes;
let mut entries = Vec::new();
while buf.has_remaining() {
// Read tag (0x0a field 1, wire type 2)
let tag = buf.get_u8();
if tag != 0x0a {
return Err(format!("Unexpected tag: {:#04x}", tag).into());
}
// varint
let mut len = 0usize;
let mut shift = 0;
loop {
if !buf.has_remaining() {
return Err("Unexpected end of buffer while reading varint".into());
}
let b = buf.get_u8();
len |= ((b & 0x7f) as usize) << shift;
if b & 0x80 == 0 {
break;
}
shift += 7;
if shift >= 70 {
return Err("Varint too long".into());
}
}
let entry_bytes = &buf[..len];
let site = GeoSite::decode(entry_bytes)?;
entries.push(site);
buf.advance(len);
}
Ok(GeoSiteList { entry: entries })
}

View file

@ -0,0 +1,66 @@
syntax = "proto3";
package types;
// Domain for routing decision.
message Domain {
// Type of domain value.
enum Type {
// The value is used as is.
Plain = 0;
// The value is used as a regular expression.
Regex = 1;
// The value is a root domain.
Domain = 2;
// The value is a domain.
Full = 3;
}
// Domain matching type.
Type type = 1;
// Domain value.
string value = 2;
// Attribute of the domain.
message Attribute {
string key = 1;
oneof typed_value {
bool bool_value = 2;
int64 int_value = 3;
}
}
// Attributes of this domain. May be used for filtering.
repeated Attribute attribute = 3;
}
// IP for routing decision, in CIDR form.
message CIDR {
// IP address, should be either 4 or 16 bytes.
bytes ip = 1;
// Number of leading ones in the network mask.
uint32 prefix = 2;
}
message GeoIP {
string country_code = 1;
repeated CIDR cidr = 2;
}
message GeoIPList {
repeated GeoIP entry = 1;
}
message GeoSite {
string country_code = 1;
repeated Domain domain = 2;
// resource_hash instruct simplified config converter to load domain from geo file.
bytes resource_hash = 3;
string code = 4;
}
message GeoSiteList {
repeated GeoSite entry = 1;
}

View file

@ -0,0 +1,121 @@
// This file is @generated by prost-build.
/// Domain for routing decision.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct Domain {
/// Domain matching type.
#[prost(enumeration = "domain::Type", tag = "1")]
pub r#type: i32,
/// Domain value.
#[prost(string, tag = "2")]
pub value: ::prost::alloc::string::String,
/// Attributes of this domain. May be used for filtering.
#[prost(message, repeated, tag = "3")]
pub attribute: ::prost::alloc::vec::Vec<domain::Attribute>,
}
/// Nested message and enum types in `Domain`.
pub mod domain {
/// Attribute of the domain.
#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
pub struct Attribute {
#[prost(string, tag = "1")]
pub key: ::prost::alloc::string::String,
#[prost(oneof = "attribute::TypedValue", tags = "2, 3")]
pub typed_value: ::core::option::Option<attribute::TypedValue>,
}
/// Nested message and enum types in `Attribute`.
pub mod attribute {
#[derive(Clone, Copy, PartialEq, Eq, Hash, ::prost::Oneof)]
pub enum TypedValue {
#[prost(bool, tag = "2")]
BoolValue(bool),
#[prost(int64, tag = "3")]
IntValue(i64),
}
}
/// Type of domain value.
#[derive(
Clone,
Copy,
Debug,
PartialEq,
Eq,
Hash,
PartialOrd,
Ord,
::prost::Enumeration
)]
#[repr(i32)]
pub enum Type {
/// The value is used as is.
Plain = 0,
/// The value is used as a regular expression.
Regex = 1,
/// The value is a root domain.
Domain = 2,
/// The value is a domain.
Full = 3,
}
impl Type {
/// String value of the enum field names used in the ProtoBuf definition.
///
/// The values are not transformed in any way and thus are considered stable
/// (if the ProtoBuf definition does not change) and safe for programmatic use.
pub fn as_str_name(&self) -> &'static str {
match self {
Self::Plain => "Plain",
Self::Regex => "Regex",
Self::Domain => "Domain",
Self::Full => "Full",
}
}
/// Creates an enum from field names used in the ProtoBuf definition.
pub fn from_str_name(value: &str) -> ::core::option::Option<Self> {
match value {
"Plain" => Some(Self::Plain),
"Regex" => Some(Self::Regex),
"Domain" => Some(Self::Domain),
"Full" => Some(Self::Full),
_ => None,
}
}
}
}
/// IP for routing decision, in CIDR form.
#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
pub struct Cidr {
/// IP address, should be either 4 or 16 bytes.
#[prost(bytes = "vec", tag = "1")]
pub ip: ::prost::alloc::vec::Vec<u8>,
/// Number of leading ones in the network mask.
#[prost(uint32, tag = "2")]
pub prefix: u32,
}
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct GeoIp {
#[prost(string, tag = "1")]
pub country_code: ::prost::alloc::string::String,
#[prost(message, repeated, tag = "2")]
pub cidr: ::prost::alloc::vec::Vec<Cidr>,
}
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct GeoIpList {
#[prost(message, repeated, tag = "1")]
pub entry: ::prost::alloc::vec::Vec<GeoIp>,
}
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct GeoSite {
#[prost(string, tag = "1")]
pub country_code: ::prost::alloc::string::String,
#[prost(message, repeated, tag = "2")]
pub domain: ::prost::alloc::vec::Vec<Domain>,
/// resource_hash instruct simplified config converter to load domain from geo file.
#[prost(bytes = "vec", tag = "3")]
pub resource_hash: ::prost::alloc::vec::Vec<u8>,
#[prost(string, tag = "4")]
pub code: ::prost::alloc::string::String,
}
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct GeoSiteList {
#[prost(message, repeated, tag = "1")]
pub entry: ::prost::alloc::vec::Vec<GeoSite>,
}

View file

@ -1,13 +1,12 @@
mod routing;
mod config;
mod geoparsers;
pub mod sniffing;
mod startup;
//mod routing;
//mod config;
//mod geoparsers;
//pub mod sniffing;
//mod startup;
use nsc::startup::init;
use std::io::Read;
fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> {
init()
}