Written geosite protobuf parser and tests 4 it
This commit is contained in:
parent
da8e70f2e3
commit
8887a775f5
10 changed files with 548 additions and 18 deletions
|
|
@ -1,2 +1,3 @@
|
|||
pub mod geoip2;
|
||||
pub mod toml;
|
||||
pub mod v2ray;
|
||||
2
src/geoparsers/v2ray/mod.rs
Normal file
2
src/geoparsers/v2ray/mod.rs
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
pub mod parsing;
|
||||
pub mod types;
|
||||
79
src/geoparsers/v2ray/parsing.rs
Normal file
79
src/geoparsers/v2ray/parsing.rs
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
use crate::geoparsers::v2ray::types::{Domain, GeoSite, GeoSiteList};
|
||||
use prost::bytes::Buf;
|
||||
use prost::Message;
|
||||
use std::fs;
|
||||
|
||||
pub struct GeoSiteService {
|
||||
index: GeoSiteList,
|
||||
}
|
||||
|
||||
impl GeoSiteService {
|
||||
// TODO: Make more smart memory mapping; geosite files can be > 70MB
|
||||
pub fn new(path: &str) -> Result<Self, Box<dyn std::error::Error>> {
|
||||
let bytes = fs::read(path)?;
|
||||
let geosite_list = decode_geosite_stream(&bytes)?;
|
||||
|
||||
Ok(Self {
|
||||
index: geosite_list,
|
||||
})
|
||||
}
|
||||
|
||||
// Idk but i think it can work
|
||||
pub fn lookup(&self, value: &str) -> Option<&GeoSite> {
|
||||
self.index
|
||||
.entry
|
||||
.iter()
|
||||
.find(|site| site.domain.iter().any(|d| d.value == value))
|
||||
}
|
||||
|
||||
/// Returns the number of GeoSite entries in the list
|
||||
pub fn len(&self) -> usize {
|
||||
self.index.entry.len()
|
||||
}
|
||||
|
||||
/// Returns true if the GeoSite list is empty
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.index.entry.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
/// Decode a stream of length-delimited GeoSite messages
|
||||
/// `geosite.dat` ts is not one protobuf-message, stream of length-delimited messages
|
||||
/// so we need ts helper
|
||||
fn decode_geosite_stream(bytes: &[u8]) -> Result<GeoSiteList, Box<dyn std::error::Error>> {
|
||||
let mut buf = bytes;
|
||||
let mut entries = Vec::new();
|
||||
|
||||
while buf.has_remaining() {
|
||||
// Read tag (0x0a field 1, wire type 2)
|
||||
let tag = buf.get_u8();
|
||||
if tag != 0x0a {
|
||||
return Err(format!("Unexpected tag: {:#04x}", tag).into());
|
||||
}
|
||||
// varint
|
||||
let mut len = 0usize;
|
||||
let mut shift = 0;
|
||||
loop {
|
||||
if !buf.has_remaining() {
|
||||
return Err("Unexpected end of buffer while reading varint".into());
|
||||
}
|
||||
let b = buf.get_u8();
|
||||
len |= ((b & 0x7f) as usize) << shift;
|
||||
if b & 0x80 == 0 {
|
||||
break;
|
||||
}
|
||||
shift += 7;
|
||||
if shift >= 70 {
|
||||
return Err("Varint too long".into());
|
||||
}
|
||||
}
|
||||
|
||||
let entry_bytes = &buf[..len];
|
||||
let site = GeoSite::decode(entry_bytes)?;
|
||||
entries.push(site);
|
||||
|
||||
buf.advance(len);
|
||||
}
|
||||
|
||||
Ok(GeoSiteList { entry: entries })
|
||||
}
|
||||
66
src/geoparsers/v2ray/proto_src/geosite.proto
Normal file
66
src/geoparsers/v2ray/proto_src/geosite.proto
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
syntax = "proto3";
|
||||
|
||||
package types;
|
||||
|
||||
// Domain for routing decision.
|
||||
message Domain {
|
||||
// Type of domain value.
|
||||
enum Type {
|
||||
// The value is used as is.
|
||||
Plain = 0;
|
||||
// The value is used as a regular expression.
|
||||
Regex = 1;
|
||||
// The value is a root domain.
|
||||
Domain = 2;
|
||||
// The value is a domain.
|
||||
Full = 3;
|
||||
}
|
||||
|
||||
// Domain matching type.
|
||||
Type type = 1;
|
||||
|
||||
// Domain value.
|
||||
string value = 2;
|
||||
|
||||
// Attribute of the domain.
|
||||
message Attribute {
|
||||
string key = 1;
|
||||
oneof typed_value {
|
||||
bool bool_value = 2;
|
||||
int64 int_value = 3;
|
||||
}
|
||||
}
|
||||
|
||||
// Attributes of this domain. May be used for filtering.
|
||||
repeated Attribute attribute = 3;
|
||||
}
|
||||
|
||||
// IP for routing decision, in CIDR form.
|
||||
message CIDR {
|
||||
// IP address, should be either 4 or 16 bytes.
|
||||
bytes ip = 1;
|
||||
|
||||
// Number of leading ones in the network mask.
|
||||
uint32 prefix = 2;
|
||||
}
|
||||
|
||||
message GeoIP {
|
||||
string country_code = 1;
|
||||
repeated CIDR cidr = 2;
|
||||
}
|
||||
|
||||
message GeoIPList {
|
||||
repeated GeoIP entry = 1;
|
||||
}
|
||||
|
||||
message GeoSite {
|
||||
string country_code = 1;
|
||||
repeated Domain domain = 2;
|
||||
// resource_hash instruct simplified config converter to load domain from geo file.
|
||||
bytes resource_hash = 3;
|
||||
string code = 4;
|
||||
}
|
||||
|
||||
message GeoSiteList {
|
||||
repeated GeoSite entry = 1;
|
||||
}
|
||||
121
src/geoparsers/v2ray/types.rs
Normal file
121
src/geoparsers/v2ray/types.rs
Normal file
|
|
@ -0,0 +1,121 @@
|
|||
// This file is @generated by prost-build.
|
||||
/// Domain for routing decision.
|
||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
||||
pub struct Domain {
|
||||
/// Domain matching type.
|
||||
#[prost(enumeration = "domain::Type", tag = "1")]
|
||||
pub r#type: i32,
|
||||
/// Domain value.
|
||||
#[prost(string, tag = "2")]
|
||||
pub value: ::prost::alloc::string::String,
|
||||
/// Attributes of this domain. May be used for filtering.
|
||||
#[prost(message, repeated, tag = "3")]
|
||||
pub attribute: ::prost::alloc::vec::Vec<domain::Attribute>,
|
||||
}
|
||||
/// Nested message and enum types in `Domain`.
|
||||
pub mod domain {
|
||||
/// Attribute of the domain.
|
||||
#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
|
||||
pub struct Attribute {
|
||||
#[prost(string, tag = "1")]
|
||||
pub key: ::prost::alloc::string::String,
|
||||
#[prost(oneof = "attribute::TypedValue", tags = "2, 3")]
|
||||
pub typed_value: ::core::option::Option<attribute::TypedValue>,
|
||||
}
|
||||
/// Nested message and enum types in `Attribute`.
|
||||
pub mod attribute {
|
||||
#[derive(Clone, Copy, PartialEq, Eq, Hash, ::prost::Oneof)]
|
||||
pub enum TypedValue {
|
||||
#[prost(bool, tag = "2")]
|
||||
BoolValue(bool),
|
||||
#[prost(int64, tag = "3")]
|
||||
IntValue(i64),
|
||||
}
|
||||
}
|
||||
/// Type of domain value.
|
||||
#[derive(
|
||||
Clone,
|
||||
Copy,
|
||||
Debug,
|
||||
PartialEq,
|
||||
Eq,
|
||||
Hash,
|
||||
PartialOrd,
|
||||
Ord,
|
||||
::prost::Enumeration
|
||||
)]
|
||||
#[repr(i32)]
|
||||
pub enum Type {
|
||||
/// The value is used as is.
|
||||
Plain = 0,
|
||||
/// The value is used as a regular expression.
|
||||
Regex = 1,
|
||||
/// The value is a root domain.
|
||||
Domain = 2,
|
||||
/// The value is a domain.
|
||||
Full = 3,
|
||||
}
|
||||
impl Type {
|
||||
/// String value of the enum field names used in the ProtoBuf definition.
|
||||
///
|
||||
/// The values are not transformed in any way and thus are considered stable
|
||||
/// (if the ProtoBuf definition does not change) and safe for programmatic use.
|
||||
pub fn as_str_name(&self) -> &'static str {
|
||||
match self {
|
||||
Self::Plain => "Plain",
|
||||
Self::Regex => "Regex",
|
||||
Self::Domain => "Domain",
|
||||
Self::Full => "Full",
|
||||
}
|
||||
}
|
||||
/// Creates an enum from field names used in the ProtoBuf definition.
|
||||
pub fn from_str_name(value: &str) -> ::core::option::Option<Self> {
|
||||
match value {
|
||||
"Plain" => Some(Self::Plain),
|
||||
"Regex" => Some(Self::Regex),
|
||||
"Domain" => Some(Self::Domain),
|
||||
"Full" => Some(Self::Full),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/// IP for routing decision, in CIDR form.
|
||||
#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)]
|
||||
pub struct Cidr {
|
||||
/// IP address, should be either 4 or 16 bytes.
|
||||
#[prost(bytes = "vec", tag = "1")]
|
||||
pub ip: ::prost::alloc::vec::Vec<u8>,
|
||||
/// Number of leading ones in the network mask.
|
||||
#[prost(uint32, tag = "2")]
|
||||
pub prefix: u32,
|
||||
}
|
||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
||||
pub struct GeoIp {
|
||||
#[prost(string, tag = "1")]
|
||||
pub country_code: ::prost::alloc::string::String,
|
||||
#[prost(message, repeated, tag = "2")]
|
||||
pub cidr: ::prost::alloc::vec::Vec<Cidr>,
|
||||
}
|
||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
||||
pub struct GeoIpList {
|
||||
#[prost(message, repeated, tag = "1")]
|
||||
pub entry: ::prost::alloc::vec::Vec<GeoIp>,
|
||||
}
|
||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
||||
pub struct GeoSite {
|
||||
#[prost(string, tag = "1")]
|
||||
pub country_code: ::prost::alloc::string::String,
|
||||
#[prost(message, repeated, tag = "2")]
|
||||
pub domain: ::prost::alloc::vec::Vec<Domain>,
|
||||
/// resource_hash instruct simplified config converter to load domain from geo file.
|
||||
#[prost(bytes = "vec", tag = "3")]
|
||||
pub resource_hash: ::prost::alloc::vec::Vec<u8>,
|
||||
#[prost(string, tag = "4")]
|
||||
pub code: ::prost::alloc::string::String,
|
||||
}
|
||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
||||
pub struct GeoSiteList {
|
||||
#[prost(message, repeated, tag = "1")]
|
||||
pub entry: ::prost::alloc::vec::Vec<GeoSite>,
|
||||
}
|
||||
13
src/main.rs
13
src/main.rs
|
|
@ -1,13 +1,12 @@
|
|||
mod routing;
|
||||
mod config;
|
||||
mod geoparsers;
|
||||
pub mod sniffing;
|
||||
mod startup;
|
||||
//mod routing;
|
||||
//mod config;
|
||||
//mod geoparsers;
|
||||
//pub mod sniffing;
|
||||
//mod startup;
|
||||
|
||||
use nsc::startup::init;
|
||||
|
||||
use std::io::Read;
|
||||
|
||||
fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> {
|
||||
|
||||
init()
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue