目的
之前就考虑评估过 函数式解析器 在dotnet这些面向对象语言上有着一些损耗,虽然表意性很强,与ABNF范式结合使用,维护性大大提升
不过由于性能考虑(以及之前认为也或许没有太多机会实现解析器),就没打算继续深究
不过现实确实有多次需要实现解析器,每次从0手写,太费时间了,比较很多实现对于解析性能并不需极致性能,(特别现在ai code疯狂的时代,老板哪会给那么多时间让你慢慢扣性能,老板们都常说ai几秒钟的事情,你个辣鸡还花几天)
所以为了血压和身体健康,只要性能将就,还是对于解析不太在意性能的场景以后就这样偷懒吧 (比如今年搞得 VKProxy 里面得动态条件和简单模板替换, 路子可行也许后面还是换掉吧,维护多累呀)
当然也有高手搞过类似的库,比如 Parlot , 不过其不支持 stream, 理想情况还是想stream 也能支持, 所以现在先简单搞一波:参考 Parlot 简单实现 string 解析器底层以及 json path 简单版
然后和 Newtonsoft.json 在简单的json path 场景比较,如果打不过,那怕没有继续的必要了
简要说明函数式解析器
这里以最简单 json path "$.Name" 举例,
我们所需要解析即为 $ 开头, . 之后字符为属性名
换成代码大致为 Parser = Char('$').And(Char('.')).And(AnyExclude("[]().,\" '\r\n@$!=<\\?&|*:")).Eof()
像 Char And 这些方法在面向对象中我们会让方法生成解析算子实例,以达到与函数式相同效果, 这也是性能损耗的一个大点
比如 Char 方法
csharp
public static Parser<char> Char(char c) => new CharLiteral(c);
具体算子实现
csharp
public class CharLiteral : Parser<char>
{
private SearchValues<char> c;
public string Value { get; private set; }
public CharLiteral(char c)
{
this.c = SearchValues.Create(new char[] { c });
Value = c.ToString();
}
public CharLiteral(string c)
{
this.c = SearchValues.Create(c);
Value = c;
}
public override bool Parse(CharParseContext context, ref ParseResult<char> result)
{
context.EnterParser(this);
var cursor = context.Cursor;
if (!cursor.Eof && c.Contains(cursor.Current))
{
var c = cursor.Current;
var start = cursor.Offset;
cursor.Advance();
result.Set(start, cursor.Offset, c);
context.ExitParser(this);
return true;
}
context.ExitParser(this);
return false;
}
}
好了,借用经典话语 这是功,这是防,这是boss 去吧
按照 [rfc9535](https://www.rfc-editor.org/rfc/rfc9535.html) 标准, 最终 json path 解析器大致如下:
csharp
public class JsonPathParser
{
//public static Parser<char> B = Char(new char[]
//{ (char)0x20, // Space
// (char)0x09, //Horizontal tab
// (char)0x0A, // Line feed or New line
// (char)0x0D // Carriage return
//});
public static readonly Parser<char> RootIdentifier = Char('$').Name(nameof(RootIdentifier));
public static readonly Parser<int> Int = Int().Name(nameof(Int));
public static readonly Parser<char> DoubleQuoted = Char('"').Name(nameof(DoubleQuoted));
public static readonly Parser<char> SingleQuoted = Char('\'').Name(nameof(SingleQuoted));
public static readonly Parser<IStatement> WildcardSelector = Char('*').Then<IStatement>(static x => new WildcardSelectorStatment()).Name(nameof(WildcardSelector));
public static readonly Parser<IStatement> IndexSelector = Int.Then<IStatement>(static x => new IndexSelectorStatment() { Index = x }).Name(nameof(IndexSelector));
public static readonly Parser<TextSpan> StringLiteral = Between(DoubleQuoted, ZeroOrOne(Any("\"", mustHasEnd: true, escape: '\\')), DoubleQuoted).Or(Between(SingleQuoted, ZeroOrOne(Any("'", mustHasEnd: true, escape: '\\')), SingleQuoted)).Name(nameof(StringLiteral));
public static readonly Parser<IStatement> NameSelector = StringLiteral.Then<IStatement>(static x => new Member() { Name = x.Span.ToString() }).Name(nameof(NameSelector));
public static readonly Parser<int> Start = Int;
public static readonly Parser<int> End = Int;
public static readonly Parser<int> Step = Int;
public static readonly Parser<Nothing> S = IgnoreChar(new char[]
{ (char)0x20, // Space
(char)0x09, //Horizontal tab
(char)0x0A, // Line feed or New line
(char)0x0D // Carriage return
}).Name(nameof(S));
public static readonly Parser<char> CurrentNodeIdentifier = Char('@').Name(nameof(CurrentNodeIdentifier));
public static readonly Parser<char> LogicalNotOp = Char('!').Name(nameof(LogicalNotOp));
public static readonly Parser<string> ComparisonOp = Text("==").Or(Text("!=")).Or(Text("<=")).Or(Text(">=")).Or(Text("<")).Or(Text(">")).Name(nameof(ComparisonOp));
public static readonly Parser<IStatement> Num = Decimal(NumberOptions.Float).Then<IStatement>(static x => new NumberValue(x)).Name(nameof(Num));
public static readonly Parser<IStatement> True = Text("true").Then<IStatement>(static x => BoolValue.True).Name(nameof(True));
public static readonly Parser<IStatement> False = Text("false").Then<IStatement>(static x => BoolValue.False).Name(nameof(False));
public static readonly Parser<IStatement> Null = Text("null").Then<IStatement>(static x => NullValue.Value).Name(nameof(Null));
private const string name = "[]().,\" '\r\n@$!=<\\?&|*:";
//public static Parser<char> LCALPHA = Char('a', 'z');
//public static Parser<char> DIGIT = Char('0', '9');
//public static Parser<char> ALPHA = Char((char)0x41, (char)0x5A).Or(Char((char)0x61, (char)0x7A));
public static readonly Parser<IStatement> MemberNameShorthand = AnyExclude(name).Then<IStatement>(static x => new Member { Name = x.Span.ToString() }).Name(nameof(MemberNameShorthand));
//public static Parser<char> NameFirst = ALPHA.Or(Char('_')).Or(Char((char)0x80, (char)0xD7FF)).Or(Char((char)0xE000, (char)0xFFFF));
//public static Parser<char> NameChar = NameFirst.Or(DIGIT);
//public static Parser<char> FunctionNameFirst = LCALPHA;
//public static Parser<char> FunctionNameChar = FunctionNameFirst.Or(Char('_')).Or(DIGIT);
//public static Parser<string> FunctionName = FunctionNameFirst.And(ZeroOrMany(FunctionNameChar)).Then<string>(static x => throw new NotImplementedException());
public static readonly Parser<string> FunctionName = AnyExclude(name).Then<string>(static x => x.Span.ToString()).Name(nameof(FunctionName));
public static readonly Parser<IStatement> SliceSelector = Optional<int?>(Start.And(S).Then<int?>(static x => x.Item1), null).And(Char(':')).And(S).And(Optional<int?>(End.And(S).Then<int?>(static x => x.Item1), null)).And(Optional<int?>(Char(':').And(Optional<int?>(S.And(Step).Then<int?>(static x => x.Item2), null)).Then<int?>(static x => x.Item2))).Then<IStatement>(static x => new SliceStatement() { Start = x.Item1, End = x.Item4, Step = x.Item5 })
.Name(nameof(SliceSelector));
public static readonly Deferred<IStatement> LogicalExpr = Deferred<IStatement>(nameof(LogicalExpr));
public static readonly Parser<IStatement> FilterSelector = Char('?').And(S).And(LogicalExpr).Then<IStatement>(static x => new FilterSelectorStatement()
{
Statement = x.Item3
}).Name(nameof(FilterSelector));
public static readonly Parser<IStatement> Selector = NameSelector.Or(WildcardSelector).Or(SliceSelector).Or(IndexSelector).Or(FilterSelector).Name(nameof(Selector));
public static readonly Parser<IStatement> ParenExpr = Optional(LogicalNotOp.And(S)).And(Char('(')).And(S).And(LogicalExpr).And(S).And(Char(')'))
.Then<IStatement>(static x => new UnaryOperaterStatement()
{
Operator = x.Item1.Item1 == '!' ? "!" : "(",
Statement = x.Item4
}).Name(nameof(ParenExpr));
public static readonly Deferred<IReadOnlyList<(Nothing, IStatement)>> Segments = Deferred<IReadOnlyList<(Nothing, IStatement)>>(nameof(Segments));
public static readonly Deferred<IStatement> FunctionExpr = Deferred<IStatement>(nameof(FunctionExpr));
public static readonly Deferred<IStatement> JsonPathQuery = Deferred<IStatement>(nameof(JsonPathQuery));
public static readonly Parser<IStatement> RelQuery = CurrentNodeIdentifier.And(Segments).Then<IStatement>(static x => new CurrentNode() { Child = ConvertSegments(x.Item2) }).Name(nameof(RelQuery));
public static readonly Parser<IStatement> Literal = Num.Or(StringLiteral.Then<IStatement>(static x => new StringValue(x.Span.ToString()))).Or(True).Or(False).Or(Null).Name(nameof(Literal));
public static readonly Parser<IStatement> NameSegment = Char('[').And(NameSelector).And(Char(']')).Then<IStatement>(static x => x.Item2).Or(Char('.').And(MemberNameShorthand).Then<IStatement>(static x => x.Item2)).Name(nameof(NameSegment));
public static readonly Parser<IStatement> IndexSegment = Char('[').And(IndexSelector).And(Char(']')).Then<IStatement>(static x => x.Item2).Name(nameof(IndexSegment));
public static readonly Parser<IStatement> SingularQuerySegments = ZeroOrMany(S.And(NameSegment.Or(IndexSegment))).Then<IStatement>(ConvertSegments).Name(nameof(SingularQuerySegments));
public static readonly Parser<IStatement> RelSingularQuery = CurrentNodeIdentifier.And(SingularQuerySegments).Then<IStatement>(static x => new CurrentNode() { Child = x.Item2 }).Name(nameof(RelSingularQuery));
public static readonly Parser<IStatement> AbsSingularQuery = RootIdentifier.And(SingularQuerySegments).Then<IStatement>(static x => new RootNode() { Child = x.Item2 }).Name(nameof(AbsSingularQuery));
public static readonly Parser<IStatement> SingularQuery = RelSingularQuery.Or(AbsSingularQuery).Name(nameof(SingularQuery));
public static readonly Parser<IStatement> Comparable = Literal.Or(SingularQuery).Or(FunctionExpr).Name(nameof(Comparable));
public static readonly Parser<IStatement> ComparisonExpr = Comparable.And(S).And(ComparisonOp).And(S).And(Comparable).Then<IStatement>(static x => new OperatorStatement() { Left = x.Item1, Operator = x.Item3, Right = x.Item5 }).Name(nameof(ComparisonExpr));
public static readonly Parser<IStatement> FilterQuery = RelQuery.Or(JsonPathQuery).Name(nameof(FilterQuery));
public static readonly Parser<IStatement> FunctionArgument = FilterQuery.Or(LogicalExpr).Or(FunctionExpr).Or(Literal).Name(nameof(FunctionArgument));
public static readonly Parser<IStatement> TestExpr = Optional(LogicalNotOp.And(S)).And(FilterQuery.Or(FunctionExpr)).Then<IStatement>(static x => x.Item1.Item1 == '!' ? new UnaryOperaterStatement() { Operator = "!", Statement = x.Item2 } : x.Item2).Name(nameof(TestExpr));
public static readonly Parser<IStatement> BasicExpr = ParenExpr.Or(ComparisonExpr).Or(TestExpr).Name(nameof(BasicExpr));
public static readonly Parser<IStatement> LogicalAndExpr = BasicExpr.And(ZeroOrMany(S.And(Text("&&")).And(S).And(BasicExpr))).Then<IStatement>(static x =>
{
IStatement current = x.Item1;
if (x.Item2 != null && x.Item2.Count > 0)
{
foreach (var item in x.Item2)
{
current = new AndStatement() { Left = current, Right = item.Item4 };
}
}
return current;
}).Name(nameof(LogicalAndExpr));
public static readonly Parser<IStatement> LogicalOrExpr = LogicalAndExpr.And(ZeroOrMany(S.And(Text("||")).And(S).And(LogicalAndExpr))).Then<IStatement>(static x =>
{
IStatement current = x.Item1;
if (x.Item2 != null && x.Item2.Count > 0)
{
foreach (var item in x.Item2)
{
current = new OrStatement() { Left = current, Right = item.Item4 };
}
}
return current;
}).Name(nameof(LogicalOrExpr));
public static readonly Parser<IStatement> BracketedSelection = Char('[').And(S).And(Selector).And(ZeroOrMany(S.And(Char(',')).And(S).And(Selector))).And(S).And(Char(']'))
.Then<IStatement>(static x =>
{
var list = new List<IStatement> { x.Item3 };
if (x.Item4 != null)
list.AddRange(x.Item4.Select(y => y.Item4));
if (list.Count == 0)
return null;
return list.Count == 1 ? list[0] : new UnionSelectionStatement(list);
}).Name(nameof(BracketedSelection));
public static readonly Parser<IStatement> ChildSegment = BracketedSelection.Or(Char('.').And(WildcardSelector.Or(MemberNameShorthand)).Then<IStatement>(static x => x.Item2)).Name(nameof(ChildSegment));
public static readonly Parser<IStatement> DescendantSegment = Char('.').And(Char('.')).And(BracketedSelection.Or(WildcardSelector).Or(MemberNameShorthand)).Then<IStatement>(static x => new WildcardSelectorStatment() { Child = x.Item3 }).Name(nameof(DescendantSegment));
public static readonly Parser<IStatement> Segment = ChildSegment.Or(DescendantSegment).Name(nameof(Segment));
public static readonly Parser<IStatement> Parser;
static JsonPathParser()
{
LogicalExpr.Parser = LogicalOrExpr;
Segments.Parser = ZeroOrMany(S.And(Segment));
//MemberNameShorthand.Parser = NameFirst.And(ZeroOrMany(NameChar)).Then<IStatement>(static x => new Member { Name = x.Item1 + new string(x.Item2.ToArray()) });
FunctionExpr.Parser = FunctionName.And(Char('(')).And(S).And(Optional(FunctionArgument.And(ZeroOrMany(S.And(Char(',')).And(S).And(FunctionArgument))))).And(S).And(Char(')')).Then<IStatement>(static x =>
{
var args = new List<IStatement>();
if (x.Item4.Item1 != null)
{
args.Add(x.Item4.Item1);
}
if (x.Item4.Item2 != null)
{
args.AddRange(x.Item4.Item2.Select(y => y.Item4));
}
var func = new FunctionStatement()
{
Name = x.Item1,
Arguments = args.Count == 0 ? Array.Empty<IStatement>() : args.ToArray()
};
return func;
});
JsonPathQuery.Parser = RootIdentifier.And(Segments).Then<IStatement>(static x => new RootNode() { Child = ConvertSegments(x.Item2) });
Parser = JsonPathQuery.Eof().Name(nameof(Parser));
}
private static IStatement ConvertSegments(IReadOnlyList<(Nothing, IStatement)> x)
{
if (x == null || x.Count == 0)
{
return null;
}
else if (x.Count == 1)
return x[0].Item2;
else
{
var current = x.Last().Item2;
for (int i = x.Count - 2; i >= 0; i--)
{
if (x[i].Item2 is IParentStatement p)
{
var pp = p;
while (pp.Child != null)
{
var pc = p.Child as IParentStatement;
if (pc is null)
throw new NotSupportedException($"Cannot set child for statement of type {p.GetType().FullName}");
pp = pc;
}
pp.Child = current;
current = p;
}
else
{
throw new NotSupportedException($"Cannot set child for statement of type {x[i].Item2.GetType().FullName}");
}
}
return current;
}
}
}
性能测试
测试代码
csharp
[MemoryDiagnoser, GroupBenchmarksBy(BenchmarkLogicalGroupRule.ByCategory)]
public class JsonPathBenchmarks
{
private object data = new
{
Num = -3.4,
Nu = null as string,
Array = new object[]
{
new { Name = "Alice", Age = 30 },
new { Name = "Bob", Age = 25 },
new { Name = "Charlie", Age = 35 }
},
};
private string path = "$.Array[1]['Name','Age']";
private string json;
private IStatement cache;
private readonly JsonPath pc;
public JsonPathBenchmarks()
{
json = JsonSerializer.Serialize(data);
JsonPathParser.Parser.TryParseResult(path, out var result, out var error);
cache = result.Value;
pc = JsonPath.Parse(path);
}
[Benchmark]
public object CacheTest()
{
return cache.EvaluateJson(json);
}
[Benchmark]
public object NoCacheTest()
{
JsonPathParser.Parser.TryParseResult(path, out var result, out var error);
return result.Value.EvaluateJson(json);
}
private Newtonsoft.Json.Linq.JToken testTo = Newtonsoft.Json.Linq.JToken.Parse("null");
[Benchmark]
public object NewtonsoftOnlyParseTest()
{
return testTo.SelectTokens(path);
}
[Benchmark]
public object NewtonsoftTest()
{
Newtonsoft.Json.Linq.JToken token = Newtonsoft.Json.Linq.JToken.Parse(json);
return token.SelectTokens(path);
}
[Benchmark]
public object JsonPathNetTest()
{
var p = JsonPath.Parse(path);
var instance = JsonNode.Parse(json);
return p.Evaluate(instance);
}
[Benchmark]
public object JsonPathNetCacheTest()
{
var instance = JsonNode.Parse(json);
return pc.Evaluate(instance);
}
[Benchmark]
public object JsonPathNetOnlyParseTest()
{
return JsonPath.Parse(path);
}
[Benchmark]
public object OnlyParseTest()
{
JsonPathParser.Parser.TryParseResult(path, out var result, out var error);
return result.Value;
}
}
效果
BenchmarkDotNet v0.15.4, Windows 11 (10.0.26100.6584/24H2/2024Update/HudsonValley)
Intel Core i7-10700 CPU 2.90GHz, 1 CPU, 16 logical and 8 physical cores
.NET SDK 9.0.304
[Host] : .NET 9.0.8 (9.0.8, 9.0.825.36511), X64 RyuJIT x86-64-v3
DefaultJob : .NET 9.0.8 (9.0.8, 9.0.825.36511), X64 RyuJIT x86-64-v3
| Method | Mean | Error | StdDev | Gen0 | Gen1 | Allocated |
|---|---|---|---|---|---|---|
| CacheTest | 1,488.1 ns | 15.42 ns | 14.42 ns | 0.2537 | - | 2136 B |
| NoCacheTest | 2,650.5 ns | 11.44 ns | 10.14 ns | 0.4196 | - | 3528 B |
| NewtonsoftOnlyParseTest | 204.0 ns | 2.65 ns | 2.35 ns | 0.1128 | - | 944 B |
| NewtonsoftTest | 3,039.1 ns | 33.40 ns | 26.07 ns | 0.9651 | 0.0191 | 8088 B |
| JsonPathNetTest | 1,266.9 ns | 25.03 ns | 35.90 ns | 0.2728 | - | 2288 B |
| JsonPathNetCacheTest | 855.0 ns | 9.16 ns | 7.65 ns | 0.1469 | - | 1232 B |
| JsonPathNetOnlyParseTest | 346.3 ns | 3.19 ns | 2.66 ns | 0.1259 | - | 1056 B |
| OnlyParseTest | 1,057.0 ns | 5.24 ns | 4.90 ns | 0.1659 | - | 1392 B |
嗯,在 system.text.json 上解析 比 Newtonsoft.json 还是快了点,具体实现都是最基本的,没有像 jsonpath 那些做优化设计,性能感觉还是可以的
所有 实现放在 https://github.com/fs7744/Lmzzz
后续看什么时候有空搞搞 stream ,能行以后就这样偷懒搞吧