Repository URL to install this package:
|
Version:
2023.12.1 ▾
|
/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
using System;
using System.Collections.Generic;
using System.Text;
using Antlr4.Runtime;
using Antlr4.Runtime.Misc;
using Antlr4.Runtime.Sharpen;
using Antlr4.Runtime.Tree;
using Antlr4.Runtime.Tree.Pattern;
namespace Antlr4.Runtime.Tree.Pattern
{
/// <summary>
/// A tree pattern matching mechanism for ANTLR
/// <see cref="Antlr4.Runtime.Tree.IParseTree"/>
/// s.
/// <p>Patterns are strings of source input text with special tags representing
/// token or rule references such as:</p>
/// <p>
/// <c><ID> = <expr>;</c>
/// </p>
/// <p>Given a pattern start rule such as
/// <c>statement</c>
/// , this object constructs
/// a
/// <see cref="Antlr4.Runtime.Tree.IParseTree"/>
/// with placeholders for the
/// <c>ID</c>
/// and
/// <c>expr</c>
/// subtree. Then the
/// <see cref="Match(Antlr4.Runtime.Tree.IParseTree, ParseTreePattern)"/>
/// routines can compare an actual
/// <see cref="Antlr4.Runtime.Tree.IParseTree"/>
/// from a parse with this pattern. Tag
/// <c><ID></c>
/// matches
/// any
/// <c>ID</c>
/// token and tag
/// <c><expr></c>
/// references the result of the
/// <c>expr</c>
/// rule (generally an instance of
/// <c>ExprContext</c>
/// .</p>
/// <p>Pattern
/// <c>x = 0;</c>
/// is a similar pattern that matches the same pattern
/// except that it requires the identifier to be
/// <c>x</c>
/// and the expression to
/// be
/// <c>0</c>
/// .</p>
/// <p>The
/// <see cref="Matches(Antlr4.Runtime.Tree.IParseTree, ParseTreePattern)"/>
/// routines return
/// <see langword="true"/>
/// or
/// <see langword="false"/>
/// based
/// upon a match for the tree rooted at the parameter sent in. The
/// <see cref="Match(Antlr4.Runtime.Tree.IParseTree, ParseTreePattern)"/>
/// routines return a
/// <see cref="ParseTreeMatch"/>
/// object that
/// contains the parse tree, the parse tree pattern, and a map from tag name to
/// matched nodes (more below). A subtree that fails to match, returns with
/// <see cref="ParseTreeMatch.MismatchedNode"/>
/// set to the first tree node that did not
/// match.</p>
/// <p>For efficiency, you can compile a tree pattern in string form to a
/// <see cref="ParseTreePattern"/>
/// object.</p>
/// <p>See
/// <c>TestParseTreeMatcher</c>
/// for lots of examples.
/// <see cref="ParseTreePattern"/>
/// has two static helper methods:
/// <see cref="ParseTreePattern.FindAll(Antlr4.Runtime.Tree.IParseTree, string)"/>
/// and
/// <see cref="ParseTreePattern.Match(Antlr4.Runtime.Tree.IParseTree)"/>
/// that
/// are easy to use but not super efficient because they create new
/// <see cref="ParseTreePatternMatcher"/>
/// objects each time and have to compile the
/// pattern in string form before using it.</p>
/// <p>The lexer and parser that you pass into the
/// <see cref="ParseTreePatternMatcher"/>
/// constructor are used to parse the pattern in string form. The lexer converts
/// the
/// <c><ID> = <expr>;</c>
/// into a sequence of four tokens (assuming lexer
/// throws out whitespace or puts it on a hidden channel). Be aware that the
/// input stream is reset for the lexer (but not the parser; a
/// <see cref="Antlr4.Runtime.ParserInterpreter"/>
/// is created to parse the input.). Any user-defined
/// fields you have put into the lexer might get changed when this mechanism asks
/// it to scan the pattern string.</p>
/// <p>Normally a parser does not accept token
/// <c><expr></c>
/// as a valid
/// <c>expr</c>
/// but, from the parser passed in, we create a special version of
/// the underlying grammar representation (an
/// <see cref="Antlr4.Runtime.Atn.ATN"/>
/// ) that allows imaginary
/// tokens representing rules (
/// <c><expr></c>
/// ) to match entire rules. We call
/// these <em>bypass alternatives</em>.</p>
/// <p>Delimiters are
/// <c><</c>
/// and
/// <c>></c>
/// , with
/// <c>\</c>
/// as the escape string
/// by default, but you can set them to whatever you want using
/// <see cref="SetDelimiters(string, string, string)"/>
/// . You must escape both start and stop strings
/// <c>\<</c>
/// and
/// <c>\></c>
/// .</p>
/// </summary>
public class ParseTreePatternMatcher
{
[System.Serializable]
public class CannotInvokeStartRule : Exception
{
public CannotInvokeStartRule(Exception e)
: base(e.Message, e)
{
}
}
[System.Serializable]
public class StartRuleDoesNotConsumeFullPattern : Exception
{
// Fixes https://github.com/antlr/antlr4/issues/413
// "Tree pattern compilation doesn't check for a complete parse"
}
/// <summary>
/// This is the backing field for
/// <see cref="Lexer()"/>
/// .
/// </summary>
private readonly Lexer lexer;
/// <summary>
/// This is the backing field for
/// <see cref="Parser()"/>
/// .
/// </summary>
private readonly Parser parser;
protected internal string start = "<";
protected internal string stop = ">";
protected internal string escape = "\\";
/// <summary>
/// Constructs a
/// <see cref="ParseTreePatternMatcher"/>
/// or from a
/// <see cref="Antlr4.Runtime.Lexer"/>
/// and
/// <see cref="Antlr4.Runtime.Parser"/>
/// object. The lexer input stream is altered for tokenizing
/// the tree patterns. The parser is used as a convenient mechanism to get
/// the grammar name, plus token, rule names.
/// </summary>
public ParseTreePatternMatcher(Lexer lexer, Parser parser)
{
// e.g., \< and \> must escape BOTH!
this.lexer = lexer;
this.parser = parser;
}
/// <summary>
/// Set the delimiters used for marking rule and token tags within concrete
/// syntax used by the tree pattern parser.
/// </summary>
/// <remarks>
/// Set the delimiters used for marking rule and token tags within concrete
/// syntax used by the tree pattern parser.
/// </remarks>
/// <param name="start">The start delimiter.</param>
/// <param name="stop">The stop delimiter.</param>
/// <param name="escapeLeft">The escape sequence to use for escaping a start or stop delimiter.</param>
/// <exception>
/// IllegalArgumentException
/// if
/// <paramref name="start"/>
/// is
/// <see langword="null"/>
/// or empty.
/// </exception>
/// <exception>
/// IllegalArgumentException
/// if
/// <paramref name="stop"/>
/// is
/// <see langword="null"/>
/// or empty.
/// </exception>
public virtual void SetDelimiters(string start, string stop, string escapeLeft)
{
if (string.IsNullOrEmpty(start))
{
throw new ArgumentException("start cannot be null or empty");
}
if (string.IsNullOrEmpty(stop))
{
throw new ArgumentException("stop cannot be null or empty");
}
this.start = start;
this.stop = stop;
this.escape = escapeLeft;
}
/// <summary>
/// Does
/// <paramref name="pattern"/>
/// matched as rule
/// <paramref name="patternRuleIndex"/>
/// match
/// <paramref name="tree"/>
/// ?
/// </summary>
public virtual bool Matches(IParseTree tree, string pattern, int patternRuleIndex)
{
ParseTreePattern p = Compile(pattern, patternRuleIndex);
return Matches(tree, p);
}
/// <summary>
/// Does
/// <paramref name="pattern"/>
/// matched as rule patternRuleIndex match tree? Pass in a
/// compiled pattern instead of a string representation of a tree pattern.
/// </summary>
public virtual bool Matches(IParseTree tree, ParseTreePattern pattern)
{
MultiMap<string, IParseTree> labels = new MultiMap<string, IParseTree>();
IParseTree mismatchedNode = MatchImpl(tree, pattern.PatternTree, labels);
return mismatchedNode == null;
}
/// <summary>
/// Compare
/// <paramref name="pattern"/>
/// matched as rule
/// <paramref name="patternRuleIndex"/>
/// against
/// <paramref name="tree"/>
/// and return a
/// <see cref="ParseTreeMatch"/>
/// object that contains the
/// matched elements, or the node at which the match failed.
/// </summary>
public virtual ParseTreeMatch Match(IParseTree tree, string pattern, int patternRuleIndex)
{
ParseTreePattern p = Compile(pattern, patternRuleIndex);
return Match(tree, p);
}
/// <summary>
/// Compare
/// <paramref name="pattern"/>
/// matched against
/// <paramref name="tree"/>
/// and return a
/// <see cref="ParseTreeMatch"/>
/// object that contains the matched elements, or the
/// node at which the match failed. Pass in a compiled pattern instead of a
/// string representation of a tree pattern.
/// </summary>
[return: NotNull]
public virtual ParseTreeMatch Match(IParseTree tree, ParseTreePattern pattern)
{
MultiMap<string, IParseTree> labels = new MultiMap<string, IParseTree>();
IParseTree mismatchedNode = MatchImpl(tree, pattern.PatternTree, labels);
return new ParseTreeMatch(tree, pattern, labels, mismatchedNode);
}
/// <summary>
/// For repeated use of a tree pattern, compile it to a
/// <see cref="ParseTreePattern"/>
/// using this method.
/// </summary>
public virtual ParseTreePattern Compile(string pattern, int patternRuleIndex)
{
IList<IToken> tokenList = Tokenize(pattern);
ListTokenSource tokenSrc = new ListTokenSource(tokenList);
CommonTokenStream tokens = new CommonTokenStream(tokenSrc);
ParserInterpreter parserInterp = new ParserInterpreter(parser.GrammarFileName,
parser.Vocabulary,
Arrays.AsList(parser.RuleNames),
parser.GetATNWithBypassAlts(),
tokens);
IParseTree tree = null;
try
{
parserInterp.ErrorHandler = new BailErrorStrategy();
tree = parserInterp.Parse(patternRuleIndex);
}
catch (ParseCanceledException e)
{
// System.out.println("pattern tree = "+tree.toStringTree(parserInterp));
throw (RecognitionException)e.InnerException;
}
catch (RecognitionException)
{
throw;
}
catch (Exception e)
{
throw new ParseTreePatternMatcher.CannotInvokeStartRule(e);
}
// Make sure tree pattern compilation checks for a complete parse
if (tokens.LA(1) != TokenConstants.EOF)
{
throw new ParseTreePatternMatcher.StartRuleDoesNotConsumeFullPattern();
}
return new ParseTreePattern(this, pattern, patternRuleIndex, tree);
}
/// <summary>Used to convert the tree pattern string into a series of tokens.</summary>
/// <remarks>
/// Used to convert the tree pattern string into a series of tokens. The
/// input stream is reset.
/// </remarks>
[NotNull]
public virtual Lexer Lexer
{
get
{
return lexer;
}
}
/// <summary>
/// Used to collect to the grammar file name, token names, rule names for
/// used to parse the pattern into a parse tree.
/// </summary>
/// <remarks>
/// Used to collect to the grammar file name, token names, rule names for
/// used to parse the pattern into a parse tree.
/// </remarks>
[NotNull]
public virtual Parser Parser
{
get
{
return parser;
}
}
// ---- SUPPORT CODE ----
/// <summary>
/// Recursively walk
/// <paramref name="tree"/>
/// against
/// <paramref name="patternTree"/>
/// , filling
/// <c>match.</c>
/// <see cref="ParseTreeMatch.Labels"/>
/// .
/// </summary>
/// <returns>
/// the first node encountered in
/// <paramref name="tree"/>
/// which does not match
/// a corresponding node in
/// <paramref name="patternTree"/>
/// , or
/// <see langword="null"/>
/// if the match
/// was successful. The specific node returned depends on the matching
/// algorithm used by the implementation, and may be overridden.
/// </returns>
[return: Nullable]
protected internal virtual IParseTree MatchImpl(IParseTree tree, IParseTree patternTree, MultiMap<string, IParseTree> labels)
{
if (tree == null)
{
throw new ArgumentException("tree cannot be null");
}
if (patternTree == null)
{
throw new ArgumentException("patternTree cannot be null");
}
// x and <ID>, x and y, or x and x; or could be mismatched types
if (tree is ITerminalNode && patternTree is ITerminalNode)
{
ITerminalNode t1 = (ITerminalNode)tree;
ITerminalNode t2 = (ITerminalNode)patternTree;
IParseTree mismatchedNode = null;
// both are tokens and they have same type
if (t1.Symbol.Type == t2.Symbol.Type)
{
if (t2.Symbol is TokenTagToken)
{
// x and <ID>
TokenTagToken tokenTagToken = (TokenTagToken)t2.Symbol;
// track label->list-of-nodes for both token name and label (if any)
labels.Map(tokenTagToken.TokenName, tree);
if (tokenTagToken.Label != null)
{
labels.Map(tokenTagToken.Label, tree);
}
}
else
{
if (t1.GetText().Equals(t2.GetText(), StringComparison.Ordinal))
{
}
else
{
// x and x
// x and y
if (mismatchedNode == null)
{
mismatchedNode = t1;
}
}
}
}
else
{
if (mismatchedNode == null)
{
mismatchedNode = t1;
}
}
return mismatchedNode;
}
if (tree is ParserRuleContext && patternTree is ParserRuleContext)
{
ParserRuleContext r1 = (ParserRuleContext)tree;
ParserRuleContext r2 = (ParserRuleContext)patternTree;
IParseTree mismatchedNode = null;
// (expr ...) and <expr>
RuleTagToken ruleTagToken = GetRuleTagToken(r2);
if (ruleTagToken != null)
{
if (r1.RuleIndex == r2.RuleIndex)
{
// track label->list-of-nodes for both rule name and label (if any)
labels.Map(ruleTagToken.RuleName, tree);
if (ruleTagToken.Label != null)
{
labels.Map(ruleTagToken.Label, tree);
}
}
else
{
if (mismatchedNode == null)
{
mismatchedNode = r1;
}
}
return mismatchedNode;
}
// (expr ...) and (expr ...)
if (r1.ChildCount != r2.ChildCount)
{
if (mismatchedNode == null)
{
mismatchedNode = r1;
}
return mismatchedNode;
}
int n = r1.ChildCount;
for (int i = 0; i < n; i++)
{
IParseTree childMatch = MatchImpl(r1.GetChild(i), patternTree.GetChild(i), labels);
if (childMatch != null)
{
return childMatch;
}
}
return mismatchedNode;
}
// if nodes aren't both tokens or both rule nodes, can't match
return tree;
}
/// <summary>
/// Is
/// <paramref name="t"/>
///
/// <c>(expr <expr>)</c>
/// subtree?
/// </summary>
protected internal virtual RuleTagToken GetRuleTagToken(IParseTree t)
{
if (t is IRuleNode)
{
IRuleNode r = (IRuleNode)t;
if (r.ChildCount == 1 && r.GetChild(0) is ITerminalNode)
{
ITerminalNode c = (ITerminalNode)r.GetChild(0);
if (c.Symbol is RuleTagToken)
{
// System.out.println("rule tag subtree "+t.toStringTree(parser));
return (RuleTagToken)c.Symbol;
}
}
}
return null;
}
public virtual IList<IToken> Tokenize(string pattern)
{
// split pattern into chunks: sea (raw input) and islands (<ID>, <expr>)
IList<Chunk> chunks = Split(pattern);
// create token stream from text and tags
IList<IToken> tokens = new List<IToken>();
foreach (Chunk chunk in chunks)
{
if (chunk is TagChunk)
{
TagChunk tagChunk = (TagChunk)chunk;
// add special rule token or conjure up new token from name
if (System.Char.IsUpper(tagChunk.Tag[0]))
{
int ttype = parser.GetTokenType(tagChunk.Tag);
if (ttype == TokenConstants.InvalidType)
{
throw new ArgumentException("Unknown token " + tagChunk.Tag + " in pattern: " + pattern);
}
TokenTagToken t = new TokenTagToken(tagChunk.Tag, ttype, tagChunk.Label);
tokens.Add(t);
}
else
{
if (System.Char.IsLower(tagChunk.Tag[0]))
{
int ruleIndex = parser.GetRuleIndex(tagChunk.Tag);
if (ruleIndex == -1)
{
throw new ArgumentException("Unknown rule " + tagChunk.Tag + " in pattern: " + pattern);
}
int ruleImaginaryTokenType = parser.GetATNWithBypassAlts().ruleToTokenType[ruleIndex];
tokens.Add(new RuleTagToken(tagChunk.Tag, ruleImaginaryTokenType, tagChunk.Label));
}
else
{
throw new ArgumentException("invalid tag: " + tagChunk.Tag + " in pattern: " + pattern);
}
}
}
else
{
TextChunk textChunk = (TextChunk)chunk;
AntlrInputStream @in = new AntlrInputStream(textChunk.Text);
lexer.SetInputStream(@in);
IToken t = lexer.NextToken();
while (t.Type != TokenConstants.EOF)
{
tokens.Add(t);
t = lexer.NextToken();
}
}
}
// System.out.println("tokens="+tokens);
return tokens;
}
/// <summary>
/// Split
/// <c><ID> = <e:expr> ;</c>
/// into 4 chunks for tokenizing by
/// <see cref="Tokenize(string)"/>
/// .
/// </summary>
internal virtual IList<Chunk> Split(string pattern)
{
int p = 0;
int n = pattern.Length;
IList<Chunk> chunks = new List<Chunk>();
// find all start and stop indexes first, then collect
IList<int> starts = new List<int>();
IList<int> stops = new List<int>();
while (p < n)
{
if (p == pattern.IndexOf(escape + start, p))
{
p += escape.Length + start.Length;
}
else
{
if (p == pattern.IndexOf(escape + stop, p))
{
p += escape.Length + stop.Length;
}
else
{
if (p == pattern.IndexOf(start, p))
{
starts.Add(p);
p += start.Length;
}
else
{
if (p == pattern.IndexOf(stop, p))
{
stops.Add(p);
p += stop.Length;
}
else
{
p++;
}
}
}
}
}
// System.out.println("");
// System.out.println(starts);
// System.out.println(stops);
if (starts.Count > stops.Count)
{
throw new ArgumentException("unterminated tag in pattern: " + pattern);
}
if (starts.Count < stops.Count)
{
throw new ArgumentException("missing start tag in pattern: " + pattern);
}
int ntags = starts.Count;
for (int i = 0; i < ntags; i++)
{
if (starts[i] >= stops[i])
{
throw new ArgumentException("tag delimiters out of order in pattern: " + pattern);
}
}
// collect into chunks now
if (ntags == 0)
{
string text = Sharpen.Runtime.Substring(pattern, 0, n);
chunks.Add(new TextChunk(text));
}
if (ntags > 0 && starts[0] > 0)
{
// copy text up to first tag into chunks
string text = Sharpen.Runtime.Substring(pattern, 0, starts[0]);
chunks.Add(new TextChunk(text));
}
for (int i_1 = 0; i_1 < ntags; i_1++)
{
// copy inside of <tag>
string tag = Sharpen.Runtime.Substring(pattern, starts[i_1] + start.Length, stops[i_1]);
string ruleOrToken = tag;
string label = null;
int colon = tag.IndexOf(':');
if (colon >= 0)
{
label = Sharpen.Runtime.Substring(tag, 0, colon);
ruleOrToken = Sharpen.Runtime.Substring(tag, colon + 1, tag.Length);
}
chunks.Add(new TagChunk(label, ruleOrToken));
if (i_1 + 1 < ntags)
{
// copy from end of <tag> to start of next
string text = Sharpen.Runtime.Substring(pattern, stops[i_1] + stop.Length, starts[i_1 + 1]);
chunks.Add(new TextChunk(text));
}
}
if (ntags > 0)
{
int afterLastTag = stops[ntags - 1] + stop.Length;
if (afterLastTag < n)
{
// copy text from end of last tag to end
string text = Sharpen.Runtime.Substring(pattern, afterLastTag, n);
chunks.Add(new TextChunk(text));
}
}
// strip out the escape sequences from text chunks but not tags
for (int i_2 = 0; i_2 < chunks.Count; i_2++)
{
Chunk c = chunks[i_2];
if (c is TextChunk)
{
TextChunk tc = (TextChunk)c;
string unescaped = tc.Text.Replace(escape, string.Empty);
if (unescaped.Length < tc.Text.Length)
{
chunks.Set(i_2, new TextChunk(unescaped));
}
}
}
return chunks;
}
}
}