因此,您已经解析了代码并为其构建了一个清晰的AST。 现在该检查用户表达的内容是否完全有意义。 我们应该执行验证,识别语义错误,以与词汇和句法错误(由解析器提供)一起通信。
建立自己的语言的系列
以前的帖子:
代码在GitHub上的标签为07_validation
实施语义检查
在上一篇文章中,我们看到了如何实现函数过程以在AST的所有节点上执行操作。 典型的情况是我们只想在某些节点上执行某些操作。 我们仍然希望使用过程来导航树。 我们可以通过创建一个名为specificProcess的函数来做到这一点。
fun <T: Node> Node.specificProcess(klass: Class<T>, operation: (T) -> Unit) {
process { if (klass.isInstance(it)) { operation(it as T) } }
}
让我们看看如何使用specificProcess来:
- 查找所有VariableDeclarations并检查它们是否未重新声明已经声明的变量
- 找到所有VarReferences并确认它们没有引用尚未声明或在VarReference之后声明的变量
- 执行上VarReferences做也分配相同的检查
data class Error(val message: String, val position: Point)
fun SandyFile.validate() : List<Error> {
val errors = LinkedList<Error>()
// check a variable is not duplicated
val varsByName = HashMap<String, VarDeclaration>()
this.specificProcess(VarDeclaration::class.java) {
if (varsByName.containsKey(it.varName)) {
errors.add(Error("A variable named '${it.varName}' has been already declared at ${varsByName[it.varName]!!.position!!.start}",
it.position!!.start))
} else {
varsByName[it.varName] = it
}
}
// check a variable is not referred before being declared
this.specificProcess(VarReference::class.java) {
if (!varsByName.containsKey(it.varName)) {
errors.add(Error("There is no variable named '${it.varName}'", it.position!!.start))
} else if (it.isBefore(varsByName[it.varName]!!)) {
errors.add(Error("You cannot refer to variable '${it.varName}' before its declaration", it.position!!.start))
}
}
this.specificProcess(Assignment::class.java) {
if (!varsByName.containsKey(it.varName)) {
errors.add(Error("There is no variable named '${it.varName}'", it.position!!.start))
} else if (it.isBefore(varsByName[it.varName]!!)) {
errors.add(Error("You cannot refer to variable '${it.varName}' before its declaration", it.position!!.start))
}
}
return errors
}
好的,因此在AST根目录上调用validate将返回所有可能的语义错误。
获取所有错误:词汇,句法和语义
我们首先需要调用ANTLR解析器并获取:
- 解析树
- 词汇和句法错误列表
data class AntlrParsingResult(val root : SandyFileContext?, val errors: List<Error>) {
fun isCorrect() = errors.isEmpty() && root != null
}
fun String.toStream(charset: Charset = Charsets.UTF_8) = ByteArrayInputStream(toByteArray(charset))
object SandyAntlrParserFacade {
fun parse(code: String) : AntlrParsingResult = parse(code.toStream())
fun parse(file: File) : AntlrParsingResult = parse(FileInputStream(file))
fun parse(inputStream: InputStream) : AntlrParsingResult {
val lexicalAndSyntaticErrors = LinkedList<Error>()
val errorListener = object : ANTLRErrorListener {
override fun reportAmbiguity(p0: Parser?, p1: DFA?, p2: Int, p3: Int, p4: Boolean, p5: BitSet?, p6: ATNConfigSet?) {
// Ignored for now
}
override fun reportAttemptingFullContext(p0: Parser?, p1: DFA?, p2: Int, p3: Int, p4: BitSet?, p5: ATNConfigSet?) {
// Ignored for now
}
override fun syntaxError(recognizer: Recognizer<*, *>?, offendingSymbol: Any?, line: Int, charPositionInline: Int, msg: String, ex: RecognitionException?) {
lexicalAndSyntaticErrors.add(Error(msg, Point(line, charPositionInline)))
}
override fun reportContextSensitivity(p0: Parser?, p1: DFA?, p2: Int, p3: Int, p4: Int, p5: ATNConfigSet?) {
// Ignored for now
}
}
val lexer = SandyLexer(ANTLRInputStream(inputStream))
lexer.removeErrorListeners()
lexer.addErrorListener(errorListener)
val parser = SandyParser(CommonTokenStream(lexer))
parser.removeErrorListeners()
parser.addErrorListener(errorListener)
val antlrRoot = parser.sandyFile()
return AntlrParsingResult(antlrRoot, lexicalAndSyntaticErrors)
}
}
然后,我们将解析树映射到AST并执行语义验证。 最后,我们返回AST和所有错误的组合。
data class ParsingResult(val root : SandyFile?, val errors: List<Error>) {
fun isCorrect() = errors.isEmpty() && root != null
}
object SandyParserFacade {
fun parse(code: String) : ParsingResult = parse(code.toStream())
fun parse(file: File) : ParsingResult = parse(FileInputStream(file))
fun parse(inputStream: InputStream) : ParsingResult {
val antlrParsingResult = SandyAntlrParserFacade.parse(inputStream)
val lexicalAnsSyntaticErrors = antlrParsingResult.errors
val antlrRoot = antlrParsingResult.root
val astRoot = antlrRoot?.toAst(considerPosition = true)
val semanticErrors = astRoot?.validate() ?: emptyList()
return ParsingResult(astRoot, lexicalAnsSyntaticErrors + semanticErrors)
}
}
在系统的其余部分,我们将简单地调用SandyParserFacade,而无需直接调用ANTLR解析器。
测试验证
它会飞吗? 让我们验证一下。
class ValidationTest {
@test fun duplicateVar() {
val errors = SandyParserFacade.parse("""var a = 1
|var a =2""".trimMargin("|")).errors
assertEquals(listOf(Error("A variable named 'a' has been already declared at Line 1, Column 0", Point(2,0))), errors)
}
@test fun unexistingVarReference() {
val errors = SandyParserFacade.parse("var a = b + 2").errors
assertEquals(listOf(Error("There is no variable named 'b'", Point(1,8))), errors)
}
@test fun varReferenceBeforeDeclaration() {
val errors = SandyParserFacade.parse("""var a = b + 2
|var b = 2""".trimMargin("|")).errors
assertEquals(listOf(Error("You cannot refer to variable 'b' before its declaration", Point(1,8))), errors)
}
@test fun unexistingVarAssignment() {
val errors = SandyParserFacade.parse("a = 3").errors
assertEquals(listOf(Error("There is no variable named 'a'", Point(1,0))), errors)
}
@test fun varAssignmentBeforeDeclaration() {
val errors = SandyParserFacade.parse("""a = 1
|var a =2""".trimMargin("|")).errors
assertEquals(listOf(Error("You cannot refer to variable 'a' before its declaration", Point(1,0))), errors)
}
结论
一切都很好:通过一个简单的调用,我们可以获得所有错误的列表。 对于他们每个人,我们都有一个描述和位置。 这对于我们的编译器已经足够了,但是我们现在需要在编辑器中显示这些错误。 我们将在以后的帖子中这样做。
翻译自: https://www.javacodegeeks.com/2016/09/building-compiler-language-validation.html