设计算法消除左递归
在查找“怎样判断计算器输入合法”问题时,遇到了感兴趣的编译原理的词法分析和语法分析问题。在语法分析部分呢,使用的是LL1方法,因此对于文法需要消除左递归。于是就到了本博客论述的正题:设计算法消除左递归
0.知识回顾
这里直接copy 维基百科 的内容啦!
这里有中文的维基:传送门
直接左递归: A -> Aa | b
消除方法: A -> aA' | bA'
A'-> aA' | ~
间接左递归: A -> Bb | b
B -> Aa | a
消除方法:
先转换为直接左递归,
A -> Aab | ab | b
再消除,
A -> abA' | bA'
A'-> abA' | ~
上面举了一个简单示例,那其他我就不介绍了,hah
1.算法Pseudo
从 i = 1 到 n {
从 j = 1 到 i - 1 {
设Aj -> d1 | d2 | ... | dk
将所有规则 Ai -> Aj y换成
Ai -> d1 y | d2 y | ... | dk y
移除Ai规则中的直接左递归
}
}
2.代码Implementation
废话不多说,直接上代码
Rule.java部分
import java.util.Iterator;
import java.util.TreeSet;
public class Rule {
String left;
TreeSet<String> right = new TreeSet<String>();
Rule() {}
Rule(String left) {
this.left = left;
}
public void add(String s) {
right.add(s);
}
public TreeSet<String> getRight() {
return right;
}
public String getLeft() {
return left;
}
public void print() {
Iterator<String> iter = right.iterator();
System.out.print(left + "::=" + iter.next());
while (iter.hasNext()) {
System.out.print("|" + iter.next());
}
System.out.println();
}
}
LRecursionRemover.java部分
import java.io.File;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Map;
import java.util.Scanner;
import java.util.TreeMap;
import java.util.TreeSet;
public class LRecursionRemover {
String start = "S";
ArrayList<Rule> grammar = new ArrayList<Rule>();
Map<String, Integer> map = new TreeMap<String, Integer>();
// remove indirect recursion
private void removeStepOne() {
for (int i = 0, len = grammar.size(); i < len; i++) {
for (int j = 0; j < i; j++) {
ArrayList<String> generated = new ArrayList<String>();
TreeSet<String> iGenRight = grammar.get(i).getRight();
Iterator<String> iter = iGenRight.iterator();
char temp = grammar.get(j).getLeft().charAt(0);
while (iter.hasNext()) {
String nextRuleRight = iter.next();
if (nextRuleRight.charAt(0) == temp) {
Iterator<String> iterAnother = grammar.get(j).getRight().iterator();
while (iterAnother.hasNext()) {
generated.add(iterAnother.next() + nextRuleRight.substring(1));
}
}
}
for (int iAno = 0, lenAno = grammar.get(i).getRight().size(); iAno < lenAno; iAno++) {
String toBeRemoved = iGenRight.pollFirst();
if (toBeRemoved.charAt(0) != temp) {
grammar.get(i).getRight().add(toBeRemoved);
}
}
// for (int iAno = 0, lenAno = generated.size(); iAno < lenAno; iAno++) {
// grammar.get(i).getRight().add(generated.get(iAno));
// }
for (int iAno = generated.size(); iAno > 0; iAno--) {
grammar.get(i).getRight().add(generated.get(iAno-1));
}
}
}
}
private void removeStepTwo() {
for (int i = 0, len = grammar.size(); i < len; i++) {
char ch = grammar.get(i).getLeft().charAt(0);
TreeSet<String> temp = grammar.get(i).getRight();
Iterator<String> iter = temp.iterator();
boolean continueFlag = true;
while (iter.hasNext()) {
if (iter.next().charAt(0) == ch) {
continueFlag = false;
break;
}
}
if (continueFlag) continue;
String newVN = ch + "'";
grammar.add(new Rule(newVN));
grammar.get(grammar.size()-1).add("~");
map.put(newVN, grammar.size());
TreeSet<String> generated = new TreeSet<String>();
iter = temp.iterator();
while (iter.hasNext()) {
String ss = iter.next();
if (ss.charAt(0) == ch) {
generated.add(ss.substring(1) + newVN);
grammar.get(grammar.size()-1).add(ss.substring(1) + newVN);
} else {
generated.add(ss + newVN);
}
}
temp.clear();
/*while (!temp.isEmpty()) {
String ss = temp.pollFirst();
if (ss.charAt(0) == ch) {
generated.add(ss.substring(1) + newVN);
grammar.get(grammar.size()-1).add(ss.substring(1) + newVN);
} else {
generated.add(ss + newVN);
}
}*/
for (String string : generated) {
temp.add(string);
}
}
}
boolean[] appeared = new boolean[500];
private void dfs(int x) {
if (appeared[x]) return;
appeared[x] = true;
for (String string : grammar.get(x).getRight()) {
for (int i = 0, len = string.length(); i < len; i++) {
if (Character.isUpperCase(string.charAt(i))) {
if (i + 1 < len && string.charAt(i+1) == '\'') {
dfs(map.get(string.substring(i, i+2) ) - 1);
} else {
dfs(map.get(string.substring(i, i+1) ) - 1);
}
}
}
}
}
private void removeStepThree() {
ArrayList<Rule> temp = new ArrayList<Rule>();
int x = map.get(start) - 1;
dfs(x);
for (int i = 0, len = appeared.length; i < len; i++) {
if (appeared[i] == true) {
temp.add(grammar.get(i));
}
}
grammar.clear();
for (Rule rule : temp) {
grammar.add(rule);
}
}
private void parseInputFromFile(String fileName) throws FileNotFoundException {
File file = new File(fileName);
Scanner scanner = new Scanner(file);
String curLine = scanner.next();
String left ="", right = "";
while (scanner.hasNext()) {
curLine = scanner.next();
left = curLine.substring(0, curLine.indexOf(":"));
right = curLine.substring(curLine.indexOf("=") + 1);
if (map.containsKey(left)) {
grammar.get(grammar.size()-1).add(right);
} else {
grammar.add(new Rule(left));
grammar.get(grammar.size()-1).add(right);
map.put(left, grammar.size());
}
}
scanner.close();
}
private void print() {
for (Rule rule : grammar) {
rule.print();
}
}
public static void main(String[] args) throws FileNotFoundException {
System.out.println("I am writing a Left Recursion Remover");
LRecursionRemover lRecursionRemover = new LRecursionRemover();
// parse input from file
lRecursionRemover.parseInputFromFile("src/resource/test.txt");
System.out.println("The input rule set:");
lRecursionRemover.print();
// remove step one: indirect recursion remove
lRecursionRemover.removeStepOne();
System.out.println("\nThe rule set after indirect recursion remove:");
lRecursionRemover.print();
// remove step two: direct recursion remove
lRecursionRemover.removeStepTwo();
System.out.println("\nThe rule set after direct recursion remove:");
lRecursionRemover.print();
// remove rules not needed, that is, those rules are useless in the rule set
lRecursionRemover.removeStepThree();
System.out.println("\nSimplify the rule set:");
lRecursionRemover.print();
}
}
3.测试Result
I am writing a Left Recursion Remover
The input rule set:
Q::=Rb|b
R::=Sa|a
S::=Qc|c
The rule set after indirect recursion remove:
Q::=Rb|b
R::=Sa|a
S::=Sabc|abc|bc|c
The rule set after direct recursion remove:
Q::=Rb|b
R::=Sa|a
S::=abcS'|bcS'|cS'
S'::=abcS'|~
Simplify the rule set:
S::=abcS'|bcS'|cS'
S'::=abcS'|~
4.小Conclusion
总的来说,还是蛮高兴的,hah
中间遇到了一些问题,但磕磕绊绊还是搞定了