mod lexer;
use std::io;
use std::io::Write;
pub use self::Stage::{
Tokens
};
#[derive(PartialEq, Clone, Debug)]
pub enum Stage {
Tokens
}
// #[derive(Debug, RustcDecodable)]
// struct Args {
// flag_l: bool,
// flag_p: bool,
// flag_i: bool
// }
fn main() {
// let args: Args = Docopt::new(USAGE)
// .and_then(|d| d.decode())
// .unwrap_or_else(|e| e.exit());
// if args.flag_p || args.flag_i {
// unimplemented!();
// }
let stage = Tokens;
let stdin = io::stdin();
let mut stdout = io::stdout();
let mut input = String::new();
'main: loop {
print!("> ");
stdout.flush().unwrap();
input.clear();
stdin.read_line(&mut input).ok().expect("Failed to read line");
if input.as_str() == ".quit\n" {
break;
}
loop {
let tokens = self::lexer::tokenize(input.as_str());
if stage == Tokens {
println!("{:?}", tokens);
continue 'main
}
}
}
}
lexer.rs
use regex::Regex;
pub use self::Token::{
Def,
Extern,
Delimiter,
OpeningParenthesis,
ClosingParenthesis,
Comma,
Ident,
Number,
Operator
};
#[derive(PartialEq, Clone, Debug)]
pub enum Token {
Def,
Extern,
Delimiter, //';' character
OpeningParenthesis,
ClosingParenthesis,
Comma,
Ident(String),
Number(f64),
Operator(String)
}
pub fn tokenize(input: &str) -> Vec<Token> {
// regex for commentaries (start with #, end with the line end)
let comment_re = Regex::new(r"(?m)#.*\n").unwrap();
// remove commentaries from the input stream
let preprocessed = comment_re.replace_all(input, "\n");
let mut result = Vec::new();
// regex for token, just union of straightforward regexes for different token types
// operators are parsed the same way as identifier and separated later
let token_re = Regex::new(concat!(
r"(?P<ident>\p{Alphabetic}\w*)|",
r"(?P<number>\d+\.?\d*)|",
r"(?P<delimiter>;)|",
r"(?P<oppar>\()|",
r"(?P<clpar>\))|",
r"(?P<comma>,)|",
r"(?P<operator>\S)")).unwrap();
for cap in token_re.captures_iter(preprocessed.to_string().as_str()) {
let token = if cap.name("ident").is_some() {
match cap.name("ident").unwrap().as_str() {
"def" => Def,
"extern" => Extern,
ident => Ident(ident.to_string())
}
} else if cap.name("number").is_some() {
match cap.name("number").unwrap().as_str().parse() {
Ok(number) => Number(number),
Err(_) => panic!("Lexer failed trying to parse number")
}
} else if cap.name("delimiter").is_some() {
Delimiter
} else if cap.name("oppar").is_some() {
OpeningParenthesis
} else if cap.name("clpar").is_some() {
ClosingParenthesis
} else if cap.name("comma").is_some() {
Comma
} else {
Operator(cap.name("operator").unwrap().as_str().to_string())
};
result.push(token)
}
result
}