AuD
Lecture 'Algorithmen und Datenstrukturen' (code examples)
LexicalScanner.java
Go to the documentation of this file.
1package aud.util;
2
3import java.util.regex.*;
4
19public class LexicalScanner {
20
22 public final static int END_OF_INPUT = -1;
24 public final static int NO_MATCH = -2;
25
29 public static class Rule {
35 public Rule(int id, Pattern pattern) {
36 id_=id;
37 pattern_=pattern;
38 }
39
41 public Rule(int id, String pattern) {
42 this(id,Pattern.compile(pattern));
43 }
44
45 int id_;
46 Pattern pattern_;
47 }
48
49 String input_ = null;
50 String text_ = null;
51 int id_ = NO_MATCH;
52 Rule[] rules_ = null;
53
55 public static final Pattern P_WHITESPACE = Pattern.compile("\\s+");
57 public static final Pattern P_IDENTIFIER =
58 Pattern.compile("[_a-zA-Z]?(\\w|_)+");
60 public static final Pattern P_FLOAT =
61 Pattern.compile("[-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?");
62
68 public LexicalScanner(Rule[] rules,String input) {
69 rules_=rules;
70 input_=input;
71 }
72
74 protected void eatWhiteSpace() {
75 if (!endOfInput()) {
76 Matcher m=P_WHITESPACE.matcher(input_);
77 if (m.lookingAt()) {
78 input_=input_.substring(m.end(),input_.length());
79 }
80 }
81 }
82
84 public void setInput(String input) {
85 input_=input;
86 }
88 public String matchedText() { return text_; }
90 public int matchedTokenId() { return id_; }
92 public String remainder() { return input_; }
94 public boolean endOfInput() {
95 return input_==null || input_.length()==0;
96 }
97
103 protected boolean match(Pattern p) {
104 text_=null;
106
107 if (endOfInput())
108 return false;
109
110 Matcher m=p.matcher(input_);
111 if (!m.lookingAt())
112 return false;
113
114 int n=m.end();
115 text_=input_.substring(0,n);
116 input_=input_.substring(m.end(),input_.length());
117
118 return true;
119 }
123 protected int next(Rule[] rules) {
125
126 if (endOfInput()) return id_=END_OF_INPUT;
127 if (rules_==null) return id_=NO_MATCH;
128
129 for (Rule rule : rules) {
130 if (match(rule.pattern_))
131 return id_=rule.id_;
132 }
133 return id_=NO_MATCH;
134 }
135
139 public int next() { return next(rules_); }
140
141
143 public static void main(String[] args) {
144
145 Rule[] rules={
146 new Rule(1,"[0-9]*\\.?[0-9]+"),
147 new Rule(2,"[a-z]+")
148 };
149
151 (rules,args.length==0 ? " 12.3a 12 bcd 34 " : args[0]);
152
153 System.out.println("input = '"+s.remainder()+"'");
154
155 while (s.next()!=END_OF_INPUT) {
156 if (s.matchedTokenId()==NO_MATCH) {
157 System.out.println("syntax error near '"+s.remainder()+"'");
158 break;
159 }
160 System.out.println("next token id = "+s.matchedTokenId());
161 System.out.println("matched text = '"+s.matchedText()+"'");
162 System.out.println("remaining input = '"+s.remainder()+"'");
163 }
164 }
165}
Base class for a simple lexical scanner.
static final int END_OF_INPUT
no more input
void eatWhiteSpace()
ignore white space (called by match
int next(Rule[] rules)
match remainder to table of rules @endiliteral
int matchedTokenId()
get result of last call to next()
void setInput(String input)
set input (resets scanner state)
boolean endOfInput()
reached end of input?
static final Pattern P_WHITESPACE
white space
String matchedText()
get text of last match or call to next
int next()
match remainder to rules provided to constructor
LexicalScanner(Rule[] rules, String input)
create new scanner processing input @endiliteral
String remainder()
get remaining text
static final int NO_MATCH
no match (usually implies a syntax error)
static final Pattern P_IDENTIFIER
identifiers
static void main(String[] args)
testing and example for usage
boolean match(Pattern p)
Match remainder against pattern p.
static final Pattern P_FLOAT
floating point number