1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 package ch.qos.cal10n.util;
23
24 import java.io.BufferedReader;
25 import java.io.IOException;
26 import java.io.Reader;
27 import java.util.ArrayList;
28 import java.util.List;
29
30 import ch.qos.cal10n.MessageConveyorException;
31 import ch.qos.cal10n.util.Token.TokenType;
32
33
34
35
36
37
38 public class TokenStream {
39
40 enum State {
41 START, COMMENT, KEY, SEPARATOR, VAL, TRAILING_BACKSLASH;
42 }
43
44 BufferedReader lineReader;
45 State state = State.START;
46
47 TokenStream(Reader reader) {
48 this.lineReader = new BufferedReader(reader);
49 }
50
51 List<Token> tokenize() {
52 List<Token> tokenList = new ArrayList<Token>();
53
54 while (true) {
55 String currentLine;
56 try {
57 currentLine = lineReader.readLine();
58 } catch (IOException e) {
59 throw new MessageConveyorException("Failed to read input stream", e);
60 }
61 if (currentLine == null) {
62 break;
63 }
64 if(state != State.TRAILING_BACKSLASH) {
65 state = State.START;
66 }
67 tokenizeLine(tokenList, currentLine);
68 tokenList.add(Token.EOL);
69 }
70
71 return tokenList;
72 }
73
74 private void tokenizeLine(List<Token> tokenList, String line) {
75 int len = line.length();
76 StringBuilder buf = new StringBuilder();
77
78 for (int pointer = 0; pointer < len; pointer++) {
79 char c = line.charAt(pointer);
80 switch (state) {
81 case START:
82 if (isWhiteSpace(c)) {
83
84 } else if (c == '#') {
85 state = State.COMMENT;
86 return;
87 } else if (isNonWhiteSpaceSeparator(c)) {
88 state = State.SEPARATOR;
89 buf.append(c);
90 } else {
91 state = State.KEY;
92 buf.append(c);
93 }
94 break;
95
96 case KEY:
97 if (isWhiteSpace(c) || isNonWhiteSpaceSeparator(c)) {
98 String lexicalValue = LexicalUtil.convertSpecialCharacters(buf).toString();
99 tokenList.add(new Token(TokenType.KEY, lexicalValue));
100 buf.setLength(0);
101 buf.append(c);
102 state = State.SEPARATOR;
103 } else {
104 buf.append(c);
105 }
106 break;
107
108 case SEPARATOR:
109
110 if (isWhiteSpace(c) || isNonWhiteSpaceSeparator(c)) {
111 buf.append(c);
112 } else {
113 tokenList.add(new Token(TokenType.SEPARATOR, buf.toString()));
114 buf.setLength(0);
115 buf.append(c);
116 state = State.VAL;
117 }
118 break;
119
120 case VAL:
121 if(c == '\\') {
122 if(isTrailingBackSlash(line, pointer+1)) {
123 String lexicalValue = LexicalUtil.convertSpecialCharacters(buf).toString();
124 tokenList.add(new Token(TokenType.VALUE, lexicalValue));
125 buf.setLength(0);
126 state = State.TRAILING_BACKSLASH;
127 tokenList.add(Token.TRAILING_BACKSLASH);
128 return;
129 } else {
130 buf.append(c);
131 }
132 } else {
133 buf.append(c);
134 }
135 break;
136
137 case TRAILING_BACKSLASH:
138 if (!isWhiteSpace(c)) {
139 buf.append(c);
140 state = State.VAL;
141 }
142 }
143 }
144
145 if(state == State.VAL) {
146 String lexicalValue = LexicalUtil.convertSpecialCharacters(buf).toString();
147 tokenList.add(new Token(TokenType.VALUE, lexicalValue));
148 buf.setLength(0);
149 }
150 }
151
152 boolean isTrailingBackSlash(String line, int next) {
153 int len = line.length();
154 for(int i = next; i < len; i++) {
155 char c = line.charAt(i);
156 if(!isWhiteSpace(c))
157 return false;
158 }
159 return true;
160 }
161
162 boolean isWhiteSpace(char c) {
163 switch (c) {
164 case ' ':
165 case '\t':
166 return true;
167 default:
168 return false;
169 }
170 }
171
172 boolean isNonWhiteSpaceSeparator(char c) {
173 switch (c) {
174 case ':':
175 case '=':
176 return true;
177 default:
178 return false;
179 }
180 }
181 }