HTML Parser: state-machine-style
- inputs a string of html
- prints the state path taken by the machine
Not all of this was implemented, but this is the basic whateverthevocabwordis:
Body-text
Open-tag
Tag-text
<
Open-tag
(error)
(error)
>
(eror)
(error)
Body-text
[^><]
Body-text
Tag-text
Tag-text
EOF
(end)
(error)
(error)
A lower-level parser than HtmlOrganizer, and not very useful. But educational, probably.
code
public class Parser {
public void parse(String s) {
char[] c = s.toCharArray();
State laststate = null;
State state = new BodyState();
for (int i=0; i<c.length; i++) {
laststate = state;
if( (state = state.nextState(c[i])) != laststate) {
System.out.println("\n"+state.toString());
}
}
}
public abstract class State {
public abstract State nextState(char input);
public String toString() {
return this.getClass().getName();
}
}
public class BodyState extends State {
public State nextState(char input) {
switch(input) {
case('<'):return new OpenState();
case('>'):System.out.println("Parse error."); //fallthrough
case(0): return null;
default: System.out.print(input);
return this;
}
}
}
public class OpenState extends State {
public State nextState(char input) {
switch(input) {
case(0):
case('<'):
case('>'): System.out.println("Parse error.");
return null;
default:
return (new TagState()).nextState(input);
}
}
}
public class TagState extends State {
public State nextState(char input) {
switch(input) {
case(0):
case('<'): System.out.println("Parse error.");
return null;
case('>'): return new BodyState();
default: System.out.print(input);
return this;
}
}
}
}