HTML Parser: state-machine-style
- inputs a string of html
- prints the state path taken by the machine
Not all of this was implemented, but this is the basic whateverthevocabwordis:
Body-text
Open-tag
Tag-text
<
Open-tag
(error)
(error)
>
(eror)
(error)
Body-text
[^><]
Body-text
Tag-text
Tag-text
EOF
(end)
(error)
(error)
A lower-level parser than HtmlOrganizer, and not very useful. But educational, probably.
code
public class Parser { public void parse(String s) { char[] c = s.toCharArray(); State laststate = null; State state = new BodyState(); for (int i=0; i<c.length; i++) { laststate = state; if( (state = state.nextState(c[i])) != laststate) { System.out.println("\n"+state.toString()); } } } public abstract class State { public abstract State nextState(char input); public String toString() { return this.getClass().getName(); } } public class BodyState extends State { public State nextState(char input) { switch(input) { case('<'):return new OpenState(); case('>'):System.out.println("Parse error."); //fallthrough case(0): return null; default: System.out.print(input); return this; } } } public class OpenState extends State { public State nextState(char input) { switch(input) { case(0): case('<'): case('>'): System.out.println("Parse error."); return null; default: return (new TagState()).nextState(input); } } } public class TagState extends State { public State nextState(char input) { switch(input) { case(0): case('<'): System.out.println("Parse error."); return null; case('>'): return new BodyState(); default: System.out.print(input); return this; } } } }