[FrontPage] [TitleIndex] [WordIndex

HTML Parser: state-machine-style

  1. inputs a string of html
  2. prints the state path taken by the machine

Not all of this was implemented, but this is the basic whateverthevocabwordis:

A lower-level parser than HtmlOrganizer, and not very useful. But educational, probably.

code

public class Parser {
    public void parse(String s) {
        char[] c = s.toCharArray();
        State laststate = null;
        State state = new BodyState();
        for (int i=0; i<c.length; i++) {
            laststate = state;
            if( (state = state.nextState(c[i])) != laststate) {
                System.out.println("\n"+state.toString());
            }
        }
    }


    public abstract class State {
        public abstract State nextState(char input);
        public String toString() {
            return this.getClass().getName();
        }
    }
    public class BodyState extends State {
        public State nextState(char input) {
            switch(input) {
            case('<'):return new OpenState();
            case('>'):System.out.println("Parse error."); //fallthrough
            case(0): return null;
            default: System.out.print(input);
                return this;
            }
        }
    }
    public class OpenState extends State {
        public State nextState(char input) {
            switch(input) {
            case(0): 
            case('<'):
            case('>'): System.out.println("Parse error.");
                return null;
            default:
                return (new TagState()).nextState(input);
            }
        }
    }
    public class TagState extends State {
        public State nextState(char input) {
            switch(input) {
            case(0):
            case('<'): System.out.println("Parse error.");
                return null;
            case('>'): return new BodyState();
            default: System.out.print(input);
                return this;
            }
        }
    }
}

2013-07-17 10:43