001// Copyright (c) FIRST and other WPILib contributors.
002// Open Source Software; you can modify and/or share it under the terms of
003// the WPILib BSD license file in the root directory of this project.
004
005package edu.wpi.first.util.struct.parser;
006
007/** Raw struct schema lexer. */
008public class Lexer {
009  /**
010   * Construct a raw struct schema lexer.
011   *
012   * @param in schema
013   */
014  public Lexer(String in) {
015    m_in = in;
016  }
017
018  /**
019   * Gets the next token.
020   *
021   * @return Token kind; the token text can be retrieved using getTokenText()
022   */
023  public TokenKind scan() {
024    // skip whitespace
025    do {
026      get();
027    } while (m_current == ' ' || m_current == '\t' || m_current == '\n' || m_current == '\r');
028    m_tokenStart = m_pos - 1;
029
030    switch (m_current) {
031      case '[':
032        return TokenKind.kLeftBracket;
033      case ']':
034        return TokenKind.kRightBracket;
035      case '{':
036        return TokenKind.kLeftBrace;
037      case '}':
038        return TokenKind.kRightBrace;
039      case ':':
040        return TokenKind.kColon;
041      case ';':
042        return TokenKind.kSemicolon;
043      case ',':
044        return TokenKind.kComma;
045      case '=':
046        return TokenKind.kEquals;
047      case '-':
048      case '0':
049      case '1':
050      case '2':
051      case '3':
052      case '4':
053      case '5':
054      case '6':
055      case '7':
056      case '8':
057      case '9':
058        return scanInteger();
059      case '\0':
060        return TokenKind.kEndOfInput;
061      default:
062        if (Character.isLetter(m_current) || m_current == '_') {
063          return scanIdentifier();
064        }
065        return TokenKind.kUnknown;
066    }
067  }
068
069  /**
070   * Gets the text of the last lexed token.
071   *
072   * @return token text
073   */
074  public String getTokenText() {
075    if (m_tokenStart >= m_in.length()) {
076      return "";
077    }
078    return m_in.substring(m_tokenStart, m_pos);
079  }
080
081  /**
082   * Gets the starting position of the last lexed token.
083   *
084   * @return position (0 = first character)
085   */
086  public int getPosition() {
087    return m_tokenStart;
088  }
089
090  private TokenKind scanInteger() {
091    do {
092      get();
093    } while (Character.isDigit(m_current));
094    unget();
095    return TokenKind.kInteger;
096  }
097
098  private TokenKind scanIdentifier() {
099    do {
100      get();
101    } while (Character.isLetterOrDigit(m_current) || m_current == '_');
102    unget();
103    return TokenKind.kIdentifier;
104  }
105
106  private void get() {
107    if (m_pos < m_in.length()) {
108      m_current = m_in.charAt(m_pos);
109    } else {
110      m_current = '\0';
111    }
112    ++m_pos;
113  }
114
115  private void unget() {
116    if (m_pos > 0) {
117      m_pos--;
118      if (m_pos < m_in.length()) {
119        m_current = m_in.charAt(m_pos);
120      } else {
121        m_current = '\0';
122      }
123    } else {
124      m_current = '\0';
125    }
126  }
127
128  final String m_in;
129  char m_current;
130  int m_tokenStart;
131  int m_pos;
132}