package org.json; /* Public Domain. */ import java.io.Reader; /** * The XMLTokener extends the JSONTokener to provide additional methods * for the parsing of XML texts. * @author JSON.org * @version 2015-12-09 */ public class XMLTokener extends JSONTokener { /** The table of entity values. It initially contains Character values for * amp, apos, gt, lt, quot. */ public static final java.util.HashMap entity; private XMLParserConfiguration configuration = XMLParserConfiguration.ORIGINAL; static { entity = new java.util.HashMap(8); entity.put("amp", XML.AMP); entity.put("apos", XML.APOS); entity.put("gt", XML.GT); entity.put("lt", XML.LT); entity.put("quot", XML.QUOT); } /** * Construct an XMLTokener from a Reader. * @param r A source reader. */ public XMLTokener(Reader r) { super(r); } /** * Construct an XMLTokener from a string. * @param s A source string. */ public XMLTokener(String s) { super(s); } /** * Construct an XMLTokener from a Reader and an XMLParserConfiguration. * @param r A source reader. * @param configuration the configuration that can be used to set certain flags */ public XMLTokener(Reader r, XMLParserConfiguration configuration) { super(r); this.configuration = configuration; } /** * Get the text in the CDATA block. * @return The string up to the ]]>. * @throws JSONException If the ]]> is not found. */ public String nextCDATA() throws JSONException { char c; int i; StringBuilder sb = new StringBuilder(); while (more()) { c = next(); sb.append(c); i = sb.length() - 3; if (i >= 0 && sb.charAt(i) == ']' && sb.charAt(i + 1) == ']' && sb.charAt(i + 2) == '>') { sb.setLength(i); return sb.toString(); } } throw syntaxError("Unclosed CDATA"); } /** * Get the next XML outer token, trimming whitespace. There are two kinds * of tokens: the
{@code '<' }
character which begins a markup * tag, and the content * text between markup tags. * * @return A string, or a
{@code '<' }
Character, or null if * there is no more source text. * @throws JSONException if a called function has an error */ public Object nextContent() throws JSONException { char c; StringBuilder sb; do { c = next(); } while (Character.isWhitespace(c) && configuration.shouldTrimWhiteSpace()); if (c == 0) { return null; } if (c == '<') { return XML.LT; } sb = new StringBuilder(); for (;;) { if (c == 0) { return sb.toString().trim(); } if (c == '<') { back(); if (configuration.shouldTrimWhiteSpace()) { return sb.toString().trim(); } else return sb.toString(); } if (c == '&') { sb.append(nextEntity(c)); } else { sb.append(c); } c = next(); } } /** *
{@code
     * Return the next entity. These entities are translated to Characters:
     *     &  '  >  <  ".
     * }
* @param ampersand An ampersand character. * @return A Character or an entity String if the entity is not recognized. * @throws JSONException If missing ';' in XML entity. */ public Object nextEntity(@SuppressWarnings("unused") char ampersand) throws JSONException { StringBuilder sb = new StringBuilder(); for (;;) { char c = next(); if (Character.isLetterOrDigit(c) || c == '#') { sb.append(Character.toLowerCase(c)); } else if (c == ';') { break; } else { throw syntaxError("Missing ';' in XML entity: &" + sb); } } String string = sb.toString(); return unescapeEntity(string); } /** * Unescape an XML entity encoding; * @param e entity (only the actual entity value, not the preceding & or ending ; * @return the unescaped entity string * @throws JSONException if the entity is malformed */ static String unescapeEntity(String e) throws JSONException { // validate if (e == null || e.isEmpty()) { return ""; } // if our entity is an encoded unicode point, parse it. if (e.charAt(0) == '#') { if (e.length() < 2) { throw new JSONException("Invalid numeric character reference: &#;"); } int cp = (e.charAt(1) == 'x' || e.charAt(1) == 'X') ? parseHexEntity(e) : parseDecimalEntity(e); if (XML.mustEscape(cp)) { throw new JSONException("Invalid numeric character reference: &#" + e.substring(1) + ";"); } return new String(new int[] {cp}, 0, 1); } Character knownEntity = entity.get(e); if (knownEntity == null) { // we don't know the entity so keep it encoded return '&' + e + ';'; } return knownEntity.toString(); } /** * Parse a hexadecimal numeric character reference (e.g., "઼"). * @param e entity string starting with '#' (e.g., "#x1F4A9") * @return the Unicode code point * @throws JSONException if the format is invalid */ private static int parseHexEntity(String e) throws JSONException { // hex encoded unicode - need at least one hex digit after #x if (e.length() < 3) { throw new JSONException("Invalid hex character reference: missing hex digits in &#" + e.substring(1) + ";"); } String hex = e.substring(2); if (!isValidHex(hex)) { throw new JSONException("Invalid hex character reference: &#" + e.substring(1) + ";"); } try { return Integer.parseInt(hex, 16); } catch (NumberFormatException nfe) { throw new JSONException("Invalid hex character reference: &#" + e.substring(1) + ";", nfe); } } /** * Parse a decimal numeric character reference (e.g., "{"). * @param e entity string starting with '#' (e.g., "#123") * @return the Unicode code point * @throws JSONException if the format is invalid */ private static int parseDecimalEntity(String e) throws JSONException { String decimal = e.substring(1); if (!isValidDecimal(decimal)) { throw new JSONException("Invalid decimal character reference: &#" + decimal + ";"); } try { return Integer.parseInt(decimal); } catch (NumberFormatException nfe) { throw new JSONException("Invalid decimal character reference: &#" + decimal + ";", nfe); } } /** * Check if a string contains only valid hexadecimal digits. * @param s the string to check * @return true if s is non-empty and contains only hex digits (0-9, a-f, A-F) */ private static boolean isValidHex(String s) { if (s == null || s.isEmpty()) { return false; } for (int i = 0; i < s.length(); i++) { char c = s.charAt(i); if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))) { return false; } } return true; } /** * Check if a string contains only valid decimal digits. * @param s the string to check * @return true if s is non-empty and contains only digits (0-9) */ private static boolean isValidDecimal(String s) { if (s == null || s.isEmpty()) { return false; } for (int i = 0; i < s.length(); i++) { char c = s.charAt(i); if (c < '0' || c > '9') { return false; } } return true; } /** *
{@code 
     * Returns the next XML meta token. This is used for skipping over 
     * and  structures.
     *  }
* @return
{@code Syntax characters (< > / = ! ?) are returned as
     *  Character, and strings and names are returned as Boolean. We don't care
     *  what the values actually are.
     *  }
* @throws JSONException If a string is not properly closed or if the XML * is badly structured. */ public Object nextMeta() throws JSONException { char c; char q; do { c = next(); } while (Character.isWhitespace(c)); switch (c) { case 0: throw syntaxError("Misshaped meta tag"); case '<': return XML.LT; case '>': return XML.GT; case '/': return XML.SLASH; case '=': return XML.EQ; case '!': return XML.BANG; case '?': return XML.QUEST; case '"': case '\'': q = c; for (;;) { c = next(); if (c == 0) { throw syntaxError("Unterminated string"); } if (c == q) { return Boolean.TRUE; } } default: for (;;) { c = next(); if (Character.isWhitespace(c)) { return Boolean.TRUE; } switch (c) { case 0: throw syntaxError("Unterminated string"); case '<': case '>': case '/': case '=': case '!': case '?': case '"': case '\'': back(); return Boolean.TRUE; } } } } /** *
{@code
     * Get the next XML Token. These tokens are found inside of angle
     * brackets. It may be one of these characters: / > = ! ? or it
     * may be a string wrapped in single quotes or double quotes, or it may be a
     * name.
     * }
* @return a String or a Character. * @throws JSONException If the XML is not well formed. */ public Object nextToken() throws JSONException { char c; char q; StringBuilder sb; do { c = next(); } while (Character.isWhitespace(c)); switch (c) { case 0: throw syntaxError("Misshaped element"); case '<': throw syntaxError("Misplaced '<'"); case '>': return XML.GT; case '/': return XML.SLASH; case '=': return XML.EQ; case '!': return XML.BANG; case '?': return XML.QUEST; // Quoted string case '"': case '\'': q = c; sb = new StringBuilder(); for (;;) { c = next(); if (c == 0) { throw syntaxError("Unterminated string"); } if (c == q) { return sb.toString(); } if (c == '&') { sb.append(nextEntity(c)); } else { sb.append(c); } } default: // Name sb = new StringBuilder(); for (;;) { sb.append(c); c = next(); if (Character.isWhitespace(c)) { return sb.toString(); } switch (c) { case 0: return sb.toString(); case '>': case '/': case '=': case '!': case '?': case '[': case ']': back(); return sb.toString(); case '<': case '"': case '\'': throw syntaxError("Bad character in a name"); } } } } /** * Skip characters until past the requested string. * If it is not found, we are left at the end of the source with a result of false. * @param to A string to skip past. */ // The Android implementation of JSONTokener has a public method of public void skipPast(String to) // even though ours does not have that method, to have API compatibility, our method in the subclass // should match. public void skipPast(String to) { boolean b; char c; int i; int j; int offset = 0; int length = to.length(); char[] circle = new char[length]; /* * First fill the circle buffer with as many characters as are in the * to string. If we reach an early end, bail. */ for (i = 0; i < length; i += 1) { c = next(); if (c == 0) { return; } circle[i] = c; } /* We will loop, possibly for all of the remaining characters. */ for (;;) { j = offset; b = true; /* Compare the circle buffer with the to string. */ for (i = 0; i < length; i += 1) { if (circle[j] != to.charAt(i)) { b = false; break; } j += 1; if (j >= length) { j -= length; } } /* If we exit the loop with b intact, then victory is ours. */ if (b) { return; } /* Get the next character. If there isn't one, then defeat is ours. */ c = next(); if (c == 0) { return; } /* * Shove the character in the circle buffer and advance the * circle offset. The offset is mod n. */ circle[offset] = c; offset += 1; if (offset >= length) { offset -= length; } } } }