mirror of
https://github.com/stleary/JSON-java.git
synced 2026-05-18 00:00:50 -04:00
489 lines
14 KiB
Java
489 lines
14 KiB
Java
package org.json;
|
|
|
|
/*
|
|
Public Domain.
|
|
*/
|
|
|
|
import java.io.Reader;
|
|
|
|
/**
|
|
* The XMLTokener extends the JSONTokener to provide additional methods
|
|
* for the parsing of XML texts.
|
|
* @author JSON.org
|
|
* @version 2015-12-09
|
|
*/
|
|
public class XMLTokener extends JSONTokener {
|
|
|
|
|
|
/** The table of entity values. It initially contains Character values for
|
|
* amp, apos, gt, lt, quot.
|
|
*/
|
|
public static final java.util.HashMap<String, Character> entity;
|
|
|
|
private XMLParserConfiguration configuration = XMLParserConfiguration.ORIGINAL;
|
|
|
|
static {
|
|
entity = new java.util.HashMap<String, Character>(8);
|
|
entity.put("amp", XML.AMP);
|
|
entity.put("apos", XML.APOS);
|
|
entity.put("gt", XML.GT);
|
|
entity.put("lt", XML.LT);
|
|
entity.put("quot", XML.QUOT);
|
|
}
|
|
|
|
/**
|
|
* Construct an XMLTokener from a Reader.
|
|
* @param r A source reader.
|
|
*/
|
|
public XMLTokener(Reader r) {
|
|
super(r);
|
|
}
|
|
|
|
/**
|
|
* Construct an XMLTokener from a string.
|
|
* @param s A source string.
|
|
*/
|
|
public XMLTokener(String s) {
|
|
super(s);
|
|
}
|
|
|
|
/**
|
|
* Construct an XMLTokener from a Reader and an XMLParserConfiguration.
|
|
* @param r A source reader.
|
|
* @param configuration the configuration that can be used to set certain flags
|
|
*/
|
|
public XMLTokener(Reader r, XMLParserConfiguration configuration) {
|
|
super(r);
|
|
this.configuration = configuration;
|
|
}
|
|
|
|
/**
|
|
* Get the text in the CDATA block.
|
|
* @return The string up to the <code>]]></code>.
|
|
* @throws JSONException If the <code>]]></code> is not found.
|
|
*/
|
|
public String nextCDATA() throws JSONException {
|
|
char c;
|
|
int i;
|
|
StringBuilder sb = new StringBuilder();
|
|
while (more()) {
|
|
c = next();
|
|
sb.append(c);
|
|
i = sb.length() - 3;
|
|
if (i >= 0 && sb.charAt(i) == ']' &&
|
|
sb.charAt(i + 1) == ']' && sb.charAt(i + 2) == '>') {
|
|
sb.setLength(i);
|
|
return sb.toString();
|
|
}
|
|
}
|
|
throw syntaxError("Unclosed CDATA");
|
|
}
|
|
|
|
|
|
/**
|
|
* Get the next XML outer token, trimming whitespace. There are two kinds
|
|
* of tokens: the <pre>{@code '<' }</pre> character which begins a markup
|
|
* tag, and the content
|
|
* text between markup tags.
|
|
*
|
|
* @return A string, or a <pre>{@code '<' }</pre> Character, or null if
|
|
* there is no more source text.
|
|
* @throws JSONException if a called function has an error
|
|
*/
|
|
public Object nextContent() throws JSONException {
|
|
char c;
|
|
StringBuilder sb;
|
|
do {
|
|
c = next();
|
|
} while (Character.isWhitespace(c) && configuration.shouldTrimWhiteSpace());
|
|
if (c == 0) {
|
|
return null;
|
|
}
|
|
if (c == '<') {
|
|
return XML.LT;
|
|
}
|
|
sb = new StringBuilder();
|
|
for (;;) {
|
|
if (c == 0) {
|
|
return sb.toString().trim();
|
|
}
|
|
if (c == '<') {
|
|
back();
|
|
if (configuration.shouldTrimWhiteSpace()) {
|
|
return sb.toString().trim();
|
|
} else return sb.toString();
|
|
}
|
|
if (c == '&') {
|
|
sb.append(nextEntity(c));
|
|
} else {
|
|
sb.append(c);
|
|
}
|
|
c = next();
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* <pre>{@code
|
|
* Return the next entity. These entities are translated to Characters:
|
|
* & ' > < ".
|
|
* }</pre>
|
|
* @param ampersand An ampersand character.
|
|
* @return A Character or an entity String if the entity is not recognized.
|
|
* @throws JSONException If missing ';' in XML entity.
|
|
*/
|
|
public Object nextEntity(@SuppressWarnings("unused") char ampersand) throws JSONException {
|
|
StringBuilder sb = new StringBuilder();
|
|
for (;;) {
|
|
char c = next();
|
|
if (Character.isLetterOrDigit(c) || c == '#') {
|
|
sb.append(Character.toLowerCase(c));
|
|
} else if (c == ';') {
|
|
break;
|
|
} else {
|
|
throw syntaxError("Missing ';' in XML entity: &" + sb);
|
|
}
|
|
}
|
|
String string = sb.toString();
|
|
return unescapeEntity(string);
|
|
}
|
|
|
|
/**
|
|
* Unescape an XML entity encoding;
|
|
* @param e entity (only the actual entity value, not the preceding & or ending ;
|
|
* @return the unescaped entity string
|
|
* @throws JSONException if the entity is malformed
|
|
*/
|
|
static String unescapeEntity(String e) throws JSONException {
|
|
// validate
|
|
if (e == null || e.isEmpty()) {
|
|
return "";
|
|
}
|
|
// if our entity is an encoded unicode point, parse it.
|
|
if (e.charAt(0) == '#') {
|
|
if (e.length() < 2) {
|
|
throw new JSONException("Invalid numeric character reference: &#;");
|
|
}
|
|
int cp = (e.charAt(1) == 'x' || e.charAt(1) == 'X')
|
|
? parseHexEntity(e)
|
|
: parseDecimalEntity(e);
|
|
if (XML.mustEscape(cp)) {
|
|
throw new JSONException("Invalid numeric character reference: &#" + e.substring(1) + ";");
|
|
}
|
|
return new String(new int[] {cp}, 0, 1);
|
|
}
|
|
Character knownEntity = entity.get(e);
|
|
if (knownEntity == null) {
|
|
// we don't know the entity so keep it encoded
|
|
return '&' + e + ';';
|
|
}
|
|
return knownEntity.toString();
|
|
}
|
|
|
|
/**
|
|
* Parse a hexadecimal numeric character reference (e.g., "઼").
|
|
* @param e entity string starting with '#' (e.g., "#x1F4A9")
|
|
* @return the Unicode code point
|
|
* @throws JSONException if the format is invalid
|
|
*/
|
|
private static int parseHexEntity(String e) throws JSONException {
|
|
// hex encoded unicode - need at least one hex digit after #x
|
|
if (e.length() < 3) {
|
|
throw new JSONException("Invalid hex character reference: missing hex digits in &#" + e.substring(1) + ";");
|
|
}
|
|
String hex = e.substring(2);
|
|
if (!isValidHex(hex)) {
|
|
throw new JSONException("Invalid hex character reference: &#" + e.substring(1) + ";");
|
|
}
|
|
try {
|
|
return Integer.parseInt(hex, 16);
|
|
} catch (NumberFormatException nfe) {
|
|
throw new JSONException("Invalid hex character reference: &#" + e.substring(1) + ";", nfe);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Parse a decimal numeric character reference (e.g., "{").
|
|
* @param e entity string starting with '#' (e.g., "#123")
|
|
* @return the Unicode code point
|
|
* @throws JSONException if the format is invalid
|
|
*/
|
|
private static int parseDecimalEntity(String e) throws JSONException {
|
|
String decimal = e.substring(1);
|
|
if (!isValidDecimal(decimal)) {
|
|
throw new JSONException("Invalid decimal character reference: &#" + decimal + ";");
|
|
}
|
|
try {
|
|
return Integer.parseInt(decimal);
|
|
} catch (NumberFormatException nfe) {
|
|
throw new JSONException("Invalid decimal character reference: &#" + decimal + ";", nfe);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Check if a string contains only valid hexadecimal digits.
|
|
* @param s the string to check
|
|
* @return true if s is non-empty and contains only hex digits (0-9, a-f, A-F)
|
|
*/
|
|
private static boolean isValidHex(String s) {
|
|
if (s == null || s.isEmpty()) {
|
|
return false;
|
|
}
|
|
for (int i = 0; i < s.length(); i++) {
|
|
char c = s.charAt(i);
|
|
if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Check if a string contains only valid decimal digits.
|
|
* @param s the string to check
|
|
* @return true if s is non-empty and contains only digits (0-9)
|
|
*/
|
|
private static boolean isValidDecimal(String s) {
|
|
if (s == null || s.isEmpty()) {
|
|
return false;
|
|
}
|
|
for (int i = 0; i < s.length(); i++) {
|
|
char c = s.charAt(i);
|
|
if (c < '0' || c > '9') {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
|
|
/**
|
|
* <pre>{@code
|
|
* Returns the next XML meta token. This is used for skipping over <!...>
|
|
* and <?...?> structures.
|
|
* }</pre>
|
|
* @return <pre>{@code Syntax characters (< > / = ! ?) are returned as
|
|
* Character, and strings and names are returned as Boolean. We don't care
|
|
* what the values actually are.
|
|
* }</pre>
|
|
* @throws JSONException If a string is not properly closed or if the XML
|
|
* is badly structured.
|
|
*/
|
|
public Object nextMeta() throws JSONException {
|
|
char c;
|
|
char q;
|
|
do {
|
|
c = next();
|
|
} while (Character.isWhitespace(c));
|
|
switch (c) {
|
|
case 0:
|
|
throw syntaxError("Misshaped meta tag");
|
|
case '<':
|
|
return XML.LT;
|
|
case '>':
|
|
return XML.GT;
|
|
case '/':
|
|
return XML.SLASH;
|
|
case '=':
|
|
return XML.EQ;
|
|
case '!':
|
|
return XML.BANG;
|
|
case '?':
|
|
return XML.QUEST;
|
|
case '"':
|
|
case '\'':
|
|
q = c;
|
|
for (;;) {
|
|
c = next();
|
|
if (c == 0) {
|
|
throw syntaxError("Unterminated string");
|
|
}
|
|
if (c == q) {
|
|
return Boolean.TRUE;
|
|
}
|
|
}
|
|
default:
|
|
for (;;) {
|
|
c = next();
|
|
if (Character.isWhitespace(c)) {
|
|
return Boolean.TRUE;
|
|
}
|
|
switch (c) {
|
|
case 0:
|
|
throw syntaxError("Unterminated string");
|
|
case '<':
|
|
case '>':
|
|
case '/':
|
|
case '=':
|
|
case '!':
|
|
case '?':
|
|
case '"':
|
|
case '\'':
|
|
back();
|
|
return Boolean.TRUE;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* <pre>{@code
|
|
* Get the next XML Token. These tokens are found inside of angle
|
|
* brackets. It may be one of these characters: / > = ! ? or it
|
|
* may be a string wrapped in single quotes or double quotes, or it may be a
|
|
* name.
|
|
* }</pre>
|
|
* @return a String or a Character.
|
|
* @throws JSONException If the XML is not well formed.
|
|
*/
|
|
public Object nextToken() throws JSONException {
|
|
char c;
|
|
char q;
|
|
StringBuilder sb;
|
|
do {
|
|
c = next();
|
|
} while (Character.isWhitespace(c));
|
|
switch (c) {
|
|
case 0:
|
|
throw syntaxError("Misshaped element");
|
|
case '<':
|
|
throw syntaxError("Misplaced '<'");
|
|
case '>':
|
|
return XML.GT;
|
|
case '/':
|
|
return XML.SLASH;
|
|
case '=':
|
|
return XML.EQ;
|
|
case '!':
|
|
return XML.BANG;
|
|
case '?':
|
|
return XML.QUEST;
|
|
|
|
// Quoted string
|
|
|
|
case '"':
|
|
case '\'':
|
|
q = c;
|
|
sb = new StringBuilder();
|
|
for (;;) {
|
|
c = next();
|
|
if (c == 0) {
|
|
throw syntaxError("Unterminated string");
|
|
}
|
|
if (c == q) {
|
|
return sb.toString();
|
|
}
|
|
if (c == '&') {
|
|
sb.append(nextEntity(c));
|
|
} else {
|
|
sb.append(c);
|
|
}
|
|
}
|
|
default:
|
|
|
|
// Name
|
|
|
|
sb = new StringBuilder();
|
|
for (;;) {
|
|
sb.append(c);
|
|
c = next();
|
|
if (Character.isWhitespace(c)) {
|
|
return sb.toString();
|
|
}
|
|
switch (c) {
|
|
case 0:
|
|
return sb.toString();
|
|
case '>':
|
|
case '/':
|
|
case '=':
|
|
case '!':
|
|
case '?':
|
|
case '[':
|
|
case ']':
|
|
back();
|
|
return sb.toString();
|
|
case '<':
|
|
case '"':
|
|
case '\'':
|
|
throw syntaxError("Bad character in a name");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* Skip characters until past the requested string.
|
|
* If it is not found, we are left at the end of the source with a result of false.
|
|
* @param to A string to skip past.
|
|
*/
|
|
// The Android implementation of JSONTokener has a public method of public void skipPast(String to)
|
|
// even though ours does not have that method, to have API compatibility, our method in the subclass
|
|
// should match.
|
|
public void skipPast(String to) {
|
|
boolean b;
|
|
char c;
|
|
int i;
|
|
int j;
|
|
int offset = 0;
|
|
int length = to.length();
|
|
char[] circle = new char[length];
|
|
|
|
/*
|
|
* First fill the circle buffer with as many characters as are in the
|
|
* to string. If we reach an early end, bail.
|
|
*/
|
|
|
|
for (i = 0; i < length; i += 1) {
|
|
c = next();
|
|
if (c == 0) {
|
|
return;
|
|
}
|
|
circle[i] = c;
|
|
}
|
|
|
|
/* We will loop, possibly for all of the remaining characters. */
|
|
|
|
for (;;) {
|
|
j = offset;
|
|
b = true;
|
|
|
|
/* Compare the circle buffer with the to string. */
|
|
|
|
for (i = 0; i < length; i += 1) {
|
|
if (circle[j] != to.charAt(i)) {
|
|
b = false;
|
|
break;
|
|
}
|
|
j += 1;
|
|
if (j >= length) {
|
|
j -= length;
|
|
}
|
|
}
|
|
|
|
/* If we exit the loop with b intact, then victory is ours. */
|
|
|
|
if (b) {
|
|
return;
|
|
}
|
|
|
|
/* Get the next character. If there isn't one, then defeat is ours. */
|
|
|
|
c = next();
|
|
if (c == 0) {
|
|
return;
|
|
}
|
|
/*
|
|
* Shove the character in the circle buffer and advance the
|
|
* circle offset. The offset is mod n.
|
|
*/
|
|
circle[offset] = c;
|
|
offset += 1;
|
|
if (offset >= length) {
|
|
offset -= length;
|
|
}
|
|
}
|
|
}
|
|
}
|