251 lines
5.9 KiB
HTML
251 lines
5.9 KiB
HTML
<script>
|
|
/*
|
|
* html2json for omi
|
|
* https://github.com/AlloyTeam/omi
|
|
*
|
|
* Original code by John Resig (ejohn.org)
|
|
* http://ejohn.org/blog/pure-javascript-html-parser/
|
|
* Original code by Erik Arvidsson, Mozilla Public License
|
|
* http://erik.eae.net/simplehtmlparser/simplehtmlparser.js
|
|
* Original code by Jxck
|
|
* https://github.com/Jxck/html2json
|
|
*/
|
|
|
|
|
|
(function(global) {
|
|
|
|
// Regular Expressions for parsing tags and attributes
|
|
var startTag = /^<([-A-Za-z0-9_]+)((?:\s+[a-zA-Z_:][-a-zA-Z0-9_:.]*(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)>/,
|
|
endTag = /^<\/([-A-Za-z0-9_]+)[^>]*>/,
|
|
attr = /([a-zA-Z_:][-a-zA-Z0-9_:.]*)(?:\s*=\s*(?:(?:"((?:\\.|[^"])*)")|(?:'((?:\\.|[^'])*)')|([^>\s]+)))?/g;
|
|
|
|
|
|
var HTMLParser = function (html, handler) {
|
|
var index, chars, match, stack = [], last = html;
|
|
stack.last = function () {
|
|
return this[this.length - 1];
|
|
};
|
|
|
|
while (html) {
|
|
chars = true;
|
|
|
|
// Make sure we're not in a script or style element
|
|
if (!stack.last() ) {
|
|
|
|
if (html.indexOf("</") == 0) {
|
|
match = html.match(endTag);
|
|
|
|
if (match) {
|
|
html = html.substring(match[0].length);
|
|
match[0].replace(endTag, parseEndTag);
|
|
chars = false;
|
|
}
|
|
|
|
// start tag
|
|
} else if (html.indexOf("<") == 0) {
|
|
match = html.match(startTag);
|
|
|
|
if (match) {
|
|
html = html.substring(match[0].length);
|
|
match[0].replace(startTag, parseStartTag);
|
|
chars = false;
|
|
}
|
|
}
|
|
|
|
if (chars) {
|
|
index = html.indexOf("<");
|
|
|
|
var text = index < 0 ? html : html.substring(0, index);
|
|
html = index < 0 ? "" : html.substring(index);
|
|
|
|
if (handler.chars)
|
|
handler.chars(text);
|
|
}
|
|
|
|
} else {
|
|
html = html.replace(new RegExp("([\\s\\S]*?)<\/" + stack.last() + "[^>]*>"), function (all, text) {
|
|
|
|
if (handler.chars)
|
|
handler.chars(text);
|
|
|
|
return "";
|
|
});
|
|
|
|
parseEndTag("", stack.last());
|
|
}
|
|
|
|
if (html == last)
|
|
throw "Parse Error: " + html;
|
|
last = html;
|
|
}
|
|
|
|
// Clean up any remaining tags
|
|
parseEndTag();
|
|
|
|
function parseStartTag(tag, tagName, rest, unary) {
|
|
tagName = tagName.toLowerCase();
|
|
|
|
unary = !!unary;
|
|
|
|
if (!unary)
|
|
stack.push(tagName);
|
|
|
|
if (handler.start) {
|
|
var attrs = [];
|
|
|
|
rest.replace(attr, function (match, name) {
|
|
var value = arguments[2] ? arguments[2] :
|
|
arguments[3] ? arguments[3] :
|
|
arguments[4] ? arguments[4] :"";
|
|
|
|
attrs.push({
|
|
name: name,
|
|
value: value,
|
|
escaped: value.replace(/(^|[^\\])"/g, '$1\\\"') //"
|
|
});
|
|
});
|
|
|
|
if (handler.start)
|
|
handler.start(tagName, attrs, unary);
|
|
}
|
|
}
|
|
|
|
function parseEndTag(tag, tagName) {
|
|
// If no tag name is provided, clean shop
|
|
if (!tagName)
|
|
var pos = 0;
|
|
|
|
// Find the closest opened tag of the same type
|
|
else
|
|
for (var pos = stack.length - 1; pos >= 0; pos--)
|
|
if (stack[pos] == tagName)
|
|
break;
|
|
|
|
if (pos >= 0) {
|
|
// Close all the open elements, up the stack
|
|
for (var i = stack.length - 1; i >= pos; i--)
|
|
if (handler.end)
|
|
handler.end(stack[i]);
|
|
|
|
// Remove the open elements from the stack
|
|
stack.length = pos;
|
|
}
|
|
}
|
|
};
|
|
|
|
var DEBUG = false;
|
|
var debug = DEBUG ? console.log.bind(console) : function(){};
|
|
|
|
// Production steps of ECMA-262, Edition 5, 15.4.4.21
|
|
// Reference: http://es5.github.io/#x15.4.4.21
|
|
if (!Array.prototype.reduce) {
|
|
Array.prototype.reduce = function(callback /*, initialValue*/) {
|
|
'use strict';
|
|
if (this == null) {
|
|
throw new TypeError('Array.prototype.reduce called on null or undefined');
|
|
}
|
|
if (typeof callback !== 'function') {
|
|
throw new TypeError(callback + ' is not a function');
|
|
}
|
|
var t = Object(this), len = t.length >>> 0, k = 0, value;
|
|
if (arguments.length == 2) {
|
|
value = arguments[1];
|
|
} else {
|
|
while (k < len && !(k in t)) {
|
|
k++;
|
|
}
|
|
if (k >= len) {
|
|
throw new TypeError('Reduce of empty array with no initial value');
|
|
}
|
|
value = t[k++];
|
|
}
|
|
for (; k < len; k++) {
|
|
if (k in t) {
|
|
value = callback(value, t[k], k, t);
|
|
}
|
|
}
|
|
return value;
|
|
};
|
|
}
|
|
|
|
global.html2json = function html2json(html) {
|
|
|
|
var bufArray = [];
|
|
var results = {
|
|
node: 'root',
|
|
child: [],
|
|
};
|
|
HTMLParser(html, {
|
|
start: function(tag, attrs, unary) {
|
|
debug(tag, attrs, unary);
|
|
// node for this element
|
|
var node = {
|
|
node: 'element',
|
|
tag: tag,
|
|
};
|
|
if (attrs.length !== 0) {
|
|
node.attr = attrs.reduce(function(pre, attr) {
|
|
var name = attr.name;
|
|
var value = attr.value;
|
|
|
|
pre[name] = value;
|
|
return pre;
|
|
}, {});
|
|
}
|
|
if (unary) {
|
|
// if this tag dosen't have end tag
|
|
// like <img src="hoge.png"/>
|
|
// add to parents
|
|
var parent = bufArray[0] || results;
|
|
if (parent.child === undefined) {
|
|
parent.child = [];
|
|
}
|
|
parent.child.push(node);
|
|
} else {
|
|
bufArray.unshift(node);
|
|
}
|
|
},
|
|
end: function(tag) {
|
|
debug(tag);
|
|
// merge into parent tag
|
|
var node = bufArray.shift();
|
|
if (node.tag !== tag) console.error('invalid state: mismatch end tag');
|
|
|
|
if (bufArray.length === 0) {
|
|
results.child.push(node);
|
|
} else {
|
|
var parent = bufArray[0];
|
|
if (parent.child === undefined) {
|
|
parent.child = [];
|
|
}
|
|
parent.child.push(node);
|
|
}
|
|
},
|
|
chars: function(text) {
|
|
debug(text);
|
|
var node = {
|
|
node: 'text',
|
|
text: text,
|
|
};
|
|
if (bufArray.length === 0) {
|
|
results.child.push(node);
|
|
} else {
|
|
var parent = bufArray[0];
|
|
if (parent.child === undefined) {
|
|
parent.child = [];
|
|
}
|
|
parent.child.push(node);
|
|
}
|
|
}
|
|
});
|
|
return results;
|
|
};
|
|
|
|
})(this);
|
|
|
|
</script>
|
|
|
|
<script>
|
|
var json = html2json('<child tag="Hello2" data-ame="imSDFgsrc sfd" sfdsf data-amesd="22" >sd</child><c2 />')
|
|
alert(JSON.stringify( json)==='{"node":"root","child":[{"node":"element","tag":"child","attr":{"tag":"Hello2","data-ame":"imSDFgsrc sfd","sfdsf":"","data-amesd":"22"},"child":[{"node":"text","text":"sd"}]},{"node":"element","tag":"c2"}]}')
|
|
</script> |