251 lines
5.9 KiB
251 lines
5.9 KiB
* html2json for omi
* https://github.com/AlloyTeam/omi
* Original code by John Resig (ejohn.org)
* http://ejohn.org/blog/pure-javascript-html-parser/
* Original code by Erik Arvidsson, Mozilla Public License
* http://erik.eae.net/simplehtmlparser/simplehtmlparser.js
* Original code by Jxck
* https://github.com/Jxck/html2json
(function(global) {
// Regular Expressions for parsing tags and attributes
var startTag = /^<([-A-Za-z0-9_]+)((?:\s+[a-zA-Z_:][-a-zA-Z0-9_:.]*(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)>/,
endTag = /^<\/([-A-Za-z0-9_]+)[^>]*>/,
attr = /([a-zA-Z_:][-a-zA-Z0-9_:.]*)(?:\s*=\s*(?:(?:"((?:\\.|[^"])*)")|(?:'((?:\\.|[^'])*)')|([^>\s]+)))?/g;
var HTMLParser = function (html, handler) {
var index, chars, match, stack = [], last = html;
stack.last = function () {
return this[this.length - 1];
while (html) {
chars = true;
// Make sure we're not in a script or style element
if (!stack.last() ) {
if (html.indexOf("</") == 0) {
match = html.match(endTag);
if (match) {
html = html.substring(match[0].length);
match[0].replace(endTag, parseEndTag);
chars = false;
// start tag
} else if (html.indexOf("<") == 0) {
match = html.match(startTag);
if (match) {
html = html.substring(match[0].length);
match[0].replace(startTag, parseStartTag);
chars = false;
if (chars) {
index = html.indexOf("<");
var text = index < 0 ? html : html.substring(0, index);
html = index < 0 ? "" : html.substring(index);
if (handler.chars)
} else {
html = html.replace(new RegExp("([\\s\\S]*?)<\/" + stack.last() + "[^>]*>"), function (all, text) {
if (handler.chars)
return "";
parseEndTag("", stack.last());
if (html == last)
throw "Parse Error: " + html;
last = html;
// Clean up any remaining tags
function parseStartTag(tag, tagName, rest, unary) {
tagName = tagName.toLowerCase();
unary = !!unary;
if (!unary)
if (handler.start) {
var attrs = [];
rest.replace(attr, function (match, name) {
var value = arguments[2] ? arguments[2] :
arguments[3] ? arguments[3] :
arguments[4] ? arguments[4] :"";
name: name,
value: value,
escaped: value.replace(/(^|[^\\])"/g, '$1\\\"') //"
if (handler.start)
handler.start(tagName, attrs, unary);
function parseEndTag(tag, tagName) {
// If no tag name is provided, clean shop
if (!tagName)
var pos = 0;
// Find the closest opened tag of the same type
for (var pos = stack.length - 1; pos >= 0; pos--)
if (stack[pos] == tagName)
if (pos >= 0) {
// Close all the open elements, up the stack
for (var i = stack.length - 1; i >= pos; i--)
if (handler.end)
// Remove the open elements from the stack
stack.length = pos;
var DEBUG = false;
var debug = DEBUG ? console.log.bind(console) : function(){};
// Production steps of ECMA-262, Edition 5,
// Reference: http://es5.github.io/#x15.4.4.21
if (!Array.prototype.reduce) {
Array.prototype.reduce = function(callback /*, initialValue*/) {
'use strict';
if (this == null) {
throw new TypeError('Array.prototype.reduce called on null or undefined');
if (typeof callback !== 'function') {
throw new TypeError(callback + ' is not a function');
var t = Object(this), len = t.length >>> 0, k = 0, value;
if (arguments.length == 2) {
value = arguments[1];
} else {
while (k < len && !(k in t)) {
if (k >= len) {
throw new TypeError('Reduce of empty array with no initial value');
value = t[k++];
for (; k < len; k++) {
if (k in t) {
value = callback(value, t[k], k, t);
return value;
global.html2json = function html2json(html) {
var bufArray = [];
var results = {
node: 'root',
child: [],
HTMLParser(html, {
start: function(tag, attrs, unary) {
debug(tag, attrs, unary);
// node for this element
var node = {
node: 'element',
tag: tag,
if (attrs.length !== 0) {
node.attr = attrs.reduce(function(pre, attr) {
var name = attr.name;
var value = attr.value;
pre[name] = value;
return pre;
}, {});
if (unary) {
// if this tag dosen't have end tag
// like <img src="hoge.png"/>
// add to parents
var parent = bufArray[0] || results;
if (parent.child === undefined) {
parent.child = [];
} else {
end: function(tag) {
// merge into parent tag
var node = bufArray.shift();
if (node.tag !== tag) console.error('invalid state: mismatch end tag');
if (bufArray.length === 0) {
} else {
var parent = bufArray[0];
if (parent.child === undefined) {
parent.child = [];
chars: function(text) {
var node = {
node: 'text',
text: text,
if (bufArray.length === 0) {
} else {
var parent = bufArray[0];
if (parent.child === undefined) {
parent.child = [];
return results;
var json = html2json('<child tag="Hello2" data-ame="imSDFgsrc sfd" sfdsf data-amesd="22" >sd</child><c2 />')
alert(JSON.stringify( json)==='{"node":"root","child":[{"node":"element","tag":"child","attr":{"tag":"Hello2","data-ame":"imSDFgsrc sfd","sfdsf":"","data-amesd":"22"},"child":[{"node":"text","text":"sd"}]},{"node":"element","tag":"c2"}]}')
</script> |