mirror of https://gitee.com/openkylin/doxygen.git
498 lines
14 KiB
Plaintext
498 lines
14 KiB
Plaintext
/******************************************************************************
|
|
*
|
|
* Copyright (C) 1997-2020 by Dimitri van Heesch.
|
|
*
|
|
* Permission to use, copy, modify, and distribute this software and its
|
|
* documentation under the terms of the GNU General Public License is hereby
|
|
* granted. No representations are made about the suitability of this software
|
|
* for any purpose. It is provided "as is" without express or implied warranty.
|
|
* See the GNU General Public License for more details.
|
|
*
|
|
* Documents produced by Doxygen are derivative works derived from the
|
|
* input used in their production; they are not affected by this license.
|
|
*
|
|
*/
|
|
/******************************************************************************
|
|
* Minimal flex based parser for XML
|
|
******************************************************************************/
|
|
|
|
%option never-interactive
|
|
%option prefix="xmlYY"
|
|
%option reentrant
|
|
%option extra-type="struct xmlYY_state *"
|
|
%option 8bit noyywrap
|
|
%top{
|
|
#include <stdint.h>
|
|
}
|
|
|
|
%{
|
|
|
|
#include <ctype.h>
|
|
#include <vector>
|
|
#include <stdio.h>
|
|
#include "xml.h"
|
|
//#include "message.h"
|
|
|
|
#define YY_NEVER_INTERACTIVE 1
|
|
#define YY_NO_INPUT 1
|
|
#define YY_NO_UNISTD_H 1
|
|
|
|
struct xmlYY_state
|
|
{
|
|
std::string fileName;
|
|
int lineNr = 1;
|
|
const char * inputString = 0; //!< the code fragment as text
|
|
yy_size_t inputPosition = 0; //!< read offset during parsing
|
|
std::string name;
|
|
bool isEnd = false;
|
|
bool selfClose = false;
|
|
std::string data;
|
|
std::string attrValue;
|
|
std::string attrName;
|
|
XMLHandlers::Attributes attrs;
|
|
XMLHandlers handlers;
|
|
int cdataContext;
|
|
int commentContext;
|
|
char stringChar;
|
|
std::vector<std::string> xpath;
|
|
};
|
|
|
|
#if USE_STATE2STRING
|
|
static const char *stateToString(int state);
|
|
#endif
|
|
|
|
static yy_size_t yyread(yyscan_t yyscanner,char *buf,yy_size_t max_size);
|
|
static void initElement(yyscan_t yyscanner);
|
|
static void addCharacters(yyscan_t yyscanner);
|
|
static void addElement(yyscan_t yyscanner);
|
|
static void addAttribute(yyscan_t yyscanner);
|
|
static void countLines(yyscan_t yyscanner, const char *txt,yy_size_t len);
|
|
static void reportError(yyscan_t yyscanner, const std::string &msg);
|
|
static std::string processData(yyscan_t yyscanner,const char *txt,yy_size_t len);
|
|
|
|
#undef YY_INPUT
|
|
#define YY_INPUT(buf,result,max_size) result=yyread(yyscanner,buf,max_size);
|
|
|
|
%}
|
|
|
|
NL (\r\n|\r|\n)
|
|
SP [ \t\r\n]+
|
|
OPEN {SP}?"<"
|
|
OPENSPECIAL {SP}?"<?"
|
|
CLOSE ">"{NL}?
|
|
CLOSESPECIAL "?>"{NL}?
|
|
NAMESTART [:A-Za-z\200-\377_]
|
|
NAMECHAR [:A-Za-z\200-\377_0-9.-]
|
|
NAME {NAMESTART}{NAMECHAR}*
|
|
ESC "&#"[0-9]+";"|"&#x"[0-9a-fA-F]+";"
|
|
COLON ":"
|
|
PCDATA [^<]+
|
|
COMMENT {OPEN}"!--"
|
|
COMMENTEND "--"{CLOSE}
|
|
STRING \"([^"&]|{ESC})*\"|\'([^'&]|{ESC})*\'
|
|
DOCTYPE {SP}?"<!DOCTYPE"{SP}
|
|
CDATA {SP}?"<![CDATA["
|
|
ENDCDATA "]]>"
|
|
|
|
%option noyywrap
|
|
|
|
%s Initial
|
|
%s Content
|
|
%s CDataSection
|
|
%s Element
|
|
%s Attributes
|
|
%s AttributeValue
|
|
%s AttrValueStr
|
|
%s Prolog
|
|
%s Comment
|
|
|
|
%%
|
|
|
|
<Initial>{
|
|
{SP} { countLines(yyscanner,yytext,yyleng); }
|
|
{DOCTYPE} { countLines(yyscanner,yytext,yyleng); }
|
|
{OPENSPECIAL} { countLines(yyscanner,yytext,yyleng); BEGIN(Prolog); }
|
|
{OPEN} { countLines(yyscanner,yytext,yyleng);
|
|
initElement(yyscanner);
|
|
BEGIN(Element); }
|
|
{COMMENT} { yyextra->commentContext = YY_START;
|
|
BEGIN(Comment);
|
|
}
|
|
}
|
|
<Content>{
|
|
{CDATA} { countLines(yyscanner,yytext,yyleng);
|
|
yyextra->cdataContext = YY_START;
|
|
BEGIN(CDataSection);
|
|
}
|
|
{PCDATA} { yyextra->data += processData(yyscanner,yytext,yyleng); }
|
|
{OPEN} { countLines(yyscanner,yytext,yyleng);
|
|
addCharacters(yyscanner);
|
|
initElement(yyscanner);
|
|
BEGIN(Element);
|
|
}
|
|
{COMMENT} { yyextra->commentContext = YY_START;
|
|
countLines(yyscanner,yytext,yyleng);
|
|
BEGIN(Comment);
|
|
}
|
|
}
|
|
<Element>{
|
|
"/" { yyextra->isEnd = true; }
|
|
{NAME} { yyextra->name = yytext;
|
|
BEGIN(Attributes); }
|
|
{CLOSE} { addElement(yyscanner);
|
|
countLines(yyscanner,yytext,yyleng);
|
|
yyextra->data = "";
|
|
BEGIN(Content);
|
|
}
|
|
{SP} { countLines(yyscanner,yytext,yyleng); }
|
|
}
|
|
<Attributes>{
|
|
"/" { yyextra->selfClose = true; }
|
|
{NAME} { yyextra->attrName = yytext; }
|
|
"=" { BEGIN(AttributeValue); }
|
|
{CLOSE} { addElement(yyscanner);
|
|
countLines(yyscanner,yytext,yyleng);
|
|
yyextra->data = "";
|
|
BEGIN(Content);
|
|
}
|
|
{SP} { countLines(yyscanner,yytext,yyleng); }
|
|
}
|
|
<AttributeValue>{
|
|
{SP} { countLines(yyscanner,yytext,yyleng); }
|
|
['"] { yyextra->stringChar = *yytext;
|
|
yyextra->attrValue = "";
|
|
BEGIN(AttrValueStr);
|
|
}
|
|
. { std::string msg = std::string("Missing attribute value. Unexpected character `")+yytext+"` found";
|
|
reportError(yyscanner,msg);
|
|
unput(*yytext);
|
|
BEGIN(Attributes);
|
|
}
|
|
}
|
|
<AttrValueStr>{
|
|
[^'"\n]+ { yyextra->attrValue += processData(yyscanner,yytext,yyleng); }
|
|
['"] { if (*yytext==yyextra->stringChar)
|
|
{
|
|
addAttribute(yyscanner);
|
|
BEGIN(Attributes);
|
|
}
|
|
else
|
|
{
|
|
yyextra->attrValue += processData(yyscanner,yytext,yyleng);
|
|
}
|
|
}
|
|
\n { yyextra->lineNr++; yyextra->attrValue+=' '; }
|
|
}
|
|
<CDataSection>{
|
|
{ENDCDATA} { BEGIN(yyextra->cdataContext); }
|
|
[^]\n]+ { yyextra->data += yytext; }
|
|
\n { yyextra->data += yytext;
|
|
yyextra->lineNr++;
|
|
}
|
|
. { yyextra->data += yytext; }
|
|
}
|
|
<Prolog>{
|
|
{CLOSESPECIAL} { countLines(yyscanner,yytext,yyleng);
|
|
BEGIN(Initial);
|
|
}
|
|
[^?\n]+ { }
|
|
\n { yyextra->lineNr++; }
|
|
. { }
|
|
}
|
|
<Comment>{
|
|
{COMMENTEND} { countLines(yyscanner,yytext,yyleng);
|
|
BEGIN(yyextra->commentContext);
|
|
}
|
|
[^\n-]+ { }
|
|
\n { yyextra->lineNr++; }
|
|
. { }
|
|
}
|
|
\n { yyextra->lineNr++; }
|
|
. { std::string msg = "Unexpected character `";
|
|
msg+=yytext;
|
|
msg+="` found";
|
|
reportError(yyscanner,msg);
|
|
}
|
|
|
|
%%
|
|
|
|
//----------------------------------------------------------------------------------------
|
|
|
|
static yy_size_t yyread(yyscan_t yyscanner,char *buf,size_t max_size)
|
|
{
|
|
struct yyguts_t *yyg = (struct yyguts_t*)yyscanner;
|
|
yy_size_t inputPosition = yyextra->inputPosition;
|
|
const char *s = yyextra->inputString + inputPosition;
|
|
yy_size_t c=0;
|
|
while( c < max_size && *s)
|
|
{
|
|
*buf++ = *s++;
|
|
c++;
|
|
}
|
|
yyextra->inputPosition += c;
|
|
return c;
|
|
}
|
|
|
|
static void countLines(yyscan_t yyscanner, const char *txt,yy_size_t len)
|
|
{
|
|
struct yyguts_t *yyg = (struct yyguts_t*)yyscanner;
|
|
for (yy_size_t i=0;i<len;i++)
|
|
{
|
|
if (txt[i]=='\n') yyextra->lineNr++;
|
|
}
|
|
}
|
|
|
|
static void initElement(yyscan_t yyscanner)
|
|
{
|
|
struct yyguts_t *yyg = (struct yyguts_t*)yyscanner;
|
|
yyextra->isEnd = false; // true => </tag>
|
|
yyextra->selfClose = false; // true => <tag/>
|
|
yyextra->name = "";
|
|
yyextra->attrs.clear();
|
|
}
|
|
|
|
static void checkAndUpdatePath(yyscan_t yyscanner)
|
|
{
|
|
struct yyguts_t *yyg = (struct yyguts_t*)yyscanner;
|
|
if (yyextra->xpath.empty())
|
|
{
|
|
std::string msg = "found closing tag '"+yyextra->name+"' without matching opening tag";
|
|
reportError(yyscanner,msg);
|
|
}
|
|
else
|
|
{
|
|
std::string expectedTagName = yyextra->xpath.back();
|
|
if (expectedTagName!=yyextra->name)
|
|
{
|
|
std::string msg = "Found closing tag '"+yyextra->name+"' that does not match the opening tag '"+expectedTagName+"' at the same level";
|
|
reportError(yyscanner,msg);
|
|
}
|
|
else // matching end tag
|
|
{
|
|
yyextra->xpath.pop_back();
|
|
}
|
|
}
|
|
}
|
|
|
|
static void addElement(yyscan_t yyscanner)
|
|
{
|
|
struct yyguts_t *yyg = (struct yyguts_t*)yyscanner;
|
|
if (!yyextra->isEnd)
|
|
{
|
|
yyextra->xpath.push_back(yyextra->name);
|
|
if (yyextra->handlers.startElement)
|
|
{
|
|
yyextra->handlers.startElement(yyextra->name,yyextra->attrs);
|
|
}
|
|
if (yy_flex_debug)
|
|
{
|
|
fprintf(stderr,"%d: startElement(%s,attr=[",yyextra->lineNr,yyextra->name.data());
|
|
for (auto attr : yyextra->attrs)
|
|
{
|
|
fprintf(stderr,"%s='%s' ",attr.first.c_str(),attr.second.c_str());
|
|
}
|
|
fprintf(stderr,"])\n");
|
|
}
|
|
}
|
|
if (yyextra->isEnd || yyextra->selfClose)
|
|
{
|
|
if (yy_flex_debug)
|
|
{
|
|
fprintf(stderr,"%d: endElement(%s)\n",yyextra->lineNr,yyextra->name.data());
|
|
}
|
|
checkAndUpdatePath(yyscanner);
|
|
if (yyextra->handlers.endElement)
|
|
{
|
|
yyextra->handlers.endElement(yyextra->name);
|
|
}
|
|
}
|
|
}
|
|
|
|
static std::string trimSpaces(const std::string &str)
|
|
{
|
|
const int l = static_cast<int>(str.length());
|
|
int s=0, e=l-1;
|
|
while (s<l && isspace(str.at(s))) s++;
|
|
while (e>s && isspace(str.at(e))) e--;
|
|
return str.substr(s,1+e-s);
|
|
}
|
|
|
|
static void addCharacters(yyscan_t yyscanner)
|
|
{
|
|
struct yyguts_t *yyg = (struct yyguts_t*)yyscanner;
|
|
std::string data = trimSpaces(yyextra->data);
|
|
if (yyextra->handlers.characters)
|
|
{
|
|
yyextra->handlers.characters(data);
|
|
}
|
|
if (!data.empty())
|
|
{
|
|
if (yy_flex_debug)
|
|
{
|
|
fprintf(stderr,"characters(%s)\n",data.c_str());
|
|
}
|
|
}
|
|
}
|
|
|
|
static void addAttribute(yyscan_t yyscanner)
|
|
{
|
|
struct yyguts_t *yyg = (struct yyguts_t*)yyscanner;
|
|
yyextra->attrs.insert(std::make_pair(yyextra->attrName,yyextra->attrValue));
|
|
}
|
|
|
|
static void reportError(yyscan_t yyscanner,const std::string &msg)
|
|
{
|
|
struct yyguts_t *yyg = (struct yyguts_t*)yyscanner;
|
|
if (yy_flex_debug)
|
|
{
|
|
fprintf(stderr,"%s:%d: Error '%s'\n",yyextra->fileName.c_str(),yyextra->lineNr,msg.c_str());
|
|
}
|
|
if (yyextra->handlers.error)
|
|
{
|
|
yyextra->handlers.error(yyextra->fileName,yyextra->lineNr,msg);
|
|
}
|
|
}
|
|
|
|
static const char *entities_enc[] = { "amp", "quot", "gt", "lt", "apos" };
|
|
static const char entities_dec[] = { '&', '"', '>', '<', '\'' };
|
|
static const int num_entities = 5;
|
|
|
|
// replace character entities such as & in txt and return the string where entities
|
|
// are replaced
|
|
static std::string processData(yyscan_t yyscanner,const char *txt,yy_size_t len)
|
|
{
|
|
std::string result;
|
|
result.reserve(len);
|
|
for (yy_size_t i=0; i<len; i++)
|
|
{
|
|
char c = txt[i];
|
|
if (c=='&')
|
|
{
|
|
const int maxEntityLen = 10;
|
|
char entity[maxEntityLen+1];
|
|
entity[maxEntityLen]='\0';
|
|
for (yy_size_t j=0; j<maxEntityLen && i+j+1<len; j++)
|
|
{
|
|
if (txt[i+j+1]!=';')
|
|
{
|
|
entity[j]=txt[i+j+1];
|
|
}
|
|
else
|
|
{
|
|
entity[j]=0;
|
|
break;
|
|
}
|
|
}
|
|
bool found=false;
|
|
for (int e=0; !found && e<num_entities; e++)
|
|
{
|
|
if (strcmp(entity,entities_enc[e])==0)
|
|
{
|
|
result+=entities_dec[e];
|
|
i+=strlen(entities_enc[e])+1;
|
|
found=true;
|
|
}
|
|
}
|
|
if (!found)
|
|
{
|
|
std::string msg = std::string("Invalid character entity '&") + entity + ";' found\n";
|
|
reportError(yyscanner,msg);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
result+=c;
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
//--------------------------------------------------------------
|
|
|
|
struct XMLParser::Private
|
|
{
|
|
yyscan_t yyscanner;
|
|
struct xmlYY_state xmlYY_extra;
|
|
};
|
|
|
|
XMLParser::XMLParser(const XMLHandlers &handlers) : p(new Private)
|
|
{
|
|
xmlYYlex_init_extra(&p->xmlYY_extra,&p->yyscanner);
|
|
p->xmlYY_extra.handlers = handlers;
|
|
}
|
|
|
|
XMLParser::~XMLParser()
|
|
{
|
|
xmlYYlex_destroy(p->yyscanner);
|
|
}
|
|
|
|
void XMLParser::parse(const char *fileName,const char *inputStr, bool debugEnabled)
|
|
{
|
|
yyscan_t yyscanner = p->yyscanner;
|
|
struct yyguts_t *yyg = (struct yyguts_t*)yyscanner;
|
|
|
|
#ifdef FLEX_DEBUG
|
|
xmlYYset_debug(1,p->yyscanner);
|
|
#endif
|
|
|
|
if (inputStr==nullptr || inputStr[0]=='\0') return; // empty input
|
|
|
|
FILE *output = 0;
|
|
const char *enter_txt = 0;
|
|
const char *finished_txt = 0;
|
|
const char *pre_txt = 0;
|
|
if (yy_flex_debug) { output=stderr; pre_txt="--"; enter_txt="entering"; finished_txt="finished"; }
|
|
else if (debugEnabled) { output=stdout; pre_txt=""; enter_txt="Entering"; finished_txt="Finished"; }
|
|
|
|
if (output)
|
|
{
|
|
fprintf(output,"%s%s lexical analyzer: %s (for: %s)\n",pre_txt,enter_txt, __FILE__, fileName);
|
|
}
|
|
|
|
BEGIN(Initial);
|
|
yyextra->fileName = fileName;
|
|
yyextra->lineNr = 1;
|
|
yyextra->inputString = inputStr;
|
|
yyextra->inputPosition = 0;
|
|
|
|
xmlYYrestart( 0, yyscanner );
|
|
|
|
if (yyextra->handlers.startDocument)
|
|
{
|
|
yyextra->handlers.startDocument();
|
|
}
|
|
xmlYYlex(yyscanner);
|
|
if (yyextra->handlers.endDocument)
|
|
{
|
|
yyextra->handlers.endDocument();
|
|
}
|
|
|
|
if (!yyextra->xpath.empty())
|
|
{
|
|
std::string tagName = yyextra->xpath.back();
|
|
std::string msg = "End of file reached while expecting closing tag '"+tagName+"'";
|
|
reportError(yyscanner,msg);
|
|
}
|
|
|
|
if (output)
|
|
{
|
|
fprintf(output,"%s%s lexical analyzer: %s (for: %s)\n",pre_txt,finished_txt, __FILE__, fileName);
|
|
}
|
|
}
|
|
|
|
int XMLParser::lineNr() const
|
|
{
|
|
struct yyguts_t *yyg = (struct yyguts_t*)p->yyscanner;
|
|
return yyextra->lineNr;
|
|
}
|
|
|
|
std::string XMLParser::fileName() const
|
|
{
|
|
struct yyguts_t *yyg = (struct yyguts_t*)p->yyscanner;
|
|
return yyextra->fileName;
|
|
}
|
|
|
|
#if USE_STATE2STRING
|
|
#include "xml.l.h"
|
|
#endif
|