imagemagick/magick/token.c

/*
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%                                                                             %
%                                                                             %
%                                                                             %
%                    TTTTT   OOO   K   K  EEEEE  N   N                        %
%                      T    O   O  K  K   E      NN  N                        %
%                      T    O   O  KKK    EEE    N N N                        %
%                      T    O   O  K  K   E      N  NN                        %
%                      T     OOO   K   K  EEEEE  N   N                        %
%                                                                             %
%                                                                             %
%                         MagickCore Token Methods                            %
%                                                                             %
%                             Software Design                                 %
%                                  Cristy                                     %
%                              January 1993                                   %
%                                                                             %
%                                                                             %
%  Copyright 1999-2021 ImageMagick Studio LLC, a non-profit organization      %
%  dedicated to making software imaging solutions freely available.           %
%                                                                             %
%  You may not use this file except in compliance with the License.  You may  %
%  obtain a copy of the License at                                            %
%                                                                             %
%    https://imagemagick.org/script/license.php                               %
%                                                                             %
%  Unless required by applicable law or agreed to in writing, software        %
%  distributed under the License is distributed on an "AS IS" BASIS,          %
%  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   %
%  See the License for the specific language governing permissions and        %
%  limitations under the License.                                             %
%                                                                             %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%
%
*/

/*
  Include declarations.
*/
#include "magick/studio.h"
#include "magick/exception.h"
#include "magick/exception-private.h"
#include "magick/image.h"
#include "magick/image-private.h"
#include "magick/memory_.h"
#include "magick/string_.h"
#include "magick/string-private.h"
#include "magick/token.h"
#include "magick/token-private.h"
#include "magick/utility.h"

/*
  Typedef declaractions.
*/
struct _TokenInfo
{
  int
    state;

  MagickStatusType
    flag;

  ssize_t
    offset;

  char
    quote;

  size_t
    signature;
};

/*
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%                                                                             %
%                                                                             %
%                                                                             %
%   A c q u i r e T o k e n I n f o                                           %
%                                                                             %
%                                                                             %
%                                                                             %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%  AcquireTokenInfo() allocates the TokenInfo structure.
%
%  The format of the AcquireTokenInfo method is:
%
%      TokenInfo *AcquireTokenInfo()
%
*/
MagickExport TokenInfo *AcquireTokenInfo(void)
{
  TokenInfo
    *token_info;

  token_info=(TokenInfo *) AcquireMagickMemory(sizeof(*token_info));
  if (token_info == (TokenInfo *) NULL)
    ThrowFatalException(ResourceLimitFatalError,"MemoryAllocationFailed");
  token_info->signature=MagickCoreSignature;
  return(token_info);
}

/*
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%                                                                             %
%                                                                             %
%                                                                             %
%   D e s t r o y T o k e n I n f o                                           %
%                                                                             %
%                                                                             %
%                                                                             %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%  DestroyTokenInfo() deallocates memory associated with an TokenInfo
%  structure.
%
%  The format of the DestroyTokenInfo method is:
%
%      TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
%
%  A description of each parameter follows:
%
%    o token_info: Specifies a pointer to an TokenInfo structure.
%
*/
MagickExport TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
{
  (void) LogMagickEvent(TraceEvent,GetMagickModule(),"...");
  assert(token_info != (TokenInfo *) NULL);
  assert(token_info->signature == MagickCoreSignature);
  token_info->signature=(~MagickCoreSignature);
  token_info=(TokenInfo *) RelinquishMagickMemory(token_info);
  return(token_info);
}

/*
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%                                                                             %
%                                                                             %
%                                                                             %
+   G e t N e x t T o k e n                                                   %
%                                                                             %
%                                                                             %
%                                                                             %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%  GetNextToken() gets a token from the token stream.  A token is defined as
%  a sequence of characters delimited by whitespace (e.g. clip-path), a
%  sequence delimited with quotes (.e.g "Quote me"), or a sequence enclosed in
%  parenthesis (e.g. rgb(0,0,0)).  GetNextToken() also recognizes these
%  separator characters: ':', '=', ',', and ';'.  GetNextToken() returns the
%  length of the consumed token.
%
%  The format of the GetNextToken method is:
%
%      size_t GetNextToken(const char *magick_restrict start,
%        const char **magick_restrict end,const size_t extent,
%        char *magick_restrict token)
%
%  A description of each parameter follows:
%
%    o start: the start of the token sequence.
%
%    o end: point to the end of the token sequence.
%
%    o extent: maximum extent of the token.
%
%    o token: copy the token to this buffer.
%
*/
MagickExport magick_hot_spot size_t GetNextToken(
  const char *magick_restrict start,const char **magick_restrict end,
  const size_t extent,char *magick_restrict token)
{
  double
    value;

  char
    *magick_restrict q;

  const char
    *magick_restrict p;

  ssize_t
    i;

  assert(start != (const char *) NULL);
  assert(token != (char *) NULL);
  i=0;
  p=start;
  while ((isspace((int) ((unsigned char) *p)) != 0) && (*p != '\0'))
    p++;
  switch (*p)
  {
    case '\0':
      break;
    case '"':
    case '\'':
    case '`':
    case '{':
    {
      char
        escape;

      switch (*p)
      {
        case '"': escape='"'; break;
        case '\'': escape='\''; break;
        case '`': escape='\''; break;
        case '{': escape='}'; break;
        default: escape=(*p); break;
      }
      for (p++; *p != '\0'; p++)
      {
        if ((*p == '\\') && ((*(p+1) == escape) || (*(p+1) == '\\')))
          p++;
        else
          if (*p == escape)
            {
              p++;
              break;
            }
        if (i < (ssize_t) (extent-1))
          token[i++]=(*p);
        if ((size_t) (p-start) >= (extent-1))
          break;
      }
      break;
    }
    case '/':
    {
      if (i < (ssize_t) (extent-1))
        token[i++]=(*p);
      p++;
      if ((*p == '>') || (*p == '/'))
        {
          if (i < (ssize_t) (extent-1))
            token[i++]=(*p);
          p++;
        }
      break;
    }
    default:
    {
      char
        *q;

      value=StringToDouble(p,&q);
      (void) value;
      if ((p != q) && (*p != ','))
        {
          for ( ; (p < q) && (*p != ','); p++)
          {
            if (i < (ssize_t) (extent-1))
              token[i++]=(*p);
            if ((size_t) (p-start) >= (extent-1))
              break;
          }
          if (*p == '%')
            {
              if (i < (ssize_t) (extent-1))
                token[i++]=(*p);
              p++;
            }
          break;
        }
      if ((*p != '\0') && (isalpha((int) ((unsigned char) *p)) == 0) &&
          (*p != *DirectorySeparator) && (*p != '#') && (*p != '<'))
        {
          if (i < (ssize_t) (extent-1))
            token[i++]=(*p);
          p++;
          break;
        }
      for ( ; *p != '\0'; p++)
      {
        if (((isspace((int) ((unsigned char) *p)) != 0) || (*p == '=') ||
            (*p == ',') || (*p == ':') || (*p == ';')) && (*(p-1) != '\\'))
          break;
        if ((i > 0) && (*p == '<'))
          break;
        if (i < (ssize_t) (extent-1))
          token[i++]=(*p);
        if (*p == '>')
          break;
        if (*p == '(')
          {
            for (p++; *p != '\0'; p++)
            {
              if (i < (ssize_t) (extent-1))
                token[i++]=(*p);
              if ((*p == ')') && (*(p-1) != '\\'))
                break;
              if ((size_t) (p-start) >= (extent-1))
                break;
            }
            if (*p == '\0')
              break;
          }
        if ((size_t) (p-start) >= (extent-1))
          break;
      }
      break;
    }
  }
  token[i]='\0';
  if (LocaleNCompare(token,"url(#",5) == 0)
    {
      q=strrchr(token,')');
      if (q != (char *) NULL)
        {
          *q='\0';
          (void) memmove(token,token+5,(size_t) (q-token-4));
        }
    }
  while (isspace((int) ((unsigned char) *p)) != 0)
    p++;
  if (end != (const char **) NULL)
    *end=(const char *) p;
  return(p-start+1);
}

/*
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%                                                                             %
%                                                                             %
%                                                                             %
%   G l o b E x p r e s s i o n                                               %
%                                                                             %
%                                                                             %
%                                                                             %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%  GlobExpression() returns MagickTrue if the expression matches the pattern.
%
%  The format of the GlobExpression function is:
%
%      MagickBooleanType GlobExpression(const char *magick_restrict expression,
%        const char *magick_restrict pattern,
%        const MagickBooleanType case_insensitive)
%
%  A description of each parameter follows:
%
%    o expression: Specifies a pointer to a text string containing a file name.
%
%    o pattern: Specifies a pointer to a text string containing a pattern.
%
%    o case_insensitive: set to MagickTrue to ignore the case when matching
%      an expression.
%
*/
MagickExport MagickBooleanType GlobExpression(
  const char *magick_restrict expression,const char *magick_restrict pattern,
  const MagickBooleanType case_insensitive)
{
  MagickBooleanType
    done,
    match;

  const char
    *magick_restrict p;

  /*
    Return on empty pattern or '*'.
  */
  if (pattern == (char *) NULL)
    return(MagickTrue);
  if (GetUTFCode(pattern) == 0)
    return(MagickTrue);
  if (LocaleCompare(pattern,"*") == 0)
    return(MagickTrue);
  p=pattern+strlen(pattern)-1;
  if ((GetUTFCode(p) == ']') && (strchr(pattern,'[') != (char *) NULL))
    {
      ExceptionInfo
        *exception;

      ImageInfo
        *image_info;

      /*
        Determine if pattern is a scene, i.e. img0001.pcd[2].
      */
      image_info=AcquireImageInfo();
      (void) CopyMagickString(image_info->filename,pattern,MaxTextExtent);
      exception=AcquireExceptionInfo();
      (void) SetImageInfo(image_info,0,exception);
      exception=DestroyExceptionInfo(exception);
      if (LocaleCompare(image_info->filename,pattern) != 0)
        {
          image_info=DestroyImageInfo(image_info);
          return(MagickFalse);
        }
      image_info=DestroyImageInfo(image_info);
    }
  /*
    Evaluate glob expression.
  */
  done=MagickFalse;
  while ((GetUTFCode(pattern) != 0) && (done == MagickFalse))
  {
    if (GetUTFCode(expression) == 0)
      if ((GetUTFCode(pattern) != '{') && (GetUTFCode(pattern) != '*'))
        break;
    switch (GetUTFCode(pattern))
    {
      case '*':
      {
        MagickBooleanType
          status;

        status=MagickFalse;
        while (GetUTFCode(pattern) == '*')
          pattern+=GetUTFOctets(pattern);
        while ((GetUTFCode(expression) != 0) && (status == MagickFalse))
        {
          status=GlobExpression(expression,pattern,case_insensitive);
          expression+=GetUTFOctets(expression);
        }
        if (status != MagickFalse)
          {
            while (GetUTFCode(expression) != 0)
              expression+=GetUTFOctets(expression);
            while (GetUTFCode(pattern) != 0)
              pattern+=GetUTFOctets(pattern);
          }
        break;
      }
      case '[':
      {
        int
          c;

        pattern+=GetUTFOctets(pattern);
        for ( ; ; )
        {
          if ((GetUTFCode(pattern) == 0) || (GetUTFCode(pattern) == ']'))
            {
              done=MagickTrue;
              break;
            }
          if (GetUTFCode(pattern) == '\\')
            {
              pattern+=GetUTFOctets(pattern);
              if (GetUTFCode(pattern) == 0)
                {
                  done=MagickTrue;
                  break;
                }
             }
          if (GetUTFCode(pattern+GetUTFOctets(pattern)) == '-')
            {
              c=GetUTFCode(pattern);
              pattern+=GetUTFOctets(pattern);
              pattern+=GetUTFOctets(pattern);
              if (GetUTFCode(pattern) == ']')
                {
                  done=MagickTrue;
                  break;
                }
              if (GetUTFCode(pattern) == '\\')
                {
                  pattern+=GetUTFOctets(pattern);
                  if (GetUTFCode(pattern) == 0)
                    {
                      done=MagickTrue;
                      break;
                    }
                }
              if ((GetUTFCode(expression) < c) ||
                  (GetUTFCode(expression) > GetUTFCode(pattern)))
                {
                  pattern+=GetUTFOctets(pattern);
                  continue;
                }
            }
          else
            if (GetUTFCode(pattern) != GetUTFCode(expression))
              {
                pattern+=GetUTFOctets(pattern);
                continue;
              }
          pattern+=GetUTFOctets(pattern);
          while ((GetUTFCode(pattern) != ']') && (GetUTFCode(pattern) != 0))
          {
            if ((GetUTFCode(pattern) == '\\') &&
                (GetUTFCode(pattern+GetUTFOctets(pattern)) > 0))
              pattern+=GetUTFOctets(pattern);
            pattern+=GetUTFOctets(pattern);
          }
          if (GetUTFCode(pattern) != 0)
            {
              pattern+=GetUTFOctets(pattern);
              expression+=GetUTFOctets(expression);
            }
          break;
        }
        break;
      }
      case '?':
      {
        pattern+=GetUTFOctets(pattern);
        expression+=GetUTFOctets(expression);
        break;
      }
      case '{':
      {
        char
          *target;

        char
          *p;

        target=AcquireString(pattern);
        p=target;
        pattern++;
        while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
        {
          *p++=(*pattern++);
          if ((GetUTFCode(pattern) == ',') || (GetUTFCode(pattern) == '}'))
            {
              *p='\0';
              match=GlobExpression(expression,target,case_insensitive);
              if (match != MagickFalse)
                {
                  expression+=MagickMin(strlen(expression),strlen(target));
                  break;
                }
              p=target;
              pattern+=GetUTFOctets(pattern);
            }
        }
        while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
          pattern+=GetUTFOctets(pattern);
        if (GetUTFCode(pattern) != 0)
          pattern+=GetUTFOctets(pattern);
        target=DestroyString(target);
        break;
      }
      case '\\':
      {
        pattern+=GetUTFOctets(pattern);
        if (GetUTFCode(pattern) == 0)
          break;
      }
      default:
      {
        if (case_insensitive != MagickFalse)
          {
            if (LocaleLowercase((int) GetUTFCode(expression)) != LocaleLowercase((int) GetUTFCode(pattern)))
              {
                done=MagickTrue;
                break;
              }
          }
        else
          if (GetUTFCode(expression) != GetUTFCode(pattern))
            {
              done=MagickTrue;
              break;
            }
        expression+=GetUTFOctets(expression);
        pattern+=GetUTFOctets(pattern);
      }
    }
  }
  while (GetUTFCode(pattern) == '*')
    pattern+=GetUTFOctets(pattern);
  match=(GetUTFCode(expression) == 0) && (GetUTFCode(pattern) == 0) ?
    MagickTrue : MagickFalse;
  return(match);
}

/*
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%                                                                             %
%                                                                             %
%                                                                             %
+     I s G l o b                                                             %
%                                                                             %
%                                                                             %
%                                                                             %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%  IsGlob() returns MagickTrue if the path specification contains a globbing
%  pattern.
%
%  The format of the IsGlob method is:
%
%      MagickBooleanType IsGlob(const char *geometry)
%
%  A description of each parameter follows:
%
%    o path: the path.
%
*/
MagickExport MagickBooleanType IsGlob(const char *path)
{
  MagickBooleanType
    status = MagickFalse;

  const char
    *p;

  if (IsPathAccessible(path) != MagickFalse)
    return(MagickFalse);
  for (p=path; *p != '\0'; p++)
  {
    switch (*p)
    {
      case '*':
      case '?':
      case '{':
      case '}':
      case '[':
      case ']':
      {
        status=MagickTrue;
        break;
      }
      default:
        break;
    }
  }
  return(status);
}

/*
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%                                                                             %
%                                                                             %
%                                                                             %
%   I s M a g i c k T r u e                                                   %
%                                                                             %
%                                                                             %
%                                                                             %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%  IsMagickTrue() returns MagickTrue if the value is "true", "on", "yes" or
%  "1".
%
%  The format of the IsMagickTrue method is:
%
%      MagickBooleanType IsMagickTrue(const char *value)
%
%  A description of each parameter follows:
%
%    o option: either MagickTrue or MagickFalse depending on the value
%      parameter.
%
%    o value: Specifies a pointer to a character array.
%
*/
MagickExport MagickBooleanType IsMagickTrue(const char *value)
{
  if (value == (const char *) NULL)
    return(MagickFalse);
  if (LocaleCompare(value,"true") == 0)
    return(MagickTrue);
  if (LocaleCompare(value,"on") == 0)
    return(MagickTrue);
  if (LocaleCompare(value,"yes") == 0)
    return(MagickTrue);
  if (LocaleCompare(value,"1") == 0)
    return(MagickTrue);
  return(MagickFalse);
}

/*
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%                                                                             %
%                                                                             %
%                                                                             %
%   T o k e n i z e r                                                         %
%                                                                             %
%                                                                             %
%                                                                             %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%  Tokenizer() is a generalized, finite state token parser.  It extracts tokens
%  one at a time from a string of characters.  The characters used for white
%  space, for break characters, and for quotes can be specified.  Also,
%  characters in the string can be preceded by a specifiable escape character
%  which removes any special meaning the character may have.
%
%  Here is some terminology:
%
%    o token: A single unit of information in the form of a group of
%      characters.
%
%    o white space: Apace that gets ignored (except within quotes or when
%      escaped), like blanks and tabs. in addition, white space terminates a
%      non-quoted token.
%
%    o break set: One or more characters that separates non-quoted tokens.
%      Commas are a common break character. The usage of break characters to
%      signal the end of a token is the same as that of white space, except
%      multiple break characters with nothing or only white space between
%      generate a null token for each two break characters together.
%
%      For example, if blank is set to be the white space and comma is set to
%      be the break character, the line
%
%        A, B, C ,  , DEF
%
%        ... consists of 5 tokens:
%
%        1)  "A"
%        2)  "B"
%        3)  "C"
%        4)  "" (the null string)
%        5)  "DEF"
%
%    o Quote character: A character that, when surrounding a group of other
%      characters, causes the group of characters to be treated as a single
%      token, no matter how many white spaces or break characters exist in
%      the group. Also, a token always terminates after the closing quote.
%      For example, if ' is the quote character, blank is white space, and
%      comma is the break character, the following string
%
%        A, ' B, CD'EF GHI
%
%        ... consists of 4 tokens:
%
%        1)  "A"
%        2)  " B, CD" (note the blanks & comma)
%        3)  "EF"
%        4)  "GHI"
%
%      The quote characters themselves do not appear in the resultant
%      tokens.  The double quotes are delimiters i use here for
%      documentation purposes only.
%
%    o Escape character: A character which itself is ignored but which
%      causes the next character to be used as is.  ^ and \ are often used
%      as escape characters. An escape in the last position of the string
%      gets treated as a "normal" (i.e., non-quote, non-white, non-break,
%      and non-escape) character. For example, assume white space, break
%      character, and quote are the same as in the above examples, and
%      further, assume that ^ is the escape character. Then, in the string
%
%        ABC, ' DEF ^' GH' I ^ J K^ L ^
%
%        ... there are 7 tokens:
%
%        1)  "ABC"
%        2)  " DEF ' GH"
%        3)  "I"
%        4)  " "     (a lone blank)
%        5)  "J"
%        6)  "K L"
%        7)  "^"     (passed as is at end of line)
%
%  The format of the Tokenizer method is:
%
%      int Tokenizer(TokenInfo *token_info,const unsigned flag,char *token,
%        const size_t max_token_length,const char *line,const char *white,
%        const char *break_set,const char *quote,const char escape,
%        char *breaker,int *next,char *quoted)
%
%  A description of each parameter follows:
%
%    o flag: right now, only the low order 3 bits are used.
%
%        1 => convert non-quoted tokens to upper case
%        2 => convert non-quoted tokens to lower case
%        0 => do not convert non-quoted tokens
%
%    o token: a character string containing the returned next token
%
%    o max_token_length: the maximum size of "token".  Characters beyond
%      "max_token_length" are truncated.
%
%    o string: the string to be parsed.
%
%    o white: a string of the valid white spaces.  example:
%
%        char whitesp[]={" \t"};
%
%      blank and tab will be valid white space.
%
%    o break: a string of the valid break characters. example:
%
%        char breakch[]={";,"};
%
%      semicolon and comma will be valid break characters.
%
%    o quote: a string of the valid quote characters. An example would be
%
%        char whitesp[]={"'\"");
%
%      (this causes single and double quotes to be valid) Note that a
%      token starting with one of these characters needs the same quote
%      character to terminate it.
%
%      for example:
%
%        "ABC '
%
%      is unterminated, but
%
%        "DEF" and 'GHI'
%
%      are properly terminated.  Note that different quote characters
%      can appear on the same line; only for a given token do the quote
%      characters have to be the same.
%
%    o escape: the escape character (NOT a string ... only one
%      allowed). Use zero if none is desired.
%
%    o breaker: the break character used to terminate the current
%      token.  If the token was quoted, this will be the quote used.  If
%      the token is the last one on the line, this will be zero.
%
%    o next: this variable points to the first character of the
%      next token.  it gets reset by "tokenizer" as it steps through the
%      string.  Set it to 0 upon initialization, and leave it alone
%      after that.  You can change it if you want to jump around in the
%      string or re-parse from the beginning, but be careful.
%
%    o quoted: set to True if the token was quoted and MagickFalse
%      if not.  You may need this information (for example:  in C, a
%      string with quotes around it is a character string, while one
%      without is an identifier).
%
%    o result: 0 if we haven't reached EOS (end of string), and 1
%      if we have.
%
*/

#define IN_WHITE 0
#define IN_TOKEN 1
#define IN_QUOTE 2
#define IN_OZONE 3

static ssize_t sindex(int c,const char *string)
{
  const char
    *p;

  for (p=string; *p != '\0'; p++)
    if (c == (int) (*p))
      return((ssize_t) (p-string));
  return(-1);
}

static void StoreToken(TokenInfo *token_info,char *string,
  size_t max_token_length,int c)
{
  ssize_t
    i;

  if ((token_info->offset < 0) ||
      ((size_t) token_info->offset >= (max_token_length-1)))
    return;
  i=token_info->offset++;
  string[i]=(char) c;
  if (token_info->state == IN_QUOTE)
    return;
  switch (token_info->flag & 0x03)
  {
    case 1:
    {
      string[i]=(char) LocaleUppercase(c);
      break;
    }
    case 2:
    {
      string[i]=(char) LocaleLowercase(c);
      break;
    }
    default:
      break;
  }
}

MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag,
  char *token,const size_t max_token_length,const char *line,const char *white,
  const char *break_set,const char *quote,const char escape,char *breaker,
  int *next,char *quoted)
{
  int
    c;

  ssize_t
    i;

  *breaker='\0';
  *quoted='\0';
  if (line[*next] == '\0')
    return(1);
  token_info->state=IN_WHITE;
  token_info->quote=(char) MagickFalse;
  token_info->flag=flag;
  for (token_info->offset=0; (int) line[*next] != 0; (*next)++)
  {
    c=(int) line[*next];
    i=sindex(c,break_set);
    if (i >= 0)
      {
        switch (token_info->state)
        {
          case IN_WHITE:
          case IN_TOKEN:
          case IN_OZONE:
          {
            (*next)++;
            *breaker=break_set[i];
            token[token_info->offset]='\0';
            return(0);
          }
          case IN_QUOTE:
          {
            StoreToken(token_info,token,max_token_length,c);
            break;
          }
        }
        continue;
      }
    i=sindex(c,quote);
    if (i >= 0)
      {
        switch (token_info->state)
        {
          case IN_WHITE:
          {
            token_info->state=IN_QUOTE;
            token_info->quote=quote[i];
            *quoted=(char) MagickTrue;
            break;
          }
          case IN_QUOTE:
          {
            if (quote[i] != token_info->quote)
              StoreToken(token_info,token,max_token_length,c);
            else
              {
                token_info->state=IN_OZONE;
                token_info->quote='\0';
              }
            break;
          }
          case IN_TOKEN:
          case IN_OZONE:
          {
            *breaker=(char) c;
            token[token_info->offset]='\0';
            return(0);
          }
        }
        continue;
      }
    i=sindex(c,white);
    if (i >= 0)
      {
        switch (token_info->state)
        {
          case IN_WHITE:
          case IN_OZONE:
            break;
          case IN_TOKEN:
          {
            token_info->state=IN_OZONE;
            break;
          }
          case IN_QUOTE:
          {
            StoreToken(token_info,token,max_token_length,c);
            break;
          }
        }
        continue;
      }
    if (c == (int) escape)
      {
        if (line[(*next)+1] == '\0')
          {
            *breaker='\0';
            StoreToken(token_info,token,max_token_length,c);
            (*next)++;
            token[token_info->offset]='\0';
            return(0);
          }
        switch (token_info->state)
        {
          case IN_WHITE:
          {
            (*next)--;
            token_info->state=IN_TOKEN;
            break;
          }
          case IN_TOKEN:
          case IN_QUOTE:
          {
            (*next)++;
            c=(int) line[*next];
            StoreToken(token_info,token,max_token_length,c);
            break;
          }
          case IN_OZONE:
          {
            token[token_info->offset]='\0';
            return(0);
          }
        }
        continue;
      }
    switch (token_info->state)
    {
      case IN_WHITE:
      {
        token_info->state=IN_TOKEN;
        StoreToken(token_info,token,max_token_length,c);
        break;
      }
      case IN_TOKEN:
      case IN_QUOTE:
      {
        StoreToken(token_info,token,max_token_length,c);
        break;
      }
      case IN_OZONE:
      {
        token[token_info->offset]='\0';
        return(0);
      }
    }
  }
  token[token_info->offset]='\0';
  return(0);
}