412 lines
9.5 KiB
C
412 lines
9.5 KiB
C
/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8; coding: utf-8 -*-
|
|
* gtksourceregex.c
|
|
* This file is part of GtkSourceView
|
|
*
|
|
* Copyright (C) 2003 - Gustavo Giráldez <gustavo.giraldez@gmx.net>
|
|
* Copyright (C) 2005, 2006 - Marco Barisione, Emanuele Aina
|
|
*
|
|
* GtkSourceView is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* GtkSourceView is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this library; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
#include <config.h>
|
|
#endif
|
|
|
|
#include <string.h>
|
|
#include <glib.h>
|
|
#include "gtksourceview-i18n.h"
|
|
#include "gtksourceview-utils.h"
|
|
#include "gtksourceregex.h"
|
|
|
|
/*
|
|
* GRegex wrapper which adds a few features needed for syntax highlighting,
|
|
* in particular resolving "\%{...@start}" and forbidding the use of \C.
|
|
*/
|
|
|
|
/* Regex used to match "\%{...@start}". */
|
|
static GRegex *
|
|
get_start_ref_regex (void)
|
|
{
|
|
static GRegex *start_ref_regex = NULL;
|
|
|
|
if (start_ref_regex == NULL)
|
|
{
|
|
start_ref_regex = g_regex_new ("(?<!\\\\)(\\\\\\\\)*\\\\%\\{(.*?)@start\\}",
|
|
G_REGEX_OPTIMIZE, 0, NULL);
|
|
}
|
|
|
|
return start_ref_regex;
|
|
}
|
|
|
|
struct _GtkSourceRegex
|
|
{
|
|
union {
|
|
struct {
|
|
gchar *pattern;
|
|
GRegexCompileFlags flags;
|
|
} info;
|
|
struct {
|
|
GRegex *regex;
|
|
GMatchInfo *match;
|
|
} regex;
|
|
} u;
|
|
|
|
guint ref_count;
|
|
guint resolved : 1;
|
|
};
|
|
|
|
/* Check whether pattern contains \C escape sequence,
|
|
* which means "single byte" in pcre and naturally leads
|
|
* to crash if used for highlighting.
|
|
*/
|
|
static gboolean
|
|
find_single_byte_escape (const gchar *string)
|
|
{
|
|
const char *p = string;
|
|
|
|
while ((p = strstr (p, "\\C")))
|
|
{
|
|
const char *slash;
|
|
gboolean found;
|
|
|
|
if (p == string)
|
|
return TRUE;
|
|
|
|
found = TRUE;
|
|
slash = p - 1;
|
|
|
|
while (slash >= string && *slash == '\\')
|
|
{
|
|
found = !found;
|
|
slash--;
|
|
}
|
|
|
|
if (found)
|
|
return TRUE;
|
|
|
|
p += 2;
|
|
}
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
/**
|
|
* gtk_source_regex_new:
|
|
* @pattern: the regular expression.
|
|
* @flags: compile options for @pattern.
|
|
* @error: location to store the error occuring, or %NULL to ignore errors.
|
|
*
|
|
* Creates a new regex.
|
|
*
|
|
* Returns: a newly-allocated #GtkSourceRegex.
|
|
*/
|
|
GtkSourceRegex *
|
|
_gtk_source_regex_new (const gchar *pattern,
|
|
GRegexCompileFlags flags,
|
|
GError **error)
|
|
{
|
|
GtkSourceRegex *regex;
|
|
|
|
g_return_val_if_fail (pattern != NULL, NULL);
|
|
g_return_val_if_fail (error == NULL || *error == NULL, NULL);
|
|
|
|
if (find_single_byte_escape (pattern))
|
|
{
|
|
g_set_error_literal (error, G_REGEX_ERROR,
|
|
G_REGEX_ERROR_COMPILE,
|
|
_("using \\C is not supported in language definitions"));
|
|
return NULL;
|
|
}
|
|
|
|
regex = g_slice_new0 (GtkSourceRegex);
|
|
regex->ref_count = 1;
|
|
|
|
if (g_regex_match (get_start_ref_regex (), pattern, 0, NULL))
|
|
{
|
|
regex->resolved = FALSE;
|
|
regex->u.info.pattern = g_strdup (pattern);
|
|
regex->u.info.flags = flags;
|
|
}
|
|
else
|
|
{
|
|
regex->resolved = TRUE;
|
|
regex->u.regex.regex = g_regex_new (pattern,
|
|
flags | G_REGEX_OPTIMIZE | G_REGEX_NEWLINE_LF, 0,
|
|
error);
|
|
|
|
if (regex->u.regex.regex == NULL)
|
|
{
|
|
g_slice_free (GtkSourceRegex, regex);
|
|
regex = NULL;
|
|
}
|
|
}
|
|
|
|
return regex;
|
|
}
|
|
|
|
GtkSourceRegex *
|
|
_gtk_source_regex_ref (GtkSourceRegex *regex)
|
|
{
|
|
if (regex != NULL)
|
|
regex->ref_count++;
|
|
return regex;
|
|
}
|
|
|
|
void
|
|
_gtk_source_regex_unref (GtkSourceRegex *regex)
|
|
{
|
|
if (regex != NULL && --regex->ref_count == 0)
|
|
{
|
|
if (regex->resolved)
|
|
{
|
|
g_regex_unref (regex->u.regex.regex);
|
|
if (regex->u.regex.match)
|
|
g_match_info_free (regex->u.regex.match);
|
|
}
|
|
else
|
|
{
|
|
g_free (regex->u.info.pattern);
|
|
}
|
|
g_slice_free (GtkSourceRegex, regex);
|
|
}
|
|
}
|
|
|
|
struct RegexResolveData {
|
|
GtkSourceRegex *start_regex;
|
|
const gchar *matched_text;
|
|
};
|
|
|
|
static gboolean
|
|
replace_start_regex (const GMatchInfo *match_info,
|
|
GString *expanded_regex,
|
|
gpointer user_data)
|
|
{
|
|
gchar *num_string, *subst, *subst_escaped, *escapes;
|
|
gint num;
|
|
struct RegexResolveData *data = user_data;
|
|
|
|
escapes = g_match_info_fetch (match_info, 1);
|
|
num_string = g_match_info_fetch (match_info, 2);
|
|
num = _gtk_source_string_to_int (num_string);
|
|
|
|
if (num < 0)
|
|
{
|
|
subst = g_match_info_fetch_named (data->start_regex->u.regex.match,
|
|
num_string);
|
|
}
|
|
else
|
|
{
|
|
subst = g_match_info_fetch (data->start_regex->u.regex.match,
|
|
num);
|
|
}
|
|
|
|
if (subst != NULL)
|
|
{
|
|
subst_escaped = g_regex_escape_string (subst, -1);
|
|
}
|
|
else
|
|
{
|
|
g_warning ("Invalid group: %s", num_string);
|
|
subst_escaped = g_strdup ("");
|
|
}
|
|
|
|
g_string_append (expanded_regex, escapes);
|
|
g_string_append (expanded_regex, subst_escaped);
|
|
|
|
g_free (escapes);
|
|
g_free (num_string);
|
|
g_free (subst);
|
|
g_free (subst_escaped);
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
/**
|
|
* _gtk_source_regex_resolve:
|
|
* @regex: a #GtkSourceRegex.
|
|
* @start_regex: a #GtkSourceRegex.
|
|
* @matched_text: the text matched against @start_regex.
|
|
*
|
|
* If the regular expression does not contain references to the start
|
|
* regular expression, the functions increases the reference count
|
|
* of @regex and returns it.
|
|
*
|
|
* If the regular expression contains references to the start regular
|
|
* expression in the form "\%{start_sub_pattern@start}", it replaces
|
|
* them (they are extracted from @start_regex and @matched_text) and
|
|
* returns the new regular expression.
|
|
*
|
|
* Returns: a #GtkSourceRegex.
|
|
*/
|
|
GtkSourceRegex *
|
|
_gtk_source_regex_resolve (GtkSourceRegex *regex,
|
|
GtkSourceRegex *start_regex,
|
|
const gchar *matched_text)
|
|
{
|
|
gchar *expanded_regex;
|
|
GtkSourceRegex *new_regex;
|
|
struct RegexResolveData data;
|
|
|
|
if (regex == NULL || regex->resolved)
|
|
return _gtk_source_regex_ref (regex);
|
|
|
|
data.start_regex = start_regex;
|
|
data.matched_text = matched_text;
|
|
expanded_regex = g_regex_replace_eval (get_start_ref_regex (),
|
|
regex->u.info.pattern,
|
|
-1, 0, 0,
|
|
replace_start_regex,
|
|
&data, NULL);
|
|
new_regex = _gtk_source_regex_new (expanded_regex, regex->u.info.flags, NULL);
|
|
if (new_regex == NULL || !new_regex->resolved)
|
|
{
|
|
_gtk_source_regex_unref (new_regex);
|
|
g_warning ("Regular expression %s cannot be expanded.",
|
|
regex->u.info.pattern);
|
|
/* Returns a regex that nevers matches. */
|
|
new_regex = _gtk_source_regex_new ("$never-match^", 0, NULL);
|
|
}
|
|
|
|
g_free (expanded_regex);
|
|
|
|
return new_regex;
|
|
}
|
|
|
|
gboolean
|
|
_gtk_source_regex_is_resolved (GtkSourceRegex *regex)
|
|
{
|
|
return regex->resolved;
|
|
}
|
|
|
|
gboolean
|
|
_gtk_source_regex_match (GtkSourceRegex *regex,
|
|
const gchar *line,
|
|
gint byte_length,
|
|
gint byte_pos)
|
|
{
|
|
gboolean result;
|
|
|
|
g_assert (regex->resolved);
|
|
|
|
if (regex->u.regex.match)
|
|
{
|
|
g_match_info_free (regex->u.regex.match);
|
|
regex->u.regex.match = NULL;
|
|
}
|
|
|
|
result = g_regex_match_full (regex->u.regex.regex, line,
|
|
byte_length, byte_pos,
|
|
0, ®ex->u.regex.match,
|
|
NULL);
|
|
|
|
return result;
|
|
}
|
|
|
|
gchar *
|
|
_gtk_source_regex_fetch (GtkSourceRegex *regex,
|
|
gint num)
|
|
{
|
|
g_assert (regex->resolved);
|
|
|
|
return g_match_info_fetch (regex->u.regex.match, num);
|
|
}
|
|
|
|
void
|
|
_gtk_source_regex_fetch_pos (GtkSourceRegex *regex,
|
|
const gchar *text,
|
|
gint num,
|
|
gint *start_pos, /* character offsets */
|
|
gint *end_pos) /* character offsets */
|
|
{
|
|
gint byte_start_pos, byte_end_pos;
|
|
|
|
g_assert (regex->resolved);
|
|
|
|
/* g_match_info_fetch_pos() can return TRUE with start_pos/end_pos set to -1 */
|
|
if (!g_match_info_fetch_pos (regex->u.regex.match, num, &byte_start_pos, &byte_end_pos))
|
|
{
|
|
if (start_pos != NULL)
|
|
*start_pos = -1;
|
|
if (end_pos != NULL)
|
|
*end_pos = -1;
|
|
}
|
|
else
|
|
{
|
|
if (start_pos != NULL)
|
|
*start_pos = g_utf8_pointer_to_offset (text, text + MAX (0, byte_start_pos));
|
|
if (end_pos != NULL)
|
|
*end_pos = g_utf8_pointer_to_offset (text, text + MAX (0, byte_end_pos));
|
|
}
|
|
}
|
|
|
|
void
|
|
_gtk_source_regex_fetch_pos_bytes (GtkSourceRegex *regex,
|
|
gint num,
|
|
gint *start_pos_p, /* byte offsets */
|
|
gint *end_pos_p) /* byte offsets */
|
|
{
|
|
gint start_pos;
|
|
gint end_pos;
|
|
|
|
g_assert (regex->resolved);
|
|
|
|
if (!g_match_info_fetch_pos (regex->u.regex.match, num, &start_pos, &end_pos))
|
|
{
|
|
start_pos = -1;
|
|
end_pos = -1;
|
|
}
|
|
|
|
if (start_pos_p != NULL)
|
|
*start_pos_p = start_pos;
|
|
if (end_pos_p != NULL)
|
|
*end_pos_p = end_pos;
|
|
}
|
|
|
|
void
|
|
_gtk_source_regex_fetch_named_pos (GtkSourceRegex *regex,
|
|
const gchar *text,
|
|
const gchar *name,
|
|
gint *start_pos, /* character offsets */
|
|
gint *end_pos) /* character offsets */
|
|
{
|
|
gint byte_start_pos, byte_end_pos;
|
|
|
|
g_assert (regex->resolved);
|
|
|
|
if (!g_match_info_fetch_named_pos (regex->u.regex.match, name, &byte_start_pos, &byte_end_pos))
|
|
{
|
|
if (start_pos != NULL)
|
|
*start_pos = -1;
|
|
if (end_pos != NULL)
|
|
*end_pos = -1;
|
|
}
|
|
else
|
|
{
|
|
if (start_pos != NULL)
|
|
*start_pos = g_utf8_pointer_to_offset (text, text + byte_start_pos);
|
|
if (end_pos != NULL)
|
|
*end_pos = g_utf8_pointer_to_offset (text, text + byte_end_pos);
|
|
}
|
|
}
|
|
|
|
const gchar *
|
|
_gtk_source_regex_get_pattern (GtkSourceRegex *regex)
|
|
{
|
|
g_assert (regex->resolved);
|
|
|
|
return g_regex_get_pattern (regex->u.regex.regex);
|
|
}
|
|
|