summaryrefslogtreecommitdiff
path: root/lib-src
diff options
context:
space:
mode:
authorYuuki Harano <masm+github@masm11.me>2021-06-13 17:34:06 +0900
committerYuuki Harano <masm+github@masm11.me>2021-06-13 17:34:06 +0900
commit7d5e94bada09e642a8bfc4f66804f7948bad40bc (patch)
tree38629672102b31bb38a855f24d4dd009e212c10d /lib-src
parent7673b6b9eb0af3add73e1614a466f142092b00aa (diff)
parentdc471feee3bcac872cc52cdc73282955cd2d219d (diff)
downloademacs-7d5e94bada09e642a8bfc4f66804f7948bad40bc.tar.gz
Merge branch 'master' of git.sv.gnu.org:/srv/git/emacs into feature/pgtk
Diffstat (limited to 'lib-src')
-rw-r--r--lib-src/etags.c505
1 files changed, 501 insertions, 4 deletions
diff --git a/lib-src/etags.c b/lib-src/etags.c
index d703183cef7..9f20e44caf4 100644
--- a/lib-src/etags.c
+++ b/lib-src/etags.c
@@ -142,7 +142,14 @@ University of California, as described above. */
# define CTAGS false
#endif
-/* Copy to DEST from SRC (containing LEN bytes), and append a NUL byte. */
+/* Define MERCURY_HEURISTICS_RATIO as it was necessary to disambiguate
+ Mercury from Objective C, which have same file extensions .m
+ See comments before function test_objc_is_mercury for details. */
+#ifndef MERCURY_HEURISTICS_RATIO
+# define MERCURY_HEURISTICS_RATIO 0.5
+#endif
+
+/* COPY to DEST from SRC (containing LEN bytes), and append a NUL byte. */
static void
memcpyz (void *dest, void const *src, ptrdiff_t len)
{
@@ -359,6 +366,7 @@ static void HTML_labels (FILE *);
static void Lisp_functions (FILE *);
static void Lua_functions (FILE *);
static void Makefile_targets (FILE *);
+static void Mercury_functions (FILE *);
static void Pascal_functions (FILE *);
static void Perl_functions (FILE *);
static void PHP_functions (FILE *);
@@ -379,6 +387,7 @@ static ptrdiff_t readline_internal (linebuffer *, FILE *, char const *);
static bool nocase_tail (const char *);
static void get_tag (char *, char **);
static void get_lispy_tag (char *);
+static void test_objc_is_mercury (char *, language **);
static void analyze_regex (char *);
static void free_regexps (void);
@@ -684,10 +693,22 @@ static const char Makefile_help [] =
"In makefiles, targets are tags; additionally, variables are tags\n\
unless you specify '--no-globals'.";
+/* Mercury and Objective C share the same .m file extensions. */
+static const char *Mercury_suffixes [] =
+ {"m",
+ NULL};
+static const char Mercury_help [] =
+ "In Mercury code, tags are all declarations beginning a line with ':-'\n\
+and optionally Prolog-like definitions (first rule for a predicate or \
+function).\n\
+To enable this behavior, run etags using --declarations.";
+static bool with_mercury_definitions = false;
+float mercury_heuristics_ratio = MERCURY_HEURISTICS_RATIO;
+
static const char *Objc_suffixes [] =
- { "lm", /* Objective lex file */
- "m", /* Objective C file */
- NULL };
+ { "lm", /* Objective lex file */
+ "m", /* By default, Objective C file will be assumed. */
+ NULL};
static const char Objc_help [] =
"In Objective C code, tags include Objective C definitions for classes,\n\
class categories, methods and protocols. Tags for variables and\n\
@@ -831,7 +852,9 @@ static language lang_names [] =
{ "lisp", Lisp_help, Lisp_functions, Lisp_suffixes },
{ "lua", Lua_help,Lua_functions,Lua_suffixes,NULL,Lua_interpreters},
{ "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames},
+ /* objc listed before mercury as it is a better default for .m extensions. */
{ "objc", Objc_help, plain_C_entries, Objc_suffixes },
+ { "mercury", Mercury_help, Mercury_functions, Mercury_suffixes },
{ "pascal", Pascal_help, Pascal_functions, Pascal_suffixes },
{ "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
{ "php", PHP_help, PHP_functions, PHP_suffixes },
@@ -958,6 +981,9 @@ Relative ones are stored relative to the output file's directory.\n");
puts
("\tand create tags for extern variables unless --no-globals is used.");
+ puts ("In Mercury, tag both declarations starting a line with ':-' and first\n\
+ predicates or functions in clauses.");
+
if (CTAGS)
puts ("-d, --defines\n\
Create tag entries for C #define constants and enum constants, too.");
@@ -1783,6 +1809,11 @@ find_entries (FILE *inf)
if (parser == NULL)
{
lang = get_language_from_filename (curfdp->infname, true);
+
+ /* Disambiguate file names between Objc and Mercury. */
+ if (lang != NULL && strcmp (lang->name, "objc") == 0)
+ test_objc_is_mercury (curfdp->infname, &lang);
+
if (lang != NULL && lang->function != NULL)
{
curfdp->lang = lang;
@@ -6072,6 +6103,472 @@ prolog_atom (char *s, size_t pos)
/*
+ * Support for Mercury
+ *
+ * Assumes that the declarations start at column 0.
+ * Original code by Sunichirou Sugou (1989) for Prolog.
+ * Rewritten by Anders Lindgren (1996) for Prolog.
+ * Adapted by Fabrice Nicol (2021) for Mercury.
+ * Note: Prolog-support behavior is preserved if
+ * --declarations is used, corresponding to
+ * with_mercury_definitions=true.
+ */
+
+static ptrdiff_t mercury_pr (char *, char *, ptrdiff_t);
+static void mercury_skip_comment (linebuffer *, FILE *);
+static bool is_mercury_type = false;
+static bool is_mercury_quantifier = false;
+static bool is_mercury_declaration = false;
+
+/*
+ * Objective-C and Mercury have identical file extension .m.
+ * To disambiguate between Objective C and Mercury, parse file
+ * with the following heuristics hook:
+ * - if line starts with :-, choose Mercury unconditionally;
+ * - if line starts with #, @, choose Objective-C;
+ * - otherwise compute the following ratio:
+ *
+ * r = (number of lines with :-
+ * or % in non-commented parts or . at trimmed EOL)
+ * / (number of lines - number of lines starting by any amount
+ * of whitespace, optionally followed by comment(s))
+ *
+ * Note: strings are neglected in counts.
+ *
+ * If r > mercury_heuristics_ratio, choose Mercury.
+ * Experimental tests show that a possibly optimal default value for
+ * this floor value is around 0.5. This is the default value for
+ * MERCURY_HEURISTICS_RATIO, defined in the first lines of this file.
+ * The closer r is to 0.5, the closer the source code to pure Prolog.
+ * Idiomatic Mercury is scored either with r = 1.0 or higher.
+ * Objective-C is scored with r = 0.0. When this fails, the r-score
+ * never rose above 0.1 in Objective-C tests.
+ */
+
+static void
+test_objc_is_mercury (char *this_file, language **lang)
+{
+ if (this_file == NULL) return;
+ FILE* fp = fopen (this_file, "r");
+ if (fp == NULL)
+ pfatal (this_file);
+
+ bool blank_line = false; /* Line starting with any amount of white space
+ followed by optional comment(s). */
+ bool commented_line = false;
+ bool found_dot = false;
+ bool only_space_before = true;
+ bool start_of_line = true;
+ int c;
+ intmax_t lines = 1;
+ intmax_t mercury_dots = 0;
+ intmax_t percentage_signs = 0;
+ intmax_t rule_signs = 0;
+ float ratio = 0;
+
+ while ((c = fgetc (fp)) != EOF)
+ {
+ switch (c)
+ {
+ case '\n':
+ if (! blank_line) ++lines;
+ blank_line = true;
+ commented_line = false;
+ start_of_line = true;
+ if (found_dot) ++mercury_dots;
+ found_dot = false;
+ only_space_before = true;
+ break;
+ case '.':
+ found_dot = ! commented_line;
+ only_space_before = false;
+ break;
+ case '%': /* More frequent in Mercury. May be modulo in Obj.-C. */
+ if (! commented_line)
+ {
+ ++percentage_signs;
+ /* Cannot tell if it is a comment or modulo yet for sure.
+ Yet works for heuristic purposes. */
+ commented_line = true;
+ }
+ found_dot = false;
+ start_of_line = false;
+ only_space_before = false;
+ break;
+ case '/':
+ {
+ int d = fgetc (fp);
+ found_dot = false;
+ only_space_before = false;
+ if (! commented_line)
+ {
+ if (d == '*')
+ commented_line = true;
+ else
+ /* If d == '/', cannot tell if it is an Obj.-C comment:
+ may be Mercury integ. division. */
+ blank_line = false;
+ }
+ }
+ FALLTHROUGH;
+ case ' ':
+ case '\t':
+ start_of_line = false;
+ break;
+ case ':':
+ c = fgetc (fp);
+ if (start_of_line)
+ {
+ if (c == '-')
+ {
+ ratio = 1.0; /* Failsafe, not an operator in Obj.-C. */
+ goto out;
+ }
+ start_of_line = false;
+ }
+ else
+ {
+ /* p :- q. Frequent in Mercury.
+ Rare or in quoted exprs in Obj.-C. */
+ if (c == '-' && ! commented_line)
+ ++rule_signs;
+ }
+ blank_line = false;
+ found_dot = false;
+ only_space_before = false;
+ break;
+ case '@':
+ case '#':
+ if (start_of_line || only_space_before)
+ {
+ ratio = 0.0;
+ goto out;
+ }
+ FALLTHROUGH;
+ default:
+ start_of_line = false;
+ blank_line = false;
+ found_dot = false;
+ only_space_before = false;
+ }
+ }
+
+ /* Fallback heuristic test. Not failsafe but errless in pratice. */
+ ratio = ((float) rule_signs + percentage_signs + mercury_dots) / lines;
+
+ out:
+ if (fclose (fp) == EOF)
+ pfatal (this_file);
+
+ if (ratio > mercury_heuristics_ratio)
+ {
+ /* Change the language from Objective-C to Mercury. */
+ static language lang0 = { "mercury", Mercury_help, Mercury_functions,
+ Mercury_suffixes };
+ *lang = &lang0;
+ }
+}
+
+static void
+Mercury_functions (FILE *inf)
+{
+ char *cp, *last = NULL;
+ ptrdiff_t lastlen = 0, allocated = 0;
+ if (declarations) with_mercury_definitions = true;
+
+ LOOP_ON_INPUT_LINES (inf, lb, cp)
+ {
+ if (cp[0] == '\0') /* Empty line. */
+ continue;
+ else if (c_isspace (cp[0]) || cp[0] == '%')
+ /* A Prolog-type comment or anything other than a declaration. */
+ continue;
+ else if (cp[0] == '/' && cp[1] == '*') /* Mercury C-type comment. */
+ mercury_skip_comment (&lb, inf);
+ else
+ {
+ is_mercury_declaration = (cp[0] == ':' && cp[1] == '-');
+
+ if (is_mercury_declaration
+ || with_mercury_definitions)
+ {
+ ptrdiff_t len = mercury_pr (cp, last, lastlen);
+ if (0 < len)
+ {
+ /* Store the declaration to avoid generating duplicate
+ tags later. */
+ if (allocated <= len)
+ {
+ xrnew (last, len + 1, 1);
+ allocated = len + 1;
+ }
+ memcpyz (last, cp, len);
+ lastlen = len;
+ }
+ }
+ }
+ }
+ free (last);
+}
+
+static void
+mercury_skip_comment (linebuffer *plb, FILE *inf)
+{
+ char *cp;
+
+ do
+ {
+ for (cp = plb->buffer; *cp != '\0'; ++cp)
+ if (cp[0] == '*' && cp[1] == '/')
+ return;
+ readline (plb, inf);
+ }
+ while (perhaps_more_input (inf));
+}
+
+/*
+ * A declaration is added if it matches:
+ * <beginning of line>:-<whitespace><Mercury Term><whitespace>(
+ * If with_mercury_definitions == true, we also add:
+ * <beginning of line><Mercury item><whitespace>(
+ * or <beginning of line><Mercury item><whitespace>:-
+ * As for Prolog support, different arities and types are not taken into
+ * consideration.
+ * Item is added to the tags database if it doesn't match the
+ * name of the previous declaration.
+ *
+ * Consume a Mercury declaration.
+ * Return the number of bytes consumed, or 0 if there was an error.
+ *
+ * A Mercury declaration must be one of:
+ * :- type
+ * :- solver type
+ * :- pred
+ * :- func
+ * :- inst
+ * :- mode
+ * :- typeclass
+ * :- instance
+ * :- pragma
+ * :- promise
+ * :- initialise
+ * :- finalise
+ * :- mutable
+ * :- module
+ * :- interface
+ * :- implementation
+ * :- import_module
+ * :- use_module
+ * :- include_module
+ * :- end_module
+ * followed on the same line by an alphanumeric sequence, starting with a lower
+ * case letter or by a single-quoted arbitrary string.
+ * Single quotes can escape themselves. Backslash quotes everything.
+ *
+ * Return the size of the name of the declaration or 0 if no header was found.
+ * As quantifiers may precede functions or predicates, we must list them too.
+ */
+
+static const char *Mercury_decl_tags[] = {"type", "solver type", "pred",
+ "func", "inst", "mode", "typeclass", "instance", "pragma", "promise",
+ "initialise", "finalise", "mutable", "module", "interface", "implementation",
+ "import_module", "use_module", "include_module", "end_module", "some", "all"};
+
+static size_t
+mercury_decl (char *s, size_t pos)
+{
+ if (s == NULL) return 0;
+
+ size_t origpos;
+ origpos = pos;
+
+ while (s + pos != NULL && (c_isalnum (s[pos]) || s[pos] == '_')) ++pos;
+
+ unsigned char decl_type_length = pos - origpos;
+ char buf[decl_type_length + 1];
+ memset (buf, 0, decl_type_length + 1);
+
+ /* Mercury declaration tags. Consume them, then check the declaration item
+ following :- is legitimate, then go on as in the prolog case. */
+
+ memcpy (buf, &s[origpos], decl_type_length);
+
+ bool found_decl_tag = false;
+
+ if (is_mercury_quantifier)
+ {
+ if (strcmp (buf, "pred") != 0 && strcmp (buf, "func") != 0) /* Bad syntax. */
+ return 0;
+ is_mercury_quantifier = false; /* Reset to base value. */
+ found_decl_tag = true;
+ }
+ else
+ {
+ for (int j = 0; j < sizeof (Mercury_decl_tags) / sizeof (char*); ++j)
+ {
+ if (strcmp (buf, Mercury_decl_tags[j]) == 0)
+ {
+ found_decl_tag = true;
+ if (strcmp (buf, "type") == 0)
+ is_mercury_type = true;
+
+ if (strcmp (buf, "some") == 0
+ || strcmp (buf, "all") == 0)
+ {
+ is_mercury_quantifier = true;
+ }
+
+ break; /* Found declaration tag of rank j. */
+ }
+ else
+ /* 'solver type' has a blank in the middle,
+ so this is the hard case. */
+ if (strcmp (buf, "solver") == 0)
+ {
+ ++pos;
+ while (s + pos != NULL && (c_isalnum (s[pos]) || s[pos] == '_'))
+ ++pos;
+
+ decl_type_length = pos - origpos;
+ char buf2[decl_type_length + 1];
+ memset (buf2, 0, decl_type_length + 1);
+ memcpy (buf2, &s[origpos], decl_type_length);
+
+ if (strcmp (buf2, "solver type") == 0)
+ {
+ found_decl_tag = false;
+ break; /* Found declaration tag of rank j. */
+ }
+ }
+ }
+ }
+
+ /* If with_mercury_definitions == false
+ * this is a Mercury syntax error, ignoring... */
+
+ if (with_mercury_definitions)
+ {
+ if (found_decl_tag)
+ pos = skip_spaces (s + pos) - s; /* Skip len blanks again. */
+ else
+ /* Prolog-like behavior
+ * we have parsed the predicate once, yet inappropriately
+ * so restarting again the parsing step. */
+ pos = 0;
+ }
+ else
+ {
+ if (found_decl_tag)
+ pos = skip_spaces (s + pos) - s; /* Skip len blanks again. */
+ else
+ return 0;
+ }
+
+ /* From now on it is the same as for Prolog except for module dots. */
+
+ if (c_islower (s[pos]) || s[pos] == '_' )
+ {
+ /* The name is unquoted.
+ Do not confuse module dots with end-of-declaration dots. */
+
+ while (c_isalnum (s[pos])
+ || s[pos] == '_'
+ || (s[pos] == '.' /* A module dot. */
+ && s + pos + 1 != NULL
+ && (c_isalnum (s[pos + 1]) || s[pos + 1] == '_')))
+ ++pos;
+
+ return pos - origpos;
+ }
+ else if (s[pos] == '\'')
+ {
+ ++pos;
+ for (;;)
+ {
+ if (s[pos] == '\'')
+ {
+ ++pos;
+ if (s[pos] != '\'')
+ break;
+ ++pos; /* A double quote. */
+ }
+ else if (s[pos] == '\0') /* Multiline quoted atoms are ignored. */
+ return 0;
+ else if (s[pos] == '\\')
+ {
+ if (s[pos+1] == '\0')
+ return 0;
+ pos += 2;
+ }
+ else
+ ++pos;
+ }
+ return pos - origpos;
+ }
+ else if (is_mercury_quantifier && s[pos] == '[') /* :- some [T] pred/func. */
+ {
+ for (++pos; s + pos != NULL && s[pos] != ']'; ++pos) {}
+ if (s + pos == NULL) return 0;
+ ++pos;
+ pos = skip_spaces (s + pos) - s;
+ return mercury_decl (s, pos) + pos - origpos;
+ }
+ else
+ return 0;
+}
+
+static ptrdiff_t
+mercury_pr (char *s, char *last, ptrdiff_t lastlen)
+{
+ size_t len0 = 0;
+ is_mercury_type = false;
+ is_mercury_quantifier = false;
+
+ if (is_mercury_declaration)
+ {
+ /* Skip len0 blanks only for declarations. */
+ len0 = skip_spaces (s + 2) - s;
+ }
+
+ size_t len = mercury_decl (s, len0);
+ if (len == 0) return 0;
+ len += len0;
+
+ if (( (s[len] == '.' /* This is a statement dot, not a module dot. */
+ || (s[len] == '(' && (len += 1))
+ || (s[len] == ':' /* Stopping in case of a rule. */
+ && s[len + 1] == '-'
+ && (len += 2)))
+ && (lastlen != len || memcmp (s, last, len) != 0)
+ )
+ /* Types are often declared on several lines so keeping just
+ the first line. */
+ || is_mercury_type)
+ {
+ char *name = skip_non_spaces (s + len0);
+ size_t namelen;
+ if (name >= s + len)
+ {
+ name = s;
+ namelen = len;
+ }
+ else
+ {
+ name = skip_spaces (name);
+ namelen = len - (name - s);
+ }
+ /* Remove trailing non-name characters. */
+ while (namelen > 0 && notinname (name[namelen - 1]))
+ namelen--;
+ make_tag (name, namelen, true, s, len, lineno, linecharno);
+ return len;
+ }
+
+ return 0;
+}
+
+
+/*
* Support for Erlang
*
* Generates tags for functions, defines, and records.