Merge branch 'master' of git.sv.gnu.org:/srv/git/emacs into feature/pgtk

author: Yuuki Harano <masm+github@masm11.me> 2021-06-13 17:34:06 +0900
committer: Yuuki Harano <masm+github@masm11.me> 2021-06-13 17:34:06 +0900
commit: 7d5e94bada09e642a8bfc4f66804f7948bad40bc (patch)
tree: 38629672102b31bb38a855f24d4dd009e212c10d /lib-src
parent: 7673b6b9eb0af3add73e1614a466f142092b00aa (diff)
parent: dc471feee3bcac872cc52cdc73282955cd2d219d (diff)
download: emacs-7d5e94bada09e642a8bfc4f66804f7948bad40bc.tar.gz
1 files changed, 501 insertions, 4 deletions
diff --git a/lib-src/etags.c b/lib-src/etags.c
index d703183cef7..9f20e44caf4 100644
--- a/lib-src/etags.c
+++ b/lib-src/etags.c
@@ -142,7 +142,14 @@ University of California, as described above. */
 # define CTAGS false
 #endif
 
-/* Copy to DEST from SRC (containing LEN bytes), and append a NUL byte.  */
+/* Define MERCURY_HEURISTICS_RATIO as it was necessary to disambiguate
+   Mercury from Objective C, which have same file extensions .m
+   See comments before function test_objc_is_mercury for details.  */
+#ifndef  MERCURY_HEURISTICS_RATIO
+# define MERCURY_HEURISTICS_RATIO 0.5
+#endif
+
+/* COPY to DEST from SRC (containing LEN bytes), and append a NUL byte.  */
 static void
 memcpyz (void *dest, void const *src, ptrdiff_t len)
 {
@@ -359,6 +366,7 @@ static void HTML_labels (FILE *);
 static void Lisp_functions (FILE *);
 static void Lua_functions (FILE *);
 static void Makefile_targets (FILE *);
+static void Mercury_functions (FILE *);
 static void Pascal_functions (FILE *);
 static void Perl_functions (FILE *);
 static void PHP_functions (FILE *);
@@ -379,6 +387,7 @@ static ptrdiff_t readline_internal (linebuffer *, FILE *, char const *);
 static bool nocase_tail (const char *);
 static void get_tag (char *, char **);
 static void get_lispy_tag (char *);
+static void test_objc_is_mercury (char *, language **);
 
 static void analyze_regex (char *);
 static void free_regexps (void);
@@ -684,10 +693,22 @@ static const char Makefile_help [] =
 "In makefiles, targets are tags; additionally, variables are tags\n\
 unless you specify '--no-globals'.";
 
+/* Mercury and Objective C share the same .m file extensions.  */
+static const char *Mercury_suffixes [] =
+  {"m",
+   NULL};
+static const char Mercury_help [] =
+  "In Mercury code, tags are all declarations beginning a line with ':-'\n\
+and optionally Prolog-like definitions (first rule for a predicate or \
+function).\n\
+To enable this behavior, run etags using --declarations.";
+static bool with_mercury_definitions = false;
+float mercury_heuristics_ratio = MERCURY_HEURISTICS_RATIO;
+
 static const char *Objc_suffixes [] =
-  { "lm",			/* Objective lex file */
-    "m",			/* Objective C file */
-     NULL };
+  { "lm",			/* Objective lex file  */
+    "m",			/* By default, Objective C file will be assumed.  */
+     NULL};
 static const char Objc_help [] =
 "In Objective C code, tags include Objective C definitions for classes,\n\
 class categories, methods and protocols.  Tags for variables and\n\
@@ -831,7 +852,9 @@ static language lang_names [] =
   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
   { "lua",       Lua_help,Lua_functions,Lua_suffixes,NULL,Lua_interpreters},
   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
+  /* objc listed before mercury as it is a better default for .m extensions.  */
   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
+  { "mercury",   Mercury_help,   Mercury_functions, Mercury_suffixes   },
   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
@@ -958,6 +981,9 @@ Relative ones are stored relative to the output file's directory.\n");
     puts
       ("\tand create tags for extern variables unless --no-globals is used.");
 
+  puts ("In Mercury, tag both declarations starting a line with ':-' and first\n\
+        predicates or functions in clauses.");
+
   if (CTAGS)
     puts ("-d, --defines\n\
         Create tag entries for C #define constants and enum constants, too.");
@@ -1783,6 +1809,11 @@ find_entries (FILE *inf)
   if (parser == NULL)
     {
       lang = get_language_from_filename (curfdp->infname, true);
+
+      /* Disambiguate file names between Objc and Mercury. */
+      if (lang != NULL && strcmp (lang->name, "objc") == 0)
+	test_objc_is_mercury (curfdp->infname, &lang);
+
       if (lang != NULL && lang->function != NULL)
 	{
 	  curfdp->lang = lang;
@@ -6072,6 +6103,472 @@ prolog_atom (char *s, size_t pos)
 
 
 /*
+ * Support for Mercury
+ *
+ * Assumes that the declarations start at column 0.
+ * Original code by Sunichirou Sugou (1989) for Prolog.
+ * Rewritten by Anders Lindgren (1996) for Prolog.
+ * Adapted by Fabrice Nicol (2021) for Mercury.
+ * Note: Prolog-support behavior is preserved if
+ * --declarations is used, corresponding to
+ * with_mercury_definitions=true.
+ */
+
+static ptrdiff_t mercury_pr (char *, char *, ptrdiff_t);
+static void mercury_skip_comment (linebuffer *, FILE *);
+static bool is_mercury_type = false;
+static bool is_mercury_quantifier = false;
+static bool is_mercury_declaration = false;
+
+/*
+ * Objective-C and Mercury have identical file extension .m.
+ * To disambiguate between Objective C and Mercury, parse file
+ * with the following heuristics hook:
+ *   - if line starts with :-, choose Mercury unconditionally;
+ *   - if line starts with #, @, choose Objective-C;
+ *   - otherwise compute the following ratio:
+ *
+ *     r = (number of lines with :-
+ *          or % in non-commented parts or . at trimmed EOL)
+ *         / (number of lines - number of lines starting by any amount
+ *                        of whitespace, optionally followed by comment(s))
+ *
+ * Note: strings are neglected in counts.
+ *
+ * If r > mercury_heuristics_ratio, choose Mercury.
+ * Experimental tests show that a possibly optimal default value for
+ * this floor value is around 0.5.  This is the default value for
+ * MERCURY_HEURISTICS_RATIO, defined in the first lines of this file.
+ * The closer r is to 0.5, the closer the source code to pure Prolog.
+ * Idiomatic Mercury is scored either with r = 1.0 or higher.
+ * Objective-C is scored with r = 0.0.  When this fails, the r-score
+ * never rose above 0.1 in Objective-C tests.
+ */
+
+static void
+test_objc_is_mercury (char *this_file, language **lang)
+{
+  if (this_file == NULL) return;
+  FILE* fp = fopen (this_file, "r");
+  if (fp == NULL)
+    pfatal (this_file);
+
+  bool blank_line = false; /* Line starting with any amount of white space
+			      followed by optional comment(s).  */
+  bool commented_line = false;
+  bool found_dot = false;
+  bool only_space_before = true;
+  bool start_of_line = true;
+  int c;
+  intmax_t lines = 1;
+  intmax_t mercury_dots = 0;
+  intmax_t percentage_signs = 0;
+  intmax_t rule_signs = 0;
+  float ratio = 0;
+
+  while ((c = fgetc (fp)) != EOF)
+    {
+      switch (c)
+	{
+	case '\n':
+	  if (! blank_line) ++lines;
+	  blank_line = true;
+	  commented_line = false;
+	  start_of_line = true;
+	  if (found_dot) ++mercury_dots;
+	  found_dot = false;
+	  only_space_before = true;
+	  break;
+	case '.':
+	  found_dot = ! commented_line;
+	  only_space_before = false;
+	  break;
+	case  '%': /* More frequent in Mercury.  May be modulo in Obj.-C.  */
+	  if (! commented_line)
+	    {
+	      ++percentage_signs;
+	      /* Cannot tell if it is a comment or modulo yet for sure.
+                 Yet works for heuristic purposes.  */
+	      commented_line = true;
+	    }
+	  found_dot = false;
+	  start_of_line = false;
+	  only_space_before = false;
+	  break;
+	case  '/':
+	  {
+	    int d = fgetc (fp);
+	    found_dot = false;
+	    only_space_before = false;
+	    if (! commented_line)
+	      {
+		if (d == '*')
+		  commented_line = true;
+		else
+		  /* If d == '/', cannot tell if it is an Obj.-C comment:
+		     may be Mercury integ. division.  */
+		    blank_line = false;
+	      }
+	  }
+	  FALLTHROUGH;
+        case  ' ':
+        case '\t':
+	  start_of_line = false;
+	  break;
+        case ':':
+	  c = fgetc (fp);
+	  if (start_of_line)
+	    {
+	      if (c == '-')
+		{
+		  ratio = 1.0; /* Failsafe, not an operator in Obj.-C.  */
+		  goto out;
+		}
+	      start_of_line = false;
+	    }
+	  else
+	    {
+	      /* p :- q.  Frequent in Mercury.
+		 Rare or in quoted exprs in Obj.-C.  */
+	      if (c == '-' && ! commented_line)
+		++rule_signs;
+	    }
+	  blank_line = false;
+	  found_dot = false;
+	  only_space_before = false;
+          break;
+	case '@':
+        case '#':
+	  if (start_of_line || only_space_before)
+	    {
+	      ratio = 0.0;
+	      goto out;
+	    }
+	  FALLTHROUGH;
+	default:
+	  start_of_line = false;
+	  blank_line = false;
+	  found_dot = false;
+	  only_space_before = false;
+	}
+    }
+
+  /* Fallback heuristic test.  Not failsafe but errless in pratice.  */
+  ratio = ((float) rule_signs + percentage_signs + mercury_dots) / lines;
+
+ out:
+  if (fclose (fp) == EOF)
+    pfatal (this_file);
+
+  if (ratio > mercury_heuristics_ratio)
+    {
+      /* Change the language from Objective-C to Mercury.  */
+      static language lang0 = { "mercury", Mercury_help, Mercury_functions,
+	Mercury_suffixes };
+      *lang = &lang0;
+    }
+}
+
+static void
+Mercury_functions (FILE *inf)
+{
+  char *cp, *last = NULL;
+  ptrdiff_t lastlen = 0, allocated = 0;
+  if (declarations) with_mercury_definitions = true;
+
+  LOOP_ON_INPUT_LINES (inf, lb, cp)
+    {
+      if (cp[0] == '\0')   /* Empty line.  */
+	continue;
+      else if (c_isspace (cp[0]) || cp[0] == '%')
+	/*  A Prolog-type comment or anything other than a declaration.  */
+	continue;
+      else if (cp[0] == '/' && cp[1] == '*')  /* Mercury C-type comment.  */
+        mercury_skip_comment (&lb, inf);
+      else
+	{
+	  is_mercury_declaration = (cp[0] == ':' && cp[1] == '-');
+
+          if (is_mercury_declaration
+	      || with_mercury_definitions)
+	    {
+	      ptrdiff_t len = mercury_pr (cp, last, lastlen);
+	      if (0 < len)
+		{
+		  /* Store the declaration to avoid generating duplicate
+		     tags later.  */
+		  if (allocated <= len)
+		    {
+		      xrnew (last, len + 1, 1);
+		      allocated = len + 1;
+		    }
+		  memcpyz (last, cp, len);
+		  lastlen = len;
+		}
+	    }
+	}
+    }
+  free (last);
+}
+
+static void
+mercury_skip_comment (linebuffer *plb, FILE *inf)
+{
+  char *cp;
+
+  do
+    {
+      for (cp = plb->buffer; *cp != '\0'; ++cp)
+	if (cp[0] == '*' && cp[1] == '/')
+	  return;
+      readline (plb, inf);
+    }
+  while (perhaps_more_input (inf));
+}
+
+/*
+ * A declaration is added if it matches:
+ *     <beginning of line>:-<whitespace><Mercury Term><whitespace>(
+ * If with_mercury_definitions == true, we also add:
+ *     <beginning of line><Mercury item><whitespace>(
+ * or  <beginning of line><Mercury item><whitespace>:-
+ * As for Prolog support, different arities and types are not taken into
+ * consideration.
+ * Item is added to the tags database if it doesn't match the
+ * name of the previous declaration.
+ *
+ * Consume a Mercury declaration.
+ * Return the number of bytes consumed, or 0 if there was an error.
+ *
+ * A Mercury declaration must be one of:
+ *  :- type
+ *  :- solver type
+ *  :- pred
+ *  :- func
+ *  :- inst
+ *  :- mode
+ *  :- typeclass
+ *  :- instance
+ *  :- pragma
+ *  :- promise
+ *  :- initialise
+ *  :- finalise
+ *  :- mutable
+ *  :- module
+ *  :- interface
+ *  :- implementation
+ *  :- import_module
+ *  :- use_module
+ *  :- include_module
+ *  :- end_module
+ * followed on the same line by an alphanumeric sequence, starting with a lower
+ * case letter or by a single-quoted arbitrary string.
+ * Single quotes can escape themselves.  Backslash quotes everything.
+ *
+ * Return the size of the name of the declaration or 0 if no header was found.
+ * As quantifiers may precede functions or predicates, we must list them too.
+ */
+
+static const char *Mercury_decl_tags[] = {"type", "solver type", "pred",
+  "func", "inst", "mode", "typeclass", "instance", "pragma", "promise",
+  "initialise", "finalise", "mutable", "module", "interface", "implementation",
+  "import_module", "use_module", "include_module", "end_module", "some", "all"};
+
+static size_t
+mercury_decl (char *s, size_t pos)
+{
+  if (s == NULL) return 0;
+
+  size_t origpos;
+  origpos = pos;
+
+  while (s + pos != NULL && (c_isalnum (s[pos]) || s[pos] == '_')) ++pos;
+
+  unsigned char decl_type_length = pos - origpos;
+  char buf[decl_type_length + 1];
+  memset (buf, 0, decl_type_length + 1);
+
+  /* Mercury declaration tags.  Consume them, then check the declaration item
+     following :- is legitimate, then go on as in the prolog case.  */
+
+  memcpy (buf, &s[origpos], decl_type_length);
+
+  bool found_decl_tag = false;
+
+  if (is_mercury_quantifier)
+    {
+      if (strcmp (buf, "pred") != 0 && strcmp (buf, "func") != 0) /* Bad syntax.  */
+	return 0;
+      is_mercury_quantifier = false; /* Reset to base value.  */
+      found_decl_tag = true;
+    }
+  else
+    {
+      for (int j = 0; j < sizeof (Mercury_decl_tags) / sizeof (char*); ++j)
+	{
+	  if (strcmp (buf, Mercury_decl_tags[j]) == 0)
+	    {
+	      found_decl_tag = true;
+	      if (strcmp (buf, "type") == 0)
+		is_mercury_type = true;
+
+	      if (strcmp (buf, "some") == 0
+		  || strcmp (buf, "all") == 0)
+		{
+		  is_mercury_quantifier = true;
+		}
+
+	      break;  /* Found declaration tag of rank j. */
+	    }
+	  else
+	    /* 'solver type' has a blank in the middle,
+	       so this is the hard case.  */
+	    if (strcmp (buf, "solver") == 0)
+	      {
+		++pos;
+		while (s + pos != NULL && (c_isalnum (s[pos]) || s[pos] == '_'))
+		  ++pos;
+
+		decl_type_length = pos - origpos;
+		char buf2[decl_type_length + 1];
+		memset (buf2, 0, decl_type_length + 1);
+		memcpy (buf2, &s[origpos], decl_type_length);
+
+		if (strcmp (buf2, "solver type") == 0)
+		  {
+		    found_decl_tag = false;
+		    break;  /* Found declaration tag of rank j.  */
+		  }
+	      }
+	}
+    }
+
+  /* If with_mercury_definitions == false
+   * this is a Mercury syntax error, ignoring... */
+
+  if (with_mercury_definitions)
+    {
+      if (found_decl_tag)
+	pos = skip_spaces (s + pos) - s; /* Skip len blanks again.  */
+      else
+	/* Prolog-like behavior
+	 * we have parsed the predicate once, yet inappropriately
+	 * so restarting again the parsing step.  */
+	pos = 0;
+    }
+  else
+    {
+      if (found_decl_tag)
+	pos = skip_spaces (s + pos) - s; /* Skip len blanks again.  */
+      else
+	return 0;
+    }
+
+  /* From now on it is the same as for Prolog except for module dots.  */
+
+  if (c_islower (s[pos]) || s[pos] == '_' )
+    {
+      /* The name is unquoted.
+         Do not confuse module dots with end-of-declaration dots.  */
+
+      while (c_isalnum (s[pos])
+             || s[pos] == '_'
+             || (s[pos] == '.' /* A module dot.  */
+                 && s + pos + 1 != NULL
+                 && (c_isalnum (s[pos + 1]) || s[pos + 1] == '_')))
+	++pos;
+
+      return pos - origpos;
+    }
+  else if (s[pos] == '\'')
+    {
+      ++pos;
+      for (;;)
+	{
+	  if (s[pos] == '\'')
+	    {
+	      ++pos;
+	      if (s[pos] != '\'')
+		break;
+	      ++pos; /* A double quote.  */
+	    }
+	  else if (s[pos] == '\0')  /* Multiline quoted atoms are ignored.  */
+	    return 0;
+	  else if (s[pos] == '\\')
+	    {
+	      if (s[pos+1] == '\0')
+		return 0;
+	      pos += 2;
+	    }
+	  else
+	    ++pos;
+	}
+      return pos - origpos;
+    }
+  else if (is_mercury_quantifier && s[pos] == '[')   /* :- some [T] pred/func.  */
+    {
+      for (++pos; s + pos != NULL && s[pos] != ']'; ++pos) {}
+      if (s + pos == NULL) return 0;
+      ++pos;
+      pos = skip_spaces (s + pos) - s;
+      return mercury_decl (s, pos) + pos - origpos;
+    }
+  else
+    return 0;
+}
+
+static ptrdiff_t
+mercury_pr (char *s, char *last, ptrdiff_t lastlen)
+{
+  size_t len0 = 0;
+  is_mercury_type = false;
+  is_mercury_quantifier = false;
+
+  if (is_mercury_declaration)
+    {
+      /* Skip len0 blanks only for declarations.  */
+      len0 = skip_spaces (s + 2) - s;
+    }
+
+  size_t len = mercury_decl (s, len0);
+  if (len == 0) return 0;
+  len += len0;
+
+  if (( (s[len] == '.'  /* This is a statement dot, not a module dot.  */
+	 || (s[len] == '(' && (len += 1))
+         || (s[len] == ':'  /* Stopping in case of a rule.  */
+	     && s[len + 1] == '-'
+	     && (len += 2)))
+	&& (lastlen != len || memcmp (s, last, len) != 0)
+	)
+      /* Types are often declared on several lines so keeping just
+	 the first line.  */
+      || is_mercury_type)
+    {
+      char *name = skip_non_spaces (s + len0);
+      size_t namelen;
+      if (name >= s + len)
+	{
+	  name = s;
+	  namelen = len;
+	}
+      else
+	{
+	  name = skip_spaces (name);
+	  namelen = len - (name - s);
+	}
+      /* Remove trailing non-name characters.  */
+      while (namelen > 0 && notinname (name[namelen - 1]))
+	namelen--;
+      make_tag (name, namelen, true, s, len, lineno, linecharno);
+      return len;
+    }
+
+  return 0;
+}
+
+
+/*
  * Support for Erlang
  *
  * Generates tags for functions, defines, and records.
author	Yuuki Harano <masm+github@masm11.me>	2021-06-13 17:34:06 +0900
committer	Yuuki Harano <masm+github@masm11.me>	2021-06-13 17:34:06 +0900
commit	7d5e94bada09e642a8bfc4f66804f7948bad40bc (patch)
tree	38629672102b31bb38a855f24d4dd009e212c10d /lib-src
parent	7673b6b9eb0af3add73e1614a466f142092b00aa (diff)
parent	dc471feee3bcac872cc52cdc73282955cd2d219d (diff)
download	emacs-7d5e94bada09e642a8bfc4f66804f7948bad40bc.tar.gz