diff options
author | Yuuki Harano <masm+github@masm11.me> | 2021-06-13 17:34:06 +0900 |
---|---|---|
committer | Yuuki Harano <masm+github@masm11.me> | 2021-06-13 17:34:06 +0900 |
commit | 7d5e94bada09e642a8bfc4f66804f7948bad40bc (patch) | |
tree | 38629672102b31bb38a855f24d4dd009e212c10d /lib-src | |
parent | 7673b6b9eb0af3add73e1614a466f142092b00aa (diff) | |
parent | dc471feee3bcac872cc52cdc73282955cd2d219d (diff) | |
download | emacs-7d5e94bada09e642a8bfc4f66804f7948bad40bc.tar.gz |
Merge branch 'master' of git.sv.gnu.org:/srv/git/emacs into feature/pgtk
Diffstat (limited to 'lib-src')
-rw-r--r-- | lib-src/etags.c | 505 |
1 files changed, 501 insertions, 4 deletions
diff --git a/lib-src/etags.c b/lib-src/etags.c index d703183cef7..9f20e44caf4 100644 --- a/lib-src/etags.c +++ b/lib-src/etags.c @@ -142,7 +142,14 @@ University of California, as described above. */ # define CTAGS false #endif -/* Copy to DEST from SRC (containing LEN bytes), and append a NUL byte. */ +/* Define MERCURY_HEURISTICS_RATIO as it was necessary to disambiguate + Mercury from Objective C, which have same file extensions .m + See comments before function test_objc_is_mercury for details. */ +#ifndef MERCURY_HEURISTICS_RATIO +# define MERCURY_HEURISTICS_RATIO 0.5 +#endif + +/* COPY to DEST from SRC (containing LEN bytes), and append a NUL byte. */ static void memcpyz (void *dest, void const *src, ptrdiff_t len) { @@ -359,6 +366,7 @@ static void HTML_labels (FILE *); static void Lisp_functions (FILE *); static void Lua_functions (FILE *); static void Makefile_targets (FILE *); +static void Mercury_functions (FILE *); static void Pascal_functions (FILE *); static void Perl_functions (FILE *); static void PHP_functions (FILE *); @@ -379,6 +387,7 @@ static ptrdiff_t readline_internal (linebuffer *, FILE *, char const *); static bool nocase_tail (const char *); static void get_tag (char *, char **); static void get_lispy_tag (char *); +static void test_objc_is_mercury (char *, language **); static void analyze_regex (char *); static void free_regexps (void); @@ -684,10 +693,22 @@ static const char Makefile_help [] = "In makefiles, targets are tags; additionally, variables are tags\n\ unless you specify '--no-globals'."; +/* Mercury and Objective C share the same .m file extensions. */ +static const char *Mercury_suffixes [] = + {"m", + NULL}; +static const char Mercury_help [] = + "In Mercury code, tags are all declarations beginning a line with ':-'\n\ +and optionally Prolog-like definitions (first rule for a predicate or \ +function).\n\ +To enable this behavior, run etags using --declarations."; +static bool with_mercury_definitions = false; +float mercury_heuristics_ratio = MERCURY_HEURISTICS_RATIO; + static const char *Objc_suffixes [] = - { "lm", /* Objective lex file */ - "m", /* Objective C file */ - NULL }; + { "lm", /* Objective lex file */ + "m", /* By default, Objective C file will be assumed. */ + NULL}; static const char Objc_help [] = "In Objective C code, tags include Objective C definitions for classes,\n\ class categories, methods and protocols. Tags for variables and\n\ @@ -831,7 +852,9 @@ static language lang_names [] = { "lisp", Lisp_help, Lisp_functions, Lisp_suffixes }, { "lua", Lua_help,Lua_functions,Lua_suffixes,NULL,Lua_interpreters}, { "makefile", Makefile_help,Makefile_targets,NULL,Makefile_filenames}, + /* objc listed before mercury as it is a better default for .m extensions. */ { "objc", Objc_help, plain_C_entries, Objc_suffixes }, + { "mercury", Mercury_help, Mercury_functions, Mercury_suffixes }, { "pascal", Pascal_help, Pascal_functions, Pascal_suffixes }, { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters}, { "php", PHP_help, PHP_functions, PHP_suffixes }, @@ -958,6 +981,9 @@ Relative ones are stored relative to the output file's directory.\n"); puts ("\tand create tags for extern variables unless --no-globals is used."); + puts ("In Mercury, tag both declarations starting a line with ':-' and first\n\ + predicates or functions in clauses."); + if (CTAGS) puts ("-d, --defines\n\ Create tag entries for C #define constants and enum constants, too."); @@ -1783,6 +1809,11 @@ find_entries (FILE *inf) if (parser == NULL) { lang = get_language_from_filename (curfdp->infname, true); + + /* Disambiguate file names between Objc and Mercury. */ + if (lang != NULL && strcmp (lang->name, "objc") == 0) + test_objc_is_mercury (curfdp->infname, &lang); + if (lang != NULL && lang->function != NULL) { curfdp->lang = lang; @@ -6072,6 +6103,472 @@ prolog_atom (char *s, size_t pos) /* + * Support for Mercury + * + * Assumes that the declarations start at column 0. + * Original code by Sunichirou Sugou (1989) for Prolog. + * Rewritten by Anders Lindgren (1996) for Prolog. + * Adapted by Fabrice Nicol (2021) for Mercury. + * Note: Prolog-support behavior is preserved if + * --declarations is used, corresponding to + * with_mercury_definitions=true. + */ + +static ptrdiff_t mercury_pr (char *, char *, ptrdiff_t); +static void mercury_skip_comment (linebuffer *, FILE *); +static bool is_mercury_type = false; +static bool is_mercury_quantifier = false; +static bool is_mercury_declaration = false; + +/* + * Objective-C and Mercury have identical file extension .m. + * To disambiguate between Objective C and Mercury, parse file + * with the following heuristics hook: + * - if line starts with :-, choose Mercury unconditionally; + * - if line starts with #, @, choose Objective-C; + * - otherwise compute the following ratio: + * + * r = (number of lines with :- + * or % in non-commented parts or . at trimmed EOL) + * / (number of lines - number of lines starting by any amount + * of whitespace, optionally followed by comment(s)) + * + * Note: strings are neglected in counts. + * + * If r > mercury_heuristics_ratio, choose Mercury. + * Experimental tests show that a possibly optimal default value for + * this floor value is around 0.5. This is the default value for + * MERCURY_HEURISTICS_RATIO, defined in the first lines of this file. + * The closer r is to 0.5, the closer the source code to pure Prolog. + * Idiomatic Mercury is scored either with r = 1.0 or higher. + * Objective-C is scored with r = 0.0. When this fails, the r-score + * never rose above 0.1 in Objective-C tests. + */ + +static void +test_objc_is_mercury (char *this_file, language **lang) +{ + if (this_file == NULL) return; + FILE* fp = fopen (this_file, "r"); + if (fp == NULL) + pfatal (this_file); + + bool blank_line = false; /* Line starting with any amount of white space + followed by optional comment(s). */ + bool commented_line = false; + bool found_dot = false; + bool only_space_before = true; + bool start_of_line = true; + int c; + intmax_t lines = 1; + intmax_t mercury_dots = 0; + intmax_t percentage_signs = 0; + intmax_t rule_signs = 0; + float ratio = 0; + + while ((c = fgetc (fp)) != EOF) + { + switch (c) + { + case '\n': + if (! blank_line) ++lines; + blank_line = true; + commented_line = false; + start_of_line = true; + if (found_dot) ++mercury_dots; + found_dot = false; + only_space_before = true; + break; + case '.': + found_dot = ! commented_line; + only_space_before = false; + break; + case '%': /* More frequent in Mercury. May be modulo in Obj.-C. */ + if (! commented_line) + { + ++percentage_signs; + /* Cannot tell if it is a comment or modulo yet for sure. + Yet works for heuristic purposes. */ + commented_line = true; + } + found_dot = false; + start_of_line = false; + only_space_before = false; + break; + case '/': + { + int d = fgetc (fp); + found_dot = false; + only_space_before = false; + if (! commented_line) + { + if (d == '*') + commented_line = true; + else + /* If d == '/', cannot tell if it is an Obj.-C comment: + may be Mercury integ. division. */ + blank_line = false; + } + } + FALLTHROUGH; + case ' ': + case '\t': + start_of_line = false; + break; + case ':': + c = fgetc (fp); + if (start_of_line) + { + if (c == '-') + { + ratio = 1.0; /* Failsafe, not an operator in Obj.-C. */ + goto out; + } + start_of_line = false; + } + else + { + /* p :- q. Frequent in Mercury. + Rare or in quoted exprs in Obj.-C. */ + if (c == '-' && ! commented_line) + ++rule_signs; + } + blank_line = false; + found_dot = false; + only_space_before = false; + break; + case '@': + case '#': + if (start_of_line || only_space_before) + { + ratio = 0.0; + goto out; + } + FALLTHROUGH; + default: + start_of_line = false; + blank_line = false; + found_dot = false; + only_space_before = false; + } + } + + /* Fallback heuristic test. Not failsafe but errless in pratice. */ + ratio = ((float) rule_signs + percentage_signs + mercury_dots) / lines; + + out: + if (fclose (fp) == EOF) + pfatal (this_file); + + if (ratio > mercury_heuristics_ratio) + { + /* Change the language from Objective-C to Mercury. */ + static language lang0 = { "mercury", Mercury_help, Mercury_functions, + Mercury_suffixes }; + *lang = &lang0; + } +} + +static void +Mercury_functions (FILE *inf) +{ + char *cp, *last = NULL; + ptrdiff_t lastlen = 0, allocated = 0; + if (declarations) with_mercury_definitions = true; + + LOOP_ON_INPUT_LINES (inf, lb, cp) + { + if (cp[0] == '\0') /* Empty line. */ + continue; + else if (c_isspace (cp[0]) || cp[0] == '%') + /* A Prolog-type comment or anything other than a declaration. */ + continue; + else if (cp[0] == '/' && cp[1] == '*') /* Mercury C-type comment. */ + mercury_skip_comment (&lb, inf); + else + { + is_mercury_declaration = (cp[0] == ':' && cp[1] == '-'); + + if (is_mercury_declaration + || with_mercury_definitions) + { + ptrdiff_t len = mercury_pr (cp, last, lastlen); + if (0 < len) + { + /* Store the declaration to avoid generating duplicate + tags later. */ + if (allocated <= len) + { + xrnew (last, len + 1, 1); + allocated = len + 1; + } + memcpyz (last, cp, len); + lastlen = len; + } + } + } + } + free (last); +} + +static void +mercury_skip_comment (linebuffer *plb, FILE *inf) +{ + char *cp; + + do + { + for (cp = plb->buffer; *cp != '\0'; ++cp) + if (cp[0] == '*' && cp[1] == '/') + return; + readline (plb, inf); + } + while (perhaps_more_input (inf)); +} + +/* + * A declaration is added if it matches: + * <beginning of line>:-<whitespace><Mercury Term><whitespace>( + * If with_mercury_definitions == true, we also add: + * <beginning of line><Mercury item><whitespace>( + * or <beginning of line><Mercury item><whitespace>:- + * As for Prolog support, different arities and types are not taken into + * consideration. + * Item is added to the tags database if it doesn't match the + * name of the previous declaration. + * + * Consume a Mercury declaration. + * Return the number of bytes consumed, or 0 if there was an error. + * + * A Mercury declaration must be one of: + * :- type + * :- solver type + * :- pred + * :- func + * :- inst + * :- mode + * :- typeclass + * :- instance + * :- pragma + * :- promise + * :- initialise + * :- finalise + * :- mutable + * :- module + * :- interface + * :- implementation + * :- import_module + * :- use_module + * :- include_module + * :- end_module + * followed on the same line by an alphanumeric sequence, starting with a lower + * case letter or by a single-quoted arbitrary string. + * Single quotes can escape themselves. Backslash quotes everything. + * + * Return the size of the name of the declaration or 0 if no header was found. + * As quantifiers may precede functions or predicates, we must list them too. + */ + +static const char *Mercury_decl_tags[] = {"type", "solver type", "pred", + "func", "inst", "mode", "typeclass", "instance", "pragma", "promise", + "initialise", "finalise", "mutable", "module", "interface", "implementation", + "import_module", "use_module", "include_module", "end_module", "some", "all"}; + +static size_t +mercury_decl (char *s, size_t pos) +{ + if (s == NULL) return 0; + + size_t origpos; + origpos = pos; + + while (s + pos != NULL && (c_isalnum (s[pos]) || s[pos] == '_')) ++pos; + + unsigned char decl_type_length = pos - origpos; + char buf[decl_type_length + 1]; + memset (buf, 0, decl_type_length + 1); + + /* Mercury declaration tags. Consume them, then check the declaration item + following :- is legitimate, then go on as in the prolog case. */ + + memcpy (buf, &s[origpos], decl_type_length); + + bool found_decl_tag = false; + + if (is_mercury_quantifier) + { + if (strcmp (buf, "pred") != 0 && strcmp (buf, "func") != 0) /* Bad syntax. */ + return 0; + is_mercury_quantifier = false; /* Reset to base value. */ + found_decl_tag = true; + } + else + { + for (int j = 0; j < sizeof (Mercury_decl_tags) / sizeof (char*); ++j) + { + if (strcmp (buf, Mercury_decl_tags[j]) == 0) + { + found_decl_tag = true; + if (strcmp (buf, "type") == 0) + is_mercury_type = true; + + if (strcmp (buf, "some") == 0 + || strcmp (buf, "all") == 0) + { + is_mercury_quantifier = true; + } + + break; /* Found declaration tag of rank j. */ + } + else + /* 'solver type' has a blank in the middle, + so this is the hard case. */ + if (strcmp (buf, "solver") == 0) + { + ++pos; + while (s + pos != NULL && (c_isalnum (s[pos]) || s[pos] == '_')) + ++pos; + + decl_type_length = pos - origpos; + char buf2[decl_type_length + 1]; + memset (buf2, 0, decl_type_length + 1); + memcpy (buf2, &s[origpos], decl_type_length); + + if (strcmp (buf2, "solver type") == 0) + { + found_decl_tag = false; + break; /* Found declaration tag of rank j. */ + } + } + } + } + + /* If with_mercury_definitions == false + * this is a Mercury syntax error, ignoring... */ + + if (with_mercury_definitions) + { + if (found_decl_tag) + pos = skip_spaces (s + pos) - s; /* Skip len blanks again. */ + else + /* Prolog-like behavior + * we have parsed the predicate once, yet inappropriately + * so restarting again the parsing step. */ + pos = 0; + } + else + { + if (found_decl_tag) + pos = skip_spaces (s + pos) - s; /* Skip len blanks again. */ + else + return 0; + } + + /* From now on it is the same as for Prolog except for module dots. */ + + if (c_islower (s[pos]) || s[pos] == '_' ) + { + /* The name is unquoted. + Do not confuse module dots with end-of-declaration dots. */ + + while (c_isalnum (s[pos]) + || s[pos] == '_' + || (s[pos] == '.' /* A module dot. */ + && s + pos + 1 != NULL + && (c_isalnum (s[pos + 1]) || s[pos + 1] == '_'))) + ++pos; + + return pos - origpos; + } + else if (s[pos] == '\'') + { + ++pos; + for (;;) + { + if (s[pos] == '\'') + { + ++pos; + if (s[pos] != '\'') + break; + ++pos; /* A double quote. */ + } + else if (s[pos] == '\0') /* Multiline quoted atoms are ignored. */ + return 0; + else if (s[pos] == '\\') + { + if (s[pos+1] == '\0') + return 0; + pos += 2; + } + else + ++pos; + } + return pos - origpos; + } + else if (is_mercury_quantifier && s[pos] == '[') /* :- some [T] pred/func. */ + { + for (++pos; s + pos != NULL && s[pos] != ']'; ++pos) {} + if (s + pos == NULL) return 0; + ++pos; + pos = skip_spaces (s + pos) - s; + return mercury_decl (s, pos) + pos - origpos; + } + else + return 0; +} + +static ptrdiff_t +mercury_pr (char *s, char *last, ptrdiff_t lastlen) +{ + size_t len0 = 0; + is_mercury_type = false; + is_mercury_quantifier = false; + + if (is_mercury_declaration) + { + /* Skip len0 blanks only for declarations. */ + len0 = skip_spaces (s + 2) - s; + } + + size_t len = mercury_decl (s, len0); + if (len == 0) return 0; + len += len0; + + if (( (s[len] == '.' /* This is a statement dot, not a module dot. */ + || (s[len] == '(' && (len += 1)) + || (s[len] == ':' /* Stopping in case of a rule. */ + && s[len + 1] == '-' + && (len += 2))) + && (lastlen != len || memcmp (s, last, len) != 0) + ) + /* Types are often declared on several lines so keeping just + the first line. */ + || is_mercury_type) + { + char *name = skip_non_spaces (s + len0); + size_t namelen; + if (name >= s + len) + { + name = s; + namelen = len; + } + else + { + name = skip_spaces (name); + namelen = len - (name - s); + } + /* Remove trailing non-name characters. */ + while (namelen > 0 && notinname (name[namelen - 1])) + namelen--; + make_tag (name, namelen, true, s, len, lineno, linecharno); + return len; + } + + return 0; +} + + +/* * Support for Erlang * * Generates tags for functions, defines, and records. |