summaryrefslogtreecommitdiff
path: root/lisp/cedet/semantic/wisent/python.el
diff options
context:
space:
mode:
authorChong Yidong <cyd@gnu.org>2012-10-02 02:10:29 +0800
committerChong Yidong <cyd@gnu.org>2012-10-02 02:10:29 +0800
commit62a81506f802e4824b718cc30321ee3a0057cdf7 (patch)
treed681d7b767b1c3f7e4aee24ce39f6bef0d7f1f7e /lisp/cedet/semantic/wisent/python.el
parentb3317662acc0157406c20c8e14c43b7126eaa8a0 (diff)
downloademacs-62a81506f802e4824b718cc30321ee3a0057cdf7.tar.gz
Update CEDET from upstream.
Diffstat (limited to 'lisp/cedet/semantic/wisent/python.el')
-rw-r--r--lisp/cedet/semantic/wisent/python.el325
1 files changed, 287 insertions, 38 deletions
diff --git a/lisp/cedet/semantic/wisent/python.el b/lisp/cedet/semantic/wisent/python.el
index fef22b16995..ea603f251bb 100644
--- a/lisp/cedet/semantic/wisent/python.el
+++ b/lisp/cedet/semantic/wisent/python.el
@@ -28,27 +28,90 @@
;;; Code:
+(require 'rx)
+
+;; Try to load python support, but fail silently since it is only used
+;; for optional functionality
+(require 'python nil t)
+
(require 'semantic/wisent)
(require 'semantic/wisent/python-wy)
+(require 'semantic/find)
(require 'semantic/dep)
(require 'semantic/ctxt)
+(eval-when-compile
+ (require 'cl))
+
+;;; Customization
+;;
+
+(defun semantic-python-get-system-include-path ()
+ "Evaluate some Python code that determines the system include path."
+ (python-proc)
+ (if python-buffer
+ (with-current-buffer python-buffer
+ (set (make-local-variable 'python-preoutput-result) nil)
+ (python-send-string
+ "import sys; print '_emacs_out ' + '\\0'.join(sys.path)")
+ (accept-process-output (python-proc) 2)
+ (if python-preoutput-result
+ (split-string python-preoutput-result "[\0\n]" t)
+ ;; Try a second, Python3k compatible shot
+ (python-send-string
+ "import sys; print('_emacs_out ' + '\\0'.join(sys.path))")
+ (accept-process-output (python-proc) 2)
+ (if python-preoutput-result
+ (split-string python-preoutput-result "[\0\n]" t)
+ (message "Timeout while querying Python for system include path.")
+ nil)))
+ (message "Python seems to be unavailable on this system.")))
+
+(defcustom-mode-local-semantic-dependency-system-include-path
+ python-mode semantic-python-dependency-system-include-path
+ (when (and (featurep 'python)
+ ;; python-mode and batch somehow often hangs.
+ (not noninteractive))
+ (semantic-python-get-system-include-path))
+ "The system include path used by Python language.")
;;; Lexical analysis
;;
;; Python strings are delimited by either single quotes or double
-;; quotes, e.g., "I'm a string" and 'I too am s string'.
+;; quotes, e.g., "I'm a string" and 'I too am a string'.
;; In addition a string can have either a 'r' and/or 'u' prefix.
;; The 'r' prefix means raw, i.e., normal backslash substitutions are
;; to be suppressed. For example, r"01\n34" is a string with six
;; characters 0, 1, \, n, 3 and 4. The 'u' prefix means the following
;; string is Unicode.
-(defconst wisent-python-string-re
- (concat (regexp-opt '("r" "u" "ur" "R" "U" "UR" "Ur" "uR") t)
- "?['\"]")
+(defconst wisent-python-string-start-re "[uU]?[rR]?['\"]"
"Regexp matching beginning of a Python string.")
+(defconst wisent-python-string-re
+ (rx
+ (opt (any "uU")) (opt (any "rR"))
+ (or
+ ;; Triple-quoted string using apostrophes
+ (: "'''" (zero-or-more (or "\\'"
+ (not (any "'"))
+ (: (repeat 1 2 "'") (not (any "'")))))
+ "'''")
+ ;; String using apostrophes
+ (: "'" (zero-or-more (or "\\'"
+ (not (any "'"))))
+ "'")
+ ;; Triple-quoted string using quotation marks.
+ (: "\"\"\"" (zero-or-more (or "\\\""
+ (not (any "\""))
+ (: (repeat 1 2 "\"") (not (any "\"")))))
+ "\"\"\"")
+ ;; String using quotation marks.
+ (: "\"" (zero-or-more (or "\\\""
+ (not (any "\""))))
+ "\"")))
+ "Regexp matching a complete Python string.")
+
(defvar wisent-python-EXPANDING-block nil
"Non-nil when expanding a paren block for Python lexical analyzer.")
@@ -60,16 +123,46 @@ curly braces."
(defsubst wisent-python-forward-string ()
"Move point at the end of the Python string at point."
- (when (looking-at wisent-python-string-re)
- ;; skip the prefix
- (and (match-end 1) (goto-char (match-end 1)))
- ;; skip the quoted part
- (cond
- ((looking-at "\"\"\"[^\"]")
- (search-forward "\"\"\"" nil nil 2))
- ((looking-at "'''[^']")
- (search-forward "'''" nil nil 2))
- ((forward-sexp 1)))))
+ (if (looking-at wisent-python-string-re)
+ (let ((start (match-beginning 0))
+ (end (match-end 0)))
+ ;; Incomplete triple-quoted string gets matched instead as a
+ ;; complete single quoted string. (This special case would be
+ ;; unnecessary if Emacs regular expressions had negative
+ ;; look-ahead assertions.)
+ (when (and (= (- end start) 2)
+ (looking-at "\"\\{3\\}\\|'\\{3\\}"))
+ (error "unterminated syntax"))
+ (goto-char end))
+ (error "unterminated syntax")))
+
+(defun wisent-python-forward-balanced-expression ()
+ "Move point to the end of the balanced expression at point.
+Here 'balanced expression' means anything matched by Emacs'
+open/close parenthesis syntax classes. We can't use forward-sexp
+for this because that Emacs built-in can't parse Python's
+triple-quoted string syntax."
+ (let ((end-char (cdr (syntax-after (point)))))
+ (forward-char 1)
+ (while (not (or (eobp) (eq (char-after (point)) end-char)))
+ (cond
+ ;; Skip over python strings.
+ ((looking-at wisent-python-string-start-re)
+ (wisent-python-forward-string))
+ ;; At a comment start just goto end of line.
+ ((looking-at "\\s<")
+ (end-of-line))
+ ;; Skip over balanced expressions.
+ ((looking-at "\\s(")
+ (wisent-python-forward-balanced-expression))
+ ;; Skip over white space, word, symbol, punctuation, paired
+ ;; delimiter (backquote) characters, line continuation, and end
+ ;; of comment characters (AKA newline characters in Python).
+ ((zerop (skip-syntax-forward "-w_.$\\>"))
+ (error "can't figure out how to go forward from here"))))
+ ;; Skip closing character. As a last resort this should raise an
+ ;; error if we hit EOB before we find our closing character..
+ (forward-char 1)))
(defun wisent-python-forward-line ()
"Move point to the beginning of the next logical line.
@@ -83,14 +176,14 @@ line ends at the end of the buffer, leave the point there."
(progn
(cond
;; Skip over python strings.
- ((looking-at wisent-python-string-re)
+ ((looking-at wisent-python-string-start-re)
(wisent-python-forward-string))
;; At a comment start just goto end of line.
((looking-at "\\s<")
(end-of-line))
- ;; Skip over generic lists and strings.
- ((looking-at "\\(\\s(\\|\\s\"\\)")
- (forward-sexp 1))
+ ;; Skip over balanced expressions.
+ ((looking-at "\\s(")
+ (wisent-python-forward-balanced-expression))
;; At the explicit line continuation character
;; (backslash) move to next line.
((looking-at "\\s\\")
@@ -107,8 +200,8 @@ line ends at the end of the buffer, leave the point there."
(defun wisent-python-forward-line-skip-indented ()
"Move point to the next logical line, skipping indented lines.
-That is the next line whose indentation is less than or equal to the
-indentation of the current line."
+That is the next line whose indentation is less than or equal to
+the indentation of the current line."
(let ((indent (current-indentation)))
(while (progn (wisent-python-forward-line)
(and (not (eobp))
@@ -185,17 +278,18 @@ indentation of the current line."
;; Loop lexer to handle tokens in current line.
t)
;; Indentation decreased
- (t
- ;; Pop items from indentation stack
- (while (< curr-indent last-indent)
- (pop wisent-python-indent-stack)
- (setq semantic-lex-current-depth (1- semantic-lex-current-depth)
- last-indent (car wisent-python-indent-stack))
- (semantic-lex-push-token
- (semantic-lex-token 'DEDENT last-pos (point))))
+ ((progn
+ ;; Pop items from indentation stack
+ (while (< curr-indent last-indent)
+ (pop wisent-python-indent-stack)
+ (setq semantic-lex-current-depth (1- semantic-lex-current-depth)
+ last-indent (car wisent-python-indent-stack))
+ (semantic-lex-push-token
+ (semantic-lex-token 'DEDENT last-pos (point))))
+ (= last-pos (point)))
;; If pos did not change, then we must return nil so that
;; other lexical analyzers can be run.
- (/= last-pos (point))))))
+ nil))))
;; All the work was done in the above analyzer matching condition.
)
@@ -211,7 +305,7 @@ continuation of current line."
(define-lex-regex-analyzer wisent-python-lex-string
"Detect and create python string tokens."
- wisent-python-string-re
+ wisent-python-string-start-re
(semantic-lex-push-token
(semantic-lex-token
'STRING_LITERAL
@@ -250,9 +344,113 @@ elsewhere on a line outside a string literal."
semantic-lex-ignore-comments
;; Signal error on unhandled syntax.
semantic-lex-default-action)
+
+
+;;; Parsing
+;;
+
+(defun wisent-python-reconstitute-function-tag (tag suite)
+ "Move a docstring from TAG's members into its :documentation attribute.
+Set attributes for constructors, special, private and static methods."
+ ;; Analyze first statement to see whether it is a documentation
+ ;; string.
+ (let ((first-statement (car suite)))
+ (when (semantic-python-docstring-p first-statement)
+ (semantic-tag-put-attribute
+ tag :documentation
+ (semantic-python-extract-docstring first-statement))))
+
+ ;; TODO HACK: we try to identify methods using the following
+ ;; heuristic:
+ ;; + at least one argument
+ ;; + first argument is self
+ (when (and (> (length (semantic-tag-function-arguments tag)) 0)
+ (string= (semantic-tag-name
+ (first (semantic-tag-function-arguments tag)))
+ "self"))
+ (semantic-tag-put-attribute tag :parent "dummy"))
+
+ ;; Identify constructors, special and private functions
+ (cond
+ ;; TODO only valid when the function resides inside a class
+ ((string= (semantic-tag-name tag) "__init__")
+ (semantic-tag-put-attribute tag :constructor-flag t)
+ (semantic-tag-put-attribute tag :suite suite))
+
+ ((semantic-python-special-p tag)
+ (semantic-tag-put-attribute tag :special-flag t))
+
+ ((semantic-python-private-p tag)
+ (semantic-tag-put-attribute tag :protection "private")))
+
+ ;; If there is a staticmethod decorator, add a static typemodifier
+ ;; for the function.
+ (when (semantic-find-tags-by-name
+ "staticmethod"
+ (semantic-tag-get-attribute tag :decorators))
+ (semantic-tag-put-attribute
+ tag :typemodifiers
+ (cons "static"
+ (semantic-tag-get-attribute tag :typemodifiers))))
+
+ ;; TODO
+ ;; + check for decorators classmethod
+ ;; + check for operators
+ tag)
+
+(defun wisent-python-reconstitute-class-tag (tag)
+ "Move a docstring from TAG's members into its :documentation attribute."
+ ;; The first member of TAG may be a documentation string. If that is
+ ;; the case, remove of it from the members list and stick its
+ ;; content into the :documentation attribute.
+ (let ((first-member (car (semantic-tag-type-members tag))))
+ (when (semantic-python-docstring-p first-member)
+ (semantic-tag-put-attribute
+ tag :members
+ (cdr (semantic-tag-type-members tag)))
+ (semantic-tag-put-attribute
+ tag :documentation
+ (semantic-python-extract-docstring first-member))))
+
+ ;; Try to find the constructor, determine the name of the instance
+ ;; parameter, find assignments to instance variables and add
+ ;; corresponding variable tags to the list of members.
+ (dolist (member (semantic-tag-type-members tag))
+ (when (semantic-tag-function-constructor-p member)
+ (let ((self (semantic-tag-name
+ (car (semantic-tag-function-arguments member)))))
+ (dolist (statement (semantic-tag-get-attribute member :suite))
+ (when (semantic-python-instance-variable-p statement self)
+ (let ((variable (semantic-tag-clone
+ statement
+ (substring (semantic-tag-name statement) 5)))
+ (members (semantic-tag-get-attribute tag :members)))
+ (when (semantic-python-private-p variable)
+ (semantic-tag-put-attribute variable :protection "private"))
+ (setcdr (last members) (list variable))))))))
+
+ ;; TODO remove the :suite attribute
+ tag)
+
+(defun semantic-python-expand-tag (tag)
+ "Expand compound declarations found in TAG into separate tags.
+TAG contains compound declaration if the NAME part of the tag is
+a list. In python, this can happen with `import' statements."
+ (let ((class (semantic-tag-class tag))
+ (elts (semantic-tag-name tag))
+ (expand nil))
+ (cond
+ ((and (eq class 'include) (listp elts))
+ (dolist (E elts)
+ (setq expand (cons (semantic-tag-clone tag E) expand)))
+ (setq expand (nreverse expand)))
+ )))
+
+
;;; Overridden Semantic API.
;;
+
(define-mode-local-override semantic-lex python-mode
(start end &optional depth length)
"Lexically analyze Python code in current buffer.
@@ -274,10 +472,11 @@ what remains in the `wisent-python-indent-stack'."
To be implemented for Python! For now just return nil."
nil)
-(defcustom-mode-local-semantic-dependency-system-include-path
- python-mode semantic-python-dependency-system-include-path
- nil
- "The system include path used by Python language.")
+;; Adapted from the semantic Java support by Andrey Torba
+(define-mode-local-override semantic-tag-include-filename python-mode (tag)
+ "Return a suitable path for (some) Python imports."
+ (let ((name (semantic-tag-name tag)))
+ (concat (mapconcat 'identity (split-string name "\\.") "/") ".py")))
;;; Enable Semantic in `python-mode'.
;;
@@ -287,13 +486,15 @@ To be implemented for Python! For now just return nil."
"Setup buffer for parse."
(wisent-python-wy--install-parser)
(set (make-local-variable 'parse-sexp-ignore-comments) t)
+ ;; Give python modes the possibility to overwrite this:
+ (if (not comment-start-skip)
+ (set (make-local-variable 'comment-start-skip) "#+\\s-*"))
(setq
- ;; Character used to separation a parent/child relationship
+ ;; Character used to separation a parent/child relationship
semantic-type-relation-separator-character '(".")
semantic-command-separation-character ";"
- ;; The following is no more necessary as semantic-lex is overridden
- ;; in python-mode.
- ;; semantic-lex-analyzer 'wisent-python-lexer
+ ;; Parsing
+ semantic-tag-expand-function 'semantic-python-expand-tag
;; Semantic to take over from the one provided by python.
;; The python one, if it uses the senator advice, will hang
@@ -320,8 +521,56 @@ To be implemented for Python! For now just return nil."
(define-child-mode python-3-mode python-mode "Python 3 mode")
+;;; Utility functions
+;;
+
+(defun semantic-python-special-p (tag)
+ "Return non-nil if the name of TAG is a special identifier of
+the form __NAME__. "
+ (string-match
+ (rx (seq string-start "__" (1+ (syntax symbol)) "__" string-end))
+ (semantic-tag-name tag)))
+
+(defun semantic-python-private-p (tag)
+ "Return non-nil if the name of TAG follows the convention _NAME
+for private names."
+ (string-match
+ (rx (seq string-start "_" (0+ (syntax symbol)) string-end))
+ (semantic-tag-name tag)))
+
+(defun semantic-python-instance-variable-p (tag &optional self)
+ "Return non-nil if TAG is an instance variable of the instance
+SELF or the instance name \"self\" if SELF is nil."
+ (when (semantic-tag-of-class-p tag 'variable)
+ (let ((name (semantic-tag-name tag)))
+ (when (string-match
+ (rx-to-string
+ `(seq string-start ,(or self "self") "."))
+ name)
+ (not (string-match "\\." (substring name 5)))))))
+
+(defun semantic-python-docstring-p (tag)
+ "Return non-nil, when TAG is a Python documentation string."
+ ;; TAG is considered to be a documentation string if the first
+ ;; member is of class 'code and its name looks like a documentation
+ ;; string.
+ (let ((class (semantic-tag-class tag))
+ (name (semantic-tag-name tag)))
+ (and (eq class 'code)
+ (string-match
+ (rx (seq string-start "\"\"\"" (0+ anything) "\"\"\"" string-end))
+ name))))
+
+(defun semantic-python-extract-docstring (tag)
+ "Return the Python documentation string contained in TAG."
+ ;; Strip leading and trailing """
+ (let ((name (semantic-tag-name tag)))
+ (substring name 3 -3)))
+
+
;;; Test
;;
+
(defun wisent-python-lex-buffer ()
"Run `wisent-python-lexer' on current buffer."
(interactive)