google · limwz01 · Jun 6, 2023 · Jan 22, 2024 · bwendling · Jan 22, 2024
diff --git a/yapf/pytree/comment_splicer.py b/yapf/pytree/comment_splicer.py
@@ -73,12 +73,13 @@ def _VisitNodeRec(node):
             # result of the way pytrees are organized, this node can be under
             # an inappropriate parent.
             comment_column -= len(comment_prefix.lstrip())
-            pytree_utils.InsertNodesAfter(
-                _CreateCommentsFromPrefix(
-                    comment_prefix,
-                    comment_lineno,
-                    comment_column,
-                    standalone=False), prev_leaf[0])
+            nodes, child.prefix = _CreateCommentsFromPrefix(
+                comment_prefix,
+                comment_lineno,
+                comment_column,
+                "",
+                standalone=False)
+            pytree_utils.InsertNodesAfter(nodes, prev_leaf[0])
           elif child.type == token.DEDENT:
             # Comment prefixes on DEDENT nodes also deserve special treatment,
             # because their final placement depends on their prefix.
@@ -101,35 +102,51 @@ def _VisitNodeRec(node):
             # In this case, we need to split them up ourselves.
 
             # Split into groups of comments at decreasing levels of indentation
+            comment_group_pre = []
             comment_groups = []
             comment_column = None
-            for cmt in comment_prefix.split('\n'):
+            comment_split = comment_prefix.split('\n')
+            for cmt in comment_split[:-1]:
               col = cmt.find('#')
               if col < 0:
                 if comment_column is None:
                   # Skip empty lines at the top of the first comment group
-                  comment_lineno += 1
+                  comment_group_pre.append(cmt)
                   continue
               elif comment_column is None or col < comment_column:
                 comment_column = col
                 comment_indent = cmt[:comment_column]
-                comment_groups.append((comment_column, comment_indent, []))
+                comment_groups.append(
+                    (comment_column, comment_indent, comment_group_pre))
+                comment_group_pre = []
               comment_groups[-1][-1].append(cmt)
 
+            prefix = ""
+            child.prefix = ""
+            ancestor_at_indent = None
             # Insert a node for each group
             for comment_column, comment_indent, comment_group in comment_groups:
               ancestor_at_indent = _FindAncestorAtIndent(child, comment_indent)
               if ancestor_at_indent.type == token.DEDENT:
                 InsertNodes = pytree_utils.InsertNodesBefore  # pylint: disable=invalid-name # noqa
               else:
                 InsertNodes = pytree_utils.InsertNodesAfter  # pylint: disable=invalid-name # noqa
-              InsertNodes(
-                  _CreateCommentsFromPrefix(
-                      '\n'.join(comment_group) + '\n',
-                      comment_lineno,
-                      comment_column,
-                      standalone=True), ancestor_at_indent)
+              nodes, prefix = _CreateCommentsFromPrefix(
+                  '\n'.join(comment_group) + '\n',
+                  comment_lineno,
+                  comment_column,
+                  prefix,
+                  standalone=True)
+              InsertNodes(nodes, ancestor_at_indent)
               comment_lineno += len(comment_group)
+            if ancestor_at_indent:
+              leaf = _FindFirstLeafAfter(nodes[-1])
+              # print(
+              #     repr(leaf),
+              #     repr(prefix),
+              #     repr(comment_split[-1]),
+              #     file=sys.stderr)
+              leaf.prefix = prefix + comment_split[-1]
           else:
             # Otherwise there are two cases.
             #
@@ -163,10 +180,9 @@ def _VisitNodeRec(node):
                   if comment_end < node_with_line_parent.lineno - 1:
                     node_with_line_parent = node_with_line_parent.parent
 
-                pytree_utils.InsertNodesBefore(
-                    _CreateCommentsFromPrefix(
-                        comment_prefix, comment_lineno, 0, standalone=True),
-                    node_with_line_parent)
+                nodes, child.prefix = _CreateCommentsFromPrefix(
+                    comment_prefix, comment_lineno, 0, "", standalone=True)
+                pytree_utils.InsertNodesBefore(nodes, node_with_line_parent)
                 break
               else:
                 if comment_lineno == prev_leaf[0].lineno:
@@ -190,10 +206,11 @@ def _VisitNodeRec(node):
                 comment_column = (
                     len(comment_prefix[rindex:]) -
                     len(comment_prefix[rindex:].lstrip()))
-                comments = _CreateCommentsFromPrefix(
+                comments, child.prefix = _CreateCommentsFromPrefix(
                     comment_prefix,
                     comment_lineno,
                     comment_column,
+                    "",
                     standalone=False)
                 pytree_utils.InsertNodesBefore(comments, child)
                 break
@@ -206,6 +223,7 @@ def _VisitNodeRec(node):
 def _CreateCommentsFromPrefix(comment_prefix,
                               comment_lineno,
                               comment_column,
+                              prefix,
                               standalone=False):
   """Create pytree nodes to represent the given comment prefix.
 
@@ -226,29 +244,40 @@ def _CreateCommentsFromPrefix(comment_prefix,
   comments = []
 
   lines = comment_prefix.split('\n')
+  # print(comment_lineno, repr(lines), repr(prefix), file=sys.stderr)
   index = 0
   while index < len(lines):
     comment_block = []
-    while index < len(lines) and lines[index].lstrip().startswith('#'):
-      comment_block.append(lines[index].strip())
-      index += 1
-
-    if comment_block:
+    lstrip = lines[index].lstrip()
+    if lstrip.startswith('#'):
+      # get whitespace on the left
+      prefix += lines[index][:len(lines[index]) - len(lstrip)]
+      # get all lines of block
+      while True:
+        comment_block.append(lines[index].lstrip())
+        index += 1
+        if not (index < len(lines) and lines[index].lstrip().startswith("#")):
+          break
+
+      # print(repr(comment_block), repr(prefix), file=sys.stderr)
       new_lineno = comment_lineno + index - 1
-      comment_block[0] = comment_block[0].strip()
-      comment_block[-1] = comment_block[-1].strip()
       comment_leaf = pytree.Leaf(
           type=token.COMMENT,
           value='\n'.join(comment_block),
-          context=('', (new_lineno, comment_column)))
+          context=('', (new_lineno, comment_column)),
+          prefix=prefix)
+      prefix = ""
       comment_node = comment_leaf if not standalone else pytree.Node(
           pygram.python_symbols.simple_stmt, [comment_leaf])
       comments.append(comment_node)
-
-    while index < len(lines) and not lines[index].lstrip():
+    else:
+      prefix += lines[index]
       index += 1
+    if index < len(lines):
+      prefix += "\n"
 
-  return comments
+  # print("prefix", repr(prefix), file=sys.stderr)
+  return comments, prefix
 
 
 # "Standalone line nodes" are tree nodes that have to start a new line in Python
@@ -287,6 +316,45 @@ def _FindNodeWithStandaloneLineParent(node):
     return _FindNodeWithStandaloneLineParent(node.parent)
 
 
+def _FindFirstLeafAfter(node):
+  """Find the first node after the given node.
+
+  Arguments:
+    node: node to start from
+
+  Returns:
+    The first node after the given node or None
+  """
+  next_sibling = node.next_sibling
+  if next_sibling:
+    return _FindFirstLeafAt(next_sibling)
+  if node.parent:
+    return _FindFirstLeafAfter(node.parent)
+  return None
+
+
+def _FindFirstLeafAt(node):
+  """Find the first leaf of the given node.
+
+  Arguments:
+    node: node to start from
+
+  Returns:
+    The first leaf or None
+  """
+  if isinstance(node, pytree.Leaf):
+    return node
+  else:
+    for child in node.children:
+      leaf = _FindFirstLeafAt(child)
+      if leaf is not None:
+        return leaf
+    next_sibling = node.next_sibling
+    if next_sibling:
+      return _FindFirstLeafAt(next_sibling)
+    return None
+
+
 # "Statement nodes" are standalone statements. The don't have to start a new
 # line.
 _STATEMENT_NODES = frozenset(['simple_stmt']) | _STANDALONE_LINE_NODES

diff --git a/yapf/pytree/pytree_unwrapper.py b/yapf/pytree/pytree_unwrapper.py
@@ -85,6 +85,7 @@ class PyTreeUnwrapper(pytree_visitor.PyTreeVisitor):
   def __init__(self):
     # A list of all logical lines finished visiting so far.
     self._logical_lines = []
+    self.prefix = ""
 
     # Builds up a "current" logical line while visiting pytree nodes. Some nodes
     # will finish a line and start a new one.
@@ -315,12 +316,17 @@ def DefaultLeafVisit(self, leaf):
     Arguments:
       leaf: the leaf to visit.
     """
+    self.prefix += leaf.prefix
     if leaf.type in _WHITESPACE_TOKENS:
       self._StartNewLine()
+      if leaf.type == grammar_token.NEWLINE:
+        self.prefix += "\n"
     elif leaf.type != grammar_token.COMMENT or leaf.value.strip():
       # Add non-whitespace tokens and comments that aren't empty.
       self._cur_logical_line.AppendToken(
-          format_token.FormatToken(leaf, pytree_utils.NodeName(leaf)))
+          format_token.FormatToken(leaf, pytree_utils.NodeName(leaf),
+                                   self.prefix))
+      self.prefix = ""
 
 
 _BRACKET_MATCH = {')': '(', '}': '{', ']': '['}

diff --git a/yapf/yapflib/format_token.py b/yapf/yapflib/format_token.py
@@ -84,7 +84,7 @@ class FormatToken(object):
     newlines: The number of newlines needed before this token.
   """
 
-  def __init__(self, node, name):
+  def __init__(self, node, name, prefix):
     """Constructor.
 
     Arguments:
@@ -97,6 +97,7 @@ def __init__(self, node, name):
     self.column = node.column
     self.lineno = node.lineno
     self.value = node.value
+    self.prefix = prefix
 
     if self.is_continuation:
       self.value = node.value.rstrip()

diff --git a/yapf/yapflib/reformatter.py b/yapf/yapflib/reformatter.py
@@ -19,18 +19,19 @@
   Reformat(): the main function exported by this module.
 """
 
+import json
 import collections
 import heapq
 import re
-
-from yapf_third_party._ylib2to3 import pytree
-from yapf_third_party._ylib2to3.pgen2 import token
+import sys
 
 from yapf.pytree import pytree_utils
 from yapf.yapflib import format_decision_state
 from yapf.yapflib import format_token
 from yapf.yapflib import line_joiner
 from yapf.yapflib import style
+from yapf_third_party._ylib2to3 import pytree
+from yapf_third_party._ylib2to3.pgen2 import token
 
 
 def Reformat(llines, lines=None):
@@ -72,6 +73,21 @@ def Reformat(llines, lines=None):
     if lline.disable or _LineHasContinuationMarkers(lline):
       _RetainHorizontalSpacing(lline)
       _RetainRequiredVerticalSpacing(lline, prev_line, lines)
+      if not _LineHasContinuationMarkers(lline):
+        for token in lline.tokens:
+          # print(
+          #     repr(token.value) + ":" + str(token.lineno) + ":" +
+          #     json.dumps(token.prefix) + ":" +
+          #     json.dumps(token.whitespace_prefix), file=sys.stderr)
+          if token.prefix:
+            prefix_new = token.whitespace_prefix.split("\n")
+            if len(prefix_new) >= 2:
+              prefix_old = token.prefix.split("\n")
+              offset = len(prefix_new) - len(prefix_old)
+              prefix = prefix_new[0:max(0, offset)] + prefix_old[
+                  max(0, -offset):-1] + prefix_new[-1:]
+              token.whitespace_prefix = "\n".join(prefix)
+              # print("--> " + repr(token.whitespace_prefix), file=sys.stderr)
       _EmitLineUnformatted(state)
 
     elif (_LineContainsPylintDisableLineTooLong(lline) or
@@ -403,7 +419,12 @@ def _FormatFinalLines(final_lines):
     for tok in line.tokens:
       if not tok.is_pseudo:
         formatted_line.append(tok.formatted_whitespace_prefix)
-        formatted_line.append(tok.value)
+        # print(
+        #     repr(tok.formatted_whitespace_prefix),
+        #     repr(tok.value),
+        #     file=sys.stderr)
+        formatted_line.append("\n".join(map(str.rstrip, tok.value.split(
+            "\n"))) if not line.disable and tok.is_comment else tok.value)
       elif (not tok.next_token.whitespace_prefix.startswith('\n') and
             not tok.next_token.whitespace_prefix.startswith(' ')):
         if (tok.previous_token.value == ':' or