clang-tools  10.0.0git
FormattedString.cpp
Go to the documentation of this file.
1 //===--- FormattedString.cpp --------------------------------*- C++-*------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "FormattedString.h"
9 #include "clang/Basic/CharInfo.h"
10 #include "llvm/ADT/ArrayRef.h"
11 #include "llvm/ADT/STLExtras.h"
12 #include "llvm/ADT/SmallVector.h"
13 #include "llvm/ADT/StringExtras.h"
14 #include "llvm/ADT/StringRef.h"
15 #include "llvm/Support/ErrorHandling.h"
16 #include "llvm/Support/FormatVariadic.h"
17 #include "llvm/Support/raw_ostream.h"
18 #include <cstddef>
19 #include <iterator>
20 #include <memory>
21 #include <string>
22 #include <vector>
23 
24 namespace clang {
25 namespace clangd {
26 namespace markup {
27 
28 namespace {
29 /// Escape a markdown text block. Ensures the punctuation will not introduce
30 /// any of the markdown constructs.
31 std::string renderText(llvm::StringRef Input) {
32  // Escaping ASCII punctuation ensures we can't start a markdown construct.
33  constexpr llvm::StringLiteral Punctuation =
34  R"txt(!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~)txt";
35 
36  std::string R;
37  for (size_t From = 0; From < Input.size();) {
38  size_t Next = Input.find_first_of(Punctuation, From);
39  R += Input.substr(From, Next - From);
40  if (Next == llvm::StringRef::npos)
41  break;
42  R += "\\";
43  R += Input[Next];
44 
45  From = Next + 1;
46  }
47  return R;
48 }
49 
50 /// Renders \p Input as an inline block of code in markdown. The returned value
51 /// is surrounded by backticks and the inner contents are properly escaped.
52 std::string renderInlineBlock(llvm::StringRef Input) {
53  std::string R;
54  // Double all backticks to make sure we don't close the inline block early.
55  for (size_t From = 0; From < Input.size();) {
56  size_t Next = Input.find("`", From);
57  R += Input.substr(From, Next - From);
58  if (Next == llvm::StringRef::npos)
59  break;
60  R += "``"; // double the found backtick.
61 
62  From = Next + 1;
63  }
64  // If results starts with a backtick, add spaces on both sides. The spaces
65  // are ignored by markdown renderers.
66  if (llvm::StringRef(R).startswith("`") || llvm::StringRef(R).endswith("`"))
67  return "` " + std::move(R) + " `";
68  // Markdown render should ignore first and last space if both are there. We
69  // add an extra pair of spaces in that case to make sure we render what the
70  // user intended.
71  if (llvm::StringRef(R).startswith(" ") && llvm::StringRef(R).endswith(" "))
72  return "` " + std::move(R) + " `";
73  return "`" + std::move(R) + "`";
74 }
75 
76 /// Get marker required for \p Input to represent a markdown codeblock. It
77 /// consists of at least 3 backticks(`). Although markdown also allows to use
78 /// tilde(~) for code blocks, they are never used.
79 std::string getMarkerForCodeBlock(llvm::StringRef Input) {
80  // Count the maximum number of consecutive backticks in \p Input. We need to
81  // start and end the code block with more.
82  unsigned MaxBackticks = 0;
83  unsigned Backticks = 0;
84  for (char C : Input) {
85  if (C == '`') {
86  ++Backticks;
87  continue;
88  }
89  MaxBackticks = std::max(MaxBackticks, Backticks);
90  Backticks = 0;
91  }
92  MaxBackticks = std::max(Backticks, MaxBackticks);
93  // Use the corresponding number of backticks to start and end a code block.
94  return std::string(/*Repeat=*/std::max(3u, MaxBackticks + 1), '`');
95 }
96 
97 // Trims the input and concatenates whitespace blocks into a single ` `.
98 std::string canonicalizeSpaces(std::string Input) {
99  // Goes over the string and preserves only a single ` ` for any whitespace
100  // chunks, the rest is moved to the end of the string and dropped in the end.
101  auto WritePtr = Input.begin();
102  llvm::SmallVector<llvm::StringRef, 4> Words;
103  llvm::SplitString(Input, Words);
104  if (Words.empty())
105  return "";
106  // Go over each word and add it to the string.
107  for (llvm::StringRef Word : Words) {
108  if (WritePtr > Input.begin())
109  *WritePtr++ = ' '; // Separate from previous block.
110  llvm::for_each(Word, [&WritePtr](const char C) { *WritePtr++ = C; });
111  }
112  // Get rid of extra spaces.
113  Input.resize(WritePtr - Input.begin());
114  return Input;
115 }
116 
117 std::string renderBlocks(llvm::ArrayRef<std::unique_ptr<Block>> Children,
118  void (Block::*RenderFunc)(llvm::raw_ostream &) const) {
119  std::string R;
120  llvm::raw_string_ostream OS(R);
121 
122  // Trim rulers.
123  Children = Children.drop_while(
124  [](const std::unique_ptr<Block> &C) { return C->isRuler(); });
125  auto Last = llvm::find_if(
126  llvm::reverse(Children),
127  [](const std::unique_ptr<Block> &C) { return !C->isRuler(); });
128  Children = Children.drop_back(Children.end() - Last.base());
129 
130  bool LastBlockWasRuler = true;
131  for (const auto &C : Children) {
132  if (C->isRuler() && LastBlockWasRuler)
133  continue;
134  LastBlockWasRuler = C->isRuler();
135  ((*C).*RenderFunc)(OS);
136  }
137 
138  // Get rid of redundant empty lines introduced in plaintext while imitating
139  // padding in markdown.
140  std::string AdjustedResult;
141  llvm::StringRef TrimmedText(OS.str());
142  TrimmedText = TrimmedText.trim();
143 
144  llvm::copy_if(TrimmedText, std::back_inserter(AdjustedResult),
145  [&TrimmedText](const char &C) {
146  return !llvm::StringRef(TrimmedText.data(),
147  &C - TrimmedText.data() + 1)
148  // We allow at most two newlines.
149  .endswith("\n\n\n");
150  });
151 
152  return AdjustedResult;
153 }
154 
155 // Seperates two blocks with extra spacing. Note that it might render strangely
156 // in vscode if the trailing block is a codeblock, see
157 // https://github.com/microsoft/vscode/issues/88416 for details.
158 class Ruler : public Block {
159 public:
160  void renderMarkdown(llvm::raw_ostream &OS) const override {
161  // Note that we need an extra new line before the ruler, otherwise we might
162  // make previous block a title instead of introducing a ruler.
163  OS << "\n---\n";
164  }
165  void renderPlainText(llvm::raw_ostream &OS) const override { OS << '\n'; }
166  bool isRuler() const override { return true; }
167 };
168 
169 class CodeBlock : public Block {
170 public:
171  void renderMarkdown(llvm::raw_ostream &OS) const override {
172  std::string Marker = getMarkerForCodeBlock(Contents);
173  // No need to pad from previous blocks, as they should end with a new line.
174  OS << Marker << Language << '\n' << Contents << '\n' << Marker << '\n';
175  }
176 
177  void renderPlainText(llvm::raw_ostream &OS) const override {
178  // In plaintext we want one empty line before and after codeblocks.
179  OS << '\n' << Contents << "\n\n";
180  }
181 
182  CodeBlock(std::string Contents, std::string Language)
183  : Contents(std::move(Contents)), Language(std::move(Language)) {}
184 
185 private:
186  std::string Contents;
187  std::string Language;
188 };
189 
190 // Inserts two spaces after each `\n` to indent each line. First line is not
191 // indented.
192 std::string indentLines(llvm::StringRef Input) {
193  assert(!Input.endswith("\n") && "Input should've been trimmed.");
194  std::string IndentedR;
195  // We'll add 2 spaces after each new line.
196  IndentedR.reserve(Input.size() + Input.count('\n') * 2);
197  for (char C : Input) {
198  IndentedR += C;
199  if (C == '\n')
200  IndentedR.append(" ");
201  }
202  return IndentedR;
203 }
204 
205 class Heading : public Paragraph {
206 public:
207  Heading(size_t Level) : Level(Level) {}
208  void renderMarkdown(llvm::raw_ostream &OS) const override {
209  OS << std::string(Level, '#') << ' ';
211  }
212 
213 private:
214  size_t Level;
215 };
216 
217 } // namespace
218 
219 std::string Block::asMarkdown() const {
220  std::string R;
221  llvm::raw_string_ostream OS(R);
222  renderMarkdown(OS);
223  return llvm::StringRef(OS.str()).trim().str();
224 }
225 
226 std::string Block::asPlainText() const {
227  std::string R;
228  llvm::raw_string_ostream OS(R);
229  renderPlainText(OS);
230  return llvm::StringRef(OS.str()).trim().str();
231 }
232 
233 void Paragraph::renderMarkdown(llvm::raw_ostream &OS) const {
234  llvm::StringRef Sep = "";
235  for (auto &C : Chunks) {
236  OS << Sep;
237  switch (C.Kind) {
238  case Chunk::PlainText:
239  OS << renderText(C.Contents);
240  break;
241  case Chunk::InlineCode:
242  OS << renderInlineBlock(C.Contents);
243  break;
244  }
245  Sep = " ";
246  }
247  // Paragraphs are translated into markdown lines, not markdown paragraphs.
248  // Therefore it only has a single linebreak afterwards.
249  // VSCode requires two spaces at the end of line to start a new one.
250  OS << " \n";
251 }
252 
253 void Paragraph::renderPlainText(llvm::raw_ostream &OS) const {
254  llvm::StringRef Sep = "";
255  for (auto &C : Chunks) {
256  OS << Sep << C.Contents;
257  Sep = " ";
258  }
259  OS << '\n';
260 }
261 
262 void BulletList::renderMarkdown(llvm::raw_ostream &OS) const {
263  for (auto &D : Items) {
264  // Instead of doing this we might prefer passing Indent to children to get
265  // rid of the copies, if it turns out to be a bottleneck.
266  OS << "- " << indentLines(D.asMarkdown()) << '\n';
267  }
268  // We need a new line after list to terminate it in markdown.
269  OS << '\n';
270 }
271 
272 void BulletList::renderPlainText(llvm::raw_ostream &OS) const {
273  for (auto &D : Items) {
274  // Instead of doing this we might prefer passing Indent to children to get
275  // rid of the copies, if it turns out to be a bottleneck.
276  OS << "- " << indentLines(D.asPlainText()) << '\n';
277  }
278 }
279 
280 Paragraph &Paragraph::appendText(std::string Text) {
281  Text = canonicalizeSpaces(std::move(Text));
282  if (Text.empty())
283  return *this;
284  Chunks.emplace_back();
285  Chunk &C = Chunks.back();
286  C.Contents = std::move(Text);
287  C.Kind = Chunk::PlainText;
288  return *this;
289 }
290 
291 Paragraph &Paragraph::appendCode(std::string Code) {
292  Code = canonicalizeSpaces(std::move(Code));
293  if (Code.empty())
294  return *this;
295  Chunks.emplace_back();
296  Chunk &C = Chunks.back();
297  C.Contents = std::move(Code);
298  C.Kind = Chunk::InlineCode;
299  return *this;
300 }
301 
302 class Document &BulletList::addItem() {
303  Items.emplace_back();
304  return Items.back();
305 }
306 
307 Paragraph &Document::addParagraph() {
308  Children.push_back(std::make_unique<Paragraph>());
309  return *static_cast<Paragraph *>(Children.back().get());
310 }
311 
312 void Document::addRuler() { Children.push_back(std::make_unique<Ruler>()); }
313 
314 void Document::addCodeBlock(std::string Code, std::string Language) {
315  Children.emplace_back(
316  std::make_unique<CodeBlock>(std::move(Code), std::move(Language)));
317 }
318 
319 std::string Document::asMarkdown() const {
320  return renderBlocks(Children, &Block::renderMarkdown);
321 }
322 
323 std::string Document::asPlainText() const {
324  return renderBlocks(Children, &Block::renderPlainText);
325 }
326 
327 BulletList &Document::addBulletList() {
328  Children.emplace_back(std::make_unique<BulletList>());
329  return *static_cast<BulletList *>(Children.back().get());
330 }
331 
332 Paragraph &Document::addHeading(size_t Level) {
333  assert(Level > 0);
334  Children.emplace_back(std::make_unique<Heading>(Level));
335  return *static_cast<Paragraph *>(Children.back().get());
336 }
337 } // namespace markup
338 } // namespace clangd
339 } // namespace clang
std::string Code
void renderMarkdown(llvm::raw_ostream &OS) const override
Paragraph & appendCode(std::string Code)
Append inline code, this translates to the ` block in markdown.
llvm::StringRef Contents
std::string asMarkdown() const
Doesn&#39;t contain any trailing newlines.
virtual void renderPlainText(llvm::raw_ostream &OS) const =0
std::string asPlainText() const
void addCodeBlock(std::string Code, std::string Language="cpp")
Adds a block of code.
Paragraph & appendText(std::string Text)
Append plain text to the end of the string.
std::string asMarkdown() const
std::vector< std::unique_ptr< HTMLNode > > Children
void renderPlainText(llvm::raw_ostream &OS) const override
void renderMarkdown(llvm::raw_ostream &OS) const override
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
std::string asPlainText() const
Doesn&#39;t contain any trailing newlines.
void addRuler()
Inserts a horizontal separator to the document.
Paragraph & addHeading(size_t Level)
Heading is a special type of paragraph that will be prepended with Level many &#39;#&#39;s in markdown...
Paragraph & addParagraph()
Adds a semantical block that will be separate from others.
void renderPlainText(llvm::raw_ostream &OS) const override
std::string Word
virtual void renderMarkdown(llvm::raw_ostream &OS) const =0