#include "clang/AST/RawCommentList.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Comment.h"
#include "clang/AST/CommentBriefParser.h"
#include "clang/AST/CommentCommandTraits.h"
#include "clang/AST/CommentLexer.h"
#include "clang/AST/CommentParser.h"
#include "clang/AST/CommentSema.h"
#include "clang/Basic/CharInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/Allocator.h"
using namespace clang;
namespace {
std::pair<RawComment::CommentKind, bool> getCommentKind(StringRef Comment,
bool ParseAllComments) {
const size_t MinCommentLength = ParseAllComments ? 2 : 3;
if ((Comment.size() < MinCommentLength) || Comment[0] != '/')
return std::make_pair(RawComment::RCK_Invalid, false);
RawComment::CommentKind K;
if (Comment[1] == '/') {
if (Comment.size() < 3)
return std::make_pair(RawComment::RCK_OrdinaryBCPL, false);
if (Comment[2] == '/')
K = RawComment::RCK_BCPLSlash;
else if (Comment[2] == '!')
K = RawComment::RCK_BCPLExcl;
else
return std::make_pair(RawComment::RCK_OrdinaryBCPL, false);
} else {
assert(Comment.size() >= 4);
if (Comment[1] != '*' ||
Comment[Comment.size() - 2] != '*' ||
Comment[Comment.size() - 1] != '/')
return std::make_pair(RawComment::RCK_Invalid, false);
if (Comment[2] == '*')
K = RawComment::RCK_JavaDoc;
else if (Comment[2] == '!')
K = RawComment::RCK_Qt;
else
return std::make_pair(RawComment::RCK_OrdinaryC, false);
}
const bool TrailingComment = (Comment.size() > 3) && (Comment[3] == '<');
return std::make_pair(K, TrailingComment);
}
bool mergedCommentIsTrailingComment(StringRef Comment) {
return (Comment.size() > 3) && (Comment[3] == '<');
}
bool commentsStartOnSameColumn(const SourceManager &SM, const RawComment &R1,
const RawComment &R2) {
SourceLocation L1 = R1.getBeginLoc();
SourceLocation L2 = R2.getBeginLoc();
bool Invalid = false;
unsigned C1 = SM.getPresumedColumnNumber(L1, &Invalid);
if (!Invalid) {
unsigned C2 = SM.getPresumedColumnNumber(L2, &Invalid);
return !Invalid && (C1 == C2);
}
return false;
}
}
static bool onlyWhitespaceOnLineBefore(const char *Buffer, unsigned P) {
for (unsigned I = P; I != 0; --I) {
char C = Buffer[I - 1];
if (isVerticalWhitespace(C))
return true;
if (!isHorizontalWhitespace(C))
return false;
}
return true;
}
static bool isOrdinaryKind(RawComment::CommentKind K) {
return (K == RawComment::RCK_OrdinaryBCPL) ||
(K == RawComment::RCK_OrdinaryC);
}
RawComment::RawComment(const SourceManager &SourceMgr, SourceRange SR,
const CommentOptions &CommentOpts, bool Merged) :
Range(SR), RawTextValid(false), BriefTextValid(false),
IsAttached(false), IsTrailingComment(false),
IsAlmostTrailingComment(false) {
if (SR.getBegin() == SR.getEnd() || getRawText(SourceMgr).empty()) {
Kind = RCK_Invalid;
return;
}
std::pair<CommentKind, bool> K =
getCommentKind(RawText, CommentOpts.ParseAllComments);
if (CommentOpts.ParseAllComments && isOrdinaryKind(K.first)) {
FileID BeginFileID;
unsigned BeginOffset;
std::tie(BeginFileID, BeginOffset) =
SourceMgr.getDecomposedLoc(Range.getBegin());
if (BeginOffset != 0) {
bool Invalid = false;
const char *Buffer =
SourceMgr.getBufferData(BeginFileID, &Invalid).data();
IsTrailingComment |=
(!Invalid && !onlyWhitespaceOnLineBefore(Buffer, BeginOffset));
}
}
if (!Merged) {
Kind = K.first;
IsTrailingComment |= K.second;
IsAlmostTrailingComment = RawText.startswith("//<") ||
RawText.startswith("/*<");
} else {
Kind = RCK_Merged;
IsTrailingComment =
IsTrailingComment || mergedCommentIsTrailingComment(RawText);
}
}
StringRef RawComment::getRawTextSlow(const SourceManager &SourceMgr) const {
FileID BeginFileID;
FileID EndFileID;
unsigned BeginOffset;
unsigned EndOffset;
std::tie(BeginFileID, BeginOffset) =
SourceMgr.getDecomposedLoc(Range.getBegin());
std::tie(EndFileID, EndOffset) = SourceMgr.getDecomposedLoc(Range.getEnd());
const unsigned Length = EndOffset - BeginOffset;
if (Length < 2)
return StringRef();
assert(BeginFileID == EndFileID);
bool Invalid = false;
const char *BufferStart = SourceMgr.getBufferData(BeginFileID,
&Invalid).data();
if (Invalid)
return StringRef();
return StringRef(BufferStart + BeginOffset, Length);
}
const char *RawComment::extractBriefText(const ASTContext &Context) const {
(void)getRawText(Context.getSourceManager());
llvm::BumpPtrAllocator Allocator;
comments::Lexer L(Allocator, Context.getDiagnostics(),
Context.getCommentCommandTraits(),
Range.getBegin(),
RawText.begin(), RawText.end());
comments::BriefParser P(L, Context.getCommentCommandTraits());
const std::string Result = P.Parse();
const unsigned BriefTextLength = Result.size();
char *BriefTextPtr = new (Context) char[BriefTextLength + 1];
memcpy(BriefTextPtr, Result.c_str(), BriefTextLength + 1);
BriefText = BriefTextPtr;
BriefTextValid = true;
return BriefTextPtr;
}
comments::FullComment *RawComment::parse(const ASTContext &Context,
const Preprocessor *PP,
const Decl *D) const {
(void)getRawText(Context.getSourceManager());
comments::Lexer L(Context.getAllocator(), Context.getDiagnostics(),
Context.getCommentCommandTraits(),
getSourceRange().getBegin(),
RawText.begin(), RawText.end());
comments::Sema S(Context.getAllocator(), Context.getSourceManager(),
Context.getDiagnostics(),
Context.getCommentCommandTraits(),
PP);
S.setDecl(D);
comments::Parser P(L, S, Context.getAllocator(), Context.getSourceManager(),
Context.getDiagnostics(),
Context.getCommentCommandTraits());
return P.parseFullComment();
}
static bool onlyWhitespaceBetween(SourceManager &SM,
SourceLocation Loc1, SourceLocation Loc2,
unsigned MaxNewlinesAllowed) {
std::pair<FileID, unsigned> Loc1Info = SM.getDecomposedLoc(Loc1);
std::pair<FileID, unsigned> Loc2Info = SM.getDecomposedLoc(Loc2);
if (Loc1Info.first != Loc2Info.first)
return false;
bool Invalid = false;
const char *Buffer = SM.getBufferData(Loc1Info.first, &Invalid).data();
if (Invalid)
return false;
unsigned NumNewlines = 0;
assert(Loc1Info.second <= Loc2Info.second && "Loc1 after Loc2!");
for (unsigned I = Loc1Info.second; I != Loc2Info.second; ++I) {
switch (Buffer[I]) {
default:
return false;
case ' ':
case '\t':
case '\f':
case '\v':
break;
case '\r':
case '\n':
++NumNewlines;
if (NumNewlines > MaxNewlinesAllowed)
return false;
if (I + 1 != Loc2Info.second &&
(Buffer[I + 1] == '\n' || Buffer[I + 1] == '\r') &&
Buffer[I] != Buffer[I + 1])
++I;
break;
}
}
return true;
}
void RawCommentList::addComment(const RawComment &RC,
const CommentOptions &CommentOpts,
llvm::BumpPtrAllocator &Allocator) {
if (RC.isInvalid())
return;
if (RC.isOrdinary() && !CommentOpts.ParseAllComments)
return;
std::pair<FileID, unsigned> Loc =
SourceMgr.getDecomposedLoc(RC.getBeginLoc());
const FileID CommentFile = Loc.first;
const unsigned CommentOffset = Loc.second;
if (OrderedComments[CommentFile].empty()) {
OrderedComments[CommentFile][CommentOffset] =
new (Allocator) RawComment(RC);
return;
}
const RawComment &C1 = *OrderedComments[CommentFile].rbegin()->second;
const RawComment &C2 = RC;
if ((C1.isTrailingComment() == C2.isTrailingComment() ||
(C1.isTrailingComment() && !C2.isTrailingComment() &&
isOrdinaryKind(C2.getKind()) &&
commentsStartOnSameColumn(SourceMgr, C1, C2))) &&
onlyWhitespaceBetween(SourceMgr, C1.getEndLoc(), C2.getBeginLoc(),
1)) {
SourceRange MergedRange(C1.getBeginLoc(), C2.getEndLoc());
*OrderedComments[CommentFile].rbegin()->second =
RawComment(SourceMgr, MergedRange, CommentOpts, true);
} else {
OrderedComments[CommentFile][CommentOffset] =
new (Allocator) RawComment(RC);
}
}
const std::map<unsigned, RawComment *> *
RawCommentList::getCommentsInFile(FileID File) const {
auto CommentsInFile = OrderedComments.find(File);
if (CommentsInFile == OrderedComments.end())
return nullptr;
return &CommentsInFile->second;
}
bool RawCommentList::empty() const { return OrderedComments.empty(); }
unsigned RawCommentList::getCommentBeginLine(RawComment *C, FileID File,
unsigned Offset) const {
auto Cached = CommentBeginLine.find(C);
if (Cached != CommentBeginLine.end())
return Cached->second;
const unsigned Line = SourceMgr.getLineNumber(File, Offset);
CommentBeginLine[C] = Line;
return Line;
}
unsigned RawCommentList::getCommentEndOffset(RawComment *C) const {
auto Cached = CommentEndOffset.find(C);
if (Cached != CommentEndOffset.end())
return Cached->second;
const unsigned Offset =
SourceMgr.getDecomposedLoc(C->getSourceRange().getEnd()).second;
CommentEndOffset[C] = Offset;
return Offset;
}
std::string RawComment::getFormattedText(const SourceManager &SourceMgr,
DiagnosticsEngine &Diags) const {
llvm::StringRef CommentText = getRawText(SourceMgr);
if (CommentText.empty())
return "";
std::string Result;
for (const RawComment::CommentLine &Line :
getFormattedLines(SourceMgr, Diags))
Result += Line.Text + "\n";
auto LastChar = Result.find_last_not_of('\n');
Result.erase(LastChar + 1, Result.size());
return Result;
}
std::vector<RawComment::CommentLine>
RawComment::getFormattedLines(const SourceManager &SourceMgr,
DiagnosticsEngine &Diags) const {
llvm::StringRef CommentText = getRawText(SourceMgr);
if (CommentText.empty())
return {};
llvm::BumpPtrAllocator Allocator;
CommentOptions DefOpts;
comments::CommandTraits EmptyTraits(Allocator, DefOpts);
comments::Lexer L(Allocator, Diags, EmptyTraits, getSourceRange().getBegin(),
CommentText.begin(), CommentText.end(),
false);
std::vector<RawComment::CommentLine> Result;
unsigned IndentColumn = 0;
unsigned PreviousLine = 0;
auto LexLine = [&](bool IsFirstLine) -> bool {
comments::Token Tok;
L.lex(Tok);
if (Tok.is(comments::tok::eof))
return false;
if (Tok.is(comments::tok::newline)) {
PresumedLoc Loc = SourceMgr.getPresumedLoc(Tok.getLocation());
if (Loc.getLine() != PreviousLine) {
Result.emplace_back("", Loc, Loc);
PreviousLine = Loc.getLine();
}
return true;
}
SmallString<124> Line;
llvm::StringRef TokText = L.getSpelling(Tok, SourceMgr);
bool LocInvalid = false;
unsigned TokColumn =
SourceMgr.getSpellingColumnNumber(Tok.getLocation(), &LocInvalid);
assert(!LocInvalid && "getFormattedText for invalid location");
size_t WhitespaceLen = TokText.find_first_not_of(" \t");
if (WhitespaceLen == StringRef::npos)
WhitespaceLen = TokText.size();
if (IsFirstLine)
IndentColumn = TokColumn + WhitespaceLen;
unsigned SkipLen =
IsFirstLine
? WhitespaceLen
: std::min<size_t>(
WhitespaceLen,
std::max<int>(static_cast<int>(IndentColumn) - TokColumn, 0));
llvm::StringRef Trimmed = TokText.drop_front(SkipLen);
Line += Trimmed;
PresumedLoc Begin =
SourceMgr.getPresumedLoc(Tok.getLocation().getLocWithOffset(SkipLen));
for (L.lex(Tok); Tok.isNot(comments::tok::eof); L.lex(Tok)) {
if (Tok.is(comments::tok::newline)) {
PresumedLoc End = SourceMgr.getPresumedLoc(Tok.getLocation());
if (End.getLine() != PreviousLine) {
Result.emplace_back(Line, Begin, End);
PreviousLine = End.getLine();
}
return true;
}
Line += L.getSpelling(Tok, SourceMgr);
}
PresumedLoc End = SourceMgr.getPresumedLoc(Tok.getLocation());
Result.emplace_back(Line, Begin, End);
return false;
};
if (!LexLine(true))
return Result;
while (LexLine(false))
;
return Result;
}