#include "llvm/Support/YAMLParser.h"
#include "llvm/ADT/AllocatorList.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/Unicode.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <map>
#include <memory>
#include <string>
#include <system_error>
#include <utility>
using namespace llvm;
using namespace yaml;
enum UnicodeEncodingForm {
UEF_UTF32_LE, UEF_UTF32_BE, UEF_UTF16_LE, UEF_UTF16_BE, UEF_UTF8, UEF_Unknown };
using EncodingInfo = std::pair<UnicodeEncodingForm, unsigned>;
static EncodingInfo getUnicodeEncoding(StringRef Input) {
if (Input.empty())
return std::make_pair(UEF_Unknown, 0);
switch (uint8_t(Input[0])) {
case 0x00:
if (Input.size() >= 4) {
if ( Input[1] == 0
&& uint8_t(Input[2]) == 0xFE
&& uint8_t(Input[3]) == 0xFF)
return std::make_pair(UEF_UTF32_BE, 4);
if (Input[1] == 0 && Input[2] == 0 && Input[3] != 0)
return std::make_pair(UEF_UTF32_BE, 0);
}
if (Input.size() >= 2 && Input[1] != 0)
return std::make_pair(UEF_UTF16_BE, 0);
return std::make_pair(UEF_Unknown, 0);
case 0xFF:
if ( Input.size() >= 4
&& uint8_t(Input[1]) == 0xFE
&& Input[2] == 0
&& Input[3] == 0)
return std::make_pair(UEF_UTF32_LE, 4);
if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFE)
return std::make_pair(UEF_UTF16_LE, 2);
return std::make_pair(UEF_Unknown, 0);
case 0xFE:
if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFF)
return std::make_pair(UEF_UTF16_BE, 2);
return std::make_pair(UEF_Unknown, 0);
case 0xEF:
if ( Input.size() >= 3
&& uint8_t(Input[1]) == 0xBB
&& uint8_t(Input[2]) == 0xBF)
return std::make_pair(UEF_UTF8, 3);
return std::make_pair(UEF_Unknown, 0);
}
if (Input.size() >= 4 && Input[1] == 0 && Input[2] == 0 && Input[3] == 0)
return std::make_pair(UEF_UTF32_LE, 0);
if (Input.size() >= 2 && Input[1] == 0)
return std::make_pair(UEF_UTF16_LE, 0);
return std::make_pair(UEF_UTF8, 0);
}
void Node::anchor() {}
void NullNode::anchor() {}
void ScalarNode::anchor() {}
void BlockScalarNode::anchor() {}
void KeyValueNode::anchor() {}
void MappingNode::anchor() {}
void SequenceNode::anchor() {}
void AliasNode::anchor() {}
namespace llvm {
namespace yaml {
struct Token {
enum TokenKind {
TK_Error, TK_StreamStart,
TK_StreamEnd,
TK_VersionDirective,
TK_TagDirective,
TK_DocumentStart,
TK_DocumentEnd,
TK_BlockEntry,
TK_BlockEnd,
TK_BlockSequenceStart,
TK_BlockMappingStart,
TK_FlowEntry,
TK_FlowSequenceStart,
TK_FlowSequenceEnd,
TK_FlowMappingStart,
TK_FlowMappingEnd,
TK_Key,
TK_Value,
TK_Scalar,
TK_BlockScalar,
TK_Alias,
TK_Anchor,
TK_Tag
} Kind = TK_Error;
StringRef Range;
std::string Value;
Token() = default;
};
} }
using TokenQueueT = BumpPtrList<Token>;
namespace {
struct SimpleKey {
TokenQueueT::iterator Tok;
unsigned Column = 0;
unsigned Line = 0;
unsigned FlowLevel = 0;
bool IsRequired = false;
bool operator ==(const SimpleKey &Other) {
return Tok == Other.Tok;
}
};
}
using UTF8Decoded = std::pair<uint32_t, unsigned>;
static UTF8Decoded decodeUTF8(StringRef Range) {
StringRef::iterator Position= Range.begin();
StringRef::iterator End = Range.end();
if (Position < End && (*Position & 0x80) == 0) {
return std::make_pair(*Position, 1);
}
if (Position + 1 < End && ((*Position & 0xE0) == 0xC0) &&
((*(Position + 1) & 0xC0) == 0x80)) {
uint32_t codepoint = ((*Position & 0x1F) << 6) |
(*(Position + 1) & 0x3F);
if (codepoint >= 0x80)
return std::make_pair(codepoint, 2);
}
if (Position + 2 < End && ((*Position & 0xF0) == 0xE0) &&
((*(Position + 1) & 0xC0) == 0x80) &&
((*(Position + 2) & 0xC0) == 0x80)) {
uint32_t codepoint = ((*Position & 0x0F) << 12) |
((*(Position + 1) & 0x3F) << 6) |
(*(Position + 2) & 0x3F);
if (codepoint >= 0x800 &&
(codepoint < 0xD800 || codepoint > 0xDFFF))
return std::make_pair(codepoint, 3);
}
if (Position + 3 < End && ((*Position & 0xF8) == 0xF0) &&
((*(Position + 1) & 0xC0) == 0x80) &&
((*(Position + 2) & 0xC0) == 0x80) &&
((*(Position + 3) & 0xC0) == 0x80)) {
uint32_t codepoint = ((*Position & 0x07) << 18) |
((*(Position + 1) & 0x3F) << 12) |
((*(Position + 2) & 0x3F) << 6) |
(*(Position + 3) & 0x3F);
if (codepoint >= 0x10000 && codepoint <= 0x10FFFF)
return std::make_pair(codepoint, 4);
}
return std::make_pair(0, 0);
}
namespace llvm {
namespace yaml {
class Scanner {
public:
Scanner(StringRef Input, SourceMgr &SM, bool ShowColors = true,
std::error_code *EC = nullptr);
Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors = true,
std::error_code *EC = nullptr);
Token &peekNext();
Token getNext();
void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message,
ArrayRef<SMRange> Ranges = None) {
SM.PrintMessage(Loc, Kind, Message, Ranges, None, ShowColors);
}
void setError(const Twine &Message, StringRef::iterator Position) {
if (Position >= End)
Position = End - 1;
if (EC)
*EC = make_error_code(std::errc::invalid_argument);
if (!Failed)
printError(SMLoc::getFromPointer(Position), SourceMgr::DK_Error, Message);
Failed = true;
}
bool failed() {
return Failed;
}
private:
void init(MemoryBufferRef Buffer);
StringRef currentInput() {
return StringRef(Current, End - Current);
}
UTF8Decoded decodeUTF8(StringRef::iterator Position) {
return ::decodeUTF8(StringRef(Position, End - Position));
}
StringRef::iterator skip_nb_char(StringRef::iterator Position);
StringRef::iterator skip_b_break(StringRef::iterator Position);
StringRef::iterator skip_s_space(StringRef::iterator Position);
StringRef::iterator skip_s_white(StringRef::iterator Position);
StringRef::iterator skip_ns_char(StringRef::iterator Position);
using SkipWhileFunc = StringRef::iterator (Scanner::*)(StringRef::iterator);
StringRef::iterator skip_while( SkipWhileFunc Func
, StringRef::iterator Position);
void advanceWhile(SkipWhileFunc Func);
void scan_ns_uri_char();
bool consume(uint32_t Expected);
void skip(uint32_t Distance);
bool isBlankOrBreak(StringRef::iterator Position);
bool isLineEmpty(StringRef Line);
bool consumeLineBreakIfPresent();
void saveSimpleKeyCandidate( TokenQueueT::iterator Tok
, unsigned AtColumn
, bool IsRequired);
void removeStaleSimpleKeyCandidates();
void removeSimpleKeyCandidatesOnFlowLevel(unsigned Level);
bool unrollIndent(int ToColumn);
bool rollIndent( int ToColumn
, Token::TokenKind Kind
, TokenQueueT::iterator InsertPoint);
void skipComment();
void scanToNextToken();
bool scanStreamStart();
bool scanStreamEnd();
bool scanDirective();
bool scanDocumentIndicator(bool IsStart);
bool scanFlowCollectionStart(bool IsSequence);
bool scanFlowCollectionEnd(bool IsSequence);
bool scanFlowEntry();
bool scanBlockEntry();
bool scanKey();
bool scanValue();
bool scanFlowScalar(bool IsDoubleQuoted);
bool scanPlainScalar();
bool scanAliasOrAnchor(bool IsAlias);
bool scanBlockScalar(bool IsLiteral);
bool scanBlockScalarIndicators(char &StyleIndicator, char &ChompingIndicator,
unsigned &IndentIndicator, bool &IsDone);
char scanBlockStyleIndicator();
char scanBlockChompingIndicator();
unsigned scanBlockIndentationIndicator();
bool scanBlockScalarHeader(char &ChompingIndicator, unsigned &IndentIndicator,
bool &IsDone);
bool findBlockScalarIndent(unsigned &BlockIndent, unsigned BlockExitIndent,
unsigned &LineBreaks, bool &IsDone);
bool scanBlockScalarIndent(unsigned BlockIndent, unsigned BlockExitIndent,
bool &IsDone);
bool scanTag();
bool fetchMoreTokens();
SourceMgr &SM;
MemoryBufferRef InputBuffer;
StringRef::iterator Current;
StringRef::iterator End;
int Indent;
unsigned Column;
unsigned Line;
unsigned FlowLevel;
bool IsStartOfStream;
bool IsSimpleKeyAllowed;
bool Failed;
bool ShowColors;
TokenQueueT TokenQueue;
SmallVector<int, 4> Indents;
SmallVector<SimpleKey, 4> SimpleKeys;
std::error_code *EC;
};
} }
static void encodeUTF8( uint32_t UnicodeScalarValue
, SmallVectorImpl<char> &Result) {
if (UnicodeScalarValue <= 0x7F) {
Result.push_back(UnicodeScalarValue & 0x7F);
} else if (UnicodeScalarValue <= 0x7FF) {
uint8_t FirstByte = 0xC0 | ((UnicodeScalarValue & 0x7C0) >> 6);
uint8_t SecondByte = 0x80 | (UnicodeScalarValue & 0x3F);
Result.push_back(FirstByte);
Result.push_back(SecondByte);
} else if (UnicodeScalarValue <= 0xFFFF) {
uint8_t FirstByte = 0xE0 | ((UnicodeScalarValue & 0xF000) >> 12);
uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
uint8_t ThirdByte = 0x80 | (UnicodeScalarValue & 0x3F);
Result.push_back(FirstByte);
Result.push_back(SecondByte);
Result.push_back(ThirdByte);
} else if (UnicodeScalarValue <= 0x10FFFF) {
uint8_t FirstByte = 0xF0 | ((UnicodeScalarValue & 0x1F0000) >> 18);
uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0x3F000) >> 12);
uint8_t ThirdByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
uint8_t FourthByte = 0x80 | (UnicodeScalarValue & 0x3F);
Result.push_back(FirstByte);
Result.push_back(SecondByte);
Result.push_back(ThirdByte);
Result.push_back(FourthByte);
}
}
bool yaml::dumpTokens(StringRef Input, raw_ostream &OS) {
SourceMgr SM;
Scanner scanner(Input, SM);
while (true) {
Token T = scanner.getNext();
switch (T.Kind) {
case Token::TK_StreamStart:
OS << "Stream-Start: ";
break;
case Token::TK_StreamEnd:
OS << "Stream-End: ";
break;
case Token::TK_VersionDirective:
OS << "Version-Directive: ";
break;
case Token::TK_TagDirective:
OS << "Tag-Directive: ";
break;
case Token::TK_DocumentStart:
OS << "Document-Start: ";
break;
case Token::TK_DocumentEnd:
OS << "Document-End: ";
break;
case Token::TK_BlockEntry:
OS << "Block-Entry: ";
break;
case Token::TK_BlockEnd:
OS << "Block-End: ";
break;
case Token::TK_BlockSequenceStart:
OS << "Block-Sequence-Start: ";
break;
case Token::TK_BlockMappingStart:
OS << "Block-Mapping-Start: ";
break;
case Token::TK_FlowEntry:
OS << "Flow-Entry: ";
break;
case Token::TK_FlowSequenceStart:
OS << "Flow-Sequence-Start: ";
break;
case Token::TK_FlowSequenceEnd:
OS << "Flow-Sequence-End: ";
break;
case Token::TK_FlowMappingStart:
OS << "Flow-Mapping-Start: ";
break;
case Token::TK_FlowMappingEnd:
OS << "Flow-Mapping-End: ";
break;
case Token::TK_Key:
OS << "Key: ";
break;
case Token::TK_Value:
OS << "Value: ";
break;
case Token::TK_Scalar:
OS << "Scalar: ";
break;
case Token::TK_BlockScalar:
OS << "Block Scalar: ";
break;
case Token::TK_Alias:
OS << "Alias: ";
break;
case Token::TK_Anchor:
OS << "Anchor: ";
break;
case Token::TK_Tag:
OS << "Tag: ";
break;
case Token::TK_Error:
break;
}
OS << T.Range << "\n";
if (T.Kind == Token::TK_StreamEnd)
break;
else if (T.Kind == Token::TK_Error)
return false;
}
return true;
}
bool yaml::scanTokens(StringRef Input) {
SourceMgr SM;
Scanner scanner(Input, SM);
while (true) {
Token T = scanner.getNext();
if (T.Kind == Token::TK_StreamEnd)
break;
else if (T.Kind == Token::TK_Error)
return false;
}
return true;
}
std::string yaml::escape(StringRef Input, bool EscapePrintable) {
std::string EscapedInput;
for (StringRef::iterator i = Input.begin(), e = Input.end(); i != e; ++i) {
if (*i == '\\')
EscapedInput += "\\\\";
else if (*i == '"')
EscapedInput += "\\\"";
else if (*i == 0)
EscapedInput += "\\0";
else if (*i == 0x07)
EscapedInput += "\\a";
else if (*i == 0x08)
EscapedInput += "\\b";
else if (*i == 0x09)
EscapedInput += "\\t";
else if (*i == 0x0A)
EscapedInput += "\\n";
else if (*i == 0x0B)
EscapedInput += "\\v";
else if (*i == 0x0C)
EscapedInput += "\\f";
else if (*i == 0x0D)
EscapedInput += "\\r";
else if (*i == 0x1B)
EscapedInput += "\\e";
else if ((unsigned char)*i < 0x20) { std::string HexStr = utohexstr(*i);
EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;
} else if (*i & 0x80) { UTF8Decoded UnicodeScalarValue
= decodeUTF8(StringRef(i, Input.end() - i));
if (UnicodeScalarValue.second == 0) {
SmallString<4> Val;
encodeUTF8(0xFFFD, Val);
llvm::append_range(EscapedInput, Val);
return EscapedInput;
}
if (UnicodeScalarValue.first == 0x85)
EscapedInput += "\\N";
else if (UnicodeScalarValue.first == 0xA0)
EscapedInput += "\\_";
else if (UnicodeScalarValue.first == 0x2028)
EscapedInput += "\\L";
else if (UnicodeScalarValue.first == 0x2029)
EscapedInput += "\\P";
else if (!EscapePrintable &&
sys::unicode::isPrintable(UnicodeScalarValue.first))
EscapedInput += StringRef(i, UnicodeScalarValue.second);
else {
std::string HexStr = utohexstr(UnicodeScalarValue.first);
if (HexStr.size() <= 2)
EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;
else if (HexStr.size() <= 4)
EscapedInput += "\\u" + std::string(4 - HexStr.size(), '0') + HexStr;
else if (HexStr.size() <= 8)
EscapedInput += "\\U" + std::string(8 - HexStr.size(), '0') + HexStr;
}
i += UnicodeScalarValue.second - 1;
} else
EscapedInput.push_back(*i);
}
return EscapedInput;
}
llvm::Optional<bool> yaml::parseBool(StringRef S) {
switch (S.size()) {
case 1:
switch (S.front()) {
case 'y':
case 'Y':
return true;
case 'n':
case 'N':
return false;
default:
return None;
}
case 2:
switch (S.front()) {
case 'O':
if (S[1] == 'N') return true;
LLVM_FALLTHROUGH;
case 'o':
if (S[1] == 'n') return true;
return None;
case 'N':
if (S[1] == 'O') return false;
LLVM_FALLTHROUGH;
case 'n':
if (S[1] == 'o') return false;
return None;
default:
return None;
}
case 3:
switch (S.front()) {
case 'O':
if (S.drop_front() == "FF") return false;
LLVM_FALLTHROUGH;
case 'o':
if (S.drop_front() == "ff") return false;
return None;
case 'Y':
if (S.drop_front() == "ES") return true;
LLVM_FALLTHROUGH;
case 'y':
if (S.drop_front() == "es") return true;
return None;
default:
return None;
}
case 4:
switch (S.front()) {
case 'T':
if (S.drop_front() == "RUE") return true;
LLVM_FALLTHROUGH;
case 't':
if (S.drop_front() == "rue") return true;
return None;
default:
return None;
}
case 5:
switch (S.front()) {
case 'F':
if (S.drop_front() == "ALSE") return false;
LLVM_FALLTHROUGH;
case 'f':
if (S.drop_front() == "alse") return false;
return None;
default:
return None;
}
default:
return None;
}
}
Scanner::Scanner(StringRef Input, SourceMgr &sm, bool ShowColors,
std::error_code *EC)
: SM(sm), ShowColors(ShowColors), EC(EC) {
init(MemoryBufferRef(Input, "YAML"));
}
Scanner::Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors,
std::error_code *EC)
: SM(SM_), ShowColors(ShowColors), EC(EC) {
init(Buffer);
}
void Scanner::init(MemoryBufferRef Buffer) {
InputBuffer = Buffer;
Current = InputBuffer.getBufferStart();
End = InputBuffer.getBufferEnd();
Indent = -1;
Column = 0;
Line = 0;
FlowLevel = 0;
IsStartOfStream = true;
IsSimpleKeyAllowed = true;
Failed = false;
std::unique_ptr<MemoryBuffer> InputBufferOwner =
MemoryBuffer::getMemBuffer(Buffer, false);
SM.AddNewSourceBuffer(std::move(InputBufferOwner), SMLoc());
}
Token &Scanner::peekNext() {
bool NeedMore = false;
while (true) {
if (TokenQueue.empty() || NeedMore) {
if (!fetchMoreTokens()) {
TokenQueue.clear();
SimpleKeys.clear();
TokenQueue.push_back(Token());
return TokenQueue.front();
}
}
assert(!TokenQueue.empty() &&
"fetchMoreTokens lied about getting tokens!");
removeStaleSimpleKeyCandidates();
SimpleKey SK;
SK.Tok = TokenQueue.begin();
if (!is_contained(SimpleKeys, SK))
break;
else
NeedMore = true;
}
return TokenQueue.front();
}
Token Scanner::getNext() {
Token Ret = peekNext();
if (!TokenQueue.empty())
TokenQueue.pop_front();
if (TokenQueue.empty())
TokenQueue.resetAlloc();
return Ret;
}
StringRef::iterator Scanner::skip_nb_char(StringRef::iterator Position) {
if (Position == End)
return Position;
if ( *Position == 0x09
|| (*Position >= 0x20 && *Position <= 0x7E))
return Position + 1;
if (uint8_t(*Position) & 0x80) {
UTF8Decoded u8d = decodeUTF8(Position);
if ( u8d.second != 0
&& u8d.first != 0xFEFF
&& ( u8d.first == 0x85
|| ( u8d.first >= 0xA0
&& u8d.first <= 0xD7FF)
|| ( u8d.first >= 0xE000
&& u8d.first <= 0xFFFD)
|| ( u8d.first >= 0x10000
&& u8d.first <= 0x10FFFF)))
return Position + u8d.second;
}
return Position;
}
StringRef::iterator Scanner::skip_b_break(StringRef::iterator Position) {
if (Position == End)
return Position;
if (*Position == 0x0D) {
if (Position + 1 != End && *(Position + 1) == 0x0A)
return Position + 2;
return Position + 1;
}
if (*Position == 0x0A)
return Position + 1;
return Position;
}
StringRef::iterator Scanner::skip_s_space(StringRef::iterator Position) {
if (Position == End)
return Position;
if (*Position == ' ')
return Position + 1;
return Position;
}
StringRef::iterator Scanner::skip_s_white(StringRef::iterator Position) {
if (Position == End)
return Position;
if (*Position == ' ' || *Position == '\t')
return Position + 1;
return Position;
}
StringRef::iterator Scanner::skip_ns_char(StringRef::iterator Position) {
if (Position == End)
return Position;
if (*Position == ' ' || *Position == '\t')
return Position;
return skip_nb_char(Position);
}
StringRef::iterator Scanner::skip_while( SkipWhileFunc Func
, StringRef::iterator Position) {
while (true) {
StringRef::iterator i = (this->*Func)(Position);
if (i == Position)
break;
Position = i;
}
return Position;
}
void Scanner::advanceWhile(SkipWhileFunc Func) {
auto Final = skip_while(Func, Current);
Column += Final - Current;
Current = Final;
}
static bool is_ns_hex_digit(const char C) { return isAlnum(C); }
static bool is_ns_word_char(const char C) { return C == '-' || isAlpha(C); }
void Scanner::scan_ns_uri_char() {
while (true) {
if (Current == End)
break;
if (( *Current == '%'
&& Current + 2 < End
&& is_ns_hex_digit(*(Current + 1))
&& is_ns_hex_digit(*(Current + 2)))
|| is_ns_word_char(*Current)
|| StringRef(Current, 1).find_first_of("#;/?:@&=+$,_.!~*'()[]")
!= StringRef::npos) {
++Current;
++Column;
} else
break;
}
}
bool Scanner::consume(uint32_t Expected) {
if (Expected >= 0x80) {
setError("Cannot consume non-ascii characters", Current);
return false;
}
if (Current == End)
return false;
if (uint8_t(*Current) >= 0x80) {
setError("Cannot consume non-ascii characters", Current);
return false;
}
if (uint8_t(*Current) == Expected) {
++Current;
++Column;
return true;
}
return false;
}
void Scanner::skip(uint32_t Distance) {
Current += Distance;
Column += Distance;
assert(Current <= End && "Skipped past the end");
}
bool Scanner::isBlankOrBreak(StringRef::iterator Position) {
if (Position == End)
return false;
return *Position == ' ' || *Position == '\t' || *Position == '\r' ||
*Position == '\n';
}
bool Scanner::isLineEmpty(StringRef Line) {
for (const auto *Position = Line.begin(); Position != Line.end(); ++Position)
if (!isBlankOrBreak(Position))
return false;
return true;
}
bool Scanner::consumeLineBreakIfPresent() {
auto Next = skip_b_break(Current);
if (Next == Current)
return false;
Column = 0;
++Line;
Current = Next;
return true;
}
void Scanner::saveSimpleKeyCandidate( TokenQueueT::iterator Tok
, unsigned AtColumn
, bool IsRequired) {
if (IsSimpleKeyAllowed) {
SimpleKey SK;
SK.Tok = Tok;
SK.Line = Line;
SK.Column = AtColumn;
SK.IsRequired = IsRequired;
SK.FlowLevel = FlowLevel;
SimpleKeys.push_back(SK);
}
}
void Scanner::removeStaleSimpleKeyCandidates() {
for (SmallVectorImpl<SimpleKey>::iterator i = SimpleKeys.begin();
i != SimpleKeys.end();) {
if (i->Line != Line || i->Column + 1024 < Column) {
if (i->IsRequired)
setError( "Could not find expected : for simple key"
, i->Tok->Range.begin());
i = SimpleKeys.erase(i);
} else
++i;
}
}
void Scanner::removeSimpleKeyCandidatesOnFlowLevel(unsigned Level) {
if (!SimpleKeys.empty() && (SimpleKeys.end() - 1)->FlowLevel == Level)
SimpleKeys.pop_back();
}
bool Scanner::unrollIndent(int ToColumn) {
Token T;
if (FlowLevel != 0)
return true;
while (Indent > ToColumn) {
T.Kind = Token::TK_BlockEnd;
T.Range = StringRef(Current, 1);
TokenQueue.push_back(T);
Indent = Indents.pop_back_val();
}
return true;
}
bool Scanner::rollIndent( int ToColumn
, Token::TokenKind Kind
, TokenQueueT::iterator InsertPoint) {
if (FlowLevel)
return true;
if (Indent < ToColumn) {
Indents.push_back(Indent);
Indent = ToColumn;
Token T;
T.Kind = Kind;
T.Range = StringRef(Current, 0);
TokenQueue.insert(InsertPoint, T);
}
return true;
}
void Scanner::skipComment() {
if (Current == End || *Current != '#')
return;
while (true) {
StringRef::iterator I = skip_nb_char(Current);
if (I == Current)
break;
Current = I;
++Column;
}
}
void Scanner::scanToNextToken() {
while (true) {
while (Current != End && (*Current == ' ' || *Current == '\t')) {
skip(1);
}
skipComment();
StringRef::iterator i = skip_b_break(Current);
if (i == Current)
break;
Current = i;
++Line;
Column = 0;
if (!FlowLevel)
IsSimpleKeyAllowed = true;
}
}
bool Scanner::scanStreamStart() {
IsStartOfStream = false;
EncodingInfo EI = getUnicodeEncoding(currentInput());
Token T;
T.Kind = Token::TK_StreamStart;
T.Range = StringRef(Current, EI.second);
TokenQueue.push_back(T);
Current += EI.second;
return true;
}
bool Scanner::scanStreamEnd() {
if (Column != 0) {
Column = 0;
++Line;
}
unrollIndent(-1);
SimpleKeys.clear();
IsSimpleKeyAllowed = false;
Token T;
T.Kind = Token::TK_StreamEnd;
T.Range = StringRef(Current, 0);
TokenQueue.push_back(T);
return true;
}
bool Scanner::scanDirective() {
unrollIndent(-1);
SimpleKeys.clear();
IsSimpleKeyAllowed = false;
StringRef::iterator Start = Current;
consume('%');
StringRef::iterator NameStart = Current;
Current = skip_while(&Scanner::skip_ns_char, Current);
StringRef Name(NameStart, Current - NameStart);
Current = skip_while(&Scanner::skip_s_white, Current);
Token T;
if (Name == "YAML") {
Current = skip_while(&Scanner::skip_ns_char, Current);
T.Kind = Token::TK_VersionDirective;
T.Range = StringRef(Start, Current - Start);
TokenQueue.push_back(T);
return true;
} else if(Name == "TAG") {
Current = skip_while(&Scanner::skip_ns_char, Current);
Current = skip_while(&Scanner::skip_s_white, Current);
Current = skip_while(&Scanner::skip_ns_char, Current);
T.Kind = Token::TK_TagDirective;
T.Range = StringRef(Start, Current - Start);
TokenQueue.push_back(T);
return true;
}
return false;
}
bool Scanner::scanDocumentIndicator(bool IsStart) {
unrollIndent(-1);
SimpleKeys.clear();
IsSimpleKeyAllowed = false;
Token T;
T.Kind = IsStart ? Token::TK_DocumentStart : Token::TK_DocumentEnd;
T.Range = StringRef(Current, 3);
skip(3);
TokenQueue.push_back(T);
return true;
}
bool Scanner::scanFlowCollectionStart(bool IsSequence) {
Token T;
T.Kind = IsSequence ? Token::TK_FlowSequenceStart
: Token::TK_FlowMappingStart;
T.Range = StringRef(Current, 1);
skip(1);
TokenQueue.push_back(T);
saveSimpleKeyCandidate(--TokenQueue.end(), Column - 1, false);
IsSimpleKeyAllowed = true;
++FlowLevel;
return true;
}
bool Scanner::scanFlowCollectionEnd(bool IsSequence) {
removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
IsSimpleKeyAllowed = false;
Token T;
T.Kind = IsSequence ? Token::TK_FlowSequenceEnd
: Token::TK_FlowMappingEnd;
T.Range = StringRef(Current, 1);
skip(1);
TokenQueue.push_back(T);
if (FlowLevel)
--FlowLevel;
return true;
}
bool Scanner::scanFlowEntry() {
removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
IsSimpleKeyAllowed = true;
Token T;
T.Kind = Token::TK_FlowEntry;
T.Range = StringRef(Current, 1);
skip(1);
TokenQueue.push_back(T);
return true;
}
bool Scanner::scanBlockEntry() {
rollIndent(Column, Token::TK_BlockSequenceStart, TokenQueue.end());
removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
IsSimpleKeyAllowed = true;
Token T;
T.Kind = Token::TK_BlockEntry;
T.Range = StringRef(Current, 1);
skip(1);
TokenQueue.push_back(T);
return true;
}
bool Scanner::scanKey() {
if (!FlowLevel)
rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end());
removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
IsSimpleKeyAllowed = !FlowLevel;
Token T;
T.Kind = Token::TK_Key;
T.Range = StringRef(Current, 1);
skip(1);
TokenQueue.push_back(T);
return true;
}
bool Scanner::scanValue() {
if (!SimpleKeys.empty()) {
SimpleKey SK = SimpleKeys.pop_back_val();
Token T;
T.Kind = Token::TK_Key;
T.Range = SK.Tok->Range;
TokenQueueT::iterator i, e;
for (i = TokenQueue.begin(), e = TokenQueue.end(); i != e; ++i) {
if (i == SK.Tok)
break;
}
if (i == e) {
Failed = true;
return false;
}
i = TokenQueue.insert(i, T);
rollIndent(SK.Column, Token::TK_BlockMappingStart, i);
IsSimpleKeyAllowed = false;
} else {
if (!FlowLevel)
rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end());
IsSimpleKeyAllowed = !FlowLevel;
}
Token T;
T.Kind = Token::TK_Value;
T.Range = StringRef(Current, 1);
skip(1);
TokenQueue.push_back(T);
return true;
}
LLVM_ATTRIBUTE_NOINLINE static bool
wasEscaped(StringRef::iterator First, StringRef::iterator Position);
static bool wasEscaped(StringRef::iterator First,
StringRef::iterator Position) {
assert(Position - 1 >= First);
StringRef::iterator I = Position - 1;
while (I >= First && *I == '\\') --I;
return (Position - 1 - I) % 2 == 1;
}
bool Scanner::scanFlowScalar(bool IsDoubleQuoted) {
StringRef::iterator Start = Current;
unsigned ColStart = Column;
if (IsDoubleQuoted) {
do {
++Current;
while (Current != End && *Current != '"')
++Current;
} while ( Current != End
&& *(Current - 1) == '\\'
&& wasEscaped(Start + 1, Current));
} else {
skip(1);
while (Current != End) {
if (Current + 1 < End && *Current == '\'' && *(Current + 1) == '\'') {
skip(2);
continue;
} else if (*Current == '\'')
break;
StringRef::iterator i = skip_nb_char(Current);
if (i == Current) {
i = skip_b_break(Current);
if (i == Current)
break;
Current = i;
Column = 0;
++Line;
} else {
if (i == End)
break;
Current = i;
++Column;
}
}
}
if (Current == End) {
setError("Expected quote at end of scalar", Current);
return false;
}
skip(1); Token T;
T.Kind = Token::TK_Scalar;
T.Range = StringRef(Start, Current - Start);
TokenQueue.push_back(T);
saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
IsSimpleKeyAllowed = false;
return true;
}
bool Scanner::scanPlainScalar() {
StringRef::iterator Start = Current;
unsigned ColStart = Column;
unsigned LeadingBlanks = 0;
assert(Indent >= -1 && "Indent must be >= -1 !");
unsigned indent = static_cast<unsigned>(Indent + 1);
while (Current != End) {
if (*Current == '#')
break;
while (Current != End && !isBlankOrBreak(Current)) {
if (FlowLevel && *Current == ':' &&
(Current + 1 == End ||
!(isBlankOrBreak(Current + 1) || *(Current + 1) == ','))) {
setError("Found unexpected ':' while scanning a plain scalar", Current);
return false;
}
if ( (*Current == ':' && isBlankOrBreak(Current + 1))
|| ( FlowLevel
&& (StringRef(Current, 1).find_first_of(",:?[]{}")
!= StringRef::npos)))
break;
StringRef::iterator i = skip_nb_char(Current);
if (i == Current)
break;
Current = i;
++Column;
}
if (!isBlankOrBreak(Current))
break;
StringRef::iterator Tmp = Current;
while (isBlankOrBreak(Tmp)) {
StringRef::iterator i = skip_s_white(Tmp);
if (i != Tmp) {
if (LeadingBlanks && (Column < indent) && *Tmp == '\t') {
setError("Found invalid tab character in indentation", Tmp);
return false;
}
Tmp = i;
++Column;
} else {
i = skip_b_break(Tmp);
if (!LeadingBlanks)
LeadingBlanks = 1;
Tmp = i;
Column = 0;
++Line;
}
}
if (!FlowLevel && Column < indent)
break;
Current = Tmp;
}
if (Start == Current) {
setError("Got empty plain scalar", Start);
return false;
}
Token T;
T.Kind = Token::TK_Scalar;
T.Range = StringRef(Start, Current - Start);
TokenQueue.push_back(T);
saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
IsSimpleKeyAllowed = false;
return true;
}
bool Scanner::scanAliasOrAnchor(bool IsAlias) {
StringRef::iterator Start = Current;
unsigned ColStart = Column;
skip(1);
while (Current != End) {
if ( *Current == '[' || *Current == ']'
|| *Current == '{' || *Current == '}'
|| *Current == ','
|| *Current == ':')
break;
StringRef::iterator i = skip_ns_char(Current);
if (i == Current)
break;
Current = i;
++Column;
}
if (Start + 1 == Current) {
setError("Got empty alias or anchor", Start);
return false;
}
Token T;
T.Kind = IsAlias ? Token::TK_Alias : Token::TK_Anchor;
T.Range = StringRef(Start, Current - Start);
TokenQueue.push_back(T);
saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
IsSimpleKeyAllowed = false;
return true;
}
bool Scanner::scanBlockScalarIndicators(char &StyleIndicator,
char &ChompingIndicator,
unsigned &IndentIndicator,
bool &IsDone) {
StyleIndicator = scanBlockStyleIndicator();
if (!scanBlockScalarHeader(ChompingIndicator, IndentIndicator, IsDone))
return false;
return true;
}
char Scanner::scanBlockStyleIndicator() {
char Indicator = ' ';
if (Current != End && (*Current == '>' || *Current == '|')) {
Indicator = *Current;
skip(1);
}
return Indicator;
}
char Scanner::scanBlockChompingIndicator() {
char Indicator = ' ';
if (Current != End && (*Current == '+' || *Current == '-')) {
Indicator = *Current;
skip(1);
}
return Indicator;
}
static unsigned getChompedLineBreaks(char ChompingIndicator,
unsigned LineBreaks, StringRef Str) {
if (ChompingIndicator == '-') return 0;
if (ChompingIndicator == '+') return LineBreaks;
return Str.empty() ? 0 : 1;
}
unsigned Scanner::scanBlockIndentationIndicator() {
unsigned Indent = 0;
if (Current != End && (*Current >= '1' && *Current <= '9')) {
Indent = unsigned(*Current - '0');
skip(1);
}
return Indent;
}
bool Scanner::scanBlockScalarHeader(char &ChompingIndicator,
unsigned &IndentIndicator, bool &IsDone) {
auto Start = Current;
ChompingIndicator = scanBlockChompingIndicator();
IndentIndicator = scanBlockIndentationIndicator();
if (ChompingIndicator == ' ')
ChompingIndicator = scanBlockChompingIndicator();
Current = skip_while(&Scanner::skip_s_white, Current);
skipComment();
if (Current == End) { Token T;
T.Kind = Token::TK_BlockScalar;
T.Range = StringRef(Start, Current - Start);
TokenQueue.push_back(T);
IsDone = true;
return true;
}
if (!consumeLineBreakIfPresent()) {
setError("Expected a line break after block scalar header", Current);
return false;
}
return true;
}
bool Scanner::findBlockScalarIndent(unsigned &BlockIndent,
unsigned BlockExitIndent,
unsigned &LineBreaks, bool &IsDone) {
unsigned MaxAllSpaceLineCharacters = 0;
StringRef::iterator LongestAllSpaceLine;
while (true) {
advanceWhile(&Scanner::skip_s_space);
if (skip_nb_char(Current) != Current) {
if (Column <= BlockExitIndent) { IsDone = true;
return true;
}
BlockIndent = Column;
if (MaxAllSpaceLineCharacters > BlockIndent) {
setError(
"Leading all-spaces line must be smaller than the block indent",
LongestAllSpaceLine);
return false;
}
return true;
}
if (skip_b_break(Current) != Current &&
Column > MaxAllSpaceLineCharacters) {
MaxAllSpaceLineCharacters = Column;
LongestAllSpaceLine = Current;
}
if (Current == End) {
IsDone = true;
return true;
}
if (!consumeLineBreakIfPresent()) {
IsDone = true;
return true;
}
++LineBreaks;
}
return true;
}
bool Scanner::scanBlockScalarIndent(unsigned BlockIndent,
unsigned BlockExitIndent, bool &IsDone) {
while (Column < BlockIndent) {
auto I = skip_s_space(Current);
if (I == Current)
break;
Current = I;
++Column;
}
if (skip_nb_char(Current) == Current)
return true;
if (Column <= BlockExitIndent) { IsDone = true;
return true;
}
if (Column < BlockIndent) {
if (Current != End && *Current == '#') { IsDone = true;
return true;
}
setError("A text line is less indented than the block scalar", Current);
return false;
}
return true; }
bool Scanner::scanBlockScalar(bool IsLiteral) {
assert(*Current == '|' || *Current == '>');
char StyleIndicator;
char ChompingIndicator;
unsigned BlockIndent;
bool IsDone = false;
if (!scanBlockScalarIndicators(StyleIndicator, ChompingIndicator, BlockIndent,
IsDone))
return false;
if (IsDone)
return true;
bool IsFolded = StyleIndicator == '>';
const auto *Start = Current;
unsigned BlockExitIndent = Indent < 0 ? 0 : (unsigned)Indent;
unsigned LineBreaks = 0;
if (BlockIndent == 0) {
if (!findBlockScalarIndent(BlockIndent, BlockExitIndent, LineBreaks,
IsDone))
return false;
}
SmallString<256> Str;
while (!IsDone) {
if (!scanBlockScalarIndent(BlockIndent, BlockExitIndent, IsDone))
return false;
if (IsDone)
break;
auto LineStart = Current;
advanceWhile(&Scanner::skip_nb_char);
if (LineStart != Current) {
if (LineBreaks && IsFolded && !Scanner::isLineEmpty(Str)) {
if (LineBreaks == 1) {
Str.append(LineBreaks,
isLineEmpty(StringRef(LineStart, Current - LineStart))
? '\n'
: ' ');
}
LineBreaks--;
}
Str.append(LineBreaks, '\n');
Str.append(StringRef(LineStart, Current - LineStart));
LineBreaks = 0;
}
if (Current == End)
break;
if (!consumeLineBreakIfPresent())
break;
++LineBreaks;
}
if (Current == End && !LineBreaks)
LineBreaks = 1;
Str.append(getChompedLineBreaks(ChompingIndicator, LineBreaks, Str), '\n');
if (!FlowLevel)
IsSimpleKeyAllowed = true;
Token T;
T.Kind = Token::TK_BlockScalar;
T.Range = StringRef(Start, Current - Start);
T.Value = std::string(Str);
TokenQueue.push_back(T);
return true;
}
bool Scanner::scanTag() {
StringRef::iterator Start = Current;
unsigned ColStart = Column;
skip(1); if (Current == End || isBlankOrBreak(Current)); else if (*Current == '<') {
skip(1);
scan_ns_uri_char();
if (!consume('>'))
return false;
} else {
Current = skip_while(&Scanner::skip_ns_char, Current);
}
Token T;
T.Kind = Token::TK_Tag;
T.Range = StringRef(Start, Current - Start);
TokenQueue.push_back(T);
saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
IsSimpleKeyAllowed = false;
return true;
}
bool Scanner::fetchMoreTokens() {
if (IsStartOfStream)
return scanStreamStart();
scanToNextToken();
if (Current == End)
return scanStreamEnd();
removeStaleSimpleKeyCandidates();
unrollIndent(Column);
if (Column == 0 && *Current == '%')
return scanDirective();
if (Column == 0 && Current + 4 <= End
&& *Current == '-'
&& *(Current + 1) == '-'
&& *(Current + 2) == '-'
&& (Current + 3 == End || isBlankOrBreak(Current + 3)))
return scanDocumentIndicator(true);
if (Column == 0 && Current + 4 <= End
&& *Current == '.'
&& *(Current + 1) == '.'
&& *(Current + 2) == '.'
&& (Current + 3 == End || isBlankOrBreak(Current + 3)))
return scanDocumentIndicator(false);
if (*Current == '[')
return scanFlowCollectionStart(true);
if (*Current == '{')
return scanFlowCollectionStart(false);
if (*Current == ']')
return scanFlowCollectionEnd(true);
if (*Current == '}')
return scanFlowCollectionEnd(false);
if (*Current == ',')
return scanFlowEntry();
if (*Current == '-' && isBlankOrBreak(Current + 1))
return scanBlockEntry();
if (*Current == '?' && (FlowLevel || isBlankOrBreak(Current + 1)))
return scanKey();
if (*Current == ':' && (FlowLevel || isBlankOrBreak(Current + 1)))
return scanValue();
if (*Current == '*')
return scanAliasOrAnchor(true);
if (*Current == '&')
return scanAliasOrAnchor(false);
if (*Current == '!')
return scanTag();
if (*Current == '|' && !FlowLevel)
return scanBlockScalar(true);
if (*Current == '>' && !FlowLevel)
return scanBlockScalar(false);
if (*Current == '\'')
return scanFlowScalar(false);
if (*Current == '"')
return scanFlowScalar(true);
StringRef FirstChar(Current, 1);
if (!(isBlankOrBreak(Current)
|| FirstChar.find_first_of("-?:,[]{}#&*!|>'\"%@`") != StringRef::npos)
|| (*Current == '-' && !isBlankOrBreak(Current + 1))
|| (!FlowLevel && (*Current == '?' || *Current == ':')
&& isBlankOrBreak(Current + 1))
|| (!FlowLevel && *Current == ':'
&& Current + 2 < End
&& *(Current + 1) == ':'
&& !isBlankOrBreak(Current + 2)))
return scanPlainScalar();
setError("Unrecognized character while tokenizing.", Current);
return false;
}
Stream::Stream(StringRef Input, SourceMgr &SM, bool ShowColors,
std::error_code *EC)
: scanner(new Scanner(Input, SM, ShowColors, EC)) {}
Stream::Stream(MemoryBufferRef InputBuffer, SourceMgr &SM, bool ShowColors,
std::error_code *EC)
: scanner(new Scanner(InputBuffer, SM, ShowColors, EC)) {}
Stream::~Stream() = default;
bool Stream::failed() { return scanner->failed(); }
void Stream::printError(Node *N, const Twine &Msg, SourceMgr::DiagKind Kind) {
printError(N ? N->getSourceRange() : SMRange(), Msg, Kind);
}
void Stream::printError(const SMRange &Range, const Twine &Msg,
SourceMgr::DiagKind Kind) {
scanner->printError(Range.Start, Kind, Msg, Range);
}
document_iterator Stream::begin() {
if (CurrentDoc)
report_fatal_error("Can only iterate over the stream once");
scanner->getNext();
CurrentDoc.reset(new Document(*this));
return document_iterator(CurrentDoc);
}
document_iterator Stream::end() {
return document_iterator();
}
void Stream::skip() {
for (Document &Doc : *this)
Doc.skip();
}
Node::Node(unsigned int Type, std::unique_ptr<Document> &D, StringRef A,
StringRef T)
: Doc(D), TypeID(Type), Anchor(A), Tag(T) {
SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin());
SourceRange = SMRange(Start, Start);
}
std::string Node::getVerbatimTag() const {
StringRef Raw = getRawTag();
if (!Raw.empty() && Raw != "!") {
std::string Ret;
if (Raw.find_last_of('!') == 0) {
Ret = std::string(Doc->getTagMap().find("!")->second);
Ret += Raw.substr(1);
return Ret;
} else if (Raw.startswith("!!")) {
Ret = std::string(Doc->getTagMap().find("!!")->second);
Ret += Raw.substr(2);
return Ret;
} else {
StringRef TagHandle = Raw.substr(0, Raw.find_last_of('!') + 1);
std::map<StringRef, StringRef>::const_iterator It =
Doc->getTagMap().find(TagHandle);
if (It != Doc->getTagMap().end())
Ret = std::string(It->second);
else {
Token T;
T.Kind = Token::TK_Tag;
T.Range = TagHandle;
setError(Twine("Unknown tag handle ") + TagHandle, T);
}
Ret += Raw.substr(Raw.find_last_of('!') + 1);
return Ret;
}
}
switch (getType()) {
case NK_Null:
return "tag:yaml.org,2002:null";
case NK_Scalar:
case NK_BlockScalar:
return "tag:yaml.org,2002:str";
case NK_Mapping:
return "tag:yaml.org,2002:map";
case NK_Sequence:
return "tag:yaml.org,2002:seq";
}
return "";
}
Token &Node::peekNext() {
return Doc->peekNext();
}
Token Node::getNext() {
return Doc->getNext();
}
Node *Node::parseBlockNode() {
return Doc->parseBlockNode();
}
BumpPtrAllocator &Node::getAllocator() {
return Doc->NodeAllocator;
}
void Node::setError(const Twine &Msg, Token &Tok) const {
Doc->setError(Msg, Tok);
}
bool Node::failed() const {
return Doc->failed();
}
StringRef ScalarNode::getValue(SmallVectorImpl<char> &Storage) const {
if (Value[0] == '"') { StringRef UnquotedValue = Value.substr(1, Value.size() - 2);
StringRef::size_type i = UnquotedValue.find_first_of("\\\r\n");
if (i != StringRef::npos)
return unescapeDoubleQuoted(UnquotedValue, i, Storage);
return UnquotedValue;
} else if (Value[0] == '\'') { StringRef UnquotedValue = Value.substr(1, Value.size() - 2);
StringRef::size_type i = UnquotedValue.find('\'');
if (i != StringRef::npos) {
Storage.clear();
Storage.reserve(UnquotedValue.size());
for (; i != StringRef::npos; i = UnquotedValue.find('\'')) {
StringRef Valid(UnquotedValue.begin(), i);
llvm::append_range(Storage, Valid);
Storage.push_back('\'');
UnquotedValue = UnquotedValue.substr(i + 2);
}
llvm::append_range(Storage, UnquotedValue);
return StringRef(Storage.begin(), Storage.size());
}
return UnquotedValue;
}
return Value.rtrim(' ');
}
StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue
, StringRef::size_type i
, SmallVectorImpl<char> &Storage)
const {
Storage.clear();
Storage.reserve(UnquotedValue.size());
for (; i != StringRef::npos; i = UnquotedValue.find_first_of("\\\r\n")) {
StringRef Valid(UnquotedValue.begin(), i);
llvm::append_range(Storage, Valid);
UnquotedValue = UnquotedValue.substr(i);
assert(!UnquotedValue.empty() && "Can't be empty!");
switch (UnquotedValue[0]) {
case '\r':
case '\n':
Storage.push_back('\n');
if ( UnquotedValue.size() > 1
&& (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n'))
UnquotedValue = UnquotedValue.substr(1);
UnquotedValue = UnquotedValue.substr(1);
break;
default:
if (UnquotedValue.size() == 1) {
Token T;
T.Range = StringRef(UnquotedValue.begin(), 1);
setError("Unrecognized escape code", T);
return "";
}
UnquotedValue = UnquotedValue.substr(1);
switch (UnquotedValue[0]) {
default: {
Token T;
T.Range = StringRef(UnquotedValue.begin(), 1);
setError("Unrecognized escape code", T);
return "";
}
case '\r':
case '\n':
if ( UnquotedValue.size() > 1
&& (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n'))
UnquotedValue = UnquotedValue.substr(1);
break;
case '0':
Storage.push_back(0x00);
break;
case 'a':
Storage.push_back(0x07);
break;
case 'b':
Storage.push_back(0x08);
break;
case 't':
case 0x09:
Storage.push_back(0x09);
break;
case 'n':
Storage.push_back(0x0A);
break;
case 'v':
Storage.push_back(0x0B);
break;
case 'f':
Storage.push_back(0x0C);
break;
case 'r':
Storage.push_back(0x0D);
break;
case 'e':
Storage.push_back(0x1B);
break;
case ' ':
Storage.push_back(0x20);
break;
case '"':
Storage.push_back(0x22);
break;
case '/':
Storage.push_back(0x2F);
break;
case '\\':
Storage.push_back(0x5C);
break;
case 'N':
encodeUTF8(0x85, Storage);
break;
case '_':
encodeUTF8(0xA0, Storage);
break;
case 'L':
encodeUTF8(0x2028, Storage);
break;
case 'P':
encodeUTF8(0x2029, Storage);
break;
case 'x': {
if (UnquotedValue.size() < 3)
break;
unsigned int UnicodeScalarValue;
if (UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue))
UnicodeScalarValue = 0xFFFD;
encodeUTF8(UnicodeScalarValue, Storage);
UnquotedValue = UnquotedValue.substr(2);
break;
}
case 'u': {
if (UnquotedValue.size() < 5)
break;
unsigned int UnicodeScalarValue;
if (UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue))
UnicodeScalarValue = 0xFFFD;
encodeUTF8(UnicodeScalarValue, Storage);
UnquotedValue = UnquotedValue.substr(4);
break;
}
case 'U': {
if (UnquotedValue.size() < 9)
break;
unsigned int UnicodeScalarValue;
if (UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue))
UnicodeScalarValue = 0xFFFD;
encodeUTF8(UnicodeScalarValue, Storage);
UnquotedValue = UnquotedValue.substr(8);
break;
}
}
UnquotedValue = UnquotedValue.substr(1);
}
}
llvm::append_range(Storage, UnquotedValue);
return StringRef(Storage.begin(), Storage.size());
}
Node *KeyValueNode::getKey() {
if (Key)
return Key;
{
Token &t = peekNext();
if ( t.Kind == Token::TK_BlockEnd
|| t.Kind == Token::TK_Value
|| t.Kind == Token::TK_Error) {
return Key = new (getAllocator()) NullNode(Doc);
}
if (t.Kind == Token::TK_Key)
getNext(); }
Token &t = peekNext();
if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Value) {
return Key = new (getAllocator()) NullNode(Doc);
}
return Key = parseBlockNode();
}
Node *KeyValueNode::getValue() {
if (Value)
return Value;
if (Node* Key = getKey())
Key->skip();
else {
setError("Null key in Key Value.", peekNext());
return Value = new (getAllocator()) NullNode(Doc);
}
if (failed())
return Value = new (getAllocator()) NullNode(Doc);
{
Token &t = peekNext();
if ( t.Kind == Token::TK_BlockEnd
|| t.Kind == Token::TK_FlowMappingEnd
|| t.Kind == Token::TK_Key
|| t.Kind == Token::TK_FlowEntry
|| t.Kind == Token::TK_Error) {
return Value = new (getAllocator()) NullNode(Doc);
}
if (t.Kind != Token::TK_Value) {
setError("Unexpected token in Key Value.", t);
return Value = new (getAllocator()) NullNode(Doc);
}
getNext(); }
Token &t = peekNext();
if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Key) {
return Value = new (getAllocator()) NullNode(Doc);
}
return Value = parseBlockNode();
}
void MappingNode::increment() {
if (failed()) {
IsAtEnd = true;
CurrentEntry = nullptr;
return;
}
if (CurrentEntry) {
CurrentEntry->skip();
if (Type == MT_Inline) {
IsAtEnd = true;
CurrentEntry = nullptr;
return;
}
}
Token T = peekNext();
if (T.Kind == Token::TK_Key || T.Kind == Token::TK_Scalar) {
CurrentEntry = new (getAllocator()) KeyValueNode(Doc);
} else if (Type == MT_Block) {
switch (T.Kind) {
case Token::TK_BlockEnd:
getNext();
IsAtEnd = true;
CurrentEntry = nullptr;
break;
default:
setError("Unexpected token. Expected Key or Block End", T);
LLVM_FALLTHROUGH;
case Token::TK_Error:
IsAtEnd = true;
CurrentEntry = nullptr;
}
} else {
switch (T.Kind) {
case Token::TK_FlowEntry:
getNext();
return increment();
case Token::TK_FlowMappingEnd:
getNext();
LLVM_FALLTHROUGH;
case Token::TK_Error:
IsAtEnd = true;
CurrentEntry = nullptr;
break;
default:
setError( "Unexpected token. Expected Key, Flow Entry, or Flow "
"Mapping End."
, T);
IsAtEnd = true;
CurrentEntry = nullptr;
}
}
}
void SequenceNode::increment() {
if (failed()) {
IsAtEnd = true;
CurrentEntry = nullptr;
return;
}
if (CurrentEntry)
CurrentEntry->skip();
Token T = peekNext();
if (SeqType == ST_Block) {
switch (T.Kind) {
case Token::TK_BlockEntry:
getNext();
CurrentEntry = parseBlockNode();
if (!CurrentEntry) { IsAtEnd = true;
CurrentEntry = nullptr;
}
break;
case Token::TK_BlockEnd:
getNext();
IsAtEnd = true;
CurrentEntry = nullptr;
break;
default:
setError( "Unexpected token. Expected Block Entry or Block End."
, T);
LLVM_FALLTHROUGH;
case Token::TK_Error:
IsAtEnd = true;
CurrentEntry = nullptr;
}
} else if (SeqType == ST_Indentless) {
switch (T.Kind) {
case Token::TK_BlockEntry:
getNext();
CurrentEntry = parseBlockNode();
if (!CurrentEntry) { IsAtEnd = true;
CurrentEntry = nullptr;
}
break;
default:
case Token::TK_Error:
IsAtEnd = true;
CurrentEntry = nullptr;
}
} else if (SeqType == ST_Flow) {
switch (T.Kind) {
case Token::TK_FlowEntry:
getNext();
WasPreviousTokenFlowEntry = true;
return increment();
case Token::TK_FlowSequenceEnd:
getNext();
LLVM_FALLTHROUGH;
case Token::TK_Error:
IsAtEnd = true;
CurrentEntry = nullptr;
break;
case Token::TK_StreamEnd:
case Token::TK_DocumentEnd:
case Token::TK_DocumentStart:
setError("Could not find closing ]!", T);
IsAtEnd = true;
CurrentEntry = nullptr;
break;
default:
if (!WasPreviousTokenFlowEntry) {
setError("Expected , between entries!", T);
IsAtEnd = true;
CurrentEntry = nullptr;
break;
}
CurrentEntry = parseBlockNode();
if (!CurrentEntry) {
IsAtEnd = true;
}
WasPreviousTokenFlowEntry = false;
break;
}
}
}
Document::Document(Stream &S) : stream(S), Root(nullptr) {
TagMap["!"] = "!";
TagMap["!!"] = "tag:yaml.org,2002:";
if (parseDirectives())
expectToken(Token::TK_DocumentStart);
Token &T = peekNext();
if (T.Kind == Token::TK_DocumentStart)
getNext();
}
bool Document::skip() {
if (stream.scanner->failed())
return false;
if (!Root && !getRoot())
return false;
Root->skip();
Token &T = peekNext();
if (T.Kind == Token::TK_StreamEnd)
return false;
if (T.Kind == Token::TK_DocumentEnd) {
getNext();
return skip();
}
return true;
}
Token &Document::peekNext() {
return stream.scanner->peekNext();
}
Token Document::getNext() {
return stream.scanner->getNext();
}
void Document::setError(const Twine &Message, Token &Location) const {
stream.scanner->setError(Message, Location.Range.begin());
}
bool Document::failed() const {
return stream.scanner->failed();
}
Node *Document::parseBlockNode() {
Token T = peekNext();
Token AnchorInfo;
Token TagInfo;
parse_property:
switch (T.Kind) {
case Token::TK_Alias:
getNext();
return new (NodeAllocator) AliasNode(stream.CurrentDoc, T.Range.substr(1));
case Token::TK_Anchor:
if (AnchorInfo.Kind == Token::TK_Anchor) {
setError("Already encountered an anchor for this node!", T);
return nullptr;
}
AnchorInfo = getNext(); T = peekNext();
goto parse_property;
case Token::TK_Tag:
if (TagInfo.Kind == Token::TK_Tag) {
setError("Already encountered a tag for this node!", T);
return nullptr;
}
TagInfo = getNext(); T = peekNext();
goto parse_property;
default:
break;
}
switch (T.Kind) {
case Token::TK_BlockEntry:
return new (NodeAllocator) SequenceNode( stream.CurrentDoc
, AnchorInfo.Range.substr(1)
, TagInfo.Range
, SequenceNode::ST_Indentless);
case Token::TK_BlockSequenceStart:
getNext();
return new (NodeAllocator)
SequenceNode( stream.CurrentDoc
, AnchorInfo.Range.substr(1)
, TagInfo.Range
, SequenceNode::ST_Block);
case Token::TK_BlockMappingStart:
getNext();
return new (NodeAllocator)
MappingNode( stream.CurrentDoc
, AnchorInfo.Range.substr(1)
, TagInfo.Range
, MappingNode::MT_Block);
case Token::TK_FlowSequenceStart:
getNext();
return new (NodeAllocator)
SequenceNode( stream.CurrentDoc
, AnchorInfo.Range.substr(1)
, TagInfo.Range
, SequenceNode::ST_Flow);
case Token::TK_FlowMappingStart:
getNext();
return new (NodeAllocator)
MappingNode( stream.CurrentDoc
, AnchorInfo.Range.substr(1)
, TagInfo.Range
, MappingNode::MT_Flow);
case Token::TK_Scalar:
getNext();
return new (NodeAllocator)
ScalarNode( stream.CurrentDoc
, AnchorInfo.Range.substr(1)
, TagInfo.Range
, T.Range);
case Token::TK_BlockScalar: {
getNext();
StringRef NullTerminatedStr(T.Value.c_str(), T.Value.length() + 1);
StringRef StrCopy = NullTerminatedStr.copy(NodeAllocator).drop_back();
return new (NodeAllocator)
BlockScalarNode(stream.CurrentDoc, AnchorInfo.Range.substr(1),
TagInfo.Range, StrCopy, T.Range);
}
case Token::TK_Key:
return new (NodeAllocator)
MappingNode( stream.CurrentDoc
, AnchorInfo.Range.substr(1)
, TagInfo.Range
, MappingNode::MT_Inline);
case Token::TK_DocumentStart:
case Token::TK_DocumentEnd:
case Token::TK_StreamEnd:
default:
return new (NodeAllocator) NullNode(stream.CurrentDoc);
case Token::TK_FlowMappingEnd:
case Token::TK_FlowSequenceEnd:
case Token::TK_FlowEntry: {
if (Root && (isa<MappingNode>(Root) || isa<SequenceNode>(Root)))
return new (NodeAllocator) NullNode(stream.CurrentDoc);
setError("Unexpected token", T);
return nullptr;
}
case Token::TK_Error:
return nullptr;
}
llvm_unreachable("Control flow shouldn't reach here.");
return nullptr;
}
bool Document::parseDirectives() {
bool isDirective = false;
while (true) {
Token T = peekNext();
if (T.Kind == Token::TK_TagDirective) {
parseTAGDirective();
isDirective = true;
} else if (T.Kind == Token::TK_VersionDirective) {
parseYAMLDirective();
isDirective = true;
} else
break;
}
return isDirective;
}
void Document::parseYAMLDirective() {
getNext(); }
void Document::parseTAGDirective() {
Token Tag = getNext(); StringRef T = Tag.Range;
T = T.substr(T.find_first_of(" \t")).ltrim(" \t");
std::size_t HandleEnd = T.find_first_of(" \t");
StringRef TagHandle = T.substr(0, HandleEnd);
StringRef TagPrefix = T.substr(HandleEnd).ltrim(" \t");
TagMap[TagHandle] = TagPrefix;
}
bool Document::expectToken(int TK) {
Token T = getNext();
if (T.Kind != TK) {
setError("Unexpected token", T);
return false;
}
return true;
}