Compiler projects using llvm
//===- unittests/Basic/CharInfoTest.cpp -- ASCII classification tests -----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "clang/Basic/CharInfo.h"
#include "gtest/gtest.h"

using namespace llvm;
using namespace clang;

// Check that the CharInfo table has been constructed reasonably.
TEST(CharInfoTest, validateInfoTable) {
  using namespace charinfo;
  EXPECT_EQ((unsigned)CHAR_SPACE,   InfoTable[(unsigned)' ']);
  EXPECT_EQ((unsigned)CHAR_HORZ_WS, InfoTable[(unsigned)'\t']);
  EXPECT_EQ((unsigned)CHAR_HORZ_WS, InfoTable[(unsigned)'\f']); // ??
  EXPECT_EQ((unsigned)CHAR_HORZ_WS, InfoTable[(unsigned)'\v']); // ??
  EXPECT_EQ((unsigned)CHAR_VERT_WS, InfoTable[(unsigned)'\n']);
  EXPECT_EQ((unsigned)CHAR_VERT_WS, InfoTable[(unsigned)'\r']);
  EXPECT_EQ((unsigned)CHAR_UNDER,   InfoTable[(unsigned)'_']);
  EXPECT_EQ((unsigned)CHAR_PERIOD,  InfoTable[(unsigned)'.']);

  for (unsigned i = 'a'; i <= 'f'; ++i) {
    EXPECT_EQ((unsigned)CHAR_XLOWER, InfoTable[i]);
    EXPECT_EQ((unsigned)CHAR_XUPPER, InfoTable[i+'A'-'a']);
  }

  for (unsigned i = 'g'; i <= 'z'; ++i) {
    EXPECT_EQ((unsigned)CHAR_LOWER, InfoTable[i]);
    EXPECT_EQ((unsigned)CHAR_UPPER, InfoTable[i+'A'-'a']);
  }

  for (unsigned i = '0'; i <= '9'; ++i)
    EXPECT_EQ((unsigned)CHAR_DIGIT, InfoTable[i]);
}

// Check various predicates.
TEST(CharInfoTest, isASCII) {
  EXPECT_TRUE(isASCII('\0'));
  EXPECT_TRUE(isASCII('\n'));
  EXPECT_TRUE(isASCII(' '));
  EXPECT_TRUE(isASCII('a'));
  EXPECT_TRUE(isASCII('\x7f'));
  EXPECT_FALSE(isASCII('\x80'));
  EXPECT_FALSE(isASCII('\xc2'));
  EXPECT_FALSE(isASCII('\xff'));
}

TEST(CharInfoTest, isAsciiIdentifierStart) {
  EXPECT_TRUE(isAsciiIdentifierStart('a'));
  EXPECT_TRUE(isAsciiIdentifierStart('A'));
  EXPECT_TRUE(isAsciiIdentifierStart('z'));
  EXPECT_TRUE(isAsciiIdentifierStart('Z'));
  EXPECT_TRUE(isAsciiIdentifierStart('_'));

  EXPECT_FALSE(isAsciiIdentifierStart('0'));
  EXPECT_FALSE(isAsciiIdentifierStart('.'));
  EXPECT_FALSE(isAsciiIdentifierStart('`'));
  EXPECT_FALSE(isAsciiIdentifierStart('\0'));

  EXPECT_FALSE(isAsciiIdentifierStart('$'));
  EXPECT_TRUE(isAsciiIdentifierStart('$', /*AllowDollar=*/true));

  EXPECT_FALSE(isAsciiIdentifierStart('\x80'));
  EXPECT_FALSE(isAsciiIdentifierStart('\xc2'));
  EXPECT_FALSE(isAsciiIdentifierStart('\xff'));
}

TEST(CharInfoTest, isAsciiIdentifierContinue) {
  EXPECT_TRUE(isAsciiIdentifierContinue('a'));
  EXPECT_TRUE(isAsciiIdentifierContinue('A'));
  EXPECT_TRUE(isAsciiIdentifierContinue('z'));
  EXPECT_TRUE(isAsciiIdentifierContinue('Z'));
  EXPECT_TRUE(isAsciiIdentifierContinue('_'));

  EXPECT_TRUE(isAsciiIdentifierContinue('0'));
  EXPECT_FALSE(isAsciiIdentifierContinue('.'));
  EXPECT_FALSE(isAsciiIdentifierContinue('`'));
  EXPECT_FALSE(isAsciiIdentifierContinue('\0'));

  EXPECT_FALSE(isAsciiIdentifierContinue('$'));
  EXPECT_TRUE(isAsciiIdentifierContinue('$', /*AllowDollar=*/true));

  EXPECT_FALSE(isAsciiIdentifierContinue('\x80'));
  EXPECT_FALSE(isAsciiIdentifierContinue('\xc2'));
  EXPECT_FALSE(isAsciiIdentifierContinue('\xff'));
}

TEST(CharInfoTest, isHorizontalWhitespace) {
  EXPECT_FALSE(isHorizontalWhitespace('a'));
  EXPECT_FALSE(isHorizontalWhitespace('_'));
  EXPECT_FALSE(isHorizontalWhitespace('0'));
  EXPECT_FALSE(isHorizontalWhitespace('.'));
  EXPECT_FALSE(isHorizontalWhitespace('`'));
  EXPECT_FALSE(isHorizontalWhitespace('\0'));
  EXPECT_FALSE(isHorizontalWhitespace('\x7f'));

  EXPECT_TRUE(isHorizontalWhitespace(' '));
  EXPECT_TRUE(isHorizontalWhitespace('\t'));
  EXPECT_TRUE(isHorizontalWhitespace('\f')); // ??
  EXPECT_TRUE(isHorizontalWhitespace('\v')); // ??

  EXPECT_FALSE(isHorizontalWhitespace('\n'));
  EXPECT_FALSE(isHorizontalWhitespace('\r'));

  EXPECT_FALSE(isHorizontalWhitespace('\x80'));
  EXPECT_FALSE(isHorizontalWhitespace('\xc2'));
  EXPECT_FALSE(isHorizontalWhitespace('\xff'));
}

TEST(CharInfoTest, isVerticalWhitespace) {
  EXPECT_FALSE(isVerticalWhitespace('a'));
  EXPECT_FALSE(isVerticalWhitespace('_'));
  EXPECT_FALSE(isVerticalWhitespace('0'));
  EXPECT_FALSE(isVerticalWhitespace('.'));
  EXPECT_FALSE(isVerticalWhitespace('`'));
  EXPECT_FALSE(isVerticalWhitespace('\0'));
  EXPECT_FALSE(isVerticalWhitespace('\x7f'));

  EXPECT_FALSE(isVerticalWhitespace(' '));
  EXPECT_FALSE(isVerticalWhitespace('\t'));
  EXPECT_FALSE(isVerticalWhitespace('\f')); // ??
  EXPECT_FALSE(isVerticalWhitespace('\v')); // ??

  EXPECT_TRUE(isVerticalWhitespace('\n'));
  EXPECT_TRUE(isVerticalWhitespace('\r'));

  EXPECT_FALSE(isVerticalWhitespace('\x80'));
  EXPECT_FALSE(isVerticalWhitespace('\xc2'));
  EXPECT_FALSE(isVerticalWhitespace('\xff'));
}

TEST(CharInfoTest, isWhitespace) {
  EXPECT_FALSE(isWhitespace('a'));
  EXPECT_FALSE(isWhitespace('_'));
  EXPECT_FALSE(isWhitespace('0'));
  EXPECT_FALSE(isWhitespace('.'));
  EXPECT_FALSE(isWhitespace('`'));
  EXPECT_FALSE(isWhitespace('\0'));
  EXPECT_FALSE(isWhitespace('\x7f'));

  EXPECT_TRUE(isWhitespace(' '));
  EXPECT_TRUE(isWhitespace('\t'));
  EXPECT_TRUE(isWhitespace('\f'));
  EXPECT_TRUE(isWhitespace('\v'));

  EXPECT_TRUE(isWhitespace('\n'));
  EXPECT_TRUE(isWhitespace('\r'));

  EXPECT_FALSE(isWhitespace('\x80'));
  EXPECT_FALSE(isWhitespace('\xc2'));
  EXPECT_FALSE(isWhitespace('\xff'));
}

TEST(CharInfoTest, isDigit) {
  EXPECT_TRUE(isDigit('0'));
  EXPECT_TRUE(isDigit('9'));

  EXPECT_FALSE(isDigit('a'));
  EXPECT_FALSE(isDigit('A'));

  EXPECT_FALSE(isDigit('z'));
  EXPECT_FALSE(isDigit('Z'));
  
  EXPECT_FALSE(isDigit('.'));
  EXPECT_FALSE(isDigit('_'));

  EXPECT_FALSE(isDigit('/'));
  EXPECT_FALSE(isDigit('\0'));

  EXPECT_FALSE(isDigit('\x80'));
  EXPECT_FALSE(isDigit('\xc2'));
  EXPECT_FALSE(isDigit('\xff'));
}

TEST(CharInfoTest, isHexDigit) {
  EXPECT_TRUE(isHexDigit('0'));
  EXPECT_TRUE(isHexDigit('9'));

  EXPECT_TRUE(isHexDigit('a'));
  EXPECT_TRUE(isHexDigit('A'));

  EXPECT_FALSE(isHexDigit('z'));
  EXPECT_FALSE(isHexDigit('Z'));
  
  EXPECT_FALSE(isHexDigit('.'));
  EXPECT_FALSE(isHexDigit('_'));

  EXPECT_FALSE(isHexDigit('/'));
  EXPECT_FALSE(isHexDigit('\0'));

  EXPECT_FALSE(isHexDigit('\x80'));
  EXPECT_FALSE(isHexDigit('\xc2'));
  EXPECT_FALSE(isHexDigit('\xff'));
}

TEST(CharInfoTest, isLetter) {
  EXPECT_FALSE(isLetter('0'));
  EXPECT_FALSE(isLetter('9'));

  EXPECT_TRUE(isLetter('a'));
  EXPECT_TRUE(isLetter('A'));

  EXPECT_TRUE(isLetter('z'));
  EXPECT_TRUE(isLetter('Z'));
  
  EXPECT_FALSE(isLetter('.'));
  EXPECT_FALSE(isLetter('_'));

  EXPECT_FALSE(isLetter('/'));
  EXPECT_FALSE(isLetter('('));
  EXPECT_FALSE(isLetter('\0'));

  EXPECT_FALSE(isLetter('\x80'));
  EXPECT_FALSE(isLetter('\xc2'));
  EXPECT_FALSE(isLetter('\xff'));
}

TEST(CharInfoTest, isLowercase) {
  EXPECT_FALSE(isLowercase('0'));
  EXPECT_FALSE(isLowercase('9'));

  EXPECT_TRUE(isLowercase('a'));
  EXPECT_FALSE(isLowercase('A'));

  EXPECT_TRUE(isLowercase('z'));
  EXPECT_FALSE(isLowercase('Z'));
  
  EXPECT_FALSE(isLowercase('.'));
  EXPECT_FALSE(isLowercase('_'));

  EXPECT_FALSE(isLowercase('/'));
  EXPECT_FALSE(isLowercase('('));
  EXPECT_FALSE(isLowercase('\0'));

  EXPECT_FALSE(isLowercase('\x80'));
  EXPECT_FALSE(isLowercase('\xc2'));
  EXPECT_FALSE(isLowercase('\xff'));
}

TEST(CharInfoTest, isUppercase) {
  EXPECT_FALSE(isUppercase('0'));
  EXPECT_FALSE(isUppercase('9'));

  EXPECT_FALSE(isUppercase('a'));
  EXPECT_TRUE(isUppercase('A'));

  EXPECT_FALSE(isUppercase('z'));
  EXPECT_TRUE(isUppercase('Z'));

  EXPECT_FALSE(isUppercase('.'));
  EXPECT_FALSE(isUppercase('_'));

  EXPECT_FALSE(isUppercase('/'));
  EXPECT_FALSE(isUppercase('('));
  EXPECT_FALSE(isUppercase('\0'));

  EXPECT_FALSE(isUppercase('\x80'));
  EXPECT_FALSE(isUppercase('\xc2'));
  EXPECT_FALSE(isUppercase('\xff'));
}

TEST(CharInfoTest, isAlphanumeric) {
  EXPECT_TRUE(isAlphanumeric('0'));
  EXPECT_TRUE(isAlphanumeric('9'));

  EXPECT_TRUE(isAlphanumeric('a'));
  EXPECT_TRUE(isAlphanumeric('A'));

  EXPECT_TRUE(isAlphanumeric('z'));
  EXPECT_TRUE(isAlphanumeric('Z'));

  EXPECT_FALSE(isAlphanumeric('.'));
  EXPECT_FALSE(isAlphanumeric('_'));

  EXPECT_FALSE(isAlphanumeric('/'));
  EXPECT_FALSE(isAlphanumeric('('));
  EXPECT_FALSE(isAlphanumeric('\0'));

  EXPECT_FALSE(isAlphanumeric('\x80'));
  EXPECT_FALSE(isAlphanumeric('\xc2'));
  EXPECT_FALSE(isAlphanumeric('\xff'));
}

TEST(CharInfoTest, isPunctuation) {
  EXPECT_FALSE(isPunctuation('0'));
  EXPECT_FALSE(isPunctuation('9'));

  EXPECT_FALSE(isPunctuation('a'));
  EXPECT_FALSE(isPunctuation('A'));

  EXPECT_FALSE(isPunctuation('z'));
  EXPECT_FALSE(isPunctuation('Z'));

  EXPECT_TRUE(isPunctuation('.'));
  EXPECT_TRUE(isPunctuation('_'));

  EXPECT_TRUE(isPunctuation('/'));
  EXPECT_TRUE(isPunctuation('('));

  EXPECT_FALSE(isPunctuation(' '));
  EXPECT_FALSE(isPunctuation('\n'));
  EXPECT_FALSE(isPunctuation('\0'));

  EXPECT_FALSE(isPunctuation('\x80'));
  EXPECT_FALSE(isPunctuation('\xc2'));
  EXPECT_FALSE(isPunctuation('\xff'));
}

TEST(CharInfoTest, isPrintable) {
  EXPECT_TRUE(isPrintable('0'));
  EXPECT_TRUE(isPrintable('9'));

  EXPECT_TRUE(isPrintable('a'));
  EXPECT_TRUE(isPrintable('A'));

  EXPECT_TRUE(isPrintable('z'));
  EXPECT_TRUE(isPrintable('Z'));

  EXPECT_TRUE(isPrintable('.'));
  EXPECT_TRUE(isPrintable('_'));

  EXPECT_TRUE(isPrintable('/'));
  EXPECT_TRUE(isPrintable('('));

  EXPECT_TRUE(isPrintable(' '));
  EXPECT_FALSE(isPrintable('\t'));
  EXPECT_FALSE(isPrintable('\n'));
  EXPECT_FALSE(isPrintable('\0'));

  EXPECT_FALSE(isPrintable('\x80'));
  EXPECT_FALSE(isPrintable('\xc2'));
  EXPECT_FALSE(isPrintable('\xff'));
}

TEST(CharInfoTest, isPreprocessingNumberBody) {
  EXPECT_TRUE(isPreprocessingNumberBody('0'));
  EXPECT_TRUE(isPreprocessingNumberBody('9'));

  EXPECT_TRUE(isPreprocessingNumberBody('a'));
  EXPECT_TRUE(isPreprocessingNumberBody('A'));

  EXPECT_TRUE(isPreprocessingNumberBody('z'));
  EXPECT_TRUE(isPreprocessingNumberBody('Z'));
  EXPECT_TRUE(isPreprocessingNumberBody('.'));
  EXPECT_TRUE(isPreprocessingNumberBody('_'));

  EXPECT_FALSE(isPreprocessingNumberBody('/'));
  EXPECT_FALSE(isPreprocessingNumberBody('('));
  EXPECT_FALSE(isPreprocessingNumberBody('\0'));

  EXPECT_FALSE(isPreprocessingNumberBody('\x80'));
  EXPECT_FALSE(isPreprocessingNumberBody('\xc2'));
  EXPECT_FALSE(isPreprocessingNumberBody('\xff'));
}

TEST(CharInfoTest, isRawStringDelimBody) {
  EXPECT_TRUE(isRawStringDelimBody('0'));
  EXPECT_TRUE(isRawStringDelimBody('9'));

  EXPECT_TRUE(isRawStringDelimBody('a'));
  EXPECT_TRUE(isRawStringDelimBody('A'));

  EXPECT_TRUE(isRawStringDelimBody('z'));
  EXPECT_TRUE(isRawStringDelimBody('Z'));
  EXPECT_TRUE(isRawStringDelimBody('.'));
  EXPECT_TRUE(isRawStringDelimBody('_'));

  EXPECT_TRUE(isRawStringDelimBody('/'));
  EXPECT_FALSE(isRawStringDelimBody('('));
  EXPECT_FALSE(isRawStringDelimBody('\0'));

  EXPECT_FALSE(isRawStringDelimBody('\x80'));
  EXPECT_FALSE(isRawStringDelimBody('\xc2'));
  EXPECT_FALSE(isRawStringDelimBody('\xff'));
}

TEST(CharInfoTest, toLowercase) {
  EXPECT_EQ('0', toLowercase('0'));
  EXPECT_EQ('9', toLowercase('9'));

  EXPECT_EQ('a', toLowercase('a'));
  EXPECT_EQ('a', toLowercase('A'));

  EXPECT_EQ('z', toLowercase('z'));
  EXPECT_EQ('z', toLowercase('Z'));

  EXPECT_EQ('.', toLowercase('.'));
  EXPECT_EQ('_', toLowercase('_'));

  EXPECT_EQ('/', toLowercase('/'));
  EXPECT_EQ('\0', toLowercase('\0'));
}

TEST(CharInfoTest, toUppercase) {
  EXPECT_EQ('0', toUppercase('0'));
  EXPECT_EQ('9', toUppercase('9'));

  EXPECT_EQ('A', toUppercase('a'));
  EXPECT_EQ('A', toUppercase('A'));

  EXPECT_EQ('Z', toUppercase('z'));
  EXPECT_EQ('Z', toUppercase('Z'));

  EXPECT_EQ('.', toUppercase('.'));
  EXPECT_EQ('_', toUppercase('_'));

  EXPECT_EQ('/', toUppercase('/'));
  EXPECT_EQ('\0', toUppercase('\0'));
}

TEST(CharInfoTest, isValidAsciiIdentifier) {
  EXPECT_FALSE(isValidAsciiIdentifier(""));

  // 1 character
  EXPECT_FALSE(isValidAsciiIdentifier("."));
  EXPECT_FALSE(isValidAsciiIdentifier("\n"));
  EXPECT_FALSE(isValidAsciiIdentifier(" "));
  EXPECT_FALSE(isValidAsciiIdentifier("\x80"));
  EXPECT_FALSE(isValidAsciiIdentifier("\xc2"));
  EXPECT_FALSE(isValidAsciiIdentifier("\xff"));
  EXPECT_FALSE(isValidAsciiIdentifier("$"));
  EXPECT_FALSE(isValidAsciiIdentifier("1"));

  EXPECT_TRUE(isValidAsciiIdentifier("_"));
  EXPECT_TRUE(isValidAsciiIdentifier("a"));
  EXPECT_TRUE(isValidAsciiIdentifier("z"));
  EXPECT_TRUE(isValidAsciiIdentifier("A"));
  EXPECT_TRUE(isValidAsciiIdentifier("Z"));
  EXPECT_TRUE(isValidAsciiIdentifier("$", /*AllowDollar=*/true));

  // 2 characters, '_' suffix
  EXPECT_FALSE(isValidAsciiIdentifier("._"));
  EXPECT_FALSE(isValidAsciiIdentifier("\n_"));
  EXPECT_FALSE(isValidAsciiIdentifier(" _"));
  EXPECT_FALSE(isValidAsciiIdentifier("\x80_"));
  EXPECT_FALSE(isValidAsciiIdentifier("\xc2_"));
  EXPECT_FALSE(isValidAsciiIdentifier("\xff_"));
  EXPECT_FALSE(isValidAsciiIdentifier("$_"));
  EXPECT_FALSE(isValidAsciiIdentifier("1_"));

  EXPECT_TRUE(isValidAsciiIdentifier("__"));
  EXPECT_TRUE(isValidAsciiIdentifier("a_"));
  EXPECT_TRUE(isValidAsciiIdentifier("z_"));
  EXPECT_TRUE(isValidAsciiIdentifier("A_"));
  EXPECT_TRUE(isValidAsciiIdentifier("Z_"));
  EXPECT_TRUE(isValidAsciiIdentifier("$_", /*AllowDollar=*/true));

  // 2 characters, '_' prefix
  EXPECT_FALSE(isValidAsciiIdentifier("_."));
  EXPECT_FALSE(isValidAsciiIdentifier("_\n"));
  EXPECT_FALSE(isValidAsciiIdentifier("_ "));
  EXPECT_FALSE(isValidAsciiIdentifier("_\x80"));
  EXPECT_FALSE(isValidAsciiIdentifier("_\xc2"));
  EXPECT_FALSE(isValidAsciiIdentifier("_\xff"));
  EXPECT_FALSE(isValidAsciiIdentifier("_$"));
  EXPECT_TRUE(isValidAsciiIdentifier("_1"));

  EXPECT_TRUE(isValidAsciiIdentifier("__"));
  EXPECT_TRUE(isValidAsciiIdentifier("_a"));
  EXPECT_TRUE(isValidAsciiIdentifier("_z"));
  EXPECT_TRUE(isValidAsciiIdentifier("_A"));
  EXPECT_TRUE(isValidAsciiIdentifier("_Z"));
  EXPECT_TRUE(isValidAsciiIdentifier("_$", /*AllowDollar=*/true));

  // 3 characters, '__' prefix
  EXPECT_FALSE(isValidAsciiIdentifier("__."));
  EXPECT_FALSE(isValidAsciiIdentifier("__\n"));
  EXPECT_FALSE(isValidAsciiIdentifier("__ "));
  EXPECT_FALSE(isValidAsciiIdentifier("__\x80"));
  EXPECT_FALSE(isValidAsciiIdentifier("__\xc2"));
  EXPECT_FALSE(isValidAsciiIdentifier("__\xff"));
  EXPECT_FALSE(isValidAsciiIdentifier("__$"));
  EXPECT_TRUE(isValidAsciiIdentifier("__1"));

  EXPECT_TRUE(isValidAsciiIdentifier("___"));
  EXPECT_TRUE(isValidAsciiIdentifier("__a"));
  EXPECT_TRUE(isValidAsciiIdentifier("__z"));
  EXPECT_TRUE(isValidAsciiIdentifier("__A"));
  EXPECT_TRUE(isValidAsciiIdentifier("__Z"));
  EXPECT_TRUE(isValidAsciiIdentifier("__$", /*AllowDollar=*/true));

  // 3 characters, '_' prefix and suffix
  EXPECT_FALSE(isValidAsciiIdentifier("_._"));
  EXPECT_FALSE(isValidAsciiIdentifier("_\n_"));
  EXPECT_FALSE(isValidAsciiIdentifier("_ _"));
  EXPECT_FALSE(isValidAsciiIdentifier("_\x80_"));
  EXPECT_FALSE(isValidAsciiIdentifier("_\xc2_"));
  EXPECT_FALSE(isValidAsciiIdentifier("_\xff_"));
  EXPECT_FALSE(isValidAsciiIdentifier("_$_"));
  EXPECT_TRUE(isValidAsciiIdentifier("_1_"));

  EXPECT_TRUE(isValidAsciiIdentifier("___"));
  EXPECT_TRUE(isValidAsciiIdentifier("_a_"));
  EXPECT_TRUE(isValidAsciiIdentifier("_z_"));
  EXPECT_TRUE(isValidAsciiIdentifier("_A_"));
  EXPECT_TRUE(isValidAsciiIdentifier("_Z_"));
  EXPECT_TRUE(isValidAsciiIdentifier("_$_", /*AllowDollar=*/true));
}