//==--- riscv_vector.td - RISC-V V-ext Builtin function list --------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file defines the builtins for RISC-V V-extension. See: // // https://github.com/riscv/rvv-intrinsic-doc // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // Instruction definitions //===----------------------------------------------------------------------===// // Each record of the class RVVBuiltin defines a collection of builtins (i.e. // "def vadd : RVVBuiltin" will be used to define things like "vadd_vv_i32m1", // "vadd_vv_i32m2", etc). // // The elements of this collection are defined by an instantiation process the // range of which is specified by the cross product of the LMUL attribute and // every element in the attribute TypeRange. By default builtins have LMUL = [1, // 2, 4, 8, 1/2, 1/4, 1/8] so the process is repeated 7 times. In tablegen we // use the Log2LMUL [0, 1, 2, 3, -1, -2, -3] to represent the LMUL. // // LMUL represents the fact that the types of values used by that builtin are // values generated by instructions that are executed under that LMUL. However, // this does not mean the builtin is necessarily lowered into an instruction // that executes under the specified LMUL. An example where this happens are // loads and stores of masks. A mask like `vbool8_t` can be generated, for // instance, by comparing two `__rvv_int8m1_t` (this is LMUL=1) or comparing two // `__rvv_int16m2_t` (this is LMUL=2). The actual load or store, however, will // be performed under LMUL=1 because mask registers are not grouped. // // TypeRange is a non-empty sequence of basic types: // // c: int8_t (i8) // s: int16_t (i16) // i: int32_t (i32) // l: int64_t (i64) // x: float16_t (half) // f: float32_t (float) // d: float64_t (double) // // This way, given an LMUL, a record with a TypeRange "sil" will cause the // definition of 3 builtins. Each type "t" in the TypeRange (in this example // they are int16_t, int32_t, int64_t) is used as a parameter that drives the // definition of that particular builtin (for the given LMUL). // // During the instantiation, types can be transformed or modified using type // transformers. Given a type "t" the following primitive type transformers can // be applied to it to yield another type. // // e: type of "t" as is (identity) // v: computes a vector type whose element type is "t" for the current LMUL // w: computes a vector type identical to what 'v' computes except for the // element type which is twice as wide as the element type of 'v' // q: computes a vector type identical to what 'v' computes except for the // element type which is four times as wide as the element type of 'v' // o: computes a vector type identical to what 'v' computes except for the // element type which is eight times as wide as the element type of 'v' // m: computes a vector type identical to what 'v' computes except for the // element type which is bool // 0: void type, ignores "t" // z: size_t, ignores "t" // t: ptrdiff_t, ignores "t" // u: unsigned long, ignores "t" // l: long, ignores "t" // // So for instance if t is "i", i.e. int, then "e" will yield int again. "v" // will yield an RVV vector type (assume LMUL=1), so __rvv_int32m1_t. // Accordingly "w" would yield __rvv_int64m2_t. // // A type transformer can be prefixed by other non-primitive type transformers. // // P: constructs a pointer to the current type // C: adds const to the type // K: requires the integer type to be a constant expression // U: given an integer type or vector type, computes its unsigned variant // I: given a vector type, compute the vector type with integer type // elements of the same width // F: given a vector type, compute the vector type with floating-point type // elements of the same width // S: given a vector type, computes its equivalent one for LMUL=1. This is a // no-op if the vector was already LMUL=1 // (Log2EEW:Value): Log2EEW value could be 3/4/5/6 (8/16/32/64), given a // vector type (SEW and LMUL) and EEW (8/16/32/64), computes its // equivalent integer vector type with EEW and corresponding ELMUL (elmul = // (eew/sew) * lmul). For example, vector type is __rvv_float16m4 // (SEW=16, LMUL=4) and Log2EEW is 3 (EEW=8), and then equivalent vector // type is __rvv_uint8m2_t (elmul=(8/16)*4 = 2). Ignore to define a new // builtins if its equivalent type has illegal lmul. // (FixedSEW:Value): Given a vector type (SEW and LMUL), and computes another // vector type which only changed SEW as given value. Ignore to define a new // builtin if its equivalent type has illegal lmul or the SEW does not changed. // (SFixedLog2LMUL:Value): Smaller Fixed Log2LMUL. Given a vector type (SEW // and LMUL), and computes another vector type which only changed LMUL as // given value. The new LMUL should be smaller than the old one. Ignore to // define a new builtin if its equivalent type has illegal lmul. // (LFixedLog2LMUL:Value): Larger Fixed Log2LMUL. Given a vector type (SEW // and LMUL), and computes another vector type which only changed LMUL as // given value. The new LMUL should be larger than the old one. Ignore to // define a new builtin if its equivalent type has illegal lmul. // // Following with the example above, if t is "i", then "Ue" will yield unsigned // int and "Fv" will yield __rvv_float32m1_t (again assuming LMUL=1), Fw would // yield __rvv_float64m2_t, etc. // // Each builtin is then defined by applying each type in TypeRange against the // sequence of type transformers described in Suffix and Prototype. // // The name of the builtin is defined by the Name attribute (which defaults to // the name of the class) appended (separated with an underscore) the Suffix // attribute. For instance with Name="foo", Suffix = "v" and TypeRange = "il", // the builtin generated will be __builtin_rvv_foo_i32m1 and // __builtin_rvv_foo_i64m1 (under LMUL=1). If Suffix contains more than one // type transformer (say "vv") each of the types is separated with an // underscore as in "__builtin_rvv_foo_i32m1_i32m1". // // The C/C++ prototype of the builtin is defined by the Prototype attribute. // Prototype is a non-empty sequence of type transformers, the first of which // is the return type of the builtin and the rest are the parameters of the // builtin, in order. For instance if Prototype is "wvv" and TypeRange is "si" // a first builtin will have type // __rvv_int32m2_t (__rvv_int16m1_t, __rvv_int16m1_t) and the second builtin // will have type __rvv_int64m2_t (__rvv_int32m1_t, __rvv_int32m1_t) (again // under LMUL=1). // // There are a number of attributes that are used to constraint the number and // shape of the builtins generated. Refer to the comments below for them. class Policy<int val>{ int Value = val; } def NonePolicy : Policy<0>; def HasPassthruOperand : Policy<1>; def HasPolicyOperand : Policy<2>; class RVVBuiltin<string suffix, string prototype, string type_range, string overloaded_suffix = ""> { // Base name that will be prepended in __builtin_rvv_ and appended the // computed Suffix. string Name = NAME; // If not empty, each instantiated builtin will have this appended after an // underscore (_). It is instantiated like Prototype. string Suffix = suffix; // If empty, default OverloadedName is sub string of `Name` which end of first // '_'. For example, the default overloaded name is `vadd` for Name `vadd_vv`. // It's used for describe some special naming cases. string OverloadedName = ""; // If not empty, each OverloadedName will have this appended after an // underscore (_). It is instantiated like Prototype. string OverloadedSuffix = overloaded_suffix; // The different variants of the builtin, parameterised with a type. string TypeRange = type_range; // We use each type described in TypeRange and LMUL with prototype to // instantiate a specific element of the set of builtins being defined. // Prototype attribute defines the C/C++ prototype of the builtin. It is a // non-empty sequence of type transformers, the first of which is the return // type of the builtin and the rest are the parameters of the builtin, in // order. For instance if Prototype is "wvv", TypeRange is "si" and LMUL=1, a // first builtin will have type // __rvv_int32m2_t (__rvv_int16m1_t, __rvv_int16m1_t), and the second builtin // will have type __rvv_int64m2_t (__rvv_int32m1_t, __rvv_int32m1_t). string Prototype = prototype; // This builtin has a masked form. bit HasMasked = true; // If HasMasked, this flag states that this builtin has a maskedoff operand. It // is always the first operand in builtin and IR intrinsic. bit HasMaskedOffOperand = true; // This builtin has a granted vector length parameter. bit HasVL = true; // The policy scheme for masked intrinsic IR. // It could be NonePolicy or HasPolicyOperand. // HasPolicyOperand: Has a policy operand. 0 is tail and mask undisturbed, 1 is // tail agnostic, 2 is mask undisturbed, and 3 is tail and mask agnostic. The // policy operand is located at the last position. Policy MaskedPolicyScheme = HasPolicyOperand; // The policy scheme for unmasked intrinsic IR. // It could be NonePolicy, HasPassthruOperand or HasPolicyOperand. // HasPassthruOperand: Has a passthru operand to decide tail policy. If it is // undef, tail policy is tail agnostic, otherwise policy is tail undisturbed. // HasPolicyOperand: Has a policy operand. 1 is tail agnostic and 0 is tail // undisturbed. Policy UnMaskedPolicyScheme = NonePolicy; // This builtin supports non-masked function overloading api. // All masked operations support overloading api. bit HasUnMaskedOverloaded = true; // This builtin is valid for the given Log2LMULs. list<int> Log2LMUL = [0, 1, 2, 3, -1, -2, -3]; // Manual code in clang codegen riscv_vector_builtin_cg.inc code ManualCodegen = [{}]; code MaskedManualCodegen = [{}]; // When emit the automatic clang codegen, it describes what types we have to use // to obtain the specific LLVM intrinsic. -1 means the return type, otherwise, // k >= 0 meaning the k-th operand (counting from zero) of the codegen'd // parameter of the unmasked version. k can't be the mask operand's position. list<int> IntrinsicTypes = []; // If these names are not empty, this is the ID of the LLVM intrinsic // we want to lower to. string IRName = NAME; // If HasMasked, this is the ID of the LLVM intrinsic we want to lower to. string MaskedIRName = NAME #"_mask"; // Use clang_builtin_alias to save the number of builtins. bit HasBuiltinAlias = true; // Features required to enable for this builtin. list<string> RequiredFeatures = []; // Number of fields for Load/Store Segment instructions. int NF = 1; } // This is the code emitted in the header. class RVVHeader { code HeaderCode; } //===----------------------------------------------------------------------===// // Basic classes with automatic codegen. //===----------------------------------------------------------------------===// class RVVOutBuiltin<string suffix, string prototype, string type_range> : RVVBuiltin<suffix, prototype, type_range> { let IntrinsicTypes = [-1]; } class RVVOp0Builtin<string suffix, string prototype, string type_range> : RVVBuiltin<suffix, prototype, type_range> { let IntrinsicTypes = [0]; } class RVVOutOp1Builtin<string suffix, string prototype, string type_range> : RVVBuiltin<suffix, prototype, type_range> { let IntrinsicTypes = [-1, 1]; } class RVVOutOp0Op1Builtin<string suffix, string prototype, string type_range> : RVVBuiltin<suffix, prototype, type_range> { let IntrinsicTypes = [-1, 0, 1]; } multiclass RVVBuiltinSet<string intrinsic_name, string type_range, list<list<string>> suffixes_prototypes, list<int> intrinsic_types> { let IRName = intrinsic_name, MaskedIRName = intrinsic_name # "_mask", IntrinsicTypes = intrinsic_types in { foreach s_p = suffixes_prototypes in { let Name = NAME # "_" # s_p[0] in { defvar suffix = s_p[1]; defvar prototype = s_p[2]; def : RVVBuiltin<suffix, prototype, type_range>; } } } } // IntrinsicTypes is output, op0, op1 [-1, 0, 1] multiclass RVVOutOp0Op1BuiltinSet<string intrinsic_name, string type_range, list<list<string>> suffixes_prototypes> : RVVBuiltinSet<intrinsic_name, type_range, suffixes_prototypes, [-1, 0, 1]>; multiclass RVVOutBuiltinSet<string intrinsic_name, string type_range, list<list<string>> suffixes_prototypes> : RVVBuiltinSet<intrinsic_name, type_range, suffixes_prototypes, [-1]>; multiclass RVVOp0BuiltinSet<string intrinsic_name, string type_range, list<list<string>> suffixes_prototypes> : RVVBuiltinSet<intrinsic_name, type_range, suffixes_prototypes, [0]>; // IntrinsicTypes is output, op1 [-1, 1] multiclass RVVOutOp1BuiltinSet<string intrinsic_name, string type_range, list<list<string>> suffixes_prototypes> : RVVBuiltinSet<intrinsic_name, type_range, suffixes_prototypes, [-1, 1]>; multiclass RVVOp0Op1BuiltinSet<string intrinsic_name, string type_range, list<list<string>> suffixes_prototypes> : RVVBuiltinSet<intrinsic_name, type_range, suffixes_prototypes, [0, 1]>; multiclass RVVOutOp1Op2BuiltinSet<string intrinsic_name, string type_range, list<list<string>> suffixes_prototypes> : RVVBuiltinSet<intrinsic_name, type_range, suffixes_prototypes, [-1, 1, 2]>; multiclass RVVSignedBinBuiltinSet : RVVOutOp1BuiltinSet<NAME, "csil", [["vv", "v", "vvv"], ["vx", "v", "vve"]]>; multiclass RVVUnsignedBinBuiltinSet : RVVOutOp1BuiltinSet<NAME, "csil", [["vv", "Uv", "UvUvUv"], ["vx", "Uv", "UvUvUe"]]>; multiclass RVVIntBinBuiltinSet : RVVSignedBinBuiltinSet, RVVUnsignedBinBuiltinSet; multiclass RVVSlideOneBuiltinSet : RVVOutOp1BuiltinSet<NAME, "csil", [["vx", "v", "vve"], ["vx", "Uv", "UvUve"]]>; multiclass RVVSignedShiftBuiltinSet : RVVOutOp1BuiltinSet<NAME, "csil", [["vv", "v", "vvUv"], ["vx", "v", "vvz"]]>; multiclass RVVUnsignedShiftBuiltinSet : RVVOutOp1BuiltinSet<NAME, "csil", [["vv", "Uv", "UvUvUv"], ["vx", "Uv", "UvUvz"]]>; multiclass RVVShiftBuiltinSet : RVVSignedShiftBuiltinSet, RVVUnsignedShiftBuiltinSet; let Log2LMUL = [-3, -2, -1, 0, 1, 2] in { multiclass RVVSignedNShiftBuiltinSet : RVVOutOp0Op1BuiltinSet<NAME, "csil", [["wv", "v", "vwUv"], ["wx", "v", "vwz"]]>; multiclass RVVUnsignedNShiftBuiltinSet : RVVOutOp0Op1BuiltinSet<NAME, "csil", [["wv", "Uv", "UvUwUv"], ["wx", "Uv", "UvUwz"]]>; } multiclass RVVCarryinBuiltinSet : RVVOutOp1BuiltinSet<NAME, "csil", [["vvm", "v", "vvvm"], ["vxm", "v", "vvem"], ["vvm", "Uv", "UvUvUvm"], ["vxm", "Uv", "UvUvUem"]]>; multiclass RVVCarryOutInBuiltinSet<string intrinsic_name> : RVVOp0Op1BuiltinSet<intrinsic_name, "csil", [["vvm", "vm", "mvvm"], ["vxm", "vm", "mvem"], ["vvm", "Uvm", "mUvUvm"], ["vxm", "Uvm", "mUvUem"]]>; multiclass RVVSignedMaskOutBuiltinSet : RVVOp0Op1BuiltinSet<NAME, "csil", [["vv", "vm", "mvv"], ["vx", "vm", "mve"]]>; multiclass RVVUnsignedMaskOutBuiltinSet : RVVOp0Op1BuiltinSet<NAME, "csil", [["vv", "Uvm", "mUvUv"], ["vx", "Uvm", "mUvUe"]]>; multiclass RVVIntMaskOutBuiltinSet : RVVSignedMaskOutBuiltinSet, RVVUnsignedMaskOutBuiltinSet; class RVVIntExt<string intrinsic_name, string suffix, string prototype, string type_range> : RVVBuiltin<suffix, prototype, type_range> { let IRName = intrinsic_name; let MaskedIRName = intrinsic_name # "_mask"; let OverloadedName = NAME; let IntrinsicTypes = [-1, 0]; } let HasMaskedOffOperand = false in { multiclass RVVIntTerBuiltinSet { defm "" : RVVOutOp1BuiltinSet<NAME, "csil", [["vv", "v", "vvvv"], ["vx", "v", "vvev"], ["vv", "Uv", "UvUvUvUv"], ["vx", "Uv", "UvUvUeUv"]]>; } multiclass RVVFloatingTerBuiltinSet { defm "" : RVVOutOp1BuiltinSet<NAME, "xfd", [["vv", "v", "vvvv"], ["vf", "v", "vvev"]]>; } } let HasMaskedOffOperand = false, Log2LMUL = [-2, -1, 0, 1, 2] in { multiclass RVVFloatingWidenTerBuiltinSet { defm "" : RVVOutOp1Op2BuiltinSet<NAME, "xf", [["vv", "w", "wwvv"], ["vf", "w", "wwev"]]>; } } multiclass RVVFloatingBinBuiltinSet : RVVOutOp1BuiltinSet<NAME, "xfd", [["vv", "v", "vvv"], ["vf", "v", "vve"]]>; multiclass RVVFloatingBinVFBuiltinSet : RVVOutOp1BuiltinSet<NAME, "xfd", [["vf", "v", "vve"]]>; multiclass RVVFloatingMaskOutBuiltinSet : RVVOp0Op1BuiltinSet<NAME, "xfd", [["vv", "vm", "mvv"], ["vf", "vm", "mve"]]>; multiclass RVVFloatingMaskOutVFBuiltinSet : RVVOp0Op1BuiltinSet<NAME, "fd", [["vf", "vm", "mve"]]>; class RVVMaskBinBuiltin : RVVOutBuiltin<"m", "mmm", "c"> { let Name = NAME # "_mm"; let HasMasked = false; } class RVVMaskUnaryBuiltin : RVVOutBuiltin<"m", "mm", "c"> { let Name = NAME # "_m"; } class RVVMaskNullaryBuiltin : RVVOutBuiltin<"m", "m", "c"> { let Name = NAME # "_m"; let HasMasked = false; let HasUnMaskedOverloaded = false; } class RVVMaskOp0Builtin<string prototype> : RVVOp0Builtin<"m", prototype, "c"> { let Name = NAME # "_m"; let HasMaskedOffOperand = false; } let UnMaskedPolicyScheme = HasPolicyOperand, HasMaskedOffOperand = false in { multiclass RVVSlideBuiltinSet { defm "" : RVVOutBuiltinSet<NAME, "csilxfd", [["vx","v", "vvvz"]]>; defm "" : RVVOutBuiltinSet<NAME, "csil", [["vx","Uv", "UvUvUvz"]]>; } } class RVVFloatingUnaryBuiltin<string builtin_suffix, string ir_suffix, string prototype> : RVVOutBuiltin<ir_suffix, prototype, "xfd"> { let Name = NAME # "_" # builtin_suffix; } class RVVFloatingUnaryVVBuiltin : RVVFloatingUnaryBuiltin<"v", "v", "vv">; class RVVConvBuiltin<string suffix, string prototype, string type_range, string overloaded_name> : RVVBuiltin<suffix, prototype, type_range> { let IntrinsicTypes = [-1, 0]; let OverloadedName = overloaded_name; } class RVVConvToSignedBuiltin<string overloaded_name> : RVVConvBuiltin<"Iv", "Ivv", "xfd", overloaded_name>; class RVVConvToUnsignedBuiltin<string overloaded_name> : RVVConvBuiltin<"Uv", "Uvv", "xfd", overloaded_name>; class RVVConvToWidenSignedBuiltin<string overloaded_name> : RVVConvBuiltin<"Iw", "Iwv", "xf", overloaded_name>; class RVVConvToWidenUnsignedBuiltin<string overloaded_name> : RVVConvBuiltin<"Uw", "Uwv", "xf", overloaded_name>; class RVVConvToNarrowingSignedBuiltin<string overloaded_name> : RVVConvBuiltin<"Iv", "IvFw", "csi", overloaded_name>; class RVVConvToNarrowingUnsignedBuiltin<string overloaded_name> : RVVConvBuiltin<"Uv", "UvFw", "csi", overloaded_name>; let HasMaskedOffOperand = false in { multiclass RVVSignedReductionBuiltin { defm "" : RVVOutOp1BuiltinSet<NAME, "csil", [["vs", "vSv", "SvSvvSv"]]>; } multiclass RVVUnsignedReductionBuiltin { defm "" : RVVOutOp1BuiltinSet<NAME, "csil", [["vs", "UvUSv", "USvUSvUvUSv"]]>; } multiclass RVVFloatingReductionBuiltin { defm "" : RVVOutOp1BuiltinSet<NAME, "xfd", [["vs", "vSv", "SvSvvSv"]]>; } multiclass RVVFloatingWidenReductionBuiltin { defm "" : RVVOutOp1BuiltinSet<NAME, "xf", [["vs", "vSw", "SwSwvSw"]]>; } } multiclass RVVIntReductionBuiltinSet : RVVSignedReductionBuiltin, RVVUnsignedReductionBuiltin; // For widen operation which has different mangling name. multiclass RVVWidenBuiltinSet<string intrinsic_name, string type_range, list<list<string>> suffixes_prototypes> { let Log2LMUL = [-3, -2, -1, 0, 1, 2], IRName = intrinsic_name, MaskedIRName = intrinsic_name # "_mask" in { foreach s_p = suffixes_prototypes in { let Name = NAME # "_" # s_p[0], OverloadedName = NAME # "_" # s_p[0] in { defvar suffix = s_p[1]; defvar prototype = s_p[2]; def : RVVOutOp0Op1Builtin<suffix, prototype, type_range>; } } } } // For widen operation with widen operand which has different mangling name. multiclass RVVWidenWOp0BuiltinSet<string intrinsic_name, string type_range, list<list<string>> suffixes_prototypes> { let Log2LMUL = [-3, -2, -1, 0, 1, 2], IRName = intrinsic_name, MaskedIRName = intrinsic_name # "_mask" in { foreach s_p = suffixes_prototypes in { let Name = NAME # "_" # s_p[0], OverloadedName = NAME # "_" # s_p[0] in { defvar suffix = s_p[1]; defvar prototype = s_p[2]; def : RVVOutOp1Builtin<suffix, prototype, type_range>; } } } } multiclass RVVSignedWidenBinBuiltinSet : RVVWidenBuiltinSet<NAME, "csi", [["vv", "w", "wvv"], ["vx", "w", "wve"]]>; multiclass RVVSignedWidenOp0BinBuiltinSet : RVVWidenWOp0BuiltinSet<NAME # "_w", "csi", [["wv", "w", "wwv"], ["wx", "w", "wwe"]]>; multiclass RVVUnsignedWidenBinBuiltinSet : RVVWidenBuiltinSet<NAME, "csi", [["vv", "Uw", "UwUvUv"], ["vx", "Uw", "UwUvUe"]]>; multiclass RVVUnsignedWidenOp0BinBuiltinSet : RVVWidenWOp0BuiltinSet<NAME # "_w", "csi", [["wv", "Uw", "UwUwUv"], ["wx", "Uw", "UwUwUe"]]>; multiclass RVVFloatingWidenBinBuiltinSet : RVVWidenBuiltinSet<NAME, "xf", [["vv", "w", "wvv"], ["vf", "w", "wve"]]>; multiclass RVVFloatingWidenOp0BinBuiltinSet : RVVWidenWOp0BuiltinSet<NAME # "_w", "xf", [["wv", "w", "wwv"], ["wf", "w", "wwe"]]>; defvar TypeList = ["c","s","i","l","x","f","d"]; defvar EEWList = [["8", "(Log2EEW:3)"], ["16", "(Log2EEW:4)"], ["32", "(Log2EEW:5)"], ["64", "(Log2EEW:6)"]]; class IsFloat<string type> { bit val = !or(!eq(type, "x"), !eq(type, "f"), !eq(type, "d")); } let HasUnMaskedOverloaded = false, MaskedPolicyScheme = NonePolicy in { class RVVVLEMaskBuiltin : RVVOutBuiltin<"m", "mPCUe", "c"> { let Name = "vlm_v"; let IRName = "vlm"; let HasMasked = false; } } let HasUnMaskedOverloaded = false, UnMaskedPolicyScheme = HasPassthruOperand in { multiclass RVVVLEBuiltin<list<string> types> { let Name = NAME # "_v", IRName = "vle", MaskedIRName ="vle_mask" in { foreach type = types in { def : RVVOutBuiltin<"v", "vPCe", type>; if !not(IsFloat<type>.val) then { def : RVVOutBuiltin<"Uv", "UvPCUe", type>; } } } } } multiclass RVVVLEFFBuiltin<list<string> types> { let Name = NAME # "_v", IRName = "vleff", MaskedIRName = "vleff_mask", HasUnMaskedOverloaded = false, ManualCodegen = [{ { IntrinsicTypes = {ResultType, Ops[2]->getType()}; Ops[0] = Builder.CreateBitCast(Ops[0], ResultType->getPointerTo()); Value *NewVL = Ops[1]; Ops.erase(Ops.begin() + 1); Ops.insert(Ops.begin(), llvm::UndefValue::get(ResultType)); llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes); llvm::Value *LoadValue = Builder.CreateCall(F, Ops, ""); llvm::Value *V = Builder.CreateExtractValue(LoadValue, {0}); // Store new_vl. clang::CharUnits Align = CGM.getNaturalPointeeTypeAlignment(E->getArg(1)->getType()); llvm::Value *Val = Builder.CreateExtractValue(LoadValue, {1}); Builder.CreateStore(Val, Address(NewVL, Val->getType(), Align)); return V; } }], MaskedManualCodegen = [{ { // Move mask to right before vl. std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 1); Ops.push_back(ConstantInt::get(Ops.back()->getType(), TAIL_UNDISTURBED)); IntrinsicTypes = {ResultType, Ops[4]->getType()}; Ops[1] = Builder.CreateBitCast(Ops[1], ResultType->getPointerTo()); Value *NewVL = Ops[2]; Ops.erase(Ops.begin() + 2); llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes); llvm::Value *LoadValue = Builder.CreateCall(F, Ops, ""); llvm::Value *V = Builder.CreateExtractValue(LoadValue, {0}); // Store new_vl. clang::CharUnits Align = CGM.getNaturalPointeeTypeAlignment(E->getArg(3)->getType()); llvm::Value *Val = Builder.CreateExtractValue(LoadValue, {1}); Builder.CreateStore(Val, Address(NewVL, Val->getType(), Align)); return V; } }] in { foreach type = types in { def : RVVBuiltin<"v", "vPCePz", type>; // Skip floating types for unsigned versions. if !not(IsFloat<type>.val) then { def : RVVBuiltin<"Uv", "UvPCUePz", type>; } } } } multiclass RVVVLSEBuiltin<list<string> types> { let Name = NAME # "_v", IRName = "vlse", MaskedIRName ="vlse_mask", HasUnMaskedOverloaded = false, UnMaskedPolicyScheme = HasPassthruOperand in { foreach type = types in { def : RVVOutBuiltin<"v", "vPCet", type>; if !not(IsFloat<type>.val) then { def : RVVOutBuiltin<"Uv", "UvPCUet", type>; } } } } multiclass RVVIndexedLoad<string op> { let UnMaskedPolicyScheme = HasPassthruOperand in { foreach type = TypeList in { foreach eew_list = EEWList[0-2] in { defvar eew = eew_list[0]; defvar eew_type = eew_list[1]; let Name = op # eew # "_v", IRName = op, MaskedIRName = op # "_mask" in { def: RVVOutOp1Builtin<"v", "vPCe" # eew_type # "Uv", type>; if !not(IsFloat<type>.val) then { def: RVVOutOp1Builtin<"Uv", "UvPCUe" # eew_type # "Uv", type>; } } } defvar eew64 = "64"; defvar eew64_type = "(Log2EEW:6)"; let Name = op # eew64 # "_v", IRName = op, MaskedIRName = op # "_mask", RequiredFeatures = ["RV64"] in { def: RVVOutOp1Builtin<"v", "vPCe" # eew64_type # "Uv", type>; if !not(IsFloat<type>.val) then { def: RVVOutOp1Builtin<"Uv", "UvPCUe" # eew64_type # "Uv", type>; } } } } } let HasMaskedOffOperand = false, MaskedPolicyScheme = NonePolicy, ManualCodegen = [{ // Builtin: (ptr, value, vl). Intrinsic: (value, ptr, vl) std::swap(Ops[0], Ops[1]); Ops[1] = Builder.CreateBitCast(Ops[1], Ops[0]->getType()->getPointerTo()); IntrinsicTypes = {Ops[0]->getType(), Ops[2]->getType()}; }], MaskedManualCodegen= [{ // Builtin: (mask, ptr, value, vl). Intrinsic: (value, ptr, mask, vl) std::swap(Ops[0], Ops[2]); Ops[1] = Builder.CreateBitCast(Ops[1], Ops[0]->getType()->getPointerTo()); IntrinsicTypes = {Ops[0]->getType(), Ops[3]->getType()}; }] in { class RVVVSEMaskBuiltin : RVVBuiltin<"m", "0PUem", "c"> { let Name = "vsm_v"; let IRName = "vsm"; let HasMasked = false; } multiclass RVVVSEBuiltin<list<string> types> { let Name = NAME # "_v", IRName = "vse", MaskedIRName = "vse_mask" in { foreach type = types in { def : RVVBuiltin<"v", "0Pev", type>; if !not(IsFloat<type>.val) then { def : RVVBuiltin<"Uv", "0PUeUv", type>; } } } } } multiclass RVVVSSEBuiltin<list<string> types> { let Name = NAME # "_v", IRName = "vsse", MaskedIRName = "vsse_mask", HasMaskedOffOperand = false, MaskedPolicyScheme = NonePolicy, ManualCodegen = [{ // Builtin: (ptr, stride, value, vl). Intrinsic: (value, ptr, stride, vl) std::rotate(Ops.begin(), Ops.begin() + 2, Ops.begin() + 3); Ops[1] = Builder.CreateBitCast(Ops[1], Ops[0]->getType()->getPointerTo()); IntrinsicTypes = {Ops[0]->getType(), Ops[3]->getType()}; }], MaskedManualCodegen= [{ // Builtin: (mask, ptr, stride, value, vl). Intrinsic: (value, ptr, stride, mask, vl) std::swap(Ops[0], Ops[3]); Ops[1] = Builder.CreateBitCast(Ops[1], Ops[0]->getType()->getPointerTo()); IntrinsicTypes = {Ops[0]->getType(), Ops[4]->getType()}; }] in { foreach type = types in { def : RVVBuiltin<"v", "0Petv", type>; if !not(IsFloat<type>.val) then { def : RVVBuiltin<"Uv", "0PUetUv", type>; } } } } multiclass RVVIndexedStore<string op> { let HasMaskedOffOperand = false, MaskedPolicyScheme = NonePolicy, ManualCodegen = [{ // Builtin: (ptr, index, value, vl). Intrinsic: (value, ptr, index, vl) std::rotate(Ops.begin(), Ops.begin() + 2, Ops.begin() + 3); Ops[1] = Builder.CreateBitCast(Ops[1],Ops[0]->getType()->getPointerTo()); IntrinsicTypes = {Ops[0]->getType(), Ops[2]->getType(), Ops[3]->getType()}; }], MaskedManualCodegen= [{ // Builtin: (mask, ptr, index, value, vl). Intrinsic: (value, ptr, index, mask, vl) std::swap(Ops[0], Ops[3]); Ops[1] = Builder.CreateBitCast(Ops[1], Ops[0]->getType()->getPointerTo()); IntrinsicTypes = {Ops[0]->getType(), Ops[2]->getType(), Ops[4]->getType()}; }] in { foreach type = TypeList in { foreach eew_list = EEWList[0-2] in { defvar eew = eew_list[0]; defvar eew_type = eew_list[1]; let Name = op # eew # "_v", IRName = op, MaskedIRName = op # "_mask" in { def : RVVBuiltin<"v", "0Pe" # eew_type # "Uvv", type>; if !not(IsFloat<type>.val) then { def : RVVBuiltin<"Uv", "0PUe" # eew_type # "UvUv", type>; } } } defvar eew64 = "64"; defvar eew64_type = "(Log2EEW:6)"; let Name = op # eew64 # "_v", IRName = op, MaskedIRName = op # "_mask", RequiredFeatures = ["RV64"] in { def : RVVBuiltin<"v", "0Pe" # eew64_type # "Uvv", type>; if !not(IsFloat<type>.val) then { def : RVVBuiltin<"Uv", "0PUe" # eew64_type # "UvUv", type>; } } } } } defvar NFList = [2, 3, 4, 5, 6, 7, 8]; class PVString<int nf, bit signed> { string S = !cond(!eq(nf, 2): !if(signed, "PvPv", "PUvPUv"), !eq(nf, 3): !if(signed, "PvPvPv", "PUvPUvPUv"), !eq(nf, 4): !if(signed, "PvPvPvPv", "PUvPUvPUvPUv"), !eq(nf, 5): !if(signed, "PvPvPvPvPv", "PUvPUvPUvPUvPUv"), !eq(nf, 6): !if(signed, "PvPvPvPvPvPv", "PUvPUvPUvPUvPUvPUv"), !eq(nf, 7): !if(signed, "PvPvPvPvPvPvPv", "PUvPUvPUvPUvPUvPUvPUv"), !eq(nf, 8): !if(signed, "PvPvPvPvPvPvPvPv", "PUvPUvPUvPUvPUvPUvPUvPUv")); } multiclass RVVUnitStridedSegLoad<string op> { foreach type = TypeList in { defvar eew = !cond(!eq(type, "c") : "8", !eq(type, "s") : "16", !eq(type, "i") : "32", !eq(type, "l") : "64", !eq(type, "x") : "16", !eq(type, "f") : "32", !eq(type, "d") : "64"); foreach nf = NFList in { let Name = op # nf # "e" # eew # "_v", IRName = op # nf, MaskedIRName = op # nf # "_mask", NF = nf, HasUnMaskedOverloaded = false, ManualCodegen = [{ { // builtin: (val0 address, val1 address, ..., ptr, vl) ResultType = ConvertType(E->getArg(0)->getType()->getPointeeType()); IntrinsicTypes = {ResultType, Ops[NF + 1]->getType()}; // intrinsic: (passthru0, passthru1, ..., ptr, vl) SmallVector<llvm::Value*, 10> Operands; for (unsigned I = 0; I < NF; ++I) Operands.push_back(llvm::UndefValue::get(ResultType)); Operands.push_back(Ops[NF]); Operands.push_back(Ops[NF + 1]); llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes); llvm::Value *LoadValue = Builder.CreateCall(F, Operands, ""); clang::CharUnits Align = CGM.getNaturalPointeeTypeAlignment(E->getArg(NF)->getType()); llvm::Value *V; for (unsigned I = 0; I < NF; ++I) { llvm::Value *Val = Builder.CreateExtractValue(LoadValue, {I}); V = Builder.CreateStore(Val, Address(Ops[I], Val->getType(), Align)); } return V; } }], MaskedManualCodegen = [{ { // builtin: (val0 address, ..., mask, maskedoff0, ..., ptr, vl) // intrinsic: (maskedoff0, ..., ptr, mask, vl) IntrinsicTypes = {ConvertType(E->getArg(0)->getType()->getPointeeType()), Ops[2 * NF + 2]->getType()}; SmallVector<llvm::Value*, 12> Operands; for (unsigned I = 0; I < NF; ++I) Operands.push_back(Ops[NF + I + 1]); Operands.push_back(Ops[2 * NF + 1]); Operands.push_back(Ops[NF]); Operands.push_back(Ops[2 * NF + 2]); Operands.push_back(ConstantInt::get(Ops.back()->getType(), TAIL_UNDISTURBED)); assert(Operands.size() == NF + 4); llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes); llvm::Value *LoadValue = Builder.CreateCall(F, Operands, ""); clang::CharUnits Align = CGM.getNaturalPointeeTypeAlignment(E->getArg(0)->getType()); llvm::Value *V; for (unsigned I = 0; I < NF; ++I) { llvm::Value *Val = Builder.CreateExtractValue(LoadValue, {I}); V = Builder.CreateStore(Val, Address(Ops[I], Val->getType(), Align)); } return V; } }] in { defvar PV = PVString<nf, /*signed=*/true>.S; defvar PUV = PVString<nf, /*signed=*/false>.S; def : RVVBuiltin<"v", "0" # PV # "PCe", type>; if !not(IsFloat<type>.val) then { def : RVVBuiltin<"Uv", "0" # PUV # "PCUe", type>; } } } } } multiclass RVVUnitStridedSegLoadFF<string op> { foreach type = TypeList in { defvar eew = !cond(!eq(type, "c") : "8", !eq(type, "s") : "16", !eq(type, "i") : "32", !eq(type, "l") : "64", !eq(type, "x") : "16", !eq(type, "f") : "32", !eq(type, "d") : "64"); foreach nf = NFList in { let Name = op # nf # "e" # eew # "ff_v", IRName = op # nf # "ff", MaskedIRName = op # nf # "ff_mask", NF = nf, HasUnMaskedOverloaded = false, ManualCodegen = [{ { // builtin: (val0 address, val1 address, ..., ptr, new_vl, vl) ResultType = ConvertType(E->getArg(0)->getType()->getPointeeType()); IntrinsicTypes = {ResultType, Ops[NF + 2]->getType()}; // intrinsic: (passthru0, passthru1, ..., ptr, vl) SmallVector<llvm::Value*, 12> Operands; for (unsigned I = 0; I < NF; ++I) Operands.push_back(llvm::UndefValue::get(ResultType)); Operands.push_back(Ops[NF]); Operands.push_back(Ops[NF + 2]); Value *NewVL = Ops[NF + 1]; llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes); llvm::Value *LoadValue = Builder.CreateCall(F, Operands, ""); clang::CharUnits Align = CGM.getNaturalPointeeTypeAlignment(E->getArg(0)->getType()); for (unsigned I = 0; I < NF; ++I) { llvm::Value *Val = Builder.CreateExtractValue(LoadValue, {I}); Builder.CreateStore(Val, Address(Ops[I], Val->getType(), Align)); } // Store new_vl. llvm::Value *Val = Builder.CreateExtractValue(LoadValue, {NF}); return Builder.CreateStore(Val, Address(NewVL, Val->getType(), Align)); } }], MaskedManualCodegen = [{ { // builtin: (val0 address, ..., mask, maskedoff0, ..., ptr, new_vl, vl) // intrinsic: (maskedoff0, ..., ptr, mask, vl) IntrinsicTypes = {ConvertType(E->getArg(0)->getType()->getPointeeType()), Ops[2 * NF + 3]->getType()}; SmallVector<llvm::Value*, 12> Operands; for (unsigned I = 0; I < NF; ++I) Operands.push_back(Ops[NF + I + 1]); Operands.push_back(Ops[2 * NF + 1]); Operands.push_back(Ops[NF]); Operands.push_back(Ops[2 * NF + 3]); Operands.push_back(ConstantInt::get(Ops.back()->getType(), TAIL_UNDISTURBED)); Value *NewVL = Ops[2 * NF + 2]; assert(Operands.size() == NF + 4); llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes); llvm::Value *LoadValue = Builder.CreateCall(F, Operands, ""); clang::CharUnits Align = CGM.getNaturalPointeeTypeAlignment(E->getArg(0)->getType()); for (unsigned I = 0; I < NF; ++I) { llvm::Value *Val = Builder.CreateExtractValue(LoadValue, {I}); Builder.CreateStore(Val, Address(Ops[I], Val->getType(), Align)); } // Store new_vl. llvm::Value *Val = Builder.CreateExtractValue(LoadValue, {NF}); return Builder.CreateStore(Val, Address(NewVL, Val->getType(), Align)); } }] in { defvar PV = PVString<nf, /*signed=*/true>.S; defvar PUV = PVString<nf, /*signed=*/false>.S; def : RVVBuiltin<"v", "0" # PV # "PCe" # "Pz", type>; if !not(IsFloat<type>.val) then { def : RVVBuiltin<"Uv", "0" # PUV # "PCUe" # "Pz", type>; } } } } } multiclass RVVStridedSegLoad<string op> { foreach type = TypeList in { defvar eew = !cond(!eq(type, "c") : "8", !eq(type, "s") : "16", !eq(type, "i") : "32", !eq(type, "l") : "64", !eq(type, "x") : "16", !eq(type, "f") : "32", !eq(type, "d") : "64"); foreach nf = NFList in { let Name = op # nf # "e" # eew # "_v", IRName = op # nf, MaskedIRName = op # nf # "_mask", NF = nf, HasUnMaskedOverloaded = false, ManualCodegen = [{ { // builtin: (val0 address, val1 address, ..., ptr, stride, vl) ResultType = ConvertType(E->getArg(0)->getType()->getPointeeType()); IntrinsicTypes = {ResultType, Ops[NF + 2]->getType()}; // intrinsic: (passthru0, passthru1, ..., ptr, stride, vl) SmallVector<llvm::Value*, 12> Operands; for (unsigned I = 0; I < NF; ++I) Operands.push_back(llvm::UndefValue::get(ResultType)); Operands.push_back(Ops[NF]); Operands.push_back(Ops[NF + 1]); Operands.push_back(Ops[NF + 2]); llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes); llvm::Value *LoadValue = Builder.CreateCall(F, Operands, ""); clang::CharUnits Align = CGM.getNaturalPointeeTypeAlignment(E->getArg(0)->getType()); llvm::Value *V; for (unsigned I = 0; I < NF; ++I) { llvm::Value *Val = Builder.CreateExtractValue(LoadValue, {I}); V = Builder.CreateStore(Val, Address(Ops[I], Val->getType(), Align)); } return V; } }], MaskedManualCodegen = [{ { // builtin: (val0 address, ..., mask, maskedoff0, ..., ptr, stride, vl) // intrinsic: (maskedoff0, ..., ptr, stride, mask, vl) IntrinsicTypes = {ConvertType(E->getArg(0)->getType()->getPointeeType()), Ops[2 * NF + 3]->getType()}; SmallVector<llvm::Value*, 12> Operands; for (unsigned I = 0; I < NF; ++I) Operands.push_back(Ops[NF + I + 1]); Operands.push_back(Ops[2 * NF + 1]); Operands.push_back(Ops[2 * NF + 2]); Operands.push_back(Ops[NF]); Operands.push_back(Ops[2 * NF + 3]); Operands.push_back(ConstantInt::get(Ops.back()->getType(), TAIL_UNDISTURBED)); assert(Operands.size() == NF + 5); llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes); llvm::Value *LoadValue = Builder.CreateCall(F, Operands, ""); clang::CharUnits Align = CGM.getNaturalPointeeTypeAlignment(E->getArg(0)->getType()); llvm::Value *V; for (unsigned I = 0; I < NF; ++I) { llvm::Value *Val = Builder.CreateExtractValue(LoadValue, {I}); V = Builder.CreateStore(Val, Address(Ops[I], Val->getType(), Align)); } return V; } }] in { defvar PV = PVString<nf, /*signed=*/true>.S; defvar PUV = PVString<nf, /*signed=*/false>.S; def : RVVBuiltin<"v", "0" # PV # "PCe" # "t", type>; if !not(IsFloat<type>.val) then { def : RVVBuiltin<"Uv", "0" # PUV # "PCUe" # "t", type>; } } } } } multiclass RVVIndexedSegLoad<string op> { foreach type = TypeList in { foreach eew_info = EEWList in { defvar eew = eew_info[0]; defvar eew_type = eew_info[1]; foreach nf = NFList in { let Name = op # nf # "ei" # eew # "_v", IRName = op # nf, MaskedIRName = op # nf # "_mask", NF = nf, ManualCodegen = [{ { // builtin: (val0 address, val1 address, ..., ptr, index, vl) ResultType = ConvertType(E->getArg(0)->getType()->getPointeeType()); IntrinsicTypes = {ResultType, Ops[NF + 1]->getType(), Ops[NF + 2]->getType()}; // intrinsic: (passthru0, passthru1, ..., ptr, index, vl) SmallVector<llvm::Value*, 12> Operands; for (unsigned I = 0; I < NF; ++I) Operands.push_back(llvm::UndefValue::get(ResultType)); Operands.push_back(Ops[NF]); Operands.push_back(Ops[NF + 1]); Operands.push_back(Ops[NF + 2]); llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes); llvm::Value *LoadValue = Builder.CreateCall(F, Operands, ""); clang::CharUnits Align = CGM.getNaturalPointeeTypeAlignment(E->getArg(0)->getType()); llvm::Value *V; for (unsigned I = 0; I < NF; ++I) { llvm::Value *Val = Builder.CreateExtractValue(LoadValue, {I}); V = Builder.CreateStore(Val, Address(Ops[I], Val->getType(), Align)); } return V; } }], MaskedManualCodegen = [{ { // builtin: (val0 address, ..., mask, maskedoff0, ..., ptr, index, vl) IntrinsicTypes = {ConvertType(E->getArg(0)->getType()->getPointeeType()), Ops[2 * NF + 2]->getType(), Ops[2 * NF + 3]->getType()}; // intrinsic: (maskedoff0, ..., ptr, index, mask, vl) SmallVector<llvm::Value*, 12> Operands; for (unsigned I = 0; I < NF; ++I) Operands.push_back(Ops[NF + I + 1]); Operands.push_back(Ops[2 * NF + 1]); Operands.push_back(Ops[2 * NF + 2]); Operands.push_back(Ops[NF]); Operands.push_back(Ops[2 * NF + 3]); Operands.push_back(ConstantInt::get(Ops.back()->getType(), TAIL_UNDISTURBED)); assert(Operands.size() == NF + 5); llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes); llvm::Value *LoadValue = Builder.CreateCall(F, Operands, ""); clang::CharUnits Align = CGM.getNaturalPointeeTypeAlignment(E->getArg(0)->getType()); llvm::Value *V; for (unsigned I = 0; I < NF; ++I) { llvm::Value *Val = Builder.CreateExtractValue(LoadValue, {I}); V = Builder.CreateStore(Val, Address(Ops[I], Val->getType(), Align)); } return V; } }] in { defvar PV = PVString<nf, /*signed=*/true>.S; defvar PUV = PVString<nf, /*signed=*/false>.S; def : RVVBuiltin<"v", "0" # PV # "PCe" # eew_type # "Uv", type>; if !not(IsFloat<type>.val) then { def : RVVBuiltin<"Uv", "0" # PUV # "PCUe" # eew_type # "Uv", type>; } } } } } } class VString<int nf, bit signed> { string S = !cond(!eq(nf, 2): !if(signed, "vv", "UvUv"), !eq(nf, 3): !if(signed, "vvv", "UvUvUv"), !eq(nf, 4): !if(signed, "vvvv", "UvUvUvUv"), !eq(nf, 5): !if(signed, "vvvvv", "UvUvUvUvUv"), !eq(nf, 6): !if(signed, "vvvvvv", "UvUvUvUvUvUv"), !eq(nf, 7): !if(signed, "vvvvvvv", "UvUvUvUvUvUvUv"), !eq(nf, 8): !if(signed, "vvvvvvvv", "UvUvUvUvUvUvUvUv")); } multiclass RVVUnitStridedSegStore<string op> { foreach type = TypeList in { defvar eew = !cond(!eq(type, "c") : "8", !eq(type, "s") : "16", !eq(type, "i") : "32", !eq(type, "l") : "64", !eq(type, "x") : "16", !eq(type, "f") : "32", !eq(type, "d") : "64"); foreach nf = NFList in { let Name = op # nf # "e" # eew # "_v", IRName = op # nf, MaskedIRName = op # nf # "_mask", NF = nf, HasMaskedOffOperand = false, MaskedPolicyScheme = NonePolicy, ManualCodegen = [{ { // Builtin: (ptr, val0, val1, ..., vl) // Intrinsic: (val0, val1, ..., ptr, vl) std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 1); IntrinsicTypes = {Ops[0]->getType(), Ops[NF + 1]->getType()}; assert(Ops.size() == NF + 2); } }], MaskedManualCodegen = [{ { // Builtin: (mask, ptr, val0, val1, ..., vl) // Intrinsic: (val0, val1, ..., ptr, mask, vl) std::rotate(Ops.begin(), Ops.begin() + 2, Ops.end() - 1); std::swap(Ops[NF], Ops[NF + 1]); IntrinsicTypes = {Ops[0]->getType(), Ops[NF + 2]->getType()}; assert(Ops.size() == NF + 3); } }] in { defvar V = VString<nf, /*signed=*/true>.S; defvar UV = VString<nf, /*signed=*/false>.S; def : RVVBuiltin<"v", "0Pe" # V, type>; if !not(IsFloat<type>.val) then { def : RVVBuiltin<"Uv", "0PUe" # UV, type>; } } } } } multiclass RVVStridedSegStore<string op> { foreach type = TypeList in { defvar eew = !cond(!eq(type, "c") : "8", !eq(type, "s") : "16", !eq(type, "i") : "32", !eq(type, "l") : "64", !eq(type, "x") : "16", !eq(type, "f") : "32", !eq(type, "d") : "64"); foreach nf = NFList in { let Name = op # nf # "e" # eew # "_v", IRName = op # nf, MaskedIRName = op # nf # "_mask", NF = nf, HasMaskedOffOperand = false, MaskedPolicyScheme = NonePolicy, ManualCodegen = [{ { // Builtin: (ptr, stride, val0, val1, ..., vl). // Intrinsic: (val0, val1, ..., ptr, stride, vl) std::rotate(Ops.begin(), Ops.begin() + 2, Ops.end() - 1); IntrinsicTypes = {Ops[0]->getType(), Ops[NF + 1]->getType()}; assert(Ops.size() == NF + 3); } }], MaskedManualCodegen = [{ { // Builtin: (mask, ptr, stride, val0, val1, ..., vl). // Intrinsic: (val0, val1, ..., ptr, stride, mask, vl) std::rotate(Ops.begin(), Ops.begin() + 3, Ops.end() - 1); std::rotate(Ops.begin() + NF, Ops.begin() + NF + 1, Ops.begin() + NF + 3); IntrinsicTypes = {Ops[0]->getType(), Ops[NF + 1]->getType()}; assert(Ops.size() == NF + 4); } }] in { defvar V = VString<nf, /*signed=*/true>.S; defvar UV = VString<nf, /*signed=*/false>.S; def : RVVBuiltin<"v", "0Pet" # V, type>; if !not(IsFloat<type>.val) then { def : RVVBuiltin<"Uv", "0PUet" # UV, type>; } } } } } multiclass RVVIndexedSegStore<string op> { foreach type = TypeList in { foreach eew_info = EEWList in { defvar eew = eew_info[0]; defvar eew_type = eew_info[1]; foreach nf = NFList in { let Name = op # nf # "ei" # eew # "_v", IRName = op # nf, MaskedIRName = op # nf # "_mask", NF = nf, HasMaskedOffOperand = false, MaskedPolicyScheme = NonePolicy, ManualCodegen = [{ { // Builtin: (ptr, index, val0, val1, ..., vl) // Intrinsic: (val0, val1, ..., ptr, index, vl) std::rotate(Ops.begin(), Ops.begin() + 2, Ops.end() - 1); IntrinsicTypes = {Ops[0]->getType(), Ops[NF + 1]->getType(), Ops[NF + 2]->getType()}; assert(Ops.size() == NF + 3); } }], MaskedManualCodegen = [{ { // Builtin: (mask, ptr, index, val0, val1, ..., vl) // Intrinsic: (val0, val1, ..., ptr, index, mask, vl) std::rotate(Ops.begin(), Ops.begin() + 3, Ops.end() - 1); std::rotate(Ops.begin() + NF, Ops.begin() + NF + 1, Ops.begin() + NF + 3); IntrinsicTypes = {Ops[0]->getType(), Ops[NF + 1]->getType(), Ops[NF + 3]->getType()}; assert(Ops.size() == NF + 4); } }] in { defvar V = VString<nf, /*signed=*/true>.S; defvar UV = VString<nf, /*signed=*/false>.S; def : RVVBuiltin<"v", "0Pe" # eew_type # "Uv" # V, type>; if !not(IsFloat<type>.val) then { def : RVVBuiltin<"Uv", "0PUe" # eew_type # "Uv" # UV, type>; } } } } } } multiclass RVVPseudoUnaryBuiltin<string IR, string type_range> { let Name = NAME, IRName = IR, MaskedIRName = IR # "_mask", ManualCodegen = [{ { // op1, vl IntrinsicTypes = {ResultType, cast<llvm::VectorType>(ResultType)->getElementType(), Ops[1]->getType()}; Ops.insert(Ops.begin() + 1, llvm::Constant::getNullValue(IntrinsicTypes[1])); // insert undef passthru Ops.insert(Ops.begin(), llvm::UndefValue::get(ResultType)); break; } }], MaskedManualCodegen = [{ { std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 1); Ops.push_back(ConstantInt::get(Ops.back()->getType(), TAIL_UNDISTURBED)); // maskedoff, op1, mask, vl IntrinsicTypes = {ResultType, cast<llvm::VectorType>(ResultType)->getElementType(), Ops[3]->getType()}; Ops.insert(Ops.begin() + 2, llvm::Constant::getNullValue(IntrinsicTypes[1])); break; } }] in { def : RVVBuiltin<"v", "vv", type_range>; } } multiclass RVVPseudoVNotBuiltin<string IR, string type_range> { let Name = NAME, IRName = IR, MaskedIRName = IR # "_mask", ManualCodegen = [{ { // op1, vl IntrinsicTypes = {ResultType, cast<llvm::VectorType>(ResultType)->getElementType(), Ops[1]->getType()}; Ops.insert(Ops.begin() + 1, llvm::Constant::getAllOnesValue(IntrinsicTypes[1])); // insert undef passthru Ops.insert(Ops.begin(), llvm::UndefValue::get(ResultType)); break; } }], MaskedManualCodegen = [{ { std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 1); Ops.push_back(ConstantInt::get(Ops.back()->getType(), TAIL_UNDISTURBED)); // maskedoff, op1, mask, vl IntrinsicTypes = {ResultType, cast<llvm::VectorType>(ResultType)->getElementType(), Ops[3]->getType()}; Ops.insert(Ops.begin() + 2, llvm::Constant::getAllOnesValue(IntrinsicTypes[1])); break; } }] in { def : RVVBuiltin<"v", "vv", type_range>; def : RVVBuiltin<"Uv", "UvUv", type_range>; } } multiclass RVVPseudoMaskBuiltin<string IR, string type_range> { let Name = NAME, IRName = IR, HasMasked = false, ManualCodegen = [{ { // op1, vl IntrinsicTypes = {ResultType, Ops[1]->getType()}; Ops.insert(Ops.begin() + 1, Ops[0]); break; } }] in { def : RVVBuiltin<"m", "mm", type_range>; } } multiclass RVVPseudoVFUnaryBuiltin<string IR, string type_range> { let Name = NAME, IRName = IR, MaskedIRName = IR # "_mask", ManualCodegen = [{ { // op1, vl IntrinsicTypes = {ResultType, Ops[0]->getType(), Ops[1]->getType()}; Ops.insert(Ops.begin() + 1, Ops[0]); // insert undef passthru Ops.insert(Ops.begin(), llvm::UndefValue::get(ResultType)); break; } }], MaskedManualCodegen = [{ { std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 1); Ops.push_back(ConstantInt::get(Ops.back()->getType(), TAIL_UNDISTURBED)); // maskedoff, op1, mask, vl IntrinsicTypes = {ResultType, Ops[1]->getType(), Ops[3]->getType()}; Ops.insert(Ops.begin() + 2, Ops[1]); break; } }] in { def : RVVBuiltin<"v", "vv", type_range>; } } multiclass RVVPseudoVWCVTBuiltin<string IR, string MName, string type_range, list<list<string>> suffixes_prototypes> { let Name = NAME, OverloadedName = MName, IRName = IR, MaskedIRName = IR # "_mask", ManualCodegen = [{ { // op1, vl IntrinsicTypes = {ResultType, Ops[0]->getType(), cast<llvm::VectorType>(Ops[0]->getType())->getElementType(), Ops[1]->getType()}; Ops.insert(Ops.begin() + 1, llvm::Constant::getNullValue(IntrinsicTypes[2])); // insert undef passthru Ops.insert(Ops.begin(), llvm::UndefValue::get(ResultType)); break; } }], MaskedManualCodegen = [{ { std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 1); Ops.push_back(ConstantInt::get(Ops.back()->getType(), TAIL_UNDISTURBED)); // maskedoff, op1, mask, vl IntrinsicTypes = {ResultType, Ops[1]->getType(), cast<llvm::VectorType>(Ops[1]->getType())->getElementType(), Ops[3]->getType()}; Ops.insert(Ops.begin() + 2, llvm::Constant::getNullValue(IntrinsicTypes[2])); break; } }] in { foreach s_p = suffixes_prototypes in { def : RVVBuiltin<s_p[0], s_p[1], type_range>; } } } multiclass RVVPseudoVNCVTBuiltin<string IR, string MName, string type_range, list<list<string>> suffixes_prototypes> { let Name = NAME, OverloadedName = MName, IRName = IR, MaskedIRName = IR # "_mask", ManualCodegen = [{ { // op1, vl IntrinsicTypes = {ResultType, Ops[0]->getType(), Ops[1]->getType(), Ops[1]->getType()}; Ops.insert(Ops.begin() + 1, llvm::Constant::getNullValue(IntrinsicTypes[2])); // insert undef passthru Ops.insert(Ops.begin(), llvm::UndefValue::get(ResultType)); break; } }], MaskedManualCodegen = [{ { std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end() - 1); Ops.push_back(ConstantInt::get(Ops.back()->getType(), TAIL_UNDISTURBED)); // maskedoff, op1, mask, vl IntrinsicTypes = {ResultType, Ops[1]->getType(), Ops[3]->getType(), Ops[3]->getType()}; Ops.insert(Ops.begin() + 2, llvm::Constant::getNullValue(IntrinsicTypes[2])); break; } }] in { foreach s_p = suffixes_prototypes in { def : RVVBuiltin<s_p[0], s_p[1], type_range>; } } } // Define vread_csr&vwrite_csr described in RVV intrinsics doc. let HeaderCode = [{ enum RVV_CSR { RVV_VSTART = 0, RVV_VXSAT, RVV_VXRM, RVV_VCSR, }; static __inline__ __attribute__((__always_inline__, __nodebug__)) unsigned long vread_csr(enum RVV_CSR __csr) { unsigned long __rv = 0; switch (__csr) { case RVV_VSTART: __asm__ __volatile__ ("csrr\t%0, vstart" : "=r"(__rv) : : "memory"); break; case RVV_VXSAT: __asm__ __volatile__ ("csrr\t%0, vxsat" : "=r"(__rv) : : "memory"); break; case RVV_VXRM: __asm__ __volatile__ ("csrr\t%0, vxrm" : "=r"(__rv) : : "memory"); break; case RVV_VCSR: __asm__ __volatile__ ("csrr\t%0, vcsr" : "=r"(__rv) : : "memory"); break; } return __rv; } static __inline__ __attribute__((__always_inline__, __nodebug__)) void vwrite_csr(enum RVV_CSR __csr, unsigned long __value) { switch (__csr) { case RVV_VSTART: __asm__ __volatile__ ("csrw\tvstart, %z0" : : "rJ"(__value) : "memory"); break; case RVV_VXSAT: __asm__ __volatile__ ("csrw\tvxsat, %z0" : : "rJ"(__value) : "memory"); break; case RVV_VXRM: __asm__ __volatile__ ("csrw\tvxrm, %z0" : : "rJ"(__value) : "memory"); break; case RVV_VCSR: __asm__ __volatile__ ("csrw\tvcsr, %z0" : : "rJ"(__value) : "memory"); break; } } }] in def vread_vwrite_csr: RVVHeader; // 6. Configuration-Setting Instructions // 6.1. vsetvli/vsetvl instructions // vsetvl/vsetvlmax are a macro because they require constant integers in SEW // and LMUL. let HeaderCode = [{ #define vsetvl_e8mf8(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 5) #define vsetvl_e8mf4(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 6) #define vsetvl_e8mf2(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 7) #define vsetvl_e8m1(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 0) #define vsetvl_e8m2(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 1) #define vsetvl_e8m4(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 2) #define vsetvl_e8m8(avl) __builtin_rvv_vsetvli((size_t)(avl), 0, 3) #define vsetvl_e16mf4(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 6) #define vsetvl_e16mf2(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 7) #define vsetvl_e16m1(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 0) #define vsetvl_e16m2(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 1) #define vsetvl_e16m4(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 2) #define vsetvl_e16m8(avl) __builtin_rvv_vsetvli((size_t)(avl), 1, 3) #define vsetvl_e32mf2(avl) __builtin_rvv_vsetvli((size_t)(avl), 2, 7) #define vsetvl_e32m1(avl) __builtin_rvv_vsetvli((size_t)(avl), 2, 0) #define vsetvl_e32m2(avl) __builtin_rvv_vsetvli((size_t)(avl), 2, 1) #define vsetvl_e32m4(avl) __builtin_rvv_vsetvli((size_t)(avl), 2, 2) #define vsetvl_e32m8(avl) __builtin_rvv_vsetvli((size_t)(avl), 2, 3) #define vsetvl_e64m1(avl) __builtin_rvv_vsetvli((size_t)(avl), 3, 0) #define vsetvl_e64m2(avl) __builtin_rvv_vsetvli((size_t)(avl), 3, 1) #define vsetvl_e64m4(avl) __builtin_rvv_vsetvli((size_t)(avl), 3, 2) #define vsetvl_e64m8(avl) __builtin_rvv_vsetvli((size_t)(avl), 3, 3) #define vsetvlmax_e8mf8() __builtin_rvv_vsetvlimax(0, 5) #define vsetvlmax_e8mf4() __builtin_rvv_vsetvlimax(0, 6) #define vsetvlmax_e8mf2() __builtin_rvv_vsetvlimax(0, 7) #define vsetvlmax_e8m1() __builtin_rvv_vsetvlimax(0, 0) #define vsetvlmax_e8m2() __builtin_rvv_vsetvlimax(0, 1) #define vsetvlmax_e8m4() __builtin_rvv_vsetvlimax(0, 2) #define vsetvlmax_e8m8() __builtin_rvv_vsetvlimax(0, 3) #define vsetvlmax_e16mf4() __builtin_rvv_vsetvlimax(1, 6) #define vsetvlmax_e16mf2() __builtin_rvv_vsetvlimax(1, 7) #define vsetvlmax_e16m1() __builtin_rvv_vsetvlimax(1, 0) #define vsetvlmax_e16m2() __builtin_rvv_vsetvlimax(1, 1) #define vsetvlmax_e16m4() __builtin_rvv_vsetvlimax(1, 2) #define vsetvlmax_e16m8() __builtin_rvv_vsetvlimax(1, 3) #define vsetvlmax_e32mf2() __builtin_rvv_vsetvlimax(2, 7) #define vsetvlmax_e32m1() __builtin_rvv_vsetvlimax(2, 0) #define vsetvlmax_e32m2() __builtin_rvv_vsetvlimax(2, 1) #define vsetvlmax_e32m4() __builtin_rvv_vsetvlimax(2, 2) #define vsetvlmax_e32m8() __builtin_rvv_vsetvlimax(2, 3) #define vsetvlmax_e64m1() __builtin_rvv_vsetvlimax(3, 0) #define vsetvlmax_e64m2() __builtin_rvv_vsetvlimax(3, 1) #define vsetvlmax_e64m4() __builtin_rvv_vsetvlimax(3, 2) #define vsetvlmax_e64m8() __builtin_rvv_vsetvlimax(3, 3) }] in def vsetvl_macro: RVVHeader; let HasBuiltinAlias = false, HasVL = false, HasMasked = false, MaskedPolicyScheme = NonePolicy, Log2LMUL = [0], ManualCodegen = [{IntrinsicTypes = {ResultType};}] in // Set XLEN type { def vsetvli : RVVBuiltin<"", "zzKzKz", "i">; def vsetvlimax : RVVBuiltin<"", "zKzKz", "i">; } // 7. Vector Loads and Stores // 7.4. Vector Unit-Stride Instructions def vlm: RVVVLEMaskBuiltin; defm vle8: RVVVLEBuiltin<["c"]>; defm vle16: RVVVLEBuiltin<["s","x"]>; defm vle32: RVVVLEBuiltin<["i","f"]>; defm vle64: RVVVLEBuiltin<["l","d"]>; def vsm : RVVVSEMaskBuiltin; defm vse8 : RVVVSEBuiltin<["c"]>; defm vse16: RVVVSEBuiltin<["s","x"]>; defm vse32: RVVVSEBuiltin<["i","f"]>; defm vse64: RVVVSEBuiltin<["l","d"]>; // 7.5. Vector Strided Instructions defm vlse8: RVVVLSEBuiltin<["c"]>; defm vlse16: RVVVLSEBuiltin<["s","x"]>; defm vlse32: RVVVLSEBuiltin<["i","f"]>; defm vlse64: RVVVLSEBuiltin<["l","d"]>; defm vsse8 : RVVVSSEBuiltin<["c"]>; defm vsse16: RVVVSSEBuiltin<["s","x"]>; defm vsse32: RVVVSSEBuiltin<["i","f"]>; defm vsse64: RVVVSSEBuiltin<["l","d"]>; // 7.6. Vector Indexed Instructions defm : RVVIndexedLoad<"vluxei">; defm : RVVIndexedLoad<"vloxei">; defm : RVVIndexedStore<"vsuxei">; defm : RVVIndexedStore<"vsoxei">; // 7.7. Unit-stride Fault-Only-First Loads defm vle8ff: RVVVLEFFBuiltin<["c"]>; defm vle16ff: RVVVLEFFBuiltin<["s","x"]>; defm vle32ff: RVVVLEFFBuiltin<["i", "f"]>; defm vle64ff: RVVVLEFFBuiltin<["l", "d"]>; // 7.8 Vector Load/Store Segment Instructions defm : RVVUnitStridedSegLoad<"vlseg">; defm : RVVUnitStridedSegLoadFF<"vlseg">; defm : RVVStridedSegLoad<"vlsseg">; defm : RVVIndexedSegLoad<"vluxseg">; defm : RVVIndexedSegLoad<"vloxseg">; defm : RVVUnitStridedSegStore<"vsseg">; defm : RVVStridedSegStore<"vssseg">; defm : RVVIndexedSegStore<"vsuxseg">; defm : RVVIndexedSegStore<"vsoxseg">; // 12. Vector Integer Arithmetic Instructions // 12.1. Vector Single-Width Integer Add and Subtract let UnMaskedPolicyScheme = HasPassthruOperand in { defm vadd : RVVIntBinBuiltinSet; defm vsub : RVVIntBinBuiltinSet; defm vrsub : RVVOutOp1BuiltinSet<"vrsub", "csil", [["vx", "v", "vve"], ["vx", "Uv", "UvUvUe"]]>; } defm vneg_v : RVVPseudoUnaryBuiltin<"vrsub", "csil">; // 12.2. Vector Widening Integer Add/Subtract // Widening unsigned integer add/subtract, 2*SEW = SEW +/- SEW let UnMaskedPolicyScheme = HasPassthruOperand in { defm vwaddu : RVVUnsignedWidenBinBuiltinSet; defm vwsubu : RVVUnsignedWidenBinBuiltinSet; // Widening signed integer add/subtract, 2*SEW = SEW +/- SEW defm vwadd : RVVSignedWidenBinBuiltinSet; defm vwsub : RVVSignedWidenBinBuiltinSet; // Widening unsigned integer add/subtract, 2*SEW = 2*SEW +/- SEW defm vwaddu : RVVUnsignedWidenOp0BinBuiltinSet; defm vwsubu : RVVUnsignedWidenOp0BinBuiltinSet; // Widening signed integer add/subtract, 2*SEW = 2*SEW +/- SEW defm vwadd : RVVSignedWidenOp0BinBuiltinSet; defm vwsub : RVVSignedWidenOp0BinBuiltinSet; } defm vwcvtu_x_x_v : RVVPseudoVWCVTBuiltin<"vwaddu", "vwcvtu_x", "csi", [["Uw", "UwUv"]]>; defm vwcvt_x_x_v : RVVPseudoVWCVTBuiltin<"vwadd", "vwcvt_x", "csi", [["w", "wv"]]>; // 12.3. Vector Integer Extension let UnMaskedPolicyScheme = HasPassthruOperand in { let Log2LMUL = [-3, -2, -1, 0, 1, 2] in { def vsext_vf2 : RVVIntExt<"vsext", "w", "wv", "csi">; def vzext_vf2 : RVVIntExt<"vzext", "Uw", "UwUv", "csi">; } let Log2LMUL = [-3, -2, -1, 0, 1] in { def vsext_vf4 : RVVIntExt<"vsext", "q", "qv", "cs">; def vzext_vf4 : RVVIntExt<"vzext", "Uq", "UqUv", "cs">; } let Log2LMUL = [-3, -2, -1, 0] in { def vsext_vf8 : RVVIntExt<"vsext", "o", "ov", "c">; def vzext_vf8 : RVVIntExt<"vzext", "Uo", "UoUv", "c">; } } // 12.4. Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions let HasMasked = false, MaskedPolicyScheme = NonePolicy in { let UnMaskedPolicyScheme = HasPassthruOperand in { defm vadc : RVVCarryinBuiltinSet; defm vsbc : RVVCarryinBuiltinSet; } defm vmadc : RVVCarryOutInBuiltinSet<"vmadc_carry_in">; defm vmadc : RVVIntMaskOutBuiltinSet; defm vmsbc : RVVCarryOutInBuiltinSet<"vmsbc_borrow_in">; defm vmsbc : RVVIntMaskOutBuiltinSet; } // 12.5. Vector Bitwise Logical Instructions let UnMaskedPolicyScheme = HasPassthruOperand in { defm vand : RVVIntBinBuiltinSet; defm vxor : RVVIntBinBuiltinSet; defm vor : RVVIntBinBuiltinSet; } defm vnot_v : RVVPseudoVNotBuiltin<"vxor", "csil">; // 12.6. Vector Single-Width Bit Shift Instructions let UnMaskedPolicyScheme = HasPassthruOperand in { defm vsll : RVVShiftBuiltinSet; defm vsrl : RVVUnsignedShiftBuiltinSet; defm vsra : RVVSignedShiftBuiltinSet; // 12.7. Vector Narrowing Integer Right Shift Instructions defm vnsrl : RVVUnsignedNShiftBuiltinSet; defm vnsra : RVVSignedNShiftBuiltinSet; } defm vncvt_x_x_w : RVVPseudoVNCVTBuiltin<"vnsrl", "vncvt_x", "csi", [["v", "vw"], ["Uv", "UvUw"]]>; // 12.8. Vector Integer Comparison Instructions let MaskedPolicyScheme = NonePolicy in { defm vmseq : RVVIntMaskOutBuiltinSet; defm vmsne : RVVIntMaskOutBuiltinSet; defm vmsltu : RVVUnsignedMaskOutBuiltinSet; defm vmslt : RVVSignedMaskOutBuiltinSet; defm vmsleu : RVVUnsignedMaskOutBuiltinSet; defm vmsle : RVVSignedMaskOutBuiltinSet; defm vmsgtu : RVVUnsignedMaskOutBuiltinSet; defm vmsgt : RVVSignedMaskOutBuiltinSet; defm vmsgeu : RVVUnsignedMaskOutBuiltinSet; defm vmsge : RVVSignedMaskOutBuiltinSet; } // 12.9. Vector Integer Min/Max Instructions let UnMaskedPolicyScheme = HasPassthruOperand in { defm vminu : RVVUnsignedBinBuiltinSet; defm vmin : RVVSignedBinBuiltinSet; defm vmaxu : RVVUnsignedBinBuiltinSet; defm vmax : RVVSignedBinBuiltinSet; // 12.10. Vector Single-Width Integer Multiply Instructions defm vmul : RVVIntBinBuiltinSet; let RequiredFeatures = ["FullMultiply"] in { defm vmulh : RVVSignedBinBuiltinSet; defm vmulhu : RVVUnsignedBinBuiltinSet; defm vmulhsu : RVVOutOp1BuiltinSet<"vmulhsu", "csil", [["vv", "v", "vvUv"], ["vx", "v", "vvUe"]]>; } // 12.11. Vector Integer Divide Instructions defm vdivu : RVVUnsignedBinBuiltinSet; defm vdiv : RVVSignedBinBuiltinSet; defm vremu : RVVUnsignedBinBuiltinSet; defm vrem : RVVSignedBinBuiltinSet; } // 12.12. Vector Widening Integer Multiply Instructions let Log2LMUL = [-3, -2, -1, 0, 1, 2], UnMaskedPolicyScheme = HasPassthruOperand in { defm vwmul : RVVOutOp0Op1BuiltinSet<"vwmul", "csi", [["vv", "w", "wvv"], ["vx", "w", "wve"]]>; defm vwmulu : RVVOutOp0Op1BuiltinSet<"vwmulu", "csi", [["vv", "Uw", "UwUvUv"], ["vx", "Uw", "UwUvUe"]]>; defm vwmulsu : RVVOutOp0Op1BuiltinSet<"vwmulsu", "csi", [["vv", "w", "wvUv"], ["vx", "w", "wvUe"]]>; } // 12.13. Vector Single-Width Integer Multiply-Add Instructions let UnMaskedPolicyScheme = HasPolicyOperand in { defm vmacc : RVVIntTerBuiltinSet; defm vnmsac : RVVIntTerBuiltinSet; defm vmadd : RVVIntTerBuiltinSet; defm vnmsub : RVVIntTerBuiltinSet; // 12.14. Vector Widening Integer Multiply-Add Instructions let HasMaskedOffOperand = false, Log2LMUL = [-3, -2, -1, 0, 1, 2] in { defm vwmaccu : RVVOutOp1Op2BuiltinSet<"vwmaccu", "csi", [["vv", "Uw", "UwUwUvUv"], ["vx", "Uw", "UwUwUeUv"]]>; defm vwmacc : RVVOutOp1Op2BuiltinSet<"vwmacc", "csi", [["vv", "w", "wwvv"], ["vx", "w", "wwev"]]>; defm vwmaccsu : RVVOutOp1Op2BuiltinSet<"vwmaccsu", "csi", [["vv", "w", "wwvUv"], ["vx", "w", "wweUv"]]>; defm vwmaccus : RVVOutOp1Op2BuiltinSet<"vwmaccus", "csi", [["vx", "w", "wwUev"]]>; } } // 12.15. Vector Integer Merge Instructions // C/C++ Operand: (mask, op1, op2, vl), Intrinsic: (op1, op2, mask, vl) let HasMasked = false, MaskedPolicyScheme = NonePolicy, ManualCodegen = [{ std::rotate(Ops.begin(), Ops.begin() + 1, Ops.begin() + 3); IntrinsicTypes = {ResultType, Ops[1]->getType(), Ops[3]->getType()}; // insert undef passthru Ops.insert(Ops.begin(), llvm::UndefValue::get(ResultType)); }] in { defm vmerge : RVVOutOp1BuiltinSet<"vmerge", "csil", [["vvm", "v", "vmvv"], ["vxm", "v", "vmve"], ["vvm", "Uv", "UvmUvUv"], ["vxm", "Uv", "UvmUvUe"]]>; } // 12.16. Vector Integer Move Instructions let HasMasked = false, UnMaskedPolicyScheme = HasPassthruOperand, MaskedPolicyScheme = NonePolicy in { let OverloadedName = "vmv_v" in { defm vmv_v : RVVOutBuiltinSet<"vmv_v_v", "csil", [["v", "Uv", "UvUv"]]>; defm vmv_v : RVVOutBuiltinSet<"vmv_v_v", "csilxfd", [["v", "v", "vv"]]>; } let HasUnMaskedOverloaded = false in defm vmv_v : RVVOutBuiltinSet<"vmv_v_x", "csil", [["x", "v", "ve"], ["x", "Uv", "UvUe"]]>; } // 13. Vector Fixed-Point Arithmetic Instructions // 13.1. Vector Single-Width Saturating Add and Subtract let UnMaskedPolicyScheme = HasPassthruOperand in { defm vsaddu : RVVUnsignedBinBuiltinSet; defm vsadd : RVVSignedBinBuiltinSet; defm vssubu : RVVUnsignedBinBuiltinSet; defm vssub : RVVSignedBinBuiltinSet; // 13.2. Vector Single-Width Averaging Add and Subtract defm vaaddu : RVVUnsignedBinBuiltinSet; defm vaadd : RVVSignedBinBuiltinSet; defm vasubu : RVVUnsignedBinBuiltinSet; defm vasub : RVVSignedBinBuiltinSet; // 13.3. Vector Single-Width Fractional Multiply with Rounding and Saturation let RequiredFeatures = ["FullMultiply"] in { defm vsmul : RVVSignedBinBuiltinSet; } // 13.4. Vector Single-Width Scaling Shift Instructions defm vssrl : RVVUnsignedShiftBuiltinSet; defm vssra : RVVSignedShiftBuiltinSet; // 13.5. Vector Narrowing Fixed-Point Clip Instructions defm vnclipu : RVVUnsignedNShiftBuiltinSet; defm vnclip : RVVSignedNShiftBuiltinSet; // 14. Vector Floating-Point Instructions // 14.2. Vector Single-Width Floating-Point Add/Subtract Instructions defm vfadd : RVVFloatingBinBuiltinSet; defm vfsub : RVVFloatingBinBuiltinSet; defm vfrsub : RVVFloatingBinVFBuiltinSet; // 14.3. Vector Widening Floating-Point Add/Subtract Instructions // Widening FP add/subtract, 2*SEW = SEW +/- SEW defm vfwadd : RVVFloatingWidenBinBuiltinSet; defm vfwsub : RVVFloatingWidenBinBuiltinSet; // Widening FP add/subtract, 2*SEW = 2*SEW +/- SEW defm vfwadd : RVVFloatingWidenOp0BinBuiltinSet; defm vfwsub : RVVFloatingWidenOp0BinBuiltinSet; // 14.4. Vector Single-Width Floating-Point Multiply/Divide Instructions defm vfmul : RVVFloatingBinBuiltinSet; defm vfdiv : RVVFloatingBinBuiltinSet; defm vfrdiv : RVVFloatingBinVFBuiltinSet; // 14.5. Vector Widening Floating-Point Multiply let Log2LMUL = [-2, -1, 0, 1, 2] in { defm vfwmul : RVVOutOp0Op1BuiltinSet<"vfwmul", "xf", [["vv", "w", "wvv"], ["vf", "w", "wve"]]>; } } // 14.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions let UnMaskedPolicyScheme = HasPolicyOperand in { defm vfmacc : RVVFloatingTerBuiltinSet; defm vfnmacc : RVVFloatingTerBuiltinSet; defm vfmsac : RVVFloatingTerBuiltinSet; defm vfnmsac : RVVFloatingTerBuiltinSet; defm vfmadd : RVVFloatingTerBuiltinSet; defm vfnmadd : RVVFloatingTerBuiltinSet; defm vfmsub : RVVFloatingTerBuiltinSet; defm vfnmsub : RVVFloatingTerBuiltinSet; // 14.7. Vector Widening Floating-Point Fused Multiply-Add Instructions defm vfwmacc : RVVFloatingWidenTerBuiltinSet; defm vfwnmacc : RVVFloatingWidenTerBuiltinSet; defm vfwmsac : RVVFloatingWidenTerBuiltinSet; defm vfwnmsac : RVVFloatingWidenTerBuiltinSet; } // 14.8. Vector Floating-Point Square-Root Instruction let UnMaskedPolicyScheme = HasPassthruOperand in { def vfsqrt : RVVFloatingUnaryVVBuiltin; // 14.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction def vfrsqrt7 : RVVFloatingUnaryVVBuiltin; // 14.10. Vector Floating-Point Reciprocal Estimate Instruction def vfrec7 : RVVFloatingUnaryVVBuiltin; // 14.11. Vector Floating-Point MIN/MAX Instructions defm vfmin : RVVFloatingBinBuiltinSet; defm vfmax : RVVFloatingBinBuiltinSet; // 14.12. Vector Floating-Point Sign-Injection Instructions defm vfsgnj : RVVFloatingBinBuiltinSet; defm vfsgnjn : RVVFloatingBinBuiltinSet; defm vfsgnjx : RVVFloatingBinBuiltinSet; } defm vfneg_v : RVVPseudoVFUnaryBuiltin<"vfsgnjn", "xfd">; defm vfabs_v : RVVPseudoVFUnaryBuiltin<"vfsgnjx", "xfd">; // 14.13. Vector Floating-Point Compare Instructions let MaskedPolicyScheme = NonePolicy in { defm vmfeq : RVVFloatingMaskOutBuiltinSet; defm vmfne : RVVFloatingMaskOutBuiltinSet; defm vmflt : RVVFloatingMaskOutBuiltinSet; defm vmfle : RVVFloatingMaskOutBuiltinSet; defm vmfgt : RVVFloatingMaskOutBuiltinSet; defm vmfge : RVVFloatingMaskOutBuiltinSet; } // 14.14. Vector Floating-Point Classify Instruction let Name = "vfclass_v", UnMaskedPolicyScheme = HasPassthruOperand in def vfclass : RVVOp0Builtin<"Uv", "Uvv", "xfd">; // 14.15. Vector Floating-Point Merge Instructio // C/C++ Operand: (mask, op1, op2, vl), Builtin: (op1, op2, mask, vl) let HasMasked = false, MaskedPolicyScheme = NonePolicy, ManualCodegen = [{ std::rotate(Ops.begin(), Ops.begin() + 1, Ops.begin() + 3); IntrinsicTypes = {ResultType, Ops[1]->getType(), Ops[3]->getType()}; // insert undef passthru Ops.insert(Ops.begin(), llvm::UndefValue::get(ResultType)); }] in { defm vmerge : RVVOutOp1BuiltinSet<"vmerge", "xfd", [["vvm", "v", "vmvv"]]>; defm vfmerge : RVVOutOp1BuiltinSet<"vfmerge", "xfd", [["vfm", "v", "vmve"]]>; } // 14.16. Vector Floating-Point Move Instruction let HasMasked = false, UnMaskedPolicyScheme = HasPassthruOperand, HasUnMaskedOverloaded = false, MaskedPolicyScheme = NonePolicy in defm vfmv_v : RVVOutBuiltinSet<"vfmv_v_f", "xfd", [["f", "v", "ve"]]>; // 14.17. Single-Width Floating-Point/Integer Type-Convert Instructions let UnMaskedPolicyScheme = HasPassthruOperand in { def vfcvt_xu_f_v : RVVConvToUnsignedBuiltin<"vfcvt_xu">; def vfcvt_x_f_v : RVVConvToSignedBuiltin<"vfcvt_x">; def vfcvt_rtz_xu_f_v : RVVConvToUnsignedBuiltin<"vfcvt_rtz_xu">; def vfcvt_rtz_x_f_v : RVVConvToSignedBuiltin<"vfcvt_rtz_x">; def vfcvt_f_xu_v : RVVConvBuiltin<"Fv", "FvUv", "sil", "vfcvt_f">; def vfcvt_f_x_v : RVVConvBuiltin<"Fv", "Fvv", "sil", "vfcvt_f">; // 14.18. Widening Floating-Point/Integer Type-Convert Instructions let Log2LMUL = [-3, -2, -1, 0, 1, 2] in { def vfwcvt_xu_f_v : RVVConvToWidenUnsignedBuiltin<"vfwcvt_xu">; def vfwcvt_x_f_v : RVVConvToWidenSignedBuiltin<"vfwcvt_x">; def vfwcvt_rtz_xu_f_v : RVVConvToWidenUnsignedBuiltin<"vfwcvt_rtz_xu">; def vfwcvt_rtz_x_f_v : RVVConvToWidenSignedBuiltin<"vfwcvt_rtz_x">; def vfwcvt_f_xu_v : RVVConvBuiltin<"Fw", "FwUv", "csi", "vfwcvt_f">; def vfwcvt_f_x_v : RVVConvBuiltin<"Fw", "Fwv", "csi", "vfwcvt_f">; def vfwcvt_f_f_v : RVVConvBuiltin<"w", "wv", "xf", "vfwcvt_f">; } // 14.19. Narrowing Floating-Point/Integer Type-Convert Instructions let Log2LMUL = [-3, -2, -1, 0, 1, 2] in { def vfncvt_xu_f_w : RVVConvToNarrowingUnsignedBuiltin<"vfncvt_xu">; def vfncvt_x_f_w : RVVConvToNarrowingSignedBuiltin<"vfncvt_x">; def vfncvt_rtz_xu_f_w : RVVConvToNarrowingUnsignedBuiltin<"vfncvt_rtz_xu">; def vfncvt_rtz_x_f_w : RVVConvToNarrowingSignedBuiltin<"vfncvt_rtz_x">; def vfncvt_f_xu_w : RVVConvBuiltin<"Fv", "FvUw", "csi", "vfncvt_f">; def vfncvt_f_x_w : RVVConvBuiltin<"Fv", "Fvw", "csi", "vfncvt_f">; def vfncvt_f_f_w : RVVConvBuiltin<"v", "vw", "xf", "vfncvt_f">; def vfncvt_rod_f_f_w : RVVConvBuiltin<"v", "vw", "xf", "vfncvt_rod_f">; } } // 15. Vector Reduction Operations // 15.1. Vector Single-Width Integer Reduction Instructions let MaskedPolicyScheme = NonePolicy in { defm vredsum : RVVIntReductionBuiltinSet; defm vredmaxu : RVVUnsignedReductionBuiltin; defm vredmax : RVVSignedReductionBuiltin; defm vredminu : RVVUnsignedReductionBuiltin; defm vredmin : RVVSignedReductionBuiltin; defm vredand : RVVIntReductionBuiltinSet; defm vredor : RVVIntReductionBuiltinSet; defm vredxor : RVVIntReductionBuiltinSet; // 15.2. Vector Widening Integer Reduction Instructions // Vector Widening Integer Reduction Operations let HasMaskedOffOperand = false in { defm vwredsum : RVVOutOp1BuiltinSet<"vwredsum", "csi", [["vs", "vSw", "SwSwvSw"]]>; defm vwredsumu : RVVOutOp1BuiltinSet<"vwredsumu", "csi", [["vs", "UvUSw", "USwUSwUvUSw"]]>; } // 15.3. Vector Single-Width Floating-Point Reduction Instructions defm vfredmax : RVVFloatingReductionBuiltin; defm vfredmin : RVVFloatingReductionBuiltin; defm vfredusum : RVVFloatingReductionBuiltin; defm vfredosum : RVVFloatingReductionBuiltin; // 15.4. Vector Widening Floating-Point Reduction Instructions defm vfwredusum : RVVFloatingWidenReductionBuiltin; defm vfwredosum : RVVFloatingWidenReductionBuiltin; } // 16. Vector Mask Instructions // 16.1. Vector Mask-Register Logical Instructions def vmand : RVVMaskBinBuiltin; def vmnand : RVVMaskBinBuiltin; def vmandn : RVVMaskBinBuiltin; def vmxor : RVVMaskBinBuiltin; def vmor : RVVMaskBinBuiltin; def vmnor : RVVMaskBinBuiltin; def vmorn : RVVMaskBinBuiltin; def vmxnor : RVVMaskBinBuiltin; // pseudoinstructions def vmclr : RVVMaskNullaryBuiltin; def vmset : RVVMaskNullaryBuiltin; defm vmmv_m : RVVPseudoMaskBuiltin<"vmand", "c">; defm vmnot_m : RVVPseudoMaskBuiltin<"vmnand", "c">; let MaskedPolicyScheme = NonePolicy in { // 16.2. Vector count population in mask vcpop.m def vcpop : RVVMaskOp0Builtin<"um">; // 16.3. vfirst find-first-set mask bit def vfirst : RVVMaskOp0Builtin<"lm">; // 16.4. vmsbf.m set-before-first mask bit def vmsbf : RVVMaskUnaryBuiltin; // 16.5. vmsif.m set-including-first mask bit def vmsif : RVVMaskUnaryBuiltin; // 16.6. vmsof.m set-only-first mask bit def vmsof : RVVMaskUnaryBuiltin; } let UnMaskedPolicyScheme = HasPassthruOperand, HasUnMaskedOverloaded = false in { // 16.8. Vector Iota Instruction defm viota : RVVOutBuiltinSet<"viota", "csil", [["m", "Uv", "Uvm"]]>; // 16.9. Vector Element Index Instruction defm vid : RVVOutBuiltinSet<"vid", "csil", [["v", "v", "v"], ["v", "Uv", "Uv"]]>; } // 17. Vector Permutation Instructions // 17.1. Integer Scalar Move Instructions let HasMasked = false, MaskedPolicyScheme = NonePolicy in { let HasVL = false, OverloadedName = "vmv_x" in defm vmv_x : RVVOp0BuiltinSet<"vmv_x_s", "csil", [["s", "ve", "ev"], ["s", "UvUe", "UeUv"]]>; let OverloadedName = "vmv_s" in defm vmv_s : RVVOutBuiltinSet<"vmv_s_x", "csil", [["x", "v", "vve"], ["x", "Uv", "UvUvUe"]]>; } // 17.2. Floating-Point Scalar Move Instructions let HasMasked = false, MaskedPolicyScheme = NonePolicy in { let HasVL = false, OverloadedName = "vfmv_f" in defm vfmv_f : RVVOp0BuiltinSet<"vfmv_f_s", "xfd", [["s", "ve", "ev"]]>; let OverloadedName = "vfmv_s" in defm vfmv_s : RVVOutBuiltinSet<"vfmv_s_f", "xfd", [["f", "v", "vve"], ["x", "Uv", "UvUvUe"]]>; } // 17.3. Vector Slide Instructions // 17.3.1. Vector Slideup Instructions defm vslideup : RVVSlideBuiltinSet; // 17.3.2. Vector Slidedown Instructions defm vslidedown : RVVSlideBuiltinSet; // 17.3.3. Vector Slide1up Instructions let UnMaskedPolicyScheme = HasPassthruOperand in { defm vslide1up : RVVSlideOneBuiltinSet; defm vfslide1up : RVVFloatingBinVFBuiltinSet; // 17.3.4. Vector Slide1down Instruction defm vslide1down : RVVSlideOneBuiltinSet; defm vfslide1down : RVVFloatingBinVFBuiltinSet; // 17.4. Vector Register Gather Instructions // signed and floating type defm vrgather : RVVOutBuiltinSet<"vrgather_vv", "csilxfd", [["vv", "v", "vvUv"]]>; defm vrgather : RVVOutBuiltinSet<"vrgather_vx", "csilxfd", [["vx", "v", "vvz"]]>; defm vrgatherei16 : RVVOutBuiltinSet<"vrgatherei16_vv", "csilxfd", [["vv", "v", "vv(Log2EEW:4)Uv"]]>; // unsigned type defm vrgather : RVVOutBuiltinSet<"vrgather_vv", "csil", [["vv", "Uv", "UvUvUv"]]>; defm vrgather : RVVOutBuiltinSet<"vrgather_vx", "csil", [["vx", "Uv", "UvUvz"]]>; defm vrgatherei16 : RVVOutBuiltinSet<"vrgatherei16_vv", "csil", [["vv", "Uv", "UvUv(Log2EEW:4)Uv"]]>; } // 17.5. Vector Compress Instruction let HasMasked = false, MaskedPolicyScheme = NonePolicy, ManualCodegen = [{ std::rotate(Ops.begin(), Ops.begin() + 1, Ops.begin() + 3); IntrinsicTypes = {ResultType, Ops[3]->getType()}; }] in { // signed and floating type defm vcompress : RVVOutBuiltinSet<"vcompress", "csilxfd", [["vm", "v", "vmvv"]]>; // unsigned type defm vcompress : RVVOutBuiltinSet<"vcompress", "csil", [["vm", "Uv", "UvmUvUv"]]>; } // Miscellaneous let HasMasked = false, HasVL = false, IRName = "" in { let Name = "vreinterpret_v", MaskedPolicyScheme = NonePolicy, ManualCodegen = [{ return Builder.CreateBitCast(Ops[0], ResultType); }] in { // Reinterpret between different type under the same SEW and LMUL def vreinterpret_i_u : RVVBuiltin<"Uvv", "vUv", "csil", "v">; def vreinterpret_i_f : RVVBuiltin<"Fvv", "vFv", "sil", "v">; def vreinterpret_u_i : RVVBuiltin<"vUv", "Uvv", "csil", "Uv">; def vreinterpret_u_f : RVVBuiltin<"FvUv", "UvFv", "sil", "Uv">; def vreinterpret_f_i : RVVBuiltin<"vFv", "Fvv", "sil", "Fv">; def vreinterpret_f_u : RVVBuiltin<"UvFv", "FvUv", "sil", "Fv">; // Reinterpret between different SEW under the same LMUL foreach dst_sew = ["(FixedSEW:8)", "(FixedSEW:16)", "(FixedSEW:32)", "(FixedSEW:64)"] in { def vreinterpret_i_ # dst_sew : RVVBuiltin<"v" # dst_sew # "v", dst_sew # "vv", "csil", dst_sew # "v">; def vreinterpret_u_ # dst_sew : RVVBuiltin<"Uv" # dst_sew # "Uv", dst_sew # "UvUv", "csil", dst_sew # "Uv">; } } let Name = "vundefined", HasUnMaskedOverloaded = false, MaskedPolicyScheme = NonePolicy, ManualCodegen = [{ return llvm::UndefValue::get(ResultType); }] in { def vundefined : RVVBuiltin<"v", "v", "csilxfd">; def vundefined_u : RVVBuiltin<"Uv", "Uv", "csil">; } // LMUL truncation // C/C++ Operand: VecTy, IR Operand: VecTy, Index let Name = "vlmul_trunc_v", OverloadedName = "vlmul_trunc", MaskedPolicyScheme = NonePolicy, ManualCodegen = [{ { ID = Intrinsic::vector_extract; IntrinsicTypes = {ResultType, Ops[0]->getType()}; Ops.push_back(ConstantInt::get(Int64Ty, 0)); return Builder.CreateCall(CGM.getIntrinsic(ID, IntrinsicTypes), Ops, ""); } }] in { foreach dst_lmul = ["(SFixedLog2LMUL:-3)", "(SFixedLog2LMUL:-2)", "(SFixedLog2LMUL:-1)", "(SFixedLog2LMUL:0)", "(SFixedLog2LMUL:1)", "(SFixedLog2LMUL:2)"] in { def vlmul_trunc # dst_lmul : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "vv", "csilxfd", dst_lmul # "v">; def vlmul_trunc_u # dst_lmul : RVVBuiltin<"Uv" # dst_lmul # "Uv", dst_lmul # "UvUv", "csil", dst_lmul # "Uv">; } } // LMUL extension // C/C++ Operand: SubVecTy, IR Operand: VecTy, SubVecTy, Index let Name = "vlmul_ext_v", OverloadedName = "vlmul_ext", MaskedPolicyScheme = NonePolicy, ManualCodegen = [{ ID = Intrinsic::vector_insert; IntrinsicTypes = {ResultType, Ops[0]->getType()}; Ops.push_back(llvm::UndefValue::get(ResultType)); std::swap(Ops[0], Ops[1]); Ops.push_back(ConstantInt::get(Int64Ty, 0)); return Builder.CreateCall(CGM.getIntrinsic(ID, IntrinsicTypes), Ops, ""); }] in { foreach dst_lmul = ["(LFixedLog2LMUL:-2)", "(LFixedLog2LMUL:-1)", "(LFixedLog2LMUL:-0)", "(LFixedLog2LMUL:1)", "(LFixedLog2LMUL:2)", "(LFixedLog2LMUL:3)"] in { def vlmul_ext # dst_lmul : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "vv", "csilxfd", dst_lmul # "v">; def vlmul_ext_u # dst_lmul : RVVBuiltin<"Uv" # dst_lmul # "Uv", dst_lmul # "UvUv", "csil", dst_lmul # "Uv">; } } let Name = "vget_v", MaskedPolicyScheme = NonePolicy, ManualCodegen = [{ { ID = Intrinsic::vector_extract; auto *VecTy = cast<ScalableVectorType>(ResultType); auto *OpVecTy = cast<ScalableVectorType>(Ops[0]->getType()); // Mask to only valid indices. unsigned MaxIndex = OpVecTy->getMinNumElements() / VecTy->getMinNumElements(); assert(isPowerOf2_32(MaxIndex)); Ops[1] = Builder.CreateZExt(Ops[1], Builder.getInt64Ty()); Ops[1] = Builder.CreateAnd(Ops[1], MaxIndex - 1); Ops[1] = Builder.CreateMul(Ops[1], ConstantInt::get(Ops[1]->getType(), VecTy->getMinNumElements())); IntrinsicTypes = {ResultType, Ops[0]->getType()}; return Builder.CreateCall(CGM.getIntrinsic(ID, IntrinsicTypes), Ops, ""); } }] in { foreach dst_lmul = ["(SFixedLog2LMUL:0)", "(SFixedLog2LMUL:1)", "(SFixedLog2LMUL:2)"] in { def : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "vvKz", "csilxfd", dst_lmul # "v">; def : RVVBuiltin<"Uv" # dst_lmul # "Uv", dst_lmul # "UvUvKz", "csil", dst_lmul # "Uv">; } } let Name = "vset_v", Log2LMUL = [0, 1, 2], MaskedPolicyScheme = NonePolicy, ManualCodegen = [{ { ID = Intrinsic::vector_insert; IntrinsicTypes = {ResultType, Ops[2]->getType()}; auto *ResVecTy = cast<ScalableVectorType>(ResultType); auto *VecTy = cast<ScalableVectorType>(Ops[2]->getType()); // Mask to only valid indices. unsigned MaxIndex = ResVecTy->getMinNumElements() / VecTy->getMinNumElements(); assert(isPowerOf2_32(MaxIndex)); Ops[1] = Builder.CreateZExt(Ops[1], Builder.getInt64Ty()); Ops[1] = Builder.CreateAnd(Ops[1], MaxIndex - 1); Ops[1] = Builder.CreateMul(Ops[1], ConstantInt::get(Ops[1]->getType(), VecTy->getMinNumElements())); std::swap(Ops[1], Ops[2]); return Builder.CreateCall(CGM.getIntrinsic(ID, IntrinsicTypes), Ops, ""); } }] in { foreach dst_lmul = ["(LFixedLog2LMUL:1)", "(LFixedLog2LMUL:2)", "(LFixedLog2LMUL:3)"] in { def : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "v" # dst_lmul # "vKzv", "csilxfd">; def : RVVBuiltin<"Uv" # dst_lmul # "Uv", dst_lmul # "Uv" # dst_lmul #"UvKzUv", "csil">; } } }