Compiler projects using llvm
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=instcombine -S | FileCheck %s

; Try to eliminate binops and shuffles when the shuffle is a select in disguise:
; PR37806 - https://bugs.llvm.org/show_bug.cgi?id=37806

define <4 x i32> @add(<4 x i32> %v) {
; CHECK-LABEL: @add(
; CHECK-NEXT:    [[S:%.*]] = add <4 x i32> [[V:%.*]], <i32 11, i32 0, i32 13, i32 0>
; CHECK-NEXT:    ret <4 x i32> [[S]]
;
  %b = add <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
  ret <4 x i32> %s
}

; Propagate flags when possible.

define <4 x i32> @add_nuw_nsw(<4 x i32> %v) {
; CHECK-LABEL: @add_nuw_nsw(
; CHECK-NEXT:    [[S:%.*]] = add nuw nsw <4 x i32> [[V:%.*]], <i32 11, i32 0, i32 13, i32 0>
; CHECK-NEXT:    ret <4 x i32> [[S]]
;
  %b = add nuw nsw <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
  ret <4 x i32> %s
}

define <4 x i32> @add_undef_mask_elt(<4 x i32> %v) {
; CHECK-LABEL: @add_undef_mask_elt(
; CHECK-NEXT:    [[S:%.*]] = add <4 x i32> [[V:%.*]], <i32 11, i32 0, i32 undef, i32 0>
; CHECK-NEXT:    ret <4 x i32> [[S]]
;
  %b = add <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 5, i32 undef, i32 7>
  ret <4 x i32> %s
}

; Poison flags must be dropped or undef must be replaced with safe constant.

define <4 x i32> @add_nuw_nsw_undef_mask_elt(<4 x i32> %v) {
; CHECK-LABEL: @add_nuw_nsw_undef_mask_elt(
; CHECK-NEXT:    [[S:%.*]] = add <4 x i32> [[V:%.*]], <i32 11, i32 undef, i32 13, i32 0>
; CHECK-NEXT:    ret <4 x i32> [[S]]
;
  %b = add nuw nsw <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
  ret <4 x i32> %s
}

; Constant operand 0 (LHS) could work for some non-commutative binops?

define <4 x i32> @sub(<4 x i32> %v) {
; CHECK-LABEL: @sub(
; CHECK-NEXT:    [[B:%.*]] = sub <4 x i32> <i32 poison, i32 poison, i32 poison, i32 14>, [[V:%.*]]
; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
; CHECK-NEXT:    ret <4 x i32> [[S]]
;
  %b = sub <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
  ret <4 x i32> %s
}

; If any element of the shuffle mask operand is undef, that element of the result is undef.
; The shuffle is eliminated in this transform, but we can replace a constant element with undef.
; Preserve flags when possible. It's not safe to propagate poison-generating flags with undef constants.

define <4 x i32> @mul(<4 x i32> %v) {
; CHECK-LABEL: @mul(
; CHECK-NEXT:    [[S:%.*]] = mul <4 x i32> [[V:%.*]], <i32 undef, i32 12, i32 1, i32 14>
; CHECK-NEXT:    ret <4 x i32> [[S]]
;
  %b = mul nsw nuw <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
  ret <4 x i32> %s
}

define <4 x i32> @shl(<4 x i32> %v) {
; CHECK-LABEL: @shl(
; CHECK-NEXT:    [[S:%.*]] = shl <4 x i32> [[V:%.*]], <i32 0, i32 12, i32 13, i32 0>
; CHECK-NEXT:    ret <4 x i32> [[S]]
;
  %b = shl <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 4, i32 1, i32 2, i32 7>
  ret <4 x i32> %s
}

define <4 x i32> @shl_nsw(<4 x i32> %v) {
; CHECK-LABEL: @shl_nsw(
; CHECK-NEXT:    [[S:%.*]] = shl nsw <4 x i32> [[V:%.*]], <i32 0, i32 12, i32 13, i32 0>
; CHECK-NEXT:    ret <4 x i32> [[S]]
;
  %b = shl nsw <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 4, i32 1, i32 2, i32 7>
  ret <4 x i32> %s
}

define <4 x i32> @shl_undef_mask_elt(<4 x i32> %v) {
; CHECK-LABEL: @shl_undef_mask_elt(
; CHECK-NEXT:    [[S:%.*]] = shl <4 x i32> [[V:%.*]], <i32 0, i32 12, i32 13, i32 0>
; CHECK-NEXT:    ret <4 x i32> [[S]]
;
  %b = shl <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
  ret <4 x i32> %s
}

define <4 x i32> @shl_nuw_undef_mask_elt(<4 x i32> %v) {
; CHECK-LABEL: @shl_nuw_undef_mask_elt(
; CHECK-NEXT:    [[S:%.*]] = shl nuw <4 x i32> [[V:%.*]], <i32 0, i32 0, i32 13, i32 0>
; CHECK-NEXT:    ret <4 x i32> [[S]]
;
  %b = shl nuw <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 undef, i32 5, i32 2, i32 undef>
  ret <4 x i32> %s
}

define <4 x i32> @lshr_constant_op0(<4 x i32> %v) {
; CHECK-LABEL: @lshr_constant_op0(
; CHECK-NEXT:    [[S:%.*]] = lshr <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 0, i32 14>
; CHECK-NEXT:    ret <4 x i32> [[S]]
;
  %b = lshr <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
  ret <4 x i32> %s
}

define <4 x i32> @lshr_exact_constant_op0(<4 x i32> %v) {
; CHECK-LABEL: @lshr_exact_constant_op0(
; CHECK-NEXT:    [[S:%.*]] = lshr exact <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 0, i32 14>
; CHECK-NEXT:    ret <4 x i32> [[S]]
;
  %b = lshr exact <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
  ret <4 x i32> %s
}

define <4 x i32> @lshr_undef_mask_elt(<4 x i32> %v) {
; CHECK-LABEL: @lshr_undef_mask_elt(
; CHECK-NEXT:    [[S:%.*]] = shl <4 x i32> [[V:%.*]], <i32 0, i32 12, i32 13, i32 0>
; CHECK-NEXT:    ret <4 x i32> [[S]]
;
  %b = shl <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
  ret <4 x i32> %s
}

define <4 x i32> @lshr_exact_undef_mask_elt(<4 x i32> %v) {
; CHECK-LABEL: @lshr_exact_undef_mask_elt(
; CHECK-NEXT:    [[S:%.*]] = lshr exact <4 x i32> [[V:%.*]], <i32 0, i32 0, i32 13, i32 0>
; CHECK-NEXT:    ret <4 x i32> [[S]]
;
  %b = lshr exact  <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 undef, i32 5, i32 2, i32 undef>
  ret <4 x i32> %s
}

define <4 x i32> @lshr_constant_op1(<4 x i32> %v) {
; CHECK-LABEL: @lshr_constant_op1(
; CHECK-NEXT:    [[B:%.*]] = lshr exact <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]]
; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
; CHECK-NEXT:    ret <4 x i32> [[S]]
;
  %b = lshr exact <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
  ret <4 x i32> %s
}

; Try weird types.

define <3 x i32> @ashr(<3 x i32> %v) {
; CHECK-LABEL: @ashr(
; CHECK-NEXT:    [[S:%.*]] = ashr <3 x i32> [[V:%.*]], <i32 0, i32 12, i32 13>
; CHECK-NEXT:    ret <3 x i32> [[S]]
;
  %b = ashr <3 x i32> %v, <i32 11, i32 12, i32 13>
  %s = shufflevector <3 x i32> %b, <3 x i32> %v, <3 x i32> <i32 3, i32 1, i32 2>
  ret <3 x i32> %s
}

define <3 x i42> @and(<3 x i42> %v) {
; CHECK-LABEL: @and(
; CHECK-NEXT:    [[S:%.*]] = and <3 x i42> [[V:%.*]], <i42 -1, i42 12, i42 undef>
; CHECK-NEXT:    ret <3 x i42> [[S]]
;
  %b = and <3 x i42> %v, <i42 11, i42 12, i42 13>
  %s = shufflevector <3 x i42> %v, <3 x i42> %b, <3 x i32> <i32 0, i32 4, i32 undef>
  ret <3 x i42> %s
}

; It doesn't matter if the intermediate op has extra uses.

declare void @use_v4i32(<4 x i32>)

define <4 x i32> @or(<4 x i32> %v) {
; CHECK-LABEL: @or(
; CHECK-NEXT:    [[B:%.*]] = or <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 13, i32 14>
; CHECK-NEXT:    [[S:%.*]] = or <4 x i32> [[V]], <i32 0, i32 0, i32 13, i32 14>
; CHECK-NEXT:    call void @use_v4i32(<4 x i32> [[B]])
; CHECK-NEXT:    ret <4 x i32> [[S]]
;
  %b = or <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
  call void @use_v4i32(<4 x i32> %b)
  ret <4 x i32> %s
}

define <4 x i32> @xor(<4 x i32> %v) {
; CHECK-LABEL: @xor(
; CHECK-NEXT:    [[S:%.*]] = xor <4 x i32> [[V:%.*]], <i32 0, i32 12, i32 0, i32 0>
; CHECK-NEXT:    ret <4 x i32> [[S]]
;
  %b = xor <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
  ret <4 x i32> %s
}

define <4 x i32> @udiv(<4 x i32> %v) {
; CHECK-LABEL: @udiv(
; CHECK-NEXT:    [[B:%.*]] = udiv <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]]
; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
; CHECK-NEXT:    ret <4 x i32> [[S]]
;
  %b = udiv <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
  ret <4 x i32> %s
}

define <4 x i32> @udiv_exact(<4 x i32> %v) {
; CHECK-LABEL: @udiv_exact(
; CHECK-NEXT:    [[B:%.*]] = udiv exact <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]]
; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
; CHECK-NEXT:    ret <4 x i32> [[S]]
;
  %b = udiv exact <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
  ret <4 x i32> %s
}

define <4 x i32> @udiv_undef_mask_elt(<4 x i32> %v) {
; CHECK-LABEL: @udiv_undef_mask_elt(
; CHECK-NEXT:    [[B:%.*]] = udiv <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]]
; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
; CHECK-NEXT:    ret <4 x i32> [[S]]
;
  %b = udiv <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
  ret <4 x i32> %s
}

define <4 x i32> @udiv_exact_undef_mask_elt(<4 x i32> %v) {
; CHECK-LABEL: @udiv_exact_undef_mask_elt(
; CHECK-NEXT:    [[B:%.*]] = udiv exact <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]]
; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
; CHECK-NEXT:    ret <4 x i32> [[S]]
;
  %b = udiv exact <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
  ret <4 x i32> %s
}

define <4 x i32> @sdiv(<4 x i32> %v) {
; CHECK-LABEL: @sdiv(
; CHECK-NEXT:    [[S:%.*]] = sdiv <4 x i32> [[V:%.*]], <i32 11, i32 1, i32 13, i32 1>
; CHECK-NEXT:    ret <4 x i32> [[S]]
;
  %b = sdiv <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
  ret <4 x i32> %s
}

define <4 x i32> @sdiv_exact(<4 x i32> %v) {
; CHECK-LABEL: @sdiv_exact(
; CHECK-NEXT:    [[S:%.*]] = sdiv exact <4 x i32> [[V:%.*]], <i32 11, i32 1, i32 13, i32 1>
; CHECK-NEXT:    ret <4 x i32> [[S]]
;
  %b = sdiv exact <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
  ret <4 x i32> %s
}

; Div/rem need special handling if the shuffle has undef elements.

define <4 x i32> @sdiv_undef_mask_elt(<4 x i32> %v) {
; CHECK-LABEL: @sdiv_undef_mask_elt(
; CHECK-NEXT:    [[S:%.*]] = sdiv <4 x i32> [[V:%.*]], <i32 1, i32 1, i32 13, i32 1>
; CHECK-NEXT:    ret <4 x i32> [[S]]
;
  %b = sdiv <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 undef, i32 1, i32 6, i32 undef>
  ret <4 x i32> %s
}

define <4 x i32> @sdiv_exact_undef_mask_elt(<4 x i32> %v) {
; CHECK-LABEL: @sdiv_exact_undef_mask_elt(
; CHECK-NEXT:    [[S:%.*]] = sdiv exact <4 x i32> [[V:%.*]], <i32 1, i32 1, i32 13, i32 1>
; CHECK-NEXT:    ret <4 x i32> [[S]]
;
  %b = sdiv exact <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 undef, i32 1, i32 6, i32 undef>
  ret <4 x i32> %s
}

define <4 x i32> @urem(<4 x i32> %v) {
; CHECK-LABEL: @urem(
; CHECK-NEXT:    [[B:%.*]] = urem <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]]
; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
; CHECK-NEXT:    ret <4 x i32> [[S]]
;
  %b = urem <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
  ret <4 x i32> %s
}

define <4 x i32> @urem_undef_mask_elt(<4 x i32> %v) {
; CHECK-LABEL: @urem_undef_mask_elt(
; CHECK-NEXT:    [[B:%.*]] = urem <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]]
; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
; CHECK-NEXT:    ret <4 x i32> [[S]]
;
  %b = urem <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
  ret <4 x i32> %s
}

define <4 x i32> @srem(<4 x i32> %v) {
; CHECK-LABEL: @srem(
; CHECK-NEXT:    [[B:%.*]] = srem <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]]
; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
; CHECK-NEXT:    ret <4 x i32> [[S]]
;
  %b = srem <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
  ret <4 x i32> %s
}

; Try FP ops/types.

define <4 x float> @fadd(<4 x float> %v) {
; CHECK-LABEL: @fadd(
; CHECK-NEXT:    [[S:%.*]] = fadd <4 x float> [[V:%.*]], <float 4.100000e+01, float 4.200000e+01, float -0.000000e+00, float -0.000000e+00>
; CHECK-NEXT:    ret <4 x float> [[S]]
;
  %b = fadd <4 x float> %v, <float 41.0, float 42.0, float 43.0, float 44.0>
  %s = shufflevector <4 x float> %b, <4 x float> %v, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
  ret <4 x float> %s
}

define <4 x double> @fsub(<4 x double> %v) {
; CHECK-LABEL: @fsub(
; CHECK-NEXT:    [[B:%.*]] = fsub <4 x double> <double poison, double poison, double 4.300000e+01, double 4.400000e+01>, [[V:%.*]]
; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x double> [[V]], <4 x double> [[B]], <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
; CHECK-NEXT:    ret <4 x double> [[S]]
;
  %b = fsub <4 x double> <double 41.0, double 42.0, double 43.0, double 44.0>, %v
  %s = shufflevector <4 x double> %v, <4 x double> %b, <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
  ret <4 x double> %s
}

; Propagate any FMF.

define <4 x float> @fmul(<4 x float> %v) {
; CHECK-LABEL: @fmul(
; CHECK-NEXT:    [[S:%.*]] = fmul nnan ninf <4 x float> [[V:%.*]], <float 4.100000e+01, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
; CHECK-NEXT:    ret <4 x float> [[S]]
;
  %b = fmul nnan ninf <4 x float> %v, <float 41.0, float 42.0, float 43.0, float 44.0>
  %s = shufflevector <4 x float> %b, <4 x float> %v, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
  ret <4 x float> %s
}

define <4 x double> @fdiv_constant_op0(<4 x double> %v) {
; CHECK-LABEL: @fdiv_constant_op0(
; CHECK-NEXT:    [[B:%.*]] = fdiv fast <4 x double> <double poison, double poison, double 4.300000e+01, double 4.400000e+01>, [[V:%.*]]
; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x double> [[V]], <4 x double> [[B]], <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
; CHECK-NEXT:    ret <4 x double> [[S]]
;
  %b = fdiv fast <4 x double> <double 41.0, double 42.0, double 43.0, double 44.0>, %v
  %s = shufflevector <4 x double> %v, <4 x double> %b, <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
  ret <4 x double> %s
}

define <4 x double> @fdiv_constant_op1(<4 x double> %v) {
; CHECK-LABEL: @fdiv_constant_op1(
; CHECK-NEXT:    [[S:%.*]] = fdiv reassoc <4 x double> [[V:%.*]], <double undef, double 1.000000e+00, double 4.300000e+01, double 4.400000e+01>
; CHECK-NEXT:    ret <4 x double> [[S]]
;
  %b = fdiv reassoc <4 x double> %v, <double 41.0, double 42.0, double 43.0, double 44.0>
  %s = shufflevector <4 x double> %v, <4 x double> %b, <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
  ret <4 x double> %s
}

define <4 x double> @frem(<4 x double> %v) {
; CHECK-LABEL: @frem(
; CHECK-NEXT:    [[B:%.*]] = frem <4 x double> <double 4.100000e+01, double 4.200000e+01, double poison, double poison>, [[V:%.*]]
; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x double> [[B]], <4 x double> [[V]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
; CHECK-NEXT:    ret <4 x double> [[S]]
;
  %b = frem <4 x double> <double 41.0, double 42.0, double 43.0, double 44.0>, %v
  %s = shufflevector <4 x double> %b, <4 x double> %v, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
  ret <4 x double> %s
}

; Tests where both operands of the shuffle are binops with the same opcode.

define <4 x i32> @add_add(<4 x i32> %v0) {
; CHECK-LABEL: @add_add(
; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[V0:%.*]], <i32 1, i32 6, i32 3, i32 8>
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %t1 = add <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
  %t2 = add <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
  ret <4 x i32> %t3
}

define <4 x i32> @add_add_nsw(<4 x i32> %v0) {
; CHECK-LABEL: @add_add_nsw(
; CHECK-NEXT:    [[TMP1:%.*]] = add nsw <4 x i32> [[V0:%.*]], <i32 1, i32 6, i32 3, i32 8>
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %t1 = add nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
  %t2 = add nsw <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
  ret <4 x i32> %t3
}

define <4 x i32> @add_add_undef_mask_elt(<4 x i32> %v0) {
; CHECK-LABEL: @add_add_undef_mask_elt(
; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[V0:%.*]], <i32 1, i32 6, i32 undef, i32 8>
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %t1 = add <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
  %t2 = add <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 undef, i32 7>
  ret <4 x i32> %t3
}

; Poison flags must be dropped or undef must be replaced with safe constant.

define <4 x i32> @add_add_nsw_undef_mask_elt(<4 x i32> %v0) {
; CHECK-LABEL: @add_add_nsw_undef_mask_elt(
; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[V0:%.*]], <i32 1, i32 6, i32 undef, i32 8>
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %t1 = add nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
  %t2 = add nsw <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 undef, i32 7>
  ret <4 x i32> %t3
}

; Constant operand 0 (LHS) also works.

define <4 x i32> @sub_sub(<4 x i32> %v0) {
; CHECK-LABEL: @sub_sub(
; CHECK-NEXT:    [[TMP1:%.*]] = sub <4 x i32> <i32 1, i32 2, i32 3, i32 8>, [[V0:%.*]]
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %t1 = sub <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
  %t2 = sub <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
  ret <4 x i32> %t3
}

define <4 x i32> @sub_sub_nuw(<4 x i32> %v0) {
; CHECK-LABEL: @sub_sub_nuw(
; CHECK-NEXT:    [[TMP1:%.*]] = sub nuw <4 x i32> <i32 1, i32 2, i32 3, i32 8>, [[V0:%.*]]
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %t1 = sub nuw <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
  %t2 = sub nuw <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
  ret <4 x i32> %t3
}

define <4 x i32> @sub_sub_undef_mask_elt(<4 x i32> %v0) {
; CHECK-LABEL: @sub_sub_undef_mask_elt(
; CHECK-NEXT:    [[TMP1:%.*]] = sub <4 x i32> <i32 undef, i32 2, i32 3, i32 8>, [[V0:%.*]]
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %t1 = sub <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
  %t2 = sub <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
  ret <4 x i32> %t3
}

; Poison flags must be dropped or undef must be replaced with safe constant.

define <4 x i32> @sub_sub_nuw_undef_mask_elt(<4 x i32> %v0) {
; CHECK-LABEL: @sub_sub_nuw_undef_mask_elt(
; CHECK-NEXT:    [[TMP1:%.*]] = sub <4 x i32> <i32 undef, i32 2, i32 3, i32 8>, [[V0:%.*]]
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %t1 = sub nuw <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
  %t2 = sub nuw <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
  ret <4 x i32> %t3
}

; If any element of the shuffle mask operand is undef, that element of the result is undef.
; The shuffle is eliminated in this transform, but we can replace a constant element with undef.

define <4 x i32> @mul_mul(<4 x i32> %v0) {
; CHECK-LABEL: @mul_mul(
; CHECK-NEXT:    [[TMP1:%.*]] = mul <4 x i32> [[V0:%.*]], <i32 undef, i32 6, i32 3, i32 8>
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %t1 = mul <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
  %t2 = mul <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
  ret <4 x i32> %t3
}

; Preserve flags when possible.

define <4 x i32> @shl_shl(<4 x i32> %v0) {
; CHECK-LABEL: @shl_shl(
; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> [[V0:%.*]], <i32 5, i32 6, i32 3, i32 4>
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %t1 = shl <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
  %t2 = shl <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
  ret <4 x i32> %t3
}

define <4 x i32> @shl_shl_nuw(<4 x i32> %v0) {
; CHECK-LABEL: @shl_shl_nuw(
; CHECK-NEXT:    [[TMP1:%.*]] = shl nuw <4 x i32> [[V0:%.*]], <i32 5, i32 6, i32 3, i32 4>
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %t1 = shl nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
  %t2 = shl nuw <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
  ret <4 x i32> %t3
}

; Shift by undef is poison. Undef must be replaced by safe constant.

define <4 x i32> @shl_shl_undef_mask_elt(<4 x i32> %v0) {
; CHECK-LABEL: @shl_shl_undef_mask_elt(
; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> [[V0:%.*]], <i32 0, i32 6, i32 3, i32 0>
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %t1 = shl <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
  %t2 = shl <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 undef>
  ret <4 x i32> %t3
}

; Shift by undef is poison. Undef must be replaced by safe constant.

define <4 x i32> @shl_shl_nuw_undef_mask_elt(<4 x i32> %v0) {
; CHECK-LABEL: @shl_shl_nuw_undef_mask_elt(
; CHECK-NEXT:    [[TMP1:%.*]] = shl nuw <4 x i32> [[V0:%.*]], <i32 0, i32 6, i32 3, i32 0>
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %t1 = shl nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
  %t2 = shl nuw <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 undef>
  ret <4 x i32> %t3
}

; Can't propagate the flag here.

define <4 x i32> @lshr_lshr(<4 x i32> %v0) {
; CHECK-LABEL: @lshr_lshr(
; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> <i32 5, i32 6, i32 3, i32 8>, [[V0:%.*]]
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %t1 = lshr exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
  %t2 = lshr <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
  ret <4 x i32> %t3
}

; Try weird types.

define <3 x i32> @ashr_ashr(<3 x i32> %v0) {
; CHECK-LABEL: @ashr_ashr(
; CHECK-NEXT:    [[TMP1:%.*]] = ashr <3 x i32> [[V0:%.*]], <i32 4, i32 2, i32 3>
; CHECK-NEXT:    ret <3 x i32> [[TMP1]]
;
  %t1 = ashr <3 x i32> %v0, <i32 1, i32 2, i32 3>
  %t2 = ashr <3 x i32> %v0, <i32 4, i32 5, i32 6>
  %t3 = shufflevector <3 x i32> %t1, <3 x i32> %t2, <3 x i32> <i32 3, i32 1, i32 2>
  ret <3 x i32> %t3
}

define <3 x i42> @and_and(<3 x i42> %v0) {
; CHECK-LABEL: @and_and(
; CHECK-NEXT:    [[TMP1:%.*]] = and <3 x i42> [[V0:%.*]], <i42 1, i42 5, i42 undef>
; CHECK-NEXT:    ret <3 x i42> [[TMP1]]
;
  %t1 = and <3 x i42> %v0, <i42 1, i42 2, i42 3>
  %t2 = and <3 x i42> %v0, <i42 4, i42 5, i42 6>
  %t3 = shufflevector <3 x i42> %t1, <3 x i42> %t2, <3 x i32> <i32 0, i32 4, i32 undef>
  ret <3 x i42> %t3
}

; It doesn't matter if the intermediate ops have extra uses.

define <4 x i32> @or_or(<4 x i32> %v0) {
; CHECK-LABEL: @or_or(
; CHECK-NEXT:    [[T1:%.*]] = or <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 3, i32 4>
; CHECK-NEXT:    [[TMP1:%.*]] = or <4 x i32> [[V0]], <i32 5, i32 6, i32 3, i32 4>
; CHECK-NEXT:    call void @use_v4i32(<4 x i32> [[T1]])
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %t1 = or <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
  %t2 = or <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
  call void @use_v4i32(<4 x i32> %t1)
  ret <4 x i32> %t3
}

define <4 x i32> @xor_xor(<4 x i32> %v0) {
; CHECK-LABEL: @xor_xor(
; CHECK-NEXT:    [[T2:%.*]] = xor <4 x i32> [[V0:%.*]], <i32 5, i32 6, i32 7, i32 8>
; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i32> [[V0]], <i32 1, i32 6, i32 3, i32 4>
; CHECK-NEXT:    call void @use_v4i32(<4 x i32> [[T2]])
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %t1 = xor <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
  %t2 = xor <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
  call void @use_v4i32(<4 x i32> %t2)
  ret <4 x i32> %t3
}

define <4 x i32> @udiv_udiv(<4 x i32> %v0) {
; CHECK-LABEL: @udiv_udiv(
; CHECK-NEXT:    [[T1:%.*]] = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]
; CHECK-NEXT:    [[T2:%.*]] = udiv <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V0]]
; CHECK-NEXT:    [[TMP1:%.*]] = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 8>, [[V0]]
; CHECK-NEXT:    call void @use_v4i32(<4 x i32> [[T1]])
; CHECK-NEXT:    call void @use_v4i32(<4 x i32> [[T2]])
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %t1 = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
  %t2 = udiv <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
  call void @use_v4i32(<4 x i32> %t1)
  call void @use_v4i32(<4 x i32> %t2)
  ret <4 x i32> %t3
}

; Div/rem need special handling if the shuffle has undef elements.

define <4 x i32> @sdiv_sdiv(<4 x i32> %v0) {
; CHECK-LABEL: @sdiv_sdiv(
; CHECK-NEXT:    [[TMP1:%.*]] = sdiv <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 7, i32 8>
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %t1 = sdiv <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
  %t2 = sdiv <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
  ret <4 x i32> %t3
}

define <4 x i32> @sdiv_sdiv_exact(<4 x i32> %v0) {
; CHECK-LABEL: @sdiv_sdiv_exact(
; CHECK-NEXT:    [[TMP1:%.*]] = sdiv exact <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 7, i32 8>
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %t1 = sdiv exact <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
  %t2 = sdiv exact <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
  ret <4 x i32> %t3
}

define <4 x i32> @sdiv_sdiv_undef_mask_elt(<4 x i32> %v0) {
; CHECK-LABEL: @sdiv_sdiv_undef_mask_elt(
; CHECK-NEXT:    [[TMP1:%.*]] = sdiv <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 7, i32 1>
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %t1 = sdiv <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
  %t2 = sdiv <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 undef>
  ret <4 x i32> %t3
}

define <4 x i32> @sdiv_sdiv_exact_undef_mask_elt(<4 x i32> %v0) {
; CHECK-LABEL: @sdiv_sdiv_exact_undef_mask_elt(
; CHECK-NEXT:    [[TMP1:%.*]] = sdiv exact <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 7, i32 1>
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %t1 = sdiv exact <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
  %t2 = sdiv exact <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 undef>
  ret <4 x i32> %t3
}

define <4 x i32> @urem_urem(<4 x i32> %v0) {
; CHECK-LABEL: @urem_urem(
; CHECK-NEXT:    [[TMP1:%.*]] = urem <4 x i32> <i32 1, i32 2, i32 7, i32 8>, [[V0:%.*]]
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %t1 = urem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
  %t2 = urem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
  ret <4 x i32> %t3
}

; This is folded by using a safe constant.

define <4 x i32> @urem_urem_undef_mask_elt(<4 x i32> %v0) {
; CHECK-LABEL: @urem_urem_undef_mask_elt(
; CHECK-NEXT:    [[TMP1:%.*]] = urem <4 x i32> <i32 1, i32 2, i32 7, i32 0>, [[V0:%.*]]
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %t1 = urem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
  %t2 = urem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
  ret <4 x i32> %t3
}

define <4 x i32> @srem_srem(<4 x i32> %v0) {
; CHECK-LABEL: @srem_srem(
; CHECK-NEXT:    [[TMP1:%.*]] = srem <4 x i32> <i32 1, i32 2, i32 7, i32 4>, [[V0:%.*]]
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %t1 = srem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
  %t2 = srem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
  ret <4 x i32> %t3
}

; This is folded by using a safe constant.

define <4 x i32> @srem_srem_undef_mask_elt(<4 x i32> %v0) {
; CHECK-LABEL: @srem_srem_undef_mask_elt(
; CHECK-NEXT:    [[TMP1:%.*]] = srem <4 x i32> <i32 1, i32 0, i32 7, i32 4>, [[V0:%.*]]
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %t1 = srem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
  %t2 = srem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 undef, i32 6, i32 3>
  ret <4 x i32> %t3
}

; Try FP ops/types.

define <4 x float> @fadd_fadd(<4 x float> %v0) {
; CHECK-LABEL: @fadd_fadd(
; CHECK-NEXT:    [[TMP1:%.*]] = fadd <4 x float> [[V0:%.*]], <float 1.000000e+00, float 2.000000e+00, float 7.000000e+00, float 8.000000e+00>
; CHECK-NEXT:    ret <4 x float> [[TMP1]]
;
  %t1 = fadd <4 x float> %v0, <float 1.0, float 2.0, float 3.0, float 4.0>
  %t2 = fadd <4 x float> %v0, <float 5.0, float 6.0, float 7.0, float 8.0>
  %t3 = shufflevector <4 x float> %t1, <4 x float> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
  ret <4 x float> %t3
}

define <4 x double> @fsub_fsub(<4 x double> %v0) {
; CHECK-LABEL: @fsub_fsub(
; CHECK-NEXT:    [[TMP1:%.*]] = fsub <4 x double> <double undef, double 2.000000e+00, double 7.000000e+00, double 8.000000e+00>, [[V0:%.*]]
; CHECK-NEXT:    ret <4 x double> [[TMP1]]
;
  %t1 = fsub <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0
  %t2 = fsub <4 x double> <double 5.0, double 6.0, double 7.0, double 8.0>, %v0
  %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
  ret <4 x double> %t3
}

; Intersect any FMF.

define <4 x float> @fmul_fmul(<4 x float> %v0) {
; CHECK-LABEL: @fmul_fmul(
; CHECK-NEXT:    [[TMP1:%.*]] = fmul nnan ninf <4 x float> [[V0:%.*]], <float 1.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00>
; CHECK-NEXT:    ret <4 x float> [[TMP1]]
;
  %t1 = fmul nnan ninf <4 x float> %v0, <float 1.0, float 2.0, float 3.0, float 4.0>
  %t2 = fmul nnan ninf <4 x float> %v0, <float 5.0, float 6.0, float 7.0, float 8.0>
  %t3 = shufflevector <4 x float> %t1, <4 x float> %t2, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
  ret <4 x float> %t3
}

define <4 x double> @fdiv_fdiv(<4 x double> %v0) {
; CHECK-LABEL: @fdiv_fdiv(
; CHECK-NEXT:    [[TMP1:%.*]] = fdiv arcp <4 x double> <double undef, double 2.000000e+00, double 7.000000e+00, double 8.000000e+00>, [[V0:%.*]]
; CHECK-NEXT:    ret <4 x double> [[TMP1]]
;
  %t1 = fdiv fast <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0
  %t2 = fdiv nnan arcp <4 x double> <double 5.0, double 6.0, double 7.0, double 8.0>, %v0
  %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
  ret <4 x double> %t3
}

; The variable operand must be either the first operand or second operand in both binops.

define <4 x double> @frem_frem(<4 x double> %v0) {
; CHECK-LABEL: @frem_frem(
; CHECK-NEXT:    [[T1:%.*]] = frem <4 x double> <double 1.000000e+00, double 2.000000e+00, double poison, double poison>, [[V0:%.*]]
; CHECK-NEXT:    [[T2:%.*]] = frem <4 x double> [[V0]], <double poison, double poison, double 7.000000e+00, double 8.000000e+00>
; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x double> [[T1]], <4 x double> [[T2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
; CHECK-NEXT:    ret <4 x double> [[T3]]
;
  %t1 = frem <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0
  %t2 = frem <4 x double> %v0, <double 5.0, double 6.0, double 7.0, double 8.0>
  %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
  ret <4 x double> %t3
}

define <4 x i32> @add_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @add_2_vars(
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
; CHECK-NEXT:    [[TMP2:%.*]] = add <4 x i32> [[TMP1]], <i32 1, i32 6, i32 3, i32 8>
; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
;
  %t1 = add <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
  %t2 = add <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
  ret <4 x i32> %t3
}

; Constant operand 0 (LHS) also works.

define <4 x i32> @sub_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @sub_2_vars(
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
; CHECK-NEXT:    [[TMP2:%.*]] = sub <4 x i32> <i32 1, i32 2, i32 3, i32 8>, [[TMP1]]
; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
;
  %t1 = sub <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
  %t2 = sub <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
  ret <4 x i32> %t3
}

define <4 x i32> @sub_2_vars_nsw(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @sub_2_vars_nsw(
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
; CHECK-NEXT:    [[TMP2:%.*]] = sub nsw <4 x i32> <i32 1, i32 2, i32 3, i32 8>, [[TMP1]]
; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
;
  %t1 = sub nsw <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
  %t2 = sub nsw <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
  ret <4 x i32> %t3
}

define <4 x i32> @sub_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @sub_2_vars_undef_mask_elt(
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
; CHECK-NEXT:    [[TMP2:%.*]] = sub <4 x i32> <i32 undef, i32 2, i32 3, i32 8>, [[TMP1]]
; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
;
  %t1 = sub <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
  %t2 = sub <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
  ret <4 x i32> %t3
}

; Poison flags must be dropped or undef must be replaced with safe constant.

define <4 x i32> @sub_2_vars_nsw_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @sub_2_vars_nsw_undef_mask_elt(
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
; CHECK-NEXT:    [[TMP2:%.*]] = sub <4 x i32> <i32 undef, i32 2, i32 3, i32 8>, [[TMP1]]
; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
;
  %t1 = sub nsw <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
  %t2 = sub nsw <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
  ret <4 x i32> %t3
}

; If any element of the shuffle mask operand is undef, that element of the result is undef.
; The shuffle is eliminated in this transform, but we can replace a constant element with undef.

define <4 x i32> @mul_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @mul_2_vars(
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
; CHECK-NEXT:    [[TMP2:%.*]] = mul <4 x i32> [[TMP1]], <i32 1, i32 6, i32 3, i32 8>
; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
;
  %t1 = mul <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
  %t2 = mul <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
  ret <4 x i32> %t3
}

define <4 x i32> @mul_2_vars_nuw(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @mul_2_vars_nuw(
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
; CHECK-NEXT:    [[TMP2:%.*]] = mul nuw <4 x i32> [[TMP1]], <i32 1, i32 6, i32 3, i32 8>
; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
;
  %t1 = mul nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
  %t2 = mul nuw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
  ret <4 x i32> %t3
}

define <4 x i32> @mul_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @mul_2_vars_undef_mask_elt(
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
; CHECK-NEXT:    [[TMP2:%.*]] = mul <4 x i32> [[TMP1]], <i32 1, i32 undef, i32 3, i32 8>
; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
;
  %t1 = mul <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
  %t2 = mul <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
  ret <4 x i32> %t3
}

; Poison flags must be dropped or undef must be replaced with safe constant.

define <4 x i32> @mul_2_vars_nuw_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @mul_2_vars_nuw_undef_mask_elt(
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
; CHECK-NEXT:    [[TMP2:%.*]] = mul <4 x i32> [[TMP1]], <i32 1, i32 undef, i32 3, i32 8>
; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
;
  %t1 = mul nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
  %t2 = mul nuw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
  ret <4 x i32> %t3
}

; Preserve flags when possible.

define <4 x i32> @shl_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @shl_2_vars(
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
; CHECK-NEXT:    [[TMP2:%.*]] = shl <4 x i32> [[TMP1]], <i32 1, i32 6, i32 3, i32 4>
; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
;
  %t1 = shl <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
  %t2 = shl <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
  ret <4 x i32> %t3
}

define <4 x i32> @shl_2_vars_nsw(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @shl_2_vars_nsw(
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
; CHECK-NEXT:    [[TMP2:%.*]] = shl nsw <4 x i32> [[TMP1]], <i32 1, i32 6, i32 3, i32 4>
; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
;
  %t1 = shl nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
  %t2 = shl nsw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
  ret <4 x i32> %t3
}

; Shift by undef is poison. Undef is replaced by safe constant.

define <4 x i32> @shl_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @shl_2_vars_undef_mask_elt(
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 undef, i32 5, i32 2, i32 undef>
; CHECK-NEXT:    [[TMP2:%.*]] = shl <4 x i32> [[TMP1]], <i32 0, i32 6, i32 3, i32 0>
; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
;
  %t1 = shl <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
  %t2 = shl <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 undef>
  ret <4 x i32> %t3
}

; Shift by undef is poison. Undef is replaced by safe constant.

define <4 x i32> @shl_2_vars_nsw_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @shl_2_vars_nsw_undef_mask_elt(
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 undef, i32 5, i32 2, i32 undef>
; CHECK-NEXT:    [[TMP2:%.*]] = shl nsw <4 x i32> [[TMP1]], <i32 0, i32 6, i32 3, i32 0>
; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
;
  %t1 = shl nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
  %t2 = shl nsw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 undef>
  ret <4 x i32> %t3
}

; Can't propagate the flag here.

define <4 x i32> @lshr_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @lshr_2_vars(
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
; CHECK-NEXT:    [[TMP2:%.*]] = lshr <4 x i32> <i32 5, i32 6, i32 3, i32 8>, [[TMP1]]
; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
;
  %t1 = lshr <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
  %t2 = lshr exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
  ret <4 x i32> %t3
}

define <4 x i32> @lshr_2_vars_exact(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @lshr_2_vars_exact(
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
; CHECK-NEXT:    [[TMP2:%.*]] = lshr exact <4 x i32> <i32 5, i32 6, i32 3, i32 8>, [[TMP1]]
; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
;
  %t1 = lshr exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
  %t2 = lshr exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
  ret <4 x i32> %t3
}

; TODO: This would require a new shuffle mask (replace undef with op0 or op1 lane). Otherwise, we have shift-by-undef.

define <4 x i32> @lshr_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @lshr_2_vars_undef_mask_elt(
; CHECK-NEXT:    [[T1:%.*]] = lshr <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]
; CHECK-NEXT:    [[T2:%.*]] = lshr <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V1:%.*]]
; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
; CHECK-NEXT:    ret <4 x i32> [[T3]]
;
  %t1 = lshr <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
  %t2 = lshr <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
  ret <4 x i32> %t3
}

; TODO: This would require a new shuffle mask (replace undef with op0 or op1 lane). Otherwise, we have shift-by-undef.

define <4 x i32> @lshr_2_vars_exact_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @lshr_2_vars_exact_undef_mask_elt(
; CHECK-NEXT:    [[T1:%.*]] = lshr exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]
; CHECK-NEXT:    [[T2:%.*]] = lshr exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V1:%.*]]
; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
; CHECK-NEXT:    ret <4 x i32> [[T3]]
;
  %t1 = lshr exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
  %t2 = lshr exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
  ret <4 x i32> %t3
}

; Try weird types.

define <3 x i32> @ashr_2_vars(<3 x i32> %v0, <3 x i32> %v1) {
; CHECK-LABEL: @ashr_2_vars(
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x i32> [[V1:%.*]], <3 x i32> [[V0:%.*]], <3 x i32> <i32 0, i32 4, i32 5>
; CHECK-NEXT:    [[TMP2:%.*]] = ashr <3 x i32> [[TMP1]], <i32 4, i32 2, i32 3>
; CHECK-NEXT:    ret <3 x i32> [[TMP2]]
;
  %t1 = ashr <3 x i32> %v0, <i32 1, i32 2, i32 3>
  %t2 = ashr <3 x i32> %v1, <i32 4, i32 5, i32 6>
  %t3 = shufflevector <3 x i32> %t1, <3 x i32> %t2, <3 x i32> <i32 3, i32 1, i32 2>
  ret <3 x i32> %t3
}

define <3 x i42> @and_2_vars(<3 x i42> %v0, <3 x i42> %v1) {
; CHECK-LABEL: @and_2_vars(
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x i42> [[V0:%.*]], <3 x i42> [[V1:%.*]], <3 x i32> <i32 0, i32 4, i32 undef>
; CHECK-NEXT:    [[TMP2:%.*]] = and <3 x i42> [[TMP1]], <i42 1, i42 5, i42 undef>
; CHECK-NEXT:    ret <3 x i42> [[TMP2]]
;
  %t1 = and <3 x i42> %v0, <i42 1, i42 2, i42 3>
  %t2 = and <3 x i42> %v1, <i42 4, i42 5, i42 6>
  %t3 = shufflevector <3 x i42> %t1, <3 x i42> %t2, <3 x i32> <i32 0, i32 4, i32 undef>
  ret <3 x i42> %t3
}

; It doesn't matter if only one intermediate op has extra uses.

define <4 x i32> @or_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @or_2_vars(
; CHECK-NEXT:    [[T1:%.*]] = or <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 3, i32 4>
; CHECK-NEXT:    call void @use_v4i32(<4 x i32> [[T1]])
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
; CHECK-NEXT:    [[TMP2:%.*]] = or <4 x i32> [[TMP1]], <i32 5, i32 6, i32 3, i32 4>
; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
;
  %t1 = or <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
  call void @use_v4i32(<4 x i32> %t1)
  %t2 = or <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
  ret <4 x i32> %t3
}

define <4 x i32> @or_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @or_2_vars_undef_mask_elt(
; CHECK-NEXT:    [[T1:%.*]] = or <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 3, i32 4>
; CHECK-NEXT:    call void @use_v4i32(<4 x i32> [[T1]])
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0]], <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
; CHECK-NEXT:    [[TMP2:%.*]] = or <4 x i32> [[TMP1]], <i32 5, i32 6, i32 3, i32 undef>
; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
;
  %t1 = or <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
  call void @use_v4i32(<4 x i32> %t1)
  %t2 = or <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 undef>
  ret <4 x i32> %t3
}

; But we don't transform if both intermediate values have extra uses.

define <4 x i32> @xor_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @xor_2_vars(
; CHECK-NEXT:    [[T1:%.*]] = xor <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 3, i32 4>
; CHECK-NEXT:    call void @use_v4i32(<4 x i32> [[T1]])
; CHECK-NEXT:    [[T2:%.*]] = xor <4 x i32> [[V1:%.*]], <i32 5, i32 6, i32 7, i32 8>
; CHECK-NEXT:    call void @use_v4i32(<4 x i32> [[T2]])
; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
; CHECK-NEXT:    ret <4 x i32> [[T3]]
;
  %t1 = xor <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
  call void @use_v4i32(<4 x i32> %t1)
  %t2 = xor <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
  call void @use_v4i32(<4 x i32> %t2)
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
  ret <4 x i32> %t3
}

; Div/rem need special handling if the shuffle has undef elements.

define <4 x i32> @udiv_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @udiv_2_vars(
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
; CHECK-NEXT:    [[TMP2:%.*]] = udiv <4 x i32> <i32 5, i32 2, i32 3, i32 8>, [[TMP1]]
; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
;
  %t1 = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
  %t2 = udiv <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 1, i32 2, i32 7>
  ret <4 x i32> %t3
}

define <4 x i32> @udiv_2_vars_exact(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @udiv_2_vars_exact(
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
; CHECK-NEXT:    [[TMP2:%.*]] = udiv exact <4 x i32> <i32 5, i32 2, i32 3, i32 8>, [[TMP1]]
; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
;
  %t1 = udiv exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
  %t2 = udiv exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 1, i32 2, i32 7>
  ret <4 x i32> %t3
}

; TODO: This could be transformed using a safe constant.

define <4 x i32> @udiv_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @udiv_2_vars_undef_mask_elt(
; CHECK-NEXT:    [[T1:%.*]] = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]
; CHECK-NEXT:    [[T2:%.*]] = udiv <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V1:%.*]]
; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
; CHECK-NEXT:    ret <4 x i32> [[T3]]
;
  %t1 = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
  %t2 = udiv <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
  ret <4 x i32> %t3
}

; TODO: This could be transformed using a safe constant.

define <4 x i32> @udiv_2_vars_exact_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @udiv_2_vars_exact_undef_mask_elt(
; CHECK-NEXT:    [[T1:%.*]] = udiv exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]
; CHECK-NEXT:    [[T2:%.*]] = udiv exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V1:%.*]]
; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
; CHECK-NEXT:    ret <4 x i32> [[T3]]
;
  %t1 = udiv exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
  %t2 = udiv exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
  ret <4 x i32> %t3
}

; If the shuffle has no undefs, it's safe to shuffle the variables first.

define <4 x i32> @sdiv_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @sdiv_2_vars(
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
; CHECK-NEXT:    [[TMP2:%.*]] = sdiv <4 x i32> [[TMP1]], <i32 1, i32 2, i32 7, i32 4>
; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
;
  %t1 = sdiv <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
  %t2 = sdiv <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
  ret <4 x i32> %t3
}

define <4 x i32> @sdiv_2_vars_exact(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @sdiv_2_vars_exact(
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
; CHECK-NEXT:    [[TMP2:%.*]] = sdiv exact <4 x i32> [[TMP1]], <i32 1, i32 2, i32 7, i32 4>
; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
;
  %t1 = sdiv exact <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
  %t2 = sdiv exact <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
  ret <4 x i32> %t3
}

; Div by undef is UB. Undef is replaced by safe constant.

define <4 x i32> @sdiv_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @sdiv_2_vars_undef_mask_elt(
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
; CHECK-NEXT:    [[TMP2:%.*]] = sdiv <4 x i32> [[TMP1]], <i32 1, i32 2, i32 7, i32 1>
; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
;
  %t1 = sdiv <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
  %t2 = sdiv <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
  ret <4 x i32> %t3
}

; Div by undef is UB. Undef is replaced by safe constant.

define <4 x i32> @sdiv_2_vars_exact_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @sdiv_2_vars_exact_undef_mask_elt(
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
; CHECK-NEXT:    [[TMP2:%.*]] = sdiv exact <4 x i32> [[TMP1]], <i32 1, i32 2, i32 7, i32 1>
; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
;
  %t1 = sdiv exact <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
  %t2 = sdiv exact <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
  ret <4 x i32> %t3
}

; If the shuffle has no undefs, it's safe to shuffle the variables first.

define <4 x i32> @urem_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @urem_2_vars(
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
; CHECK-NEXT:    [[TMP2:%.*]] = urem <4 x i32> <i32 1, i32 2, i32 7, i32 8>, [[TMP1]]
; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
;
  %t1 = urem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
  %t2 = urem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
  ret <4 x i32> %t3
}

define <4 x i32> @srem_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @srem_2_vars(
; CHECK-NEXT:    [[T1:%.*]] = srem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]
; CHECK-NEXT:    [[T2:%.*]] = srem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V1:%.*]]
; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 0, i32 undef, i32 6, i32 3>
; CHECK-NEXT:    ret <4 x i32> [[T3]]
;
  %t1 = srem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
  %t2 = srem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 undef, i32 6, i32 3>
  ret <4 x i32> %t3
}

; Try FP ops/types.

define <4 x float> @fadd_2_vars(<4 x float> %v0, <4 x float> %v1) {
; CHECK-LABEL: @fadd_2_vars(
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[V0:%.*]], <4 x float> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
; CHECK-NEXT:    [[TMP2:%.*]] = fadd <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+00, float 7.000000e+00, float 8.000000e+00>
; CHECK-NEXT:    ret <4 x float> [[TMP2]]
;
  %t1 = fadd <4 x float> %v0, <float 1.0, float 2.0, float 3.0, float 4.0>
  %t2 = fadd <4 x float> %v1, <float 5.0, float 6.0, float 7.0, float 8.0>
  %t3 = shufflevector <4 x float> %t1, <4 x float> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
  ret <4 x float> %t3
}

define <4 x double> @fsub_2_vars(<4 x double> %v0, <4 x double> %v1) {
; CHECK-LABEL: @fsub_2_vars(
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
; CHECK-NEXT:    [[TMP2:%.*]] = fsub <4 x double> <double undef, double 2.000000e+00, double 7.000000e+00, double 8.000000e+00>, [[TMP1]]
; CHECK-NEXT:    ret <4 x double> [[TMP2]]
;
  %t1 = fsub <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0
  %t2 = fsub <4 x double> <double 5.0, double 6.0, double 7.0, double 8.0>, %v1
  %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
  ret <4 x double> %t3
}

; Intersect any FMF.

define <4 x float> @fmul_2_vars(<4 x float> %v0, <4 x float> %v1) {
; CHECK-LABEL: @fmul_2_vars(
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[V0:%.*]], <4 x float> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
; CHECK-NEXT:    [[TMP2:%.*]] = fmul reassoc nsz <4 x float> [[TMP1]], <float 1.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00>
; CHECK-NEXT:    ret <4 x float> [[TMP2]]
;
  %t1 = fmul reassoc nsz <4 x float> %v0, <float 1.0, float 2.0, float 3.0, float 4.0>
  %t2 = fmul reassoc nsz <4 x float> %v1, <float 5.0, float 6.0, float 7.0, float 8.0>
  %t3 = shufflevector <4 x float> %t1, <4 x float> %t2, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
  ret <4 x float> %t3
}

define <4 x double> @frem_2_vars(<4 x double> %v0, <4 x double> %v1) {
; CHECK-LABEL: @frem_2_vars(
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
; CHECK-NEXT:    [[TMP2:%.*]] = frem <4 x double> <double undef, double 2.000000e+00, double 7.000000e+00, double 8.000000e+00>, [[TMP1]]
; CHECK-NEXT:    ret <4 x double> [[TMP2]]
;
  %t1 = frem nnan ninf <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0
  %t2 = frem nnan arcp <4 x double> <double 5.0, double 6.0, double 7.0, double 8.0>, %v1
  %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
  ret <4 x double> %t3
}

; The variable operand must be either the first operand or second operand in both binops.

define <4 x double> @fdiv_2_vars(<4 x double> %v0, <4 x double> %v1) {
; CHECK-LABEL: @fdiv_2_vars(
; CHECK-NEXT:    [[T1:%.*]] = fdiv <4 x double> <double 1.000000e+00, double 2.000000e+00, double poison, double poison>, [[V0:%.*]]
; CHECK-NEXT:    [[T2:%.*]] = fdiv <4 x double> [[V1:%.*]], <double poison, double poison, double 7.000000e+00, double 8.000000e+00>
; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x double> [[T1]], <4 x double> [[T2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
; CHECK-NEXT:    ret <4 x double> [[T3]]
;
  %t1 = fdiv <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0
  %t2 = fdiv <4 x double> %v1, <double 5.0, double 6.0, double 7.0, double 8.0>
  %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
  ret <4 x double> %t3
}

; Shift-left with constant shift amount can be converted to mul to enable the fold.

define <4 x i32> @mul_shl(<4 x i32> %v0) {
; CHECK-LABEL: @mul_shl(
; CHECK-NEXT:    [[TMP1:%.*]] = mul nuw <4 x i32> [[V0:%.*]], <i32 32, i32 64, i32 3, i32 4>
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %t1 = mul nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
  %t2 = shl nuw <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
  ret <4 x i32> %t3
}

; Try with shift as operand 0 of the shuffle; 'nsw' is dropped for safety, but that could be improved.

define <4 x i32> @shl_mul(<4 x i32> %v0) {
; CHECK-LABEL: @shl_mul(
; CHECK-NEXT:    [[TMP1:%.*]] = mul <4 x i32> [[V0:%.*]], <i32 5, i32 undef, i32 8, i32 16>
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %t1 = shl nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
  %t2 = mul nsw <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 undef, i32 2, i32 3>
  ret <4 x i32> %t3
}

; Demanded elements + simplification can remove the mul alone, but that's not the best case.

define <4 x i32> @mul_is_nop_shl(<4 x i32> %v0) {
; CHECK-LABEL: @mul_is_nop_shl(
; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> [[V0:%.*]], <i32 0, i32 6, i32 7, i32 8>
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %t1 = mul <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
  %t2 = shl <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
  ret <4 x i32> %t3
}

; Negative test: shift amount (operand 1) must be constant.

define <4 x i32> @shl_mul_not_constant_shift_amount(<4 x i32> %v0) {
; CHECK-LABEL: @shl_mul_not_constant_shift_amount(
; CHECK-NEXT:    [[T1:%.*]] = shl <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]
; CHECK-NEXT:    [[T2:%.*]] = mul <4 x i32> [[V0]], <i32 5, i32 6, i32 poison, i32 poison>
; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T2]], <4 x i32> [[T1]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
; CHECK-NEXT:    ret <4 x i32> [[T3]]
;
  %t1 = shl <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
  %t2 = mul <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
  ret <4 x i32> %t3
}

; Try with 2 variable inputs.

define <4 x i32> @mul_shl_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @mul_shl_2_vars(
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
; CHECK-NEXT:    [[TMP2:%.*]] = mul nuw <4 x i32> [[TMP1]], <i32 32, i32 64, i32 3, i32 4>
; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
;
  %t1 = mul nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
  %t2 = shl nuw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
  ret <4 x i32> %t3
}

define <4 x i32> @shl_mul_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @shl_mul_2_vars(
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> <i32 0, i32 undef, i32 6, i32 7>
; CHECK-NEXT:    [[TMP2:%.*]] = mul <4 x i32> [[TMP1]], <i32 5, i32 undef, i32 8, i32 16>
; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
;
  %t1 = shl nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
  %t2 = mul nsw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 undef, i32 2, i32 3>
  ret <4 x i32> %t3
}

; Negate can be converted to mul to enable the fold.

define <4 x i32> @mul_neg(<4 x i32> %x) {
; CHECK-LABEL: @mul_neg(
; CHECK-NEXT:    [[TMP1:%.*]] = mul <4 x i32> [[X:%.*]], <i32 257, i32 -3, i32 -1, i32 -9>
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %m = mul <4 x i32> %x, <i32 257, i32 -3, i32 poison, i32 -9>
  %n = sub <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, %x
  %r = shufflevector <4 x i32> %m, <4 x i32> %n, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
  ret <4 x i32> %r
}

define <3 x i79> @neg_mul(<3 x i79> %x) {
; CHECK-LABEL: @neg_mul(
; CHECK-NEXT:    [[TMP1:%.*]] = mul nsw <3 x i79> [[X:%.*]], <i79 -1, i79 -3, i79 -1>
; CHECK-NEXT:    ret <3 x i79> [[TMP1]]
;
  %n = sub nsw <3 x i79> <i79 0, i79 poison, i79 0>, %x
  %m = mul nsw <3 x i79> %x, <i79 poison, i79 -3, i79 poison>
  %r = shufflevector <3 x i79> %n, <3 x i79> %m, <3 x i32> <i32 0, i32 4, i32 2>
  ret <3 x i79> %r
}

define <4 x i32> @mul_neg_2_vars(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: @mul_neg_2_vars(
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
; CHECK-NEXT:    [[TMP2:%.*]] = mul <4 x i32> [[TMP1]], <i32 42, i32 -1, i32 -1, i32 6>
; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
;
  %m = mul nuw <4 x i32> %x, <i32 42, i32 poison, i32 poison, i32 6>
  %n = sub nsw <4 x i32> <i32 poison, i32 0, i32 0, i32 poison>, %y
  %r = shufflevector <4 x i32> %m, <4 x i32> %n, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
  ret <4 x i32> %r
}

define <4 x i32> @neg_mul_2_vars(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: @neg_mul_2_vars(
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> [[X:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
; CHECK-NEXT:    [[TMP2:%.*]] = mul nsw <4 x i32> [[TMP1]], <i32 -1, i32 42, i32 -1, i32 6>
; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
;
  %n = sub nsw <4 x i32> <i32 0, i32 poison, i32 0, i32 poison>, %y
  %m = mul nuw nsw <4 x i32> %x, <i32 poison, i32 42, i32 poison, i32 6>
  %r = shufflevector <4 x i32> %n, <4 x i32> %m, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
  ret <4 x i32> %r
}

; Or with constant can be converted to add to enable the fold.
; The 'shl' is here to allow analysis to determine that the 'or' can be transformed to 'add'.
; TODO: The 'or' constant is limited to a splat.

define <4 x i32> @add_or(<4 x i32> %v) {
; CHECK-LABEL: @add_or(
; CHECK-NEXT:    [[V0:%.*]] = shl <4 x i32> [[V:%.*]], <i32 5, i32 5, i32 5, i32 5>
; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[V0]], <i32 31, i32 31, i32 65536, i32 65537>
; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
;
  %v0 = shl <4 x i32> %v, <i32 5, i32 5, i32 5, i32 5>                   ; clear the bottom bits
  %t1 = add <4 x i32> %v0, <i32 65534, i32 65535, i32 65536, i32 65537>  ; this can't be converted to 'or'
  %t2 = or <4 x i32> %v0, <i32 31, i32 31, i32 31, i32 31>               ; set the bottom bits
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
  ret <4 x i32> %t3
}

; Try with 'or' as operand 0 of the shuffle.

define <4 x i8> @or_add(<4 x i8> %v) {
; CHECK-LABEL: @or_add(
; CHECK-NEXT:    [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], <i8 3, i8 3, i8 3, i8 3>
; CHECK-NEXT:    [[TMP1:%.*]] = add nuw nsw <4 x i8> [[V0]], <i8 1, i8 2, i8 -64, i8 -64>
; CHECK-NEXT:    ret <4 x i8> [[TMP1]]
;
  %v0 = lshr <4 x i8> %v, <i8 3, i8 3, i8 3, i8 3>          ; clear the top bits
  %t1 = or <4 x i8> %v0, <i8 192, i8 192, i8 192, i8 192>   ; set some top bits
  %t2 = add nsw nuw <4 x i8> %v0, <i8 1, i8 2, i8 3, i8 4>  ; this can't be converted to 'or'
  %t3 = shufflevector <4 x i8> %t1, <4 x i8> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
  ret <4 x i8> %t3
}

; Negative test: not all 'or' insts can be converted to 'add'.

define <4 x i8> @or_add_not_enough_masking(<4 x i8> %v) {
; CHECK-LABEL: @or_add_not_enough_masking(
; CHECK-NEXT:    [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], <i8 1, i8 1, i8 1, i8 1>
; CHECK-NEXT:    [[T1:%.*]] = or <4 x i8> [[V0]], <i8 poison, i8 poison, i8 -64, i8 -64>
; CHECK-NEXT:    [[T2:%.*]] = add nuw nsw <4 x i8> [[V0]], <i8 1, i8 2, i8 poison, i8 poison>
; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i8> [[T2]], <4 x i8> [[T1]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
; CHECK-NEXT:    ret <4 x i8> [[T3]]
;
  %v0 = lshr <4 x i8> %v, <i8 1, i8 1, i8 1, i8 1>          ; clear not enough top bits
  %t1 = or <4 x i8> %v0, <i8 192, i8 192, i8 192, i8 192>   ; set some top bits
  %t2 = add nsw nuw <4 x i8> %v0, <i8 1, i8 2, i8 3, i8 4>  ; this can't be converted to 'or'
  %t3 = shufflevector <4 x i8> %t1, <4 x i8> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
  ret <4 x i8> %t3
}

; Try with 2 variable inputs.

define <4 x i32> @add_or_2_vars(<4 x i32> %v, <4 x i32> %v1) {
; CHECK-LABEL: @add_or_2_vars(
; CHECK-NEXT:    [[V0:%.*]] = shl <4 x i32> [[V:%.*]], <i32 5, i32 5, i32 5, i32 5>
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
; CHECK-NEXT:    [[TMP2:%.*]] = add <4 x i32> [[TMP1]], <i32 31, i32 31, i32 65536, i32 65537>
; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
;
  %v0 = shl <4 x i32> %v, <i32 5, i32 5, i32 5, i32 5>                   ; clear the bottom bits
  %t1 = add <4 x i32> %v1, <i32 65534, i32 65535, i32 65536, i32 65537>  ; this can't be converted to 'or'
  %t2 = or <4 x i32> %v0, <i32 31, i32 31, i32 31, i32 31>               ; set the bottom bits
  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
  ret <4 x i32> %t3
}

define <4 x i8> @or_add_2_vars(<4 x i8> %v, <4 x i8> %v1) {
; CHECK-LABEL: @or_add_2_vars(
; CHECK-NEXT:    [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], <i8 3, i8 3, i8 3, i8 3>
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i8> [[V1:%.*]], <4 x i8> [[V0]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw <4 x i8> [[TMP1]], <i8 1, i8 2, i8 -64, i8 -64>
; CHECK-NEXT:    ret <4 x i8> [[TMP2]]
;
  %v0 = lshr <4 x i8> %v, <i8 3, i8 3, i8 3, i8 3>          ; clear the top bits
  %t1 = or <4 x i8> %v0, <i8 192, i8 192, i8 192, i8 192>   ; set some top bits
  %t2 = add nsw nuw <4 x i8> %v1, <i8 1, i8 2, i8 3, i8 4>  ; this can't be converted to 'or'
  %t3 = shufflevector <4 x i8> %t1, <4 x i8> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
  ret <4 x i8> %t3
}

; The undef operand is used to simplify the shuffle mask, but don't assert that too soon.

define <4 x i32> @PR41419(<4 x i32> %v) {
; CHECK-LABEL: @PR41419(
; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[V:%.*]], <4 x i32> undef, <4 x i32> <i32 undef, i32 undef, i32 2, i32 undef>
; CHECK-NEXT:    ret <4 x i32> [[S]]
;
  %s = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
  ret <4 x i32> %s
}