# RUN: llc -mtriple=x86_64-linux-gnu                                  -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NO_AVX512VL --check-prefix=NO_AVX512F --check-prefix=SSE
# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx                      -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NO_AVX512VL --check-prefix=NO_AVX512F --check-prefix=AVX
# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f                  -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NO_AVX512VL --check-prefix=AVX512ALL --check-prefix=AVX512F
# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -mattr=+avx512vl -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512ALL --check-prefix=AVX512VL

--- |
  define i8 @test_load_i8(i8* %p1) {
    %r = load i8, i8* %p1
    ret i8 %r
  }

  define i16 @test_load_i16(i16* %p1) {
    %r = load i16, i16* %p1
    ret i16 %r
  }

  define i32 @test_load_i32(i32* %p1) {
    %r = load i32, i32* %p1
    ret i32 %r
  }

  define i64 @test_load_i64(i64* %p1) {
    %r = load i64, i64* %p1
    ret i64 %r
  }

  define float @test_load_float(float* %p1) {
    %r = load float, float* %p1
    ret float %r
  }

  define float @test_load_float_vecreg(float* %p1) {
    %r = load float, float* %p1
    ret float %r
  }


  define double @test_load_double(double* %p1) {
    %r = load double, double* %p1
    ret double %r
  }

  define double @test_load_double_vecreg(double* %p1) {
    %r = load double, double* %p1
    ret double %r
  }

  define <4 x i32> @test_load_v4i32_noalign(<4 x i32>* %p1) {
    %r = load <4 x i32>, <4 x i32>* %p1, align 1
    ret <4 x i32> %r
  }

  define <4 x i32> @test_load_v4i32_align(<4 x i32>* %p1) {
    %r = load <4 x i32>, <4 x i32>* %p1, align 16
    ret <4 x i32> %r
  }

  define i32* @test_store_i32(i32 %val, i32* %p1) {
    store i32 %val, i32* %p1
    ret i32* %p1
  }

  define i64* @test_store_i64(i64 %val, i64* %p1) {
    store i64 %val, i64* %p1
    ret i64* %p1
  }

  define float* @test_store_float(float %val, float* %p1) {
    store float %val, float* %p1
    ret float* %p1
  }

  define float* @test_store_float_vec(float %val, float* %p1) {
    store float %val, float* %p1
    ret float* %p1
  }

  define double* @test_store_double(double %val, double* %p1) {
    store double %val, double* %p1
    ret double* %p1
  }

  define double* @test_store_double_vec(double %val, double* %p1) {
    store double %val, double* %p1
    ret double* %p1
  }

  define <4 x i32>* @test_store_v4i32_align(<4 x i32> %val, <4 x i32>* %p1) {
    store <4 x i32> %val, <4 x i32>* %p1, align 16
    ret <4 x i32>* %p1
  }

  define <4 x i32>* @test_store_v4i32_noalign(<4 x i32> %val, <4 x i32>* %p1) {
    store <4 x i32> %val, <4 x i32>* %p1, align 1
    ret <4 x i32>* %p1
  }

...
---
# ALL-LABEL: name:            test_load_i8
name:            test_load_i8
alignment:       4
legalized:       true
regBankSelected: true
registers:
# ALL:   - { id: 0, class: gr64 }
# ALL:   - { id: 1, class: gr8 }
  - { id: 0, class: gpr }
  - { id: 1, class: gpr }
# ALL:     %0 = COPY %rdi
# ALL:     %1 = MOV8rm %0, 1, _, 0, _ :: (load 1 from %ir.p1)
# ALL:     %al = COPY %1
body:             |
  bb.1 (%ir-block.0):
    liveins: %rdi

    %0(p0) = COPY %rdi
    %1(s8) = G_LOAD %0(p0) :: (load 1 from %ir.p1)
    %al = COPY %1(s8)
    RET 0, implicit %al

...
---
# ALL-LABEL: name:            test_load_i16
name:            test_load_i16
alignment:       4
legalized:       true
regBankSelected: true
registers:
# ALL:   - { id: 0, class: gr64 }
# ALL:   - { id: 1, class: gr16 }
  - { id: 0, class: gpr }
  - { id: 1, class: gpr }
# ALL:     %0 = COPY %rdi
# ALL:     %1 = MOV16rm %0, 1, _, 0, _ :: (load 2 from %ir.p1)
# ALL:     %ax = COPY %1
body:             |
  bb.1 (%ir-block.0):
    liveins: %rdi

    %0(p0) = COPY %rdi
    %1(s16) = G_LOAD %0(p0) :: (load 2 from %ir.p1)
    %ax = COPY %1(s16)
    RET 0, implicit %ax

...
---
# ALL-LABEL: name:            test_load_i32
name:            test_load_i32
alignment:       4
legalized:       true
regBankSelected: true
registers:
# ALL:   - { id: 0, class: gr64 }
# ALL:   - { id: 1, class: gr32 }
  - { id: 0, class: gpr }
  - { id: 1, class: gpr }
# ALL:     %0 = COPY %rdi
# ALL:     %1 = MOV32rm %0, 1, _, 0, _ :: (load 4 from %ir.p1)
# ALL:     %eax = COPY %1
body:             |
  bb.1 (%ir-block.0):
    liveins: %rdi

    %0(p0) = COPY %rdi
    %1(s32) = G_LOAD %0(p0) :: (load 4 from %ir.p1)
    %eax = COPY %1(s32)
    RET 0, implicit %eax

...
---
# ALL-LABEL: name:            test_load_i64
name:            test_load_i64
alignment:       4
legalized:       true
regBankSelected: true
registers:
# ALL:   - { id: 0, class: gr64 }
# ALL:   - { id: 1, class: gr64 }
  - { id: 0, class: gpr }
  - { id: 1, class: gpr }
# ALL:     %0 = COPY %rdi
# ALL:     %1 = MOV64rm %0, 1, _, 0, _ :: (load 8 from %ir.p1)
# ALL:     %rax = COPY %1
body:             |
  bb.1 (%ir-block.0):
    liveins: %rdi

    %0(p0) = COPY %rdi
    %1(s64) = G_LOAD %0(p0) :: (load 8 from %ir.p1)
    %rax = COPY %1(s64)
    RET 0, implicit %rax

...
---
# ALL-LABEL: name:            test_load_float
name:            test_load_float
alignment:       4
legalized:       true
regBankSelected: true
registers:
# ALL:   - { id: 0, class: gr64 }
# ALL:   - { id: 1, class: gr32 }
  - { id: 0, class: gpr }
  - { id: 1, class: gpr }
# ALL:     %0 = COPY %rdi
# ALL:     %1 = MOV32rm %0, 1, _, 0, _ :: (load 4 from %ir.p1)
# ALL:     %xmm0 = COPY %1
body:             |
  bb.1 (%ir-block.0):
    liveins: %rdi

    %0(p0) = COPY %rdi
    %1(s32) = G_LOAD %0(p0) :: (load 4 from %ir.p1)
    %xmm0 = COPY %1(s32)
    RET 0, implicit %xmm0

...
---
# ALL-LABEL: name:            test_load_float_vecreg
name:            test_load_float_vecreg
alignment:       4
legalized:       true
regBankSelected: true
registers:
# ALL:          - { id: 0, class: gr64 }
# NO_AVX512F:   - { id: 1, class: fr32 }
# AVX512ALL:    - { id: 1, class: fr32x }
  - { id: 0, class: gpr }
  - { id: 1, class: vecr }
# ALL:       %0 = COPY %rdi
# SSE:       %1 = MOVSSrm %0, 1, _, 0, _ :: (load 4 from %ir.p1)
# AVX:       %1 = VMOVSSrm %0, 1, _, 0, _ :: (load 4 from %ir.p1)
# AVX512ALL: %1 = VMOVSSZrm %0, 1, _, 0, _ :: (load 4 from %ir.p1)
# ALL: %xmm0 = COPY %1
body:             |
  bb.1 (%ir-block.0):
    liveins: %rdi

    %0(p0) = COPY %rdi
    %1(s32) = G_LOAD %0(p0) :: (load 4 from %ir.p1)
    %xmm0 = COPY %1(s32)
    RET 0, implicit %xmm0

...
---
# ALL-LABEL: name:            test_load_double
name:            test_load_double
alignment:       4
legalized:       true
regBankSelected: true
registers:
# ALL:   - { id: 0, class: gr64 }
# ALL:   - { id: 1, class: gr64 }
  - { id: 0, class: gpr }
  - { id: 1, class: gpr }
# ALL:     %0 = COPY %rdi
# ALL:     %1 = MOV64rm %0, 1, _, 0, _ :: (load 8 from %ir.p1)
# ALL:     %xmm0 = COPY %1
body:             |
  bb.1 (%ir-block.0):
    liveins: %rdi

    %0(p0) = COPY %rdi
    %1(s64) = G_LOAD %0(p0) :: (load 8 from %ir.p1)
    %xmm0 = COPY %1(s64)
    RET 0, implicit %xmm0

...
---
# ALL-LABEL: name:            test_load_double_vecreg
name:            test_load_double_vecreg
alignment:       4
legalized:       true
regBankSelected: true
registers:
# ALL:          - { id: 0, class: gr64 }
# NO_AVX512F:   - { id: 1, class: fr64 }
# AVX512ALL:    - { id: 1, class: fr64x }
  - { id: 0, class: gpr }
  - { id: 1, class: vecr }
# ALL:       %0 = COPY %rdi
# SSE:       %1 = MOVSDrm %0, 1, _, 0, _ :: (load 8 from %ir.p1)
# AVX:       %1 = VMOVSDrm %0, 1, _, 0, _ :: (load 8 from %ir.p1)
# AVX512ALL: %1 = VMOVSDZrm %0, 1, _, 0, _ :: (load 8 from %ir.p1)
# ALL: %xmm0 = COPY %1
body:             |
  bb.1 (%ir-block.0):
    liveins: %rdi

    %0(p0) = COPY %rdi
    %1(s64) = G_LOAD %0(p0) :: (load 8 from %ir.p1)
    %xmm0 = COPY %1(s64)
    RET 0, implicit %xmm0

...
---
# ALL-LABEL: name:            test_load_v4i32_noalign
name:            test_load_v4i32_noalign
alignment:       4
legalized:       true
regBankSelected: true
registers:
# ALL:          - { id: 0, class: gr64 }
# NO_AVX512F:   - { id: 1, class: vr128 }
# AVX512ALL:    - { id: 1, class: vr128x }
  - { id: 0, class: gpr }
  - { id: 1, class: vecr }
# ALL:      %0 = COPY %rdi
# SSE:      %1 = MOVUPSrm %0, 1, _, 0, _ :: (load 16 from %ir.p1, align 1)
# AVX:      %1 = VMOVUPSrm %0, 1, _, 0, _ :: (load 16 from %ir.p1, align 1)
# AVX512F:  %1 = VMOVUPSZ128rm_NOVLX %0, 1, _, 0, _ :: (load 16 from %ir.p1, align 1)
# AVX512VL: %1 = VMOVUPSZ128rm %0, 1, _, 0, _ :: (load 16 from %ir.p1, align 1)
# ALL: %xmm0 = COPY %1
body:             |
  bb.1 (%ir-block.0):
    liveins: %rdi

    %0(p0) = COPY %rdi
    %1(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.p1, align 1)
    %xmm0 = COPY %1(<4 x s32>)
    RET 0, implicit %xmm0

...
---
# ALL-LABEL: name:            test_load_v4i32_align
name:            test_load_v4i32_align
alignment:       4
legalized:       true
regBankSelected: true
registers:
# ALL:   - { id: 0, class: gr64 }
# NO_AVX512F:   - { id: 1, class: vr128 }
# AVX512ALL:    - { id: 1, class: vr128x }
  - { id: 0, class: gpr }
  - { id: 1, class: vecr }
# ALL:      %0 = COPY %rdi
# SSE:      %1 = MOVAPSrm %0, 1, _, 0, _ :: (load 16 from %ir.p1)
# AVX:      %1 = VMOVAPSrm %0, 1, _, 0, _ :: (load 16 from %ir.p1)
# AVX512F:  %1 = VMOVAPSZ128rm_NOVLX %0, 1, _, 0, _ :: (load 16 from %ir.p1)
# AVX512VL: %1 = VMOVAPSZ128rm %0, 1, _, 0, _ :: (load 16 from %ir.p1)
# ALL: %xmm0 = COPY %1
body:             |
  bb.1 (%ir-block.0):
    liveins: %rdi

    %0(p0) = COPY %rdi
    %1(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.p1)
    %xmm0 = COPY %1(<4 x s32>)
    RET 0, implicit %xmm0

...
---
# ALL-LABEL: name:            test_store_i32
name:            test_store_i32
alignment:       4
legalized:       true
regBankSelected: true
registers:
# ALL:   - { id: 0, class: gr32 }
# ALL:   - { id: 1, class: gr64 }
  - { id: 0, class: gpr }
  - { id: 1, class: gpr }
# ALL:     %0 = COPY %edi
# ALL:     %1 = COPY %rsi
# ALL:     MOV32mr %1, 1, _, 0, _, %0 :: (store 4 into %ir.p1)
# ALL:     %rax = COPY %1
body:             |
  bb.1 (%ir-block.0):
    liveins: %edi, %rsi

    %0(s32) = COPY %edi
    %1(p0) = COPY %rsi
    G_STORE %0(s32), %1(p0) :: (store 4 into %ir.p1)
    %rax = COPY %1(p0)
    RET 0, implicit %rax

...
---
# ALL-LABEL: name:            test_store_i64
name:            test_store_i64
alignment:       4
legalized:       true
regBankSelected: true
registers:
# ALL:   - { id: 0, class: gr64 }
# ALL:   - { id: 1, class: gr64 }
  - { id: 0, class: gpr }
  - { id: 1, class: gpr }
# ALL:     %0 = COPY %rdi
# ALL:     %1 = COPY %rsi
# ALL:     MOV64mr %1, 1, _, 0, _, %0 :: (store 8 into %ir.p1)
# ALL:     %rax = COPY %1
body:             |
  bb.1 (%ir-block.0):
    liveins: %rdi, %rsi

    %0(s64) = COPY %rdi
    %1(p0) = COPY %rsi
    G_STORE %0(s64), %1(p0) :: (store 8 into %ir.p1)
    %rax = COPY %1(p0)
    RET 0, implicit %rax

...
---
# ALL-LABEL: name:            test_store_float
name:            test_store_float
alignment:       4
legalized:       true
regBankSelected: true
registers:
# ALL:   - { id: 0, class: fr32x }
# ALL:   - { id: 1, class: gr64 }
# ALL:   - { id: 2, class: gr32 }
  - { id: 0, class: vecr }
  - { id: 1, class: gpr }
  - { id: 2, class: gpr }
# ALL:     %0 = COPY %xmm0
# ALL:     %1 = COPY %rdi
# ALL:     %2 = COPY %0
# ALL:     MOV32mr %1, 1, _, 0, _, %2 :: (store 4 into %ir.p1)
# ALL:     %rax = COPY %1
body:             |
  bb.1 (%ir-block.0):
    liveins: %rdi, %xmm0

    %0(s32) = COPY %xmm0
    %1(p0) = COPY %rdi
    %2(s32) = COPY %0(s32)
    G_STORE %2(s32), %1(p0) :: (store 4 into %ir.p1)
    %rax = COPY %1(p0)
    RET 0, implicit %rax

...
---
# ALL-LABEL: name:            test_store_float_vec
name:            test_store_float_vec
alignment:       4
legalized:       true
regBankSelected: true
registers:
# NO_AVX512F:   - { id: 0, class: fr32 }
# AVX512ALL:    - { id: 0, class: fr32x }
# ALL:   - { id: 1, class: gr64 }
  - { id: 0, class: vecr }
  - { id: 1, class: gpr }
# ALL:       %0 = COPY %xmm0
# ALL:       %1 = COPY %rdi
# SSE:       MOVSSmr %1, 1, _, 0, _, %0 :: (store 4 into %ir.p1)
# AVX:       VMOVSSmr %1, 1, _, 0, _, %0 :: (store 4 into %ir.p1)
# AVX512ALL: VMOVSSZmr %1, 1, _, 0, _, %0 :: (store 4 into %ir.p1)
# ALL:       %rax = COPY %1
body:             |
  bb.1 (%ir-block.0):
    liveins: %rdi, %xmm0

    %0(s32) = COPY %xmm0
    %1(p0) = COPY %rdi
    G_STORE %0(s32), %1(p0) :: (store 4 into %ir.p1)
    %rax = COPY %1(p0)
    RET 0, implicit %rax

...
---
# ALL-LABEL: name:            test_store_double
name:            test_store_double
alignment:       4
legalized:       true
regBankSelected: true
registers:
# ALL:   - { id: 0, class: fr64x }
# ALL:   - { id: 1, class: gr64 }
# ALL:   - { id: 2, class: gr64 }
  - { id: 0, class: vecr }
  - { id: 1, class: gpr }
  - { id: 2, class: gpr }
# ALL:     %0 = COPY %xmm0
# ALL:     %1 = COPY %rdi
# ALL:     %2 = COPY %0
# ALL:     MOV64mr %1, 1, _, 0, _, %2 :: (store 8 into %ir.p1)
# ALL:     %rax = COPY %1
body:             |
  bb.1 (%ir-block.0):
    liveins: %rdi, %xmm0

    %0(s64) = COPY %xmm0
    %1(p0) = COPY %rdi
    %2(s64) = COPY %0(s64)
    G_STORE %2(s64), %1(p0) :: (store 8 into %ir.p1)
    %rax = COPY %1(p0)
    RET 0, implicit %rax

...
---
# ALL-LABEL: name:            test_store_double_vec
name:            test_store_double_vec
alignment:       4
legalized:       true
regBankSelected: true
registers:
# NO_AVX512F:   - { id: 0, class: fr64 }
# AVX512ALL:    - { id: 0, class: fr64x }
# ALL:   - { id: 1, class: gr64 }
  - { id: 0, class: vecr }
  - { id: 1, class: gpr }
# ALL:       %0 = COPY %xmm0
# ALL:       %1 = COPY %rdi
# SSE:       MOVSDmr %1, 1, _, 0, _, %0 :: (store 8 into %ir.p1)
# AVX:       VMOVSDmr %1, 1, _, 0, _, %0 :: (store 8 into %ir.p1)
# AVX512ALL: VMOVSDZmr %1, 1, _, 0, _, %0 :: (store 8 into %ir.p1)
# ALL:       %rax = COPY %1
body:             |
  bb.1 (%ir-block.0):
    liveins: %rdi, %xmm0

    %0(s64) = COPY %xmm0
    %1(p0) = COPY %rdi
    G_STORE %0(s64), %1(p0) :: (store 8 into %ir.p1)
    %rax = COPY %1(p0)
    RET 0, implicit %rax

...
---
# ALL-LABEL: name:            test_store_v4i32_align
name:            test_store_v4i32_align
alignment:       4
legalized:       true
regBankSelected: true
registers:
# NO_AVX512F:   - { id: 0, class: vr128 }
# AVX512ALL:    - { id: 0, class: vr128x }
# ALL:   - { id: 1, class: gr64 }
  - { id: 0, class: vecr }
  - { id: 1, class: gpr }
# ALL:       %0 = COPY %xmm0
# ALL:       %1 = COPY %rdi
# SSE:       MOVAPSmr %1, 1, _, 0, _, %0 :: (store 16 into %ir.p1)
# AVX:       VMOVAPSmr %1, 1, _, 0, _, %0 :: (store 16 into %ir.p1)
# AVX512F:   VMOVAPSZ128mr_NOVLX %1, 1, _, 0, _, %0 :: (store 16 into %ir.p1)
# AVX512VL:  VMOVAPSZ128mr %1, 1, _, 0, _, %0 :: (store 16 into %ir.p1)
# ALL:       %rax = COPY %1
body:             |
  bb.1 (%ir-block.0):
    liveins: %rdi, %xmm0

    %0(<4 x s32>) = COPY %xmm0
    %1(p0) = COPY %rdi
    G_STORE %0(<4 x s32>), %1(p0) :: (store 16 into %ir.p1, align 16)
    %rax = COPY %1(p0)
    RET 0, implicit %rax

...
---
# ALL-LABEL: name:            test_store_v4i32_noalign
name:            test_store_v4i32_noalign
alignment:       4
legalized:       true
regBankSelected: true
registers:
# NO_AVX512F:   - { id: 0, class: vr128 }
# AVX512ALL:    - { id: 0, class: vr128x }
# ALL:   - { id: 1, class: gr64 }
  - { id: 0, class: vecr }
  - { id: 1, class: gpr }
# ALL:       %0 = COPY %xmm0
# ALL:       %1 = COPY %rdi
# SSE:       MOVUPSmr %1, 1, _, 0, _, %0 :: (store 16 into %ir.p1, align 1)
# AVX:       VMOVUPSmr %1, 1, _, 0, _, %0 :: (store 16 into %ir.p1, align 1)
# AVX512F:   VMOVUPSZ128mr_NOVLX %1, 1, _, 0, _, %0 :: (store 16 into %ir.p1, align 1)
# AVX512VL:  VMOVUPSZ128mr %1, 1, _, 0, _, %0 :: (store 16 into %ir.p1, align 1)
# ALL:       %rax = COPY %1
body:             |
  bb.1 (%ir-block.0):
    liveins: %rdi, %xmm0

    %0(<4 x s32>) = COPY %xmm0
    %1(p0) = COPY %rdi
    G_STORE %0(<4 x s32>), %1(p0) :: (store 16 into %ir.p1, align 1)
    %rax = COPY %1(p0)
    RET 0, implicit %rax

...