diff --git a/crates/intrinsic-test/src/arm/config.rs b/crates/intrinsic-test/src/arm/config.rs deleted file mode 100644 index 7c26143622..0000000000 --- a/crates/intrinsic-test/src/arm/config.rs +++ /dev/null @@ -1,25 +0,0 @@ -pub const NOTICE: &str = "\ -// This is a transient test file, not intended for distribution. Some aspects of the -// test are derived from a JSON specification, published under the same license as the -// `intrinsic-test` crate.\n"; - -pub const PLATFORM_RUST_DEFINITIONS: &str = ""; - -pub const PLATFORM_RUST_CFGS: &str = r#" -#![cfg_attr(target_arch = "arm", feature(stdarch_arm_neon_intrinsics))] -#![cfg_attr(target_arch = "arm", feature(stdarch_aarch32_crc32))] -#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_fcma))] -#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_i8mm))] -#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_sm4))] -#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_ftts))] -#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_feat_lut))] -#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_fp8))] -#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(faminmax))] -#![feature(stdarch_neon_f16)] - -#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] -use core_arch::arch::aarch64::*; - -#[cfg(target_arch = "arm")] -use core_arch::arch::arm::*; -"#; diff --git a/crates/intrinsic-test/src/arm/mod.rs b/crates/intrinsic-test/src/arm/mod.rs index 6cd578fe43..a60e0ff155 100644 --- a/crates/intrinsic-test/src/arm/mod.rs +++ b/crates/intrinsic-test/src/arm/mod.rs @@ -1,4 +1,3 @@ -mod config; mod intrinsic; mod json_parser; mod types; @@ -20,14 +19,20 @@ impl SupportedArchitecture for Arm { &self.0 } - const NOTICE: &str = config::NOTICE; + const NOTICE: &str = r#" +// This is a transient test file, not intended for distribution. Some aspects of the +// test are derived from a JSON specification, published under the same license as the +// `intrinsic-test` crate. +"#; - const PLATFORM_C_HEADERS: &[&str] = &["arm_neon.h", "arm_acle.h", "arm_fp16.h"]; + const C_PRELUDE: &str = r#" +#include +#include +#include +"#; + const RUST_PRELUDE: &str = RUST_PRELUDE; - const PLATFORM_RUST_DEFINITIONS: &str = config::PLATFORM_RUST_DEFINITIONS; - const PLATFORM_RUST_CFGS: &str = config::PLATFORM_RUST_CFGS; - - fn arch_flags(&self, cli_options: &ProcessedCli) -> Vec<&str> { + fn c_compiler_flags(&self, cli_options: &ProcessedCli) -> Vec<&str> { // GCC uses an extra `-` in the arch name match cli_options.cc_arg_style { CcArgStyle::Clang => vec!["-march=armv8.6a+crypto+crc+dotprod+fp16"], @@ -125,3 +130,22 @@ impl SupportedArchitecture for Arm { Self(intrinsics) } } + +const RUST_PRELUDE: &str = r#" +#![cfg_attr(target_arch = "arm", feature(stdarch_arm_neon_intrinsics))] +#![cfg_attr(target_arch = "arm", feature(stdarch_aarch32_crc32))] +#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_fcma))] +#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_i8mm))] +#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_sm4))] +#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_ftts))] +#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_feat_lut))] +#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_fp8))] +#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(faminmax))] +#![feature(stdarch_neon_f16)] + +#[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] +use core_arch::arch::aarch64::*; + +#[cfg(target_arch = "arm")] +use core_arch::arch::arm::*; +"#; diff --git a/crates/intrinsic-test/src/common/gen_c.rs b/crates/intrinsic-test/src/common/gen_c.rs index e4b6c9815c..21cc5cfa2e 100644 --- a/crates/intrinsic-test/src/common/gen_c.rs +++ b/crates/intrinsic-test/src/common/gen_c.rs @@ -16,18 +16,13 @@ use super::intrinsic_helpers::TypeDefinition; /// ``` pub fn write_wrapper_c( w: &mut impl std::io::Write, - notice: &str, - platform_headers: &[&str], intrinsics: &[Intrinsic], ) -> std::io::Result<()> { - write!(w, "{notice}")?; + write!(w, "{}", A::NOTICE)?; writeln!(w, "#include ")?; writeln!(w, "#include ")?; - - for header in platform_headers { - writeln!(w, "#include <{header}>")?; - } + writeln!(w, "{}", A::C_PRELUDE)?; for intrinsic in intrinsics { intrinsic.iter_specializations(|imm_values| { diff --git a/crates/intrinsic-test/src/common/gen_rust.rs b/crates/intrinsic-test/src/common/gen_rust.rs index a2100e839a..432cdde3d4 100644 --- a/crates/intrinsic-test/src/common/gen_rust.rs +++ b/crates/intrinsic-test/src/common/gen_rust.rs @@ -116,14 +116,11 @@ pub fn write_lib_rs( #![allow(non_camel_case_types)] #![allow(non_snake_case)] -{cfg} +{prelude} {COMMON_RUST_DEFINITIONS} - -{definitions} "#, notice = A::NOTICE, - cfg = A::PLATFORM_RUST_CFGS, - definitions = A::PLATFORM_RUST_DEFINITIONS, + prelude = A::RUST_PRELUDE, )?; let mut seen = std::collections::HashSet::new(); diff --git a/crates/intrinsic-test/src/common/mod.rs b/crates/intrinsic-test/src/common/mod.rs index ff0b6cbbaf..73daabbd66 100644 --- a/crates/intrinsic-test/src/common/mod.rs +++ b/crates/intrinsic-test/src/common/mod.rs @@ -38,12 +38,10 @@ pub trait SupportedArchitecture: Sized { const NOTICE: &str; - const PLATFORM_C_HEADERS: &[&str]; + const C_PRELUDE: &str; + const RUST_PRELUDE: &str; - const PLATFORM_RUST_CFGS: &str; - const PLATFORM_RUST_DEFINITIONS: &str; - - fn arch_flags(&self, cli_options: &ProcessedCli) -> Vec<&str>; + fn c_compiler_flags(&self, cli_options: &ProcessedCli) -> Vec<&str>; fn generate_c_file(&self) { let (max_chunk_size, _chunk_count) = manual_chunk(self.intrinsics().len()); @@ -55,14 +53,14 @@ pub trait SupportedArchitecture: Sized { .map(|(i, chunk)| { let c_filename = format!("c_programs/wrapper_{i}.c"); let mut file = File::create(&c_filename).unwrap(); - write_wrapper_c(&mut file, Self::NOTICE, Self::PLATFORM_C_HEADERS, chunk) + write_wrapper_c(&mut file, chunk) }) .collect::>() .unwrap(); } fn generate_rust_file(&self, cli_options: &ProcessedCli) { - let arch_flags = self.arch_flags(cli_options); + let arch_flags = self.c_compiler_flags(cli_options); std::fs::create_dir_all("rust_programs").unwrap(); diff --git a/crates/intrinsic-test/src/x86/config.rs b/crates/intrinsic-test/src/x86/config.rs deleted file mode 100644 index 68737ab5ac..0000000000 --- a/crates/intrinsic-test/src/x86/config.rs +++ /dev/null @@ -1,138 +0,0 @@ -pub const NOTICE: &str = "\ -// This is a transient test file, not intended for distribution. Some aspects of the -// test are derived from an XML specification, published under the same license as the -// `intrinsic-test` crate.\n"; - -pub const PLATFORM_RUST_DEFINITIONS: &str = r#" -use core_arch::arch::x86_64::*; - -#[inline] -unsafe fn _mm_loadu_ph_to___m128i(mem_addr: *const f16) -> __m128i { - _mm_castph_si128(_mm_loadu_ph(mem_addr)) -} - -#[inline] -unsafe fn _mm256_loadu_ph_to___m256i(mem_addr: *const f16) -> __m256i { - _mm256_castph_si256(_mm256_loadu_ph(mem_addr)) -} - -#[inline] -unsafe fn _mm512_loadu_ph_to___mm512i(mem_addr: *const f16) -> __m512i { - _mm512_castph_si512(_mm512_loadu_ph(mem_addr)) -} - - -#[inline] -unsafe fn _mm_loadu_ps_to___m128h(mem_addr: *const f32) -> __m128h { - _mm_castps_ph(_mm_loadu_ps(mem_addr)) -} - -#[inline] -unsafe fn _mm256_loadu_ps_to___m256h(mem_addr: *const f32) -> __m256h { - _mm256_castps_ph(_mm256_loadu_ps(mem_addr)) -} - -#[inline] -unsafe fn _mm512_loadu_ps_to___m512h(mem_addr: *const f32) -> __m512h { - _mm512_castps_ph(_mm512_loadu_ps(mem_addr)) -} - -#[inline] -unsafe fn _mm_loadu_epi16_to___m128d(mem_addr: *const i16) -> __m128d { - _mm_castsi128_pd(_mm_loadu_epi16(mem_addr)) -} - -#[inline] -unsafe fn _mm256_loadu_epi16_to___m256d(mem_addr: *const i16) -> __m256d { - _mm256_castsi256_pd(_mm256_loadu_epi16(mem_addr)) -} - -#[inline] -unsafe fn _mm512_loadu_epi16_to___m512d(mem_addr: *const i16) -> __m512d { - _mm512_castsi512_pd(_mm512_loadu_epi16(mem_addr)) -} - -#[inline] -unsafe fn _mm_loadu_epi32_to___m128d(mem_addr: *const i32) -> __m128d { - _mm_castsi128_pd(_mm_loadu_epi32(mem_addr)) -} - -#[inline] -unsafe fn _mm256_loadu_epi32_to___m256d(mem_addr: *const i32) -> __m256d { - _mm256_castsi256_pd(_mm256_loadu_epi32(mem_addr)) -} - -#[inline] -unsafe fn _mm512_loadu_epi32_to___m512d(mem_addr: *const i32) -> __m512d { - _mm512_castsi512_pd(_mm512_loadu_epi32(mem_addr)) -} - -#[inline] -unsafe fn _mm_loadu_epi64_to___m128d(mem_addr: *const i64) -> __m128d { - _mm_castsi128_pd(_mm_loadu_epi64(mem_addr)) -} - -#[inline] -unsafe fn _mm256_loadu_epi64_to___m256d(mem_addr: *const i64) -> __m256d { - _mm256_castsi256_pd(_mm256_loadu_epi64(mem_addr)) -} - -#[inline] -unsafe fn _mm512_loadu_epi64_to___m512d(mem_addr: *const i64) -> __m512d { - _mm512_castsi512_pd(_mm512_loadu_epi64(mem_addr)) -} - -// === -#[inline] -unsafe fn _mm_loadu_epi16_to___m128(mem_addr: *const i16) -> __m128 { - _mm_castsi128_ps(_mm_loadu_epi16(mem_addr)) -} - -#[inline] -unsafe fn _mm256_loadu_epi16_to___m256(mem_addr: *const i16) -> __m256 { - _mm256_castsi256_ps(_mm256_loadu_epi16(mem_addr)) -} - -#[inline] -unsafe fn _mm512_loadu_epi16_to___m512(mem_addr: *const i16) -> __m512 { - _mm512_castsi512_ps(_mm512_loadu_epi16(mem_addr)) -} - -#[inline] -unsafe fn _mm_loadu_epi32_to___m128(mem_addr: *const i32) -> __m128 { - _mm_castsi128_ps(_mm_loadu_epi32(mem_addr)) -} - -#[inline] -unsafe fn _mm256_loadu_epi32_to___m256(mem_addr: *const i32) -> __m256 { - _mm256_castsi256_ps(_mm256_loadu_epi32(mem_addr)) -} - -#[inline] -unsafe fn _mm512_loadu_epi32_to___m512(mem_addr: *const i32) -> __m512 { - _mm512_castsi512_ps(_mm512_loadu_epi32(mem_addr)) -} - -#[inline] -unsafe fn _mm_loadu_epi64_to___m128(mem_addr: *const i64) -> __m128 { - _mm_castsi128_ps(_mm_loadu_epi64(mem_addr)) -} - -#[inline] -unsafe fn _mm256_loadu_epi64_to___m256(mem_addr: *const i64) -> __m256 { - _mm256_castsi256_ps(_mm256_loadu_epi64(mem_addr)) -} - -#[inline] -unsafe fn _mm512_loadu_epi64_to___m512(mem_addr: *const i64) -> __m512 { - _mm512_castsi512_ps(_mm512_loadu_epi64(mem_addr)) -} - -"#; - -pub const PLATFORM_RUST_CFGS: &str = r#" -#![feature(stdarch_x86_avx512_bf16)] -#![feature(stdarch_x86_avx512_f16)] -#![feature(stdarch_x86_rtm)] -#![feature(x86_amx_intrinsics)] -"#; diff --git a/crates/intrinsic-test/src/x86/mod.rs b/crates/intrinsic-test/src/x86/mod.rs index ec198d9218..ce49550329 100644 --- a/crates/intrinsic-test/src/x86/mod.rs +++ b/crates/intrinsic-test/src/x86/mod.rs @@ -1,4 +1,3 @@ -mod config; mod constraint; mod intrinsic; mod types; @@ -22,14 +21,18 @@ impl SupportedArchitecture for X86 { &self.intrinsics } - const NOTICE: &str = config::NOTICE; + const NOTICE: &str = r#" +// This is a transient test file, not intended for distribution. Some aspects of the +// test are derived from an XML specification, published under the same license as the +// `intrinsic-test` crate. +"#; - const PLATFORM_C_HEADERS: &[&str] = &["immintrin.h"]; + const C_PRELUDE: &str = r#" +#include +"#; + const RUST_PRELUDE: &str = RUST_PRELUDE; - const PLATFORM_RUST_DEFINITIONS: &str = config::PLATFORM_RUST_DEFINITIONS; - const PLATFORM_RUST_CFGS: &str = config::PLATFORM_RUST_CFGS; - - fn arch_flags(&self, _cli_options: &ProcessedCli) -> Vec<&str> { + fn c_compiler_flags(&self, _cli_options: &ProcessedCli) -> Vec<&str> { vec![ "-maes", "-mf16c", @@ -97,3 +100,134 @@ impl SupportedArchitecture for X86 { Self { intrinsics } } } + +const RUST_PRELUDE: &str = r#" +#![feature(stdarch_x86_avx512_bf16)] +#![feature(stdarch_x86_avx512_f16)] +#![feature(stdarch_x86_rtm)] +#![feature(x86_amx_intrinsics)] + +use core_arch::arch::x86_64::*; + +#[inline] +unsafe fn _mm_loadu_ph_to___m128i(mem_addr: *const f16) -> __m128i { + _mm_castph_si128(_mm_loadu_ph(mem_addr)) +} + +#[inline] +unsafe fn _mm256_loadu_ph_to___m256i(mem_addr: *const f16) -> __m256i { + _mm256_castph_si256(_mm256_loadu_ph(mem_addr)) +} + +#[inline] +unsafe fn _mm512_loadu_ph_to___mm512i(mem_addr: *const f16) -> __m512i { + _mm512_castph_si512(_mm512_loadu_ph(mem_addr)) +} + + +#[inline] +unsafe fn _mm_loadu_ps_to___m128h(mem_addr: *const f32) -> __m128h { + _mm_castps_ph(_mm_loadu_ps(mem_addr)) +} + +#[inline] +unsafe fn _mm256_loadu_ps_to___m256h(mem_addr: *const f32) -> __m256h { + _mm256_castps_ph(_mm256_loadu_ps(mem_addr)) +} + +#[inline] +unsafe fn _mm512_loadu_ps_to___m512h(mem_addr: *const f32) -> __m512h { + _mm512_castps_ph(_mm512_loadu_ps(mem_addr)) +} + +#[inline] +unsafe fn _mm_loadu_epi16_to___m128d(mem_addr: *const i16) -> __m128d { + _mm_castsi128_pd(_mm_loadu_epi16(mem_addr)) +} + +#[inline] +unsafe fn _mm256_loadu_epi16_to___m256d(mem_addr: *const i16) -> __m256d { + _mm256_castsi256_pd(_mm256_loadu_epi16(mem_addr)) +} + +#[inline] +unsafe fn _mm512_loadu_epi16_to___m512d(mem_addr: *const i16) -> __m512d { + _mm512_castsi512_pd(_mm512_loadu_epi16(mem_addr)) +} + +#[inline] +unsafe fn _mm_loadu_epi32_to___m128d(mem_addr: *const i32) -> __m128d { + _mm_castsi128_pd(_mm_loadu_epi32(mem_addr)) +} + +#[inline] +unsafe fn _mm256_loadu_epi32_to___m256d(mem_addr: *const i32) -> __m256d { + _mm256_castsi256_pd(_mm256_loadu_epi32(mem_addr)) +} + +#[inline] +unsafe fn _mm512_loadu_epi32_to___m512d(mem_addr: *const i32) -> __m512d { + _mm512_castsi512_pd(_mm512_loadu_epi32(mem_addr)) +} + +#[inline] +unsafe fn _mm_loadu_epi64_to___m128d(mem_addr: *const i64) -> __m128d { + _mm_castsi128_pd(_mm_loadu_epi64(mem_addr)) +} + +#[inline] +unsafe fn _mm256_loadu_epi64_to___m256d(mem_addr: *const i64) -> __m256d { + _mm256_castsi256_pd(_mm256_loadu_epi64(mem_addr)) +} + +#[inline] +unsafe fn _mm512_loadu_epi64_to___m512d(mem_addr: *const i64) -> __m512d { + _mm512_castsi512_pd(_mm512_loadu_epi64(mem_addr)) +} + +// === +#[inline] +unsafe fn _mm_loadu_epi16_to___m128(mem_addr: *const i16) -> __m128 { + _mm_castsi128_ps(_mm_loadu_epi16(mem_addr)) +} + +#[inline] +unsafe fn _mm256_loadu_epi16_to___m256(mem_addr: *const i16) -> __m256 { + _mm256_castsi256_ps(_mm256_loadu_epi16(mem_addr)) +} + +#[inline] +unsafe fn _mm512_loadu_epi16_to___m512(mem_addr: *const i16) -> __m512 { + _mm512_castsi512_ps(_mm512_loadu_epi16(mem_addr)) +} + +#[inline] +unsafe fn _mm_loadu_epi32_to___m128(mem_addr: *const i32) -> __m128 { + _mm_castsi128_ps(_mm_loadu_epi32(mem_addr)) +} + +#[inline] +unsafe fn _mm256_loadu_epi32_to___m256(mem_addr: *const i32) -> __m256 { + _mm256_castsi256_ps(_mm256_loadu_epi32(mem_addr)) +} + +#[inline] +unsafe fn _mm512_loadu_epi32_to___m512(mem_addr: *const i32) -> __m512 { + _mm512_castsi512_ps(_mm512_loadu_epi32(mem_addr)) +} + +#[inline] +unsafe fn _mm_loadu_epi64_to___m128(mem_addr: *const i64) -> __m128 { + _mm_castsi128_ps(_mm_loadu_epi64(mem_addr)) +} + +#[inline] +unsafe fn _mm256_loadu_epi64_to___m256(mem_addr: *const i64) -> __m256 { + _mm256_castsi256_ps(_mm256_loadu_epi64(mem_addr)) +} + +#[inline] +unsafe fn _mm512_loadu_epi64_to___m512(mem_addr: *const i64) -> __m512 { + _mm512_castsi512_ps(_mm512_loadu_epi64(mem_addr)) +} +"#;