From 5ee8474fecbe2f909575dffc6dea0fd75cd6cd0e Mon Sep 17 00:00:00 2001 From: jc_gargma Date: Thu, 28 Mar 2019 19:01:30 -0700 Subject: Updated to 66.0.2 --- 0001-bz-1468911.patch | 26961 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 26961 insertions(+) create mode 100644 0001-bz-1468911.patch (limited to '0001-bz-1468911.patch') diff --git a/0001-bz-1468911.patch b/0001-bz-1468911.patch new file mode 100644 index 0000000..6e5293f --- /dev/null +++ b/0001-bz-1468911.patch @@ -0,0 +1,26961 @@ +From b68f15b7a57f7df52d955f7f75f9df3b78041764 Mon Sep 17 00:00:00 2001 +Message-Id: +From: "Jan Alexander Steffens (heftig)" +Date: Tue, 19 Mar 2019 20:45:22 +0100 +Subject: [PATCH] bz 1468911 + +https://bugzilla.mozilla.org/show_bug.cgi?id=1521249 +--- + .cargo/config.in | 5 + + Cargo.lock | 53 +- + Cargo.toml | 1 + + third_party/rust/cfg-if/.cargo-checksum.json | 2 +- + third_party/rust/cfg-if/Cargo.toml | 28 +- + third_party/rust/cfg-if/README.md | 18 +- + third_party/rust/cfg-if/src/lib.rs | 53 +- + .../rust/encoding_rs/.cargo-checksum.json | 2 +- + third_party/rust/encoding_rs/Cargo.toml | 12 +- + third_party/rust/encoding_rs/README.md | 52 +- + third_party/rust/encoding_rs/build.rs | 8 + + third_party/rust/encoding_rs/src/handles.rs | 2 +- + third_party/rust/encoding_rs/src/lib.rs | 7 +- + third_party/rust/encoding_rs/src/mem.rs | 24 +- + .../rust/encoding_rs/src/simd_funcs.rs | 93 +- + .../rust/encoding_rs/src/x_user_defined.rs | 7 +- + third_party/rust/packed_simd/.appveyor.yml | 59 + + .../rust/packed_simd/.cargo-checksum.json | 1 + + third_party/rust/packed_simd/.travis.yml | 308 ++++ + third_party/rust/packed_simd/Cargo.toml | 42 + + .../rust/{simd => packed_simd}/LICENSE-APACHE | 0 + .../rust/{simd => packed_simd}/LICENSE-MIT | 4 +- + third_party/rust/packed_simd/bors.toml | 3 + + third_party/rust/packed_simd/build.rs | 8 + + third_party/rust/packed_simd/ci/all.sh | 71 + + .../packed_simd/ci/android-install-ndk.sh | 37 + + .../packed_simd/ci/android-install-sdk.sh | 60 + + .../rust/packed_simd/ci/android-sysimage.sh | 56 + + third_party/rust/packed_simd/ci/benchmark.sh | 32 + + .../ci/deploy_and_run_on_ios_simulator.rs | 176 +++ + .../docker/aarch64-linux-android/Dockerfile | 47 + + .../aarch64-unknown-linux-gnu/Dockerfile | 14 + + .../docker/arm-linux-androideabi/Dockerfile | 47 + + .../arm-unknown-linux-gnueabi/Dockerfile | 15 + + .../arm-unknown-linux-gnueabihf/Dockerfile | 13 + + .../armv7-unknown-linux-gnueabihf/Dockerfile | 13 + + .../docker/i586-unknown-linux-gnu/Dockerfile | 7 + + .../docker/i686-unknown-linux-gnu/Dockerfile | 7 + + .../docker/mips-unknown-linux-gnu/Dockerfile | 13 + + .../mips64-unknown-linux-gnuabi64/Dockerfile | 10 + + .../Dockerfile | 10 + + .../mipsel-unknown-linux-musl/Dockerfile | 25 + + .../powerpc-unknown-linux-gnu/Dockerfile | 12 + + .../powerpc64-unknown-linux-gnu/Dockerfile | 17 + + .../powerpc64le-unknown-linux-gnu/Dockerfile | 11 + + .../docker/s390x-unknown-linux-gnu/Dockerfile | 20 + + .../sparc64-unknown-linux-gnu/Dockerfile | 18 + + .../thumbv7neon-linux-androideabi/Dockerfile | 47 + + .../Dockerfile | 13 + + .../docker/wasm32-unknown-unknown/Dockerfile | 37 + + .../ci/docker/x86_64-linux-android/Dockerfile | 29 + + .../Dockerfile | 16 + + .../x86_64-unknown-linux-gnu/Dockerfile | 10 + + third_party/rust/packed_simd/ci/dox.sh | 24 + + .../rust/packed_simd/ci/linux-s390x.sh | 18 + + .../rust/packed_simd/ci/linux-sparc64.sh | 17 + + third_party/rust/packed_simd/ci/lld-shim.rs | 11 + + .../rust/packed_simd/ci/max_line_width.sh | 17 + + third_party/rust/packed_simd/ci/run-docker.sh | 38 + + third_party/rust/packed_simd/ci/run.sh | 96 ++ + .../rust/packed_simd/ci/run_examples.sh | 51 + + .../rust/packed_simd/ci/runtest-android.rs | 45 + + .../rust/packed_simd/ci/setup_benchmarks.sh | 10 + + .../rust/packed_simd/ci/test-runner-linux | 24 + + third_party/rust/packed_simd/contributing.md | 67 + + .../rust/packed_simd/perf-guide/.gitignore | 1 + + .../rust/packed_simd/perf-guide/book.toml | 12 + + .../packed_simd/perf-guide/src/SUMMARY.md | 21 + + .../rust/packed_simd/perf-guide/src/ascii.css | 4 + + .../perf-guide/src/bound_checks.md | 22 + + .../perf-guide/src/float-math/approx.md | 8 + + .../perf-guide/src/float-math/fma.md | 6 + + .../perf-guide/src/float-math/fp.md | 3 + + .../perf-guide/src/float-math/svml.md | 7 + + .../perf-guide/src/introduction.md | 26 + + .../packed_simd/perf-guide/src/prof/linux.md | 107 ++ + .../packed_simd/perf-guide/src/prof/mca.md | 100 ++ + .../perf-guide/src/prof/profiling.md | 14 + + .../src/target-feature/attribute.md | 5 + + .../perf-guide/src/target-feature/features.md | 13 + + .../perf-guide/src/target-feature/inlining.md | 5 + + .../perf-guide/src/target-feature/practice.md | 31 + + .../perf-guide/src/target-feature/runtime.md | 5 + + .../src/target-feature/rustflags.md | 77 + + .../perf-guide/src/vert-hor-ops.md | 76 + + third_party/rust/packed_simd/readme.md | 182 +++ + third_party/rust/packed_simd/rustfmt.toml | 7 + + third_party/rust/packed_simd/src/api.rs | 301 ++++ + .../rust/packed_simd/src/api/bit_manip.rs | 128 ++ + third_party/rust/packed_simd/src/api/cast.rs | 108 ++ + .../rust/packed_simd/src/api/cast/macros.rs | 82 + + .../rust/packed_simd/src/api/cast/v128.rs | 79 + + .../rust/packed_simd/src/api/cast/v16.rs | 17 + + .../rust/packed_simd/src/api/cast/v256.rs | 81 + + .../rust/packed_simd/src/api/cast/v32.rs | 30 + + .../rust/packed_simd/src/api/cast/v512.rs | 68 + + .../rust/packed_simd/src/api/cast/v64.rs | 47 + + third_party/rust/packed_simd/src/api/cmp.rs | 16 + + .../rust/packed_simd/src/api/cmp/eq.rs | 27 + + .../rust/packed_simd/src/api/cmp/ord.rs | 43 + + .../packed_simd/src/api/cmp/partial_eq.rs | 67 + + .../packed_simd/src/api/cmp/partial_ord.rs | 234 +++ + .../rust/packed_simd/src/api/cmp/vertical.rs | 114 ++ + .../rust/packed_simd/src/api/default.rs | 28 + + third_party/rust/packed_simd/src/api/fmt.rs | 12 + + .../rust/packed_simd/src/api/fmt/binary.rs | 56 + + .../rust/packed_simd/src/api/fmt/debug.rs | 62 + + .../rust/packed_simd/src/api/fmt/lower_hex.rs | 56 + + .../rust/packed_simd/src/api/fmt/octal.rs | 56 + + .../rust/packed_simd/src/api/fmt/upper_hex.rs | 56 + + third_party/rust/packed_simd/src/api/from.rs | 7 + + .../packed_simd/src/api/from/from_array.rs | 121 ++ + .../packed_simd/src/api/from/from_vector.rs | 67 + + third_party/rust/packed_simd/src/api/hash.rs | 47 + + .../rust/packed_simd/src/api/into_bits.rs | 59 + + .../src/api/into_bits/arch_specific.rs | 190 +++ + .../packed_simd/src/api/into_bits/macros.rs | 74 + + .../packed_simd/src/api/into_bits/v128.rs | 28 + + .../rust/packed_simd/src/api/into_bits/v16.rs | 9 + + .../packed_simd/src/api/into_bits/v256.rs | 27 + + .../rust/packed_simd/src/api/into_bits/v32.rs | 13 + + .../packed_simd/src/api/into_bits/v512.rs | 27 + + .../rust/packed_simd/src/api/into_bits/v64.rs | 18 + + third_party/rust/packed_simd/src/api/math.rs | 4 + + .../rust/packed_simd/src/api/math/float.rs | 61 + + .../packed_simd/src/api/math/float/abs.rs | 31 + + .../packed_simd/src/api/math/float/consts.rs | 86 + + .../packed_simd/src/api/math/float/cos.rs | 44 + + .../packed_simd/src/api/math/float/exp.rs | 33 + + .../rust/packed_simd/src/api/math/float/ln.rs | 33 + + .../packed_simd/src/api/math/float/mul_add.rs | 44 + + .../src/api/math/float/mul_adde.rs | 48 + + .../packed_simd/src/api/math/float/powf.rs | 36 + + .../packed_simd/src/api/math/float/recpre.rs | 36 + + .../packed_simd/src/api/math/float/rsqrte.rs | 40 + + .../packed_simd/src/api/math/float/sin.rs | 50 + + .../packed_simd/src/api/math/float/sqrt.rs | 35 + + .../packed_simd/src/api/math/float/sqrte.rs | 44 + + .../rust/packed_simd/src/api/minimal.rs | 6 + + .../rust/packed_simd/src/api/minimal/iuf.rs | 167 ++ + .../rust/packed_simd/src/api/minimal/mask.rs | 174 +++ + .../rust/packed_simd/src/api/minimal/ptr.rs | 1385 +++++++++++++++++ + third_party/rust/packed_simd/src/api/ops.rs | 32 + + .../src/api/ops/scalar_arithmetic.rs | 203 +++ + .../packed_simd/src/api/ops/scalar_bitwise.rs | 162 ++ + .../src/api/ops/scalar_mask_bitwise.rs | 140 ++ + .../packed_simd/src/api/ops/scalar_shifts.rs | 107 ++ + .../src/api/ops/vector_arithmetic.rs | 148 ++ + .../packed_simd/src/api/ops/vector_bitwise.rs | 129 ++ + .../src/api/ops/vector_float_min_max.rs | 69 + + .../src/api/ops/vector_int_min_max.rs | 57 + + .../src/api/ops/vector_mask_bitwise.rs | 116 ++ + .../packed_simd/src/api/ops/vector_neg.rs | 43 + + .../packed_simd/src/api/ops/vector_rotates.rs | 90 ++ + .../packed_simd/src/api/ops/vector_shifts.rs | 107 ++ + third_party/rust/packed_simd/src/api/ptr.rs | 4 + + .../packed_simd/src/api/ptr/gather_scatter.rs | 241 +++ + .../rust/packed_simd/src/api/reductions.rs | 12 + + .../packed_simd/src/api/reductions/bitwise.rs | 151 ++ + .../src/api/reductions/float_arithmetic.rs | 312 ++++ + .../src/api/reductions/integer_arithmetic.rs | 197 +++ + .../packed_simd/src/api/reductions/mask.rs | 89 ++ + .../packed_simd/src/api/reductions/min_max.rs | 377 +++++ + .../rust/packed_simd/src/api/select.rs | 75 + + .../rust/packed_simd/src/api/shuffle.rs | 190 +++ + .../rust/packed_simd/src/api/shuffle1_dyn.rs | 159 ++ + third_party/rust/packed_simd/src/api/slice.rs | 7 + + .../packed_simd/src/api/slice/from_slice.rs | 216 +++ + .../src/api/slice/write_to_slice.rs | 211 +++ + .../rust/packed_simd/src/api/swap_bytes.rs | 192 +++ + third_party/rust/packed_simd/src/codegen.rs | 59 + + .../rust/packed_simd/src/codegen/bit_manip.rs | 354 +++++ + .../rust/packed_simd/src/codegen/llvm.rs | 99 ++ + .../rust/packed_simd/src/codegen/math.rs | 3 + + .../packed_simd/src/codegen/math/float.rs | 18 + + .../packed_simd/src/codegen/math/float/abs.rs | 103 ++ + .../packed_simd/src/codegen/math/float/cos.rs | 103 ++ + .../src/codegen/math/float/cos_pi.rs | 87 ++ + .../packed_simd/src/codegen/math/float/exp.rs | 112 ++ + .../packed_simd/src/codegen/math/float/ln.rs | 112 ++ + .../src/codegen/math/float/macros.rs | 559 +++++++ + .../src/codegen/math/float/mul_add.rs | 109 ++ + .../src/codegen/math/float/mul_adde.rs | 66 + + .../src/codegen/math/float/powf.rs | 112 ++ + .../packed_simd/src/codegen/math/float/sin.rs | 103 ++ + .../src/codegen/math/float/sin_cos_pi.rs | 195 +++ + .../src/codegen/math/float/sin_pi.rs | 87 ++ + .../src/codegen/math/float/sqrt.rs | 103 ++ + .../src/codegen/math/float/sqrte.rs | 67 + + .../src/codegen/pointer_sized_int.rs | 28 + + .../packed_simd/src/codegen/reductions.rs | 1 + + .../src/codegen/reductions/mask.rs | 69 + + .../src/codegen/reductions/mask/aarch64.rs | 71 + + .../src/codegen/reductions/mask/arm.rs | 54 + + .../src/codegen/reductions/mask/fallback.rs | 6 + + .../codegen/reductions/mask/fallback_impl.rs | 237 +++ + .../src/codegen/reductions/mask/x86.rs | 194 +++ + .../src/codegen/reductions/mask/x86/avx.rs | 101 ++ + .../src/codegen/reductions/mask/x86/avx2.rs | 35 + + .../src/codegen/reductions/mask/x86/sse.rs | 68 + + .../src/codegen/reductions/mask/x86/sse2.rs | 70 + + .../rust/packed_simd/src/codegen/shuffle.rs | 302 ++++ + .../packed_simd/src/codegen/shuffle1_dyn.rs | 432 +++++ + .../packed_simd/src/codegen/swap_bytes.rs | 189 +++ + .../rust/packed_simd/src/codegen/v128.rs | 46 + + .../rust/packed_simd/src/codegen/v16.rs | 7 + + .../rust/packed_simd/src/codegen/v256.rs | 78 + + .../rust/packed_simd/src/codegen/v32.rs | 11 + + .../rust/packed_simd/src/codegen/v512.rs | 145 ++ + .../rust/packed_simd/src/codegen/v64.rs | 21 + + .../rust/packed_simd/src/codegen/vPtr.rs | 33 + + .../rust/packed_simd/src/codegen/vSize.rs | 43 + + third_party/rust/packed_simd/src/lib.rs | 327 ++++ + third_party/rust/packed_simd/src/masks.rs | 128 ++ + third_party/rust/packed_simd/src/sealed.rs | 41 + + third_party/rust/packed_simd/src/testing.rs | 8 + + .../rust/packed_simd/src/testing/macros.rs | 44 + + .../rust/packed_simd/src/testing/utils.rs | 135 ++ + third_party/rust/packed_simd/src/v128.rs | 80 + + third_party/rust/packed_simd/src/v16.rs | 16 + + third_party/rust/packed_simd/src/v256.rs | 86 + + third_party/rust/packed_simd/src/v32.rs | 29 + + third_party/rust/packed_simd/src/v512.rs | 99 ++ + third_party/rust/packed_simd/src/v64.rs | 66 + + third_party/rust/packed_simd/src/vPtr.rs | 34 + + third_party/rust/packed_simd/src/vSize.rs | 53 + + .../rust/packed_simd/tests/endianness.rs | 262 ++++ + third_party/rust/simd/.cargo-checksum.json | 1 - + third_party/rust/simd/Cargo.toml | 37 - + third_party/rust/simd/README.md | 11 - + third_party/rust/simd/benches/mandelbrot.rs | 117 -- + third_party/rust/simd/benches/matrix.rs | 485 ------ + third_party/rust/simd/build.rs | 3 - + third_party/rust/simd/examples/axpy.rs | 65 - + third_party/rust/simd/examples/convert.rs | 38 - + third_party/rust/simd/examples/dot-product.rs | 60 - + .../simd/examples/fannkuch-redux-nosimd.rs | 156 -- + .../rust/simd/examples/fannkuch-redux.rs | 233 --- + third_party/rust/simd/examples/mandelbrot.rs | 125 -- + .../rust/simd/examples/matrix-inverse.rs | 281 ---- + .../rust/simd/examples/nbody-nosimd.rs | 156 -- + third_party/rust/simd/examples/nbody.rs | 170 -- + third_party/rust/simd/examples/ops.rs | 10 - + .../simd/examples/spectral-norm-nosimd.rs | 106 -- + .../rust/simd/examples/spectral-norm.rs | 74 - + third_party/rust/simd/src/aarch64/mod.rs | 3 - + third_party/rust/simd/src/aarch64/neon.rs | 681 -------- + third_party/rust/simd/src/arm/mod.rs | 4 - + third_party/rust/simd/src/arm/neon.rs | 622 -------- + third_party/rust/simd/src/common.rs | 520 ------- + third_party/rust/simd/src/lib.rs | 804 ---------- + third_party/rust/simd/src/sixty_four.rs | 228 --- + third_party/rust/simd/src/v256.rs | 436 ------ + third_party/rust/simd/src/x86/avx.rs | 290 ---- + third_party/rust/simd/src/x86/avx2.rs | 65 - + third_party/rust/simd/src/x86/mod.rs | 16 - + third_party/rust/simd/src/x86/sse2.rs | 359 ----- + third_party/rust/simd/src/x86/sse3.rs | 57 - + third_party/rust/simd/src/x86/sse4_1.rs | 155 -- + third_party/rust/simd/src/x86/sse4_2.rs | 19 - + third_party/rust/simd/src/x86/ssse3.rs | 172 -- + toolkit/moz.configure | 11 +- + 262 files changed, 17410 insertions(+), 6733 deletions(-) + create mode 100644 third_party/rust/packed_simd/.appveyor.yml + create mode 100644 third_party/rust/packed_simd/.cargo-checksum.json + create mode 100644 third_party/rust/packed_simd/.travis.yml + create mode 100644 third_party/rust/packed_simd/Cargo.toml + rename third_party/rust/{simd => packed_simd}/LICENSE-APACHE (100%) + rename third_party/rust/{simd => packed_simd}/LICENSE-MIT (93%) + create mode 100644 third_party/rust/packed_simd/bors.toml + create mode 100644 third_party/rust/packed_simd/build.rs + create mode 100644 third_party/rust/packed_simd/ci/all.sh + create mode 100644 third_party/rust/packed_simd/ci/android-install-ndk.sh + create mode 100644 third_party/rust/packed_simd/ci/android-install-sdk.sh + create mode 100644 third_party/rust/packed_simd/ci/android-sysimage.sh + create mode 100644 third_party/rust/packed_simd/ci/benchmark.sh + create mode 100644 third_party/rust/packed_simd/ci/deploy_and_run_on_ios_simulator.rs + create mode 100644 third_party/rust/packed_simd/ci/docker/aarch64-linux-android/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/aarch64-unknown-linux-gnu/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/arm-linux-androideabi/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabi/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/i586-unknown-linux-gnu/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/i686-unknown-linux-gnu/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/mips-unknown-linux-gnu/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/mipsel-unknown-linux-musl/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/powerpc-unknown-linux-gnu/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/s390x-unknown-linux-gnu/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/sparc64-unknown-linux-gnu/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/thumbv7neon-linux-androideabi/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/thumbv7neon-unknown-linux-gnueabihf/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/wasm32-unknown-unknown/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/x86_64-linux-android/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu/Dockerfile + create mode 100644 third_party/rust/packed_simd/ci/dox.sh + create mode 100644 third_party/rust/packed_simd/ci/linux-s390x.sh + create mode 100644 third_party/rust/packed_simd/ci/linux-sparc64.sh + create mode 100644 third_party/rust/packed_simd/ci/lld-shim.rs + create mode 100644 third_party/rust/packed_simd/ci/max_line_width.sh + create mode 100644 third_party/rust/packed_simd/ci/run-docker.sh + create mode 100644 third_party/rust/packed_simd/ci/run.sh + create mode 100644 third_party/rust/packed_simd/ci/run_examples.sh + create mode 100644 third_party/rust/packed_simd/ci/runtest-android.rs + create mode 100644 third_party/rust/packed_simd/ci/setup_benchmarks.sh + create mode 100644 third_party/rust/packed_simd/ci/test-runner-linux + create mode 100644 third_party/rust/packed_simd/contributing.md + create mode 100644 third_party/rust/packed_simd/perf-guide/.gitignore + create mode 100644 third_party/rust/packed_simd/perf-guide/book.toml + create mode 100644 third_party/rust/packed_simd/perf-guide/src/SUMMARY.md + create mode 100644 third_party/rust/packed_simd/perf-guide/src/ascii.css + create mode 100644 third_party/rust/packed_simd/perf-guide/src/bound_checks.md + create mode 100644 third_party/rust/packed_simd/perf-guide/src/float-math/approx.md + create mode 100644 third_party/rust/packed_simd/perf-guide/src/float-math/fma.md + create mode 100644 third_party/rust/packed_simd/perf-guide/src/float-math/fp.md + create mode 100644 third_party/rust/packed_simd/perf-guide/src/float-math/svml.md + create mode 100644 third_party/rust/packed_simd/perf-guide/src/introduction.md + create mode 100644 third_party/rust/packed_simd/perf-guide/src/prof/linux.md + create mode 100644 third_party/rust/packed_simd/perf-guide/src/prof/mca.md + create mode 100644 third_party/rust/packed_simd/perf-guide/src/prof/profiling.md + create mode 100644 third_party/rust/packed_simd/perf-guide/src/target-feature/attribute.md + create mode 100644 third_party/rust/packed_simd/perf-guide/src/target-feature/features.md + create mode 100644 third_party/rust/packed_simd/perf-guide/src/target-feature/inlining.md + create mode 100644 third_party/rust/packed_simd/perf-guide/src/target-feature/practice.md + create mode 100644 third_party/rust/packed_simd/perf-guide/src/target-feature/runtime.md + create mode 100644 third_party/rust/packed_simd/perf-guide/src/target-feature/rustflags.md + create mode 100644 third_party/rust/packed_simd/perf-guide/src/vert-hor-ops.md + create mode 100644 third_party/rust/packed_simd/readme.md + create mode 100644 third_party/rust/packed_simd/rustfmt.toml + create mode 100644 third_party/rust/packed_simd/src/api.rs + create mode 100644 third_party/rust/packed_simd/src/api/bit_manip.rs + create mode 100644 third_party/rust/packed_simd/src/api/cast.rs + create mode 100644 third_party/rust/packed_simd/src/api/cast/macros.rs + create mode 100644 third_party/rust/packed_simd/src/api/cast/v128.rs + create mode 100644 third_party/rust/packed_simd/src/api/cast/v16.rs + create mode 100644 third_party/rust/packed_simd/src/api/cast/v256.rs + create mode 100644 third_party/rust/packed_simd/src/api/cast/v32.rs + create mode 100644 third_party/rust/packed_simd/src/api/cast/v512.rs + create mode 100644 third_party/rust/packed_simd/src/api/cast/v64.rs + create mode 100644 third_party/rust/packed_simd/src/api/cmp.rs + create mode 100644 third_party/rust/packed_simd/src/api/cmp/eq.rs + create mode 100644 third_party/rust/packed_simd/src/api/cmp/ord.rs + create mode 100644 third_party/rust/packed_simd/src/api/cmp/partial_eq.rs + create mode 100644 third_party/rust/packed_simd/src/api/cmp/partial_ord.rs + create mode 100644 third_party/rust/packed_simd/src/api/cmp/vertical.rs + create mode 100644 third_party/rust/packed_simd/src/api/default.rs + create mode 100644 third_party/rust/packed_simd/src/api/fmt.rs + create mode 100644 third_party/rust/packed_simd/src/api/fmt/binary.rs + create mode 100644 third_party/rust/packed_simd/src/api/fmt/debug.rs + create mode 100644 third_party/rust/packed_simd/src/api/fmt/lower_hex.rs + create mode 100644 third_party/rust/packed_simd/src/api/fmt/octal.rs + create mode 100644 third_party/rust/packed_simd/src/api/fmt/upper_hex.rs + create mode 100644 third_party/rust/packed_simd/src/api/from.rs + create mode 100644 third_party/rust/packed_simd/src/api/from/from_array.rs + create mode 100644 third_party/rust/packed_simd/src/api/from/from_vector.rs + create mode 100644 third_party/rust/packed_simd/src/api/hash.rs + create mode 100644 third_party/rust/packed_simd/src/api/into_bits.rs + create mode 100644 third_party/rust/packed_simd/src/api/into_bits/arch_specific.rs + create mode 100644 third_party/rust/packed_simd/src/api/into_bits/macros.rs + create mode 100644 third_party/rust/packed_simd/src/api/into_bits/v128.rs + create mode 100644 third_party/rust/packed_simd/src/api/into_bits/v16.rs + create mode 100644 third_party/rust/packed_simd/src/api/into_bits/v256.rs + create mode 100644 third_party/rust/packed_simd/src/api/into_bits/v32.rs + create mode 100644 third_party/rust/packed_simd/src/api/into_bits/v512.rs + create mode 100644 third_party/rust/packed_simd/src/api/into_bits/v64.rs + create mode 100644 third_party/rust/packed_simd/src/api/math.rs + create mode 100644 third_party/rust/packed_simd/src/api/math/float.rs + create mode 100644 third_party/rust/packed_simd/src/api/math/float/abs.rs + create mode 100644 third_party/rust/packed_simd/src/api/math/float/consts.rs + create mode 100644 third_party/rust/packed_simd/src/api/math/float/cos.rs + create mode 100644 third_party/rust/packed_simd/src/api/math/float/exp.rs + create mode 100644 third_party/rust/packed_simd/src/api/math/float/ln.rs + create mode 100644 third_party/rust/packed_simd/src/api/math/float/mul_add.rs + create mode 100644 third_party/rust/packed_simd/src/api/math/float/mul_adde.rs + create mode 100644 third_party/rust/packed_simd/src/api/math/float/powf.rs + create mode 100644 third_party/rust/packed_simd/src/api/math/float/recpre.rs + create mode 100644 third_party/rust/packed_simd/src/api/math/float/rsqrte.rs + create mode 100644 third_party/rust/packed_simd/src/api/math/float/sin.rs + create mode 100644 third_party/rust/packed_simd/src/api/math/float/sqrt.rs + create mode 100644 third_party/rust/packed_simd/src/api/math/float/sqrte.rs + create mode 100644 third_party/rust/packed_simd/src/api/minimal.rs + create mode 100644 third_party/rust/packed_simd/src/api/minimal/iuf.rs + create mode 100644 third_party/rust/packed_simd/src/api/minimal/mask.rs + create mode 100644 third_party/rust/packed_simd/src/api/minimal/ptr.rs + create mode 100644 third_party/rust/packed_simd/src/api/ops.rs + create mode 100644 third_party/rust/packed_simd/src/api/ops/scalar_arithmetic.rs + create mode 100644 third_party/rust/packed_simd/src/api/ops/scalar_bitwise.rs + create mode 100644 third_party/rust/packed_simd/src/api/ops/scalar_mask_bitwise.rs + create mode 100644 third_party/rust/packed_simd/src/api/ops/scalar_shifts.rs + create mode 100644 third_party/rust/packed_simd/src/api/ops/vector_arithmetic.rs + create mode 100644 third_party/rust/packed_simd/src/api/ops/vector_bitwise.rs + create mode 100644 third_party/rust/packed_simd/src/api/ops/vector_float_min_max.rs + create mode 100644 third_party/rust/packed_simd/src/api/ops/vector_int_min_max.rs + create mode 100644 third_party/rust/packed_simd/src/api/ops/vector_mask_bitwise.rs + create mode 100644 third_party/rust/packed_simd/src/api/ops/vector_neg.rs + create mode 100644 third_party/rust/packed_simd/src/api/ops/vector_rotates.rs + create mode 100644 third_party/rust/packed_simd/src/api/ops/vector_shifts.rs + create mode 100644 third_party/rust/packed_simd/src/api/ptr.rs + create mode 100644 third_party/rust/packed_simd/src/api/ptr/gather_scatter.rs + create mode 100644 third_party/rust/packed_simd/src/api/reductions.rs + create mode 100644 third_party/rust/packed_simd/src/api/reductions/bitwise.rs + create mode 100644 third_party/rust/packed_simd/src/api/reductions/float_arithmetic.rs + create mode 100644 third_party/rust/packed_simd/src/api/reductions/integer_arithmetic.rs + create mode 100644 third_party/rust/packed_simd/src/api/reductions/mask.rs + create mode 100644 third_party/rust/packed_simd/src/api/reductions/min_max.rs + create mode 100644 third_party/rust/packed_simd/src/api/select.rs + create mode 100644 third_party/rust/packed_simd/src/api/shuffle.rs + create mode 100644 third_party/rust/packed_simd/src/api/shuffle1_dyn.rs + create mode 100644 third_party/rust/packed_simd/src/api/slice.rs + create mode 100644 third_party/rust/packed_simd/src/api/slice/from_slice.rs + create mode 100644 third_party/rust/packed_simd/src/api/slice/write_to_slice.rs + create mode 100644 third_party/rust/packed_simd/src/api/swap_bytes.rs + create mode 100644 third_party/rust/packed_simd/src/codegen.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/bit_manip.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/llvm.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/math.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/math/float.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/abs.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/cos.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/cos_pi.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/exp.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/ln.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/macros.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/mul_add.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/mul_adde.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/powf.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/sin.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/sin_cos_pi.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/sin_pi.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/sqrt.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/math/float/sqrte.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/pointer_sized_int.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/reductions.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/aarch64.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/arm.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/fallback.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/fallback_impl.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/x86.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx2.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse2.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/shuffle.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/shuffle1_dyn.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/swap_bytes.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/v128.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/v16.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/v256.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/v32.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/v512.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/v64.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/vPtr.rs + create mode 100644 third_party/rust/packed_simd/src/codegen/vSize.rs + create mode 100644 third_party/rust/packed_simd/src/lib.rs + create mode 100644 third_party/rust/packed_simd/src/masks.rs + create mode 100644 third_party/rust/packed_simd/src/sealed.rs + create mode 100644 third_party/rust/packed_simd/src/testing.rs + create mode 100644 third_party/rust/packed_simd/src/testing/macros.rs + create mode 100644 third_party/rust/packed_simd/src/testing/utils.rs + create mode 100644 third_party/rust/packed_simd/src/v128.rs + create mode 100644 third_party/rust/packed_simd/src/v16.rs + create mode 100644 third_party/rust/packed_simd/src/v256.rs + create mode 100644 third_party/rust/packed_simd/src/v32.rs + create mode 100644 third_party/rust/packed_simd/src/v512.rs + create mode 100644 third_party/rust/packed_simd/src/v64.rs + create mode 100644 third_party/rust/packed_simd/src/vPtr.rs + create mode 100644 third_party/rust/packed_simd/src/vSize.rs + create mode 100644 third_party/rust/packed_simd/tests/endianness.rs + delete mode 100644 third_party/rust/simd/.cargo-checksum.json + delete mode 100644 third_party/rust/simd/Cargo.toml + delete mode 100644 third_party/rust/simd/README.md + delete mode 100755 third_party/rust/simd/benches/mandelbrot.rs + delete mode 100755 third_party/rust/simd/benches/matrix.rs + delete mode 100644 third_party/rust/simd/build.rs + delete mode 100755 third_party/rust/simd/examples/axpy.rs + delete mode 100644 third_party/rust/simd/examples/convert.rs + delete mode 100755 third_party/rust/simd/examples/dot-product.rs + delete mode 100644 third_party/rust/simd/examples/fannkuch-redux-nosimd.rs + delete mode 100755 third_party/rust/simd/examples/fannkuch-redux.rs + delete mode 100755 third_party/rust/simd/examples/mandelbrot.rs + delete mode 100644 third_party/rust/simd/examples/matrix-inverse.rs + delete mode 100644 third_party/rust/simd/examples/nbody-nosimd.rs + delete mode 100755 third_party/rust/simd/examples/nbody.rs + delete mode 100644 third_party/rust/simd/examples/ops.rs + delete mode 100644 third_party/rust/simd/examples/spectral-norm-nosimd.rs + delete mode 100755 third_party/rust/simd/examples/spectral-norm.rs + delete mode 100644 third_party/rust/simd/src/aarch64/mod.rs + delete mode 100644 third_party/rust/simd/src/aarch64/neon.rs + delete mode 100644 third_party/rust/simd/src/arm/mod.rs + delete mode 100644 third_party/rust/simd/src/arm/neon.rs + delete mode 100644 third_party/rust/simd/src/common.rs + delete mode 100644 third_party/rust/simd/src/lib.rs + delete mode 100644 third_party/rust/simd/src/sixty_four.rs + delete mode 100644 third_party/rust/simd/src/v256.rs + delete mode 100644 third_party/rust/simd/src/x86/avx.rs + delete mode 100644 third_party/rust/simd/src/x86/avx2.rs + delete mode 100644 third_party/rust/simd/src/x86/mod.rs + delete mode 100644 third_party/rust/simd/src/x86/sse2.rs + delete mode 100644 third_party/rust/simd/src/x86/sse3.rs + delete mode 100644 third_party/rust/simd/src/x86/sse4_1.rs + delete mode 100644 third_party/rust/simd/src/x86/sse4_2.rs + delete mode 100644 third_party/rust/simd/src/x86/ssse3.rs + +diff --git a/.cargo/config.in b/.cargo/config.in +index 94f5732891fb..57ae36311e52 100644 +--- a/.cargo/config.in ++++ b/.cargo/config.in +@@ -17,6 +17,11 @@ git = "https://github.com/froydnj/winapi-rs" + branch = "aarch64" + replace-with = "vendored-sources" + ++[source."https://github.com/rust-lang-nursery/packed_simd"] ++git = "https://github.com/hsivonen/packed_simd" ++branch = "rust_1_32" ++replace-with = "vendored-sources" ++ + [source.vendored-sources] + directory = '@top_srcdir@/third_party/rust' + +diff --git a/Cargo.lock b/Cargo.lock +index 8896cfeddb28..a048a5522ab0 100644 +--- a/Cargo.lock ++++ b/Cargo.lock +@@ -141,7 +141,7 @@ version = "0.3.9" + source = "registry+https://github.com/rust-lang/crates.io-index" + dependencies = [ + "backtrace-sys 0.1.24 (registry+https://github.com/rust-lang/crates.io-index)", +- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", ++ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)", + "rustc-demangle 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.6 (git+https://github.com/froydnj/winapi-rs?branch=aarch64)", +@@ -218,7 +218,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" + dependencies = [ + "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", + "cexpr 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", +- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", ++ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", + "clang-sys 0.26.1 (registry+https://github.com/rust-lang/crates.io-index)", + "clap 2.31.2 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", +@@ -372,7 +372,7 @@ dependencies = [ + + [[package]] + name = "cfg-if" +-version = "0.1.2" ++version = "0.1.6" + source = "registry+https://github.com/rust-lang/crates.io-index" + + [[package]] +@@ -575,41 +575,41 @@ version = "0.3.1" + source = "registry+https://github.com/rust-lang/crates.io-index" + dependencies = [ + "arrayvec 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", +- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", ++ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", + "crossbeam-utils 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "memoffset 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "nodrop 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)", + "scopeguard 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", + ] + + [[package]] + name = "crossbeam-epoch" + version = "0.4.3" + source = "registry+https://github.com/rust-lang/crates.io-index" + dependencies = [ + "arrayvec 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", +- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", ++ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", + "crossbeam-utils 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "memoffset 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "scopeguard 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", + ] + + [[package]] + name = "crossbeam-utils" + version = "0.2.2" + source = "registry+https://github.com/rust-lang/crates.io-index" + dependencies = [ +- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", ++ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", + ] + + [[package]] + name = "crossbeam-utils" + version = "0.3.2" + source = "registry+https://github.com/rust-lang/crates.io-index" + dependencies = [ +- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", ++ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", + ] + + [[package]] +@@ -848,25 +848,25 @@ name = "encoding_c" + version = "0.9.0" + source = "registry+https://github.com/rust-lang/crates.io-index" + dependencies = [ +- "encoding_rs 0.8.14 (registry+https://github.com/rust-lang/crates.io-index)", ++ "encoding_rs 0.8.16 (registry+https://github.com/rust-lang/crates.io-index)", + ] + + [[package]] + name = "encoding_glue" + version = "0.1.0" + dependencies = [ +- "encoding_rs 0.8.14 (registry+https://github.com/rust-lang/crates.io-index)", ++ "encoding_rs 0.8.16 (registry+https://github.com/rust-lang/crates.io-index)", + "nserror 0.1.0", + "nsstring 0.1.0", + ] + + [[package]] + name = "encoding_rs" +-version = "0.8.14" ++version = "0.8.16" + source = "registry+https://github.com/rust-lang/crates.io-index" + dependencies = [ +- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", +- "simd 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", ++ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", ++ "packed_simd 0.3.3 (git+https://github.com/hsivonen/packed_simd?branch=rust_1_32)", + ] + + [[package]] +@@ -1477,7 +1477,7 @@ name = "log" + version = "0.4.6" + source = "registry+https://github.com/rust-lang/crates.io-index" + dependencies = [ +- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", ++ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", + ] + + [[package]] +@@ -1719,7 +1719,7 @@ name = "net2" + version = "0.2.32" + source = "registry+https://github.com/rust-lang/crates.io-index" + dependencies = [ +- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", ++ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.6 (git+https://github.com/froydnj/winapi-rs?branch=aarch64)", + ] +@@ -1773,7 +1773,7 @@ name = "nsstring" + version = "0.1.0" + dependencies = [ + "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", +- "encoding_rs 0.8.14 (registry+https://github.com/rust-lang/crates.io-index)", ++ "encoding_rs 0.8.16 (registry+https://github.com/rust-lang/crates.io-index)", + ] + + [[package]] +@@ -1859,6 +1859,14 @@ dependencies = [ + "stable_deref_trait 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", + ] + ++[[package]] ++name = "packed_simd" ++version = "0.3.3" ++source = "git+https://github.com/hsivonen/packed_simd?branch=rust_1_32#3541e3818fdc7c2a24f87e3459151a4ce955a67a" ++dependencies = [ ++ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", ++] ++ + [[package]] + name = "parking_lot" + version = "0.6.3" +@@ -2354,11 +2362,6 @@ dependencies = [ + "opaque-debug 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + ] + +-[[package]] +-name = "simd" +-version = "0.2.3" +-source = "registry+https://github.com/rust-lang/crates.io-index" +- + [[package]] + name = "siphasher" + version = "0.2.1" +@@ -2936,7 +2939,7 @@ name = "uuid" + version = "0.6.5" + source = "registry+https://github.com/rust-lang/crates.io-index" + dependencies = [ +- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", ++ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", + "rand 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", + ] + +@@ -3017,7 +3020,7 @@ dependencies = [ + "bincode 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", + "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", + "byteorder 1.2.7 (registry+https://github.com/rust-lang/crates.io-index)", +- "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", ++ "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", + "core-foundation 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)", + "core-graphics 0.17.1 (registry+https://github.com/rust-lang/crates.io-index)", + "core-text 13.0.0 (registry+https://github.com/rust-lang/crates.io-index)", +@@ -3253,7 +3256,7 @@ dependencies = [ + "checksum cast 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "926013f2860c46252efceabb19f4a6b308197505082c609025aa6706c011d427" + "checksum cc 1.0.23 (registry+https://github.com/rust-lang/crates.io-index)" = "c37f0efaa4b9b001fa6f02d4b644dee4af97d3414df07c51e3e4f015f3a3e131" + "checksum cexpr 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "8fc0086be9ca82f7fc89fc873435531cb898b86e850005850de1f820e2db6e9b" +-"checksum cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "d4c819a1287eb618df47cc647173c5c4c66ba19d888a6e50d605672aed3140de" ++"checksum cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "082bb9b28e00d3c9d39cc03e64ce4cea0f1bb9b3fde493f0cbc008472d22bdf4" + "checksum chrono 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "45912881121cb26fad7c38c17ba7daa18764771836b34fab7d3fbd93ed633878" + "checksum clang-sys 0.26.1 (registry+https://github.com/rust-lang/crates.io-index)" = "481e42017c1416b1c0856ece45658ecbb7c93d8a93455f7e5fa77f3b35455557" + "checksum clap 2.31.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f0f16b89cbb9ee36d87483dc939fe9f1e13c05898d56d7b230a0d4dff033a536" +@@ -3303,7 +3306,7 @@ dependencies = [ + "checksum either 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "18785c1ba806c258137c937e44ada9ee7e69a37e3c72077542cd2f069d78562a" + "checksum ena 0.9.3 (registry+https://github.com/rust-lang/crates.io-index)" = "88dc8393b3c7352f94092497f6b52019643e493b6b890eb417cdb7c46117e621" + "checksum encoding_c 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "769ecb8b33323998e482b218c0d13cd64c267609023b4b7ec3ee740714c318ee" +-"checksum encoding_rs 0.8.14 (registry+https://github.com/rust-lang/crates.io-index)" = "a69d152eaa438a291636c1971b0a370212165ca8a75759eb66818c5ce9b538f7" ++"checksum encoding_rs 0.8.16 (registry+https://github.com/rust-lang/crates.io-index)" = "0535f350c60aac0b87ccf28319abc749391e912192255b0c00a2c12c6917bd73" + "checksum env_logger 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0561146661ae44c579e993456bc76d11ce1e0c7d745e57b2fa7146b6e49fa2ad" + "checksum error-chain 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ff511d5dc435d703f4971bc399647c9bc38e20cb41452e3b9feb4765419ed3f3" + "checksum euclid 0.19.5 (registry+https://github.com/rust-lang/crates.io-index)" = "d1a7698bdda3d7444a79d33bdc96e8b518d44ea3ff101d8492a6ca1207b886ea" +@@ -3388,6 +3391,7 @@ dependencies = [ + "checksum ordered-float 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2f0015e9e8e28ee20c581cfbfe47c650cedeb9ed0721090e0b7ebb10b9cdbcc2" + "checksum ordermap 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "a86ed3f5f244b372d6b1a00b72ef7f8876d0bc6a78a4c9985c53614041512063" + "checksum owning_ref 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "49a4b8ea2179e6a2e27411d3bca09ca6dd630821cf6894c6c7c8467a8ee7ef13" ++"checksum packed_simd 0.3.3 (git+https://github.com/hsivonen/packed_simd?branch=rust_1_32)" = "" + "checksum parking_lot 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)" = "69376b761943787ebd5cc85a5bc95958651a22609c5c1c2b65de21786baec72b" + "checksum parking_lot_core 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)" = "4db1a8ccf734a7bce794cc19b3df06ed87ab2f3907036b693c68f56b4d4537fa" + "checksum peeking_take_while 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" +@@ -3441,7 +3445,6 @@ dependencies = [ + "checksum serde_json 1.0.26 (registry+https://github.com/rust-lang/crates.io-index)" = "44dd2cfde475037451fa99b7e5df77aa3cfd1536575fa8e7a538ab36dcde49ae" + "checksum sha2 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "9eb6be24e4c23a84d7184280d2722f7f2731fcdd4a9d886efbfe4413e4847ea0" + "checksum sha2 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7b4d8bfd0e469f417657573d8451fb33d16cfe0989359b93baf3a1ffc639543d" +-"checksum simd 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "0048b17eb9577ac545c61d85c3559b41dfb4cbea41c9bd9ca6a4f73ff05fda84" + "checksum siphasher 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2ffc669b726f2bc9a3bcff66e5e23b56ba6bf70e22a34c3d7b6d0b3450b65b84" + "checksum slab 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "17b4fcaed89ab08ef143da37bc52adbcc04d4a69014f4c1208d6b51f0c47bc23" + "checksum slab 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "5f9776d6b986f77b35c6cf846c11ad986ff128fe0b2b63a3628e3755e8d3102d" +diff --git a/Cargo.toml b/Cargo.toml +index d64cbc77b53d..25859a20ecc3 100644 +--- a/Cargo.toml ++++ b/Cargo.toml +@@ -59,3 +59,4 @@ codegen-units = 1 + libudev-sys = { path = "dom/webauthn/libudev-sys" } + serde_derive = { git = "https://github.com/servo/serde", branch = "deserialize_from_enums9" } + winapi = { git = "https://github.com/froydnj/winapi-rs", branch = "aarch64" } ++packed_simd = { git = "https://github.com/hsivonen/packed_simd", branch = "rust_1_32" } +diff --git a/third_party/rust/cfg-if/.cargo-checksum.json b/third_party/rust/cfg-if/.cargo-checksum.json +index 89b14a227216..b744a21d9fd1 100644 +--- a/third_party/rust/cfg-if/.cargo-checksum.json ++++ b/third_party/rust/cfg-if/.cargo-checksum.json +@@ -1 +1 @@ +-{"files":{"Cargo.toml":"764b9ce160653e841430da3919ff968b957ff811f7da42c8483c8bfc2f06be25","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"378f5840b258e2779c39418f3f2d7b2ba96f1c7917dd6be0713f88305dbda397","README.md":"3fa9368c60bc701dea294fbacae0469188c4be1de79f82e972bb9b321776cd52","src/lib.rs":"6915169e3ca05f28e1cb0e052379d74f2496400de1240b74c56e55c2674a6560","tests/xcrate.rs":"30dcb70fbb9c96fda2b7825592558279f534776f72e2a8a0a3e26df4dedb3caa"},"package":"d4c819a1287eb618df47cc647173c5c4c66ba19d888a6e50d605672aed3140de"} +\ No newline at end of file ++{"files":{"Cargo.toml":"090d983ec20ad09e59f6b7679b48b9b54e9c0841cf2922b81cba485edcd40876","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"378f5840b258e2779c39418f3f2d7b2ba96f1c7917dd6be0713f88305dbda397","README.md":"1cd0ebc3b30a9c9eddb0fda5515b5a52ec2b85a087328f0ee9f4d68cbb28afc2","src/lib.rs":"f02d6e295109365cf54884e5282a3e7d1e1f62857c700f23cd013e94a56bd803","tests/xcrate.rs":"30dcb70fbb9c96fda2b7825592558279f534776f72e2a8a0a3e26df4dedb3caa"},"package":"082bb9b28e00d3c9d39cc03e64ce4cea0f1bb9b3fde493f0cbc008472d22bdf4"} +\ No newline at end of file +diff --git a/third_party/rust/cfg-if/Cargo.toml b/third_party/rust/cfg-if/Cargo.toml +index 7afa063d1ef5..84c4fc7835ab 100644 +--- a/third_party/rust/cfg-if/Cargo.toml ++++ b/third_party/rust/cfg-if/Cargo.toml +@@ -1,14 +1,24 @@ ++# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO ++# ++# When uploading crates to the registry Cargo will automatically ++# "normalize" Cargo.toml files for maximal compatibility ++# with all versions of Cargo and also rewrite `path` dependencies ++# to registry (e.g. crates.io) dependencies ++# ++# If you believe there's an error in this file please file an ++# issue against the rust-lang/cargo repository. If you're ++# editing this file be aware that the upstream Cargo.toml ++# will likely look very different (and much more reasonable) ++ + [package] + name = "cfg-if" +-version = "0.1.2" ++version = "0.1.6" + authors = ["Alex Crichton "] +-license = "MIT/Apache-2.0" ++description = "A macro to ergonomically define an item depending on a large number of #[cfg]\nparameters. Structured like an if-else chain, the first matching branch is the\nitem that gets emitted.\n" ++homepage = "https://github.com/alexcrichton/cfg-if" ++documentation = "https://docs.rs/cfg-if" + readme = "README.md" ++license = "MIT/Apache-2.0" + repository = "https://github.com/alexcrichton/cfg-if" +-homepage = "https://github.com/alexcrichton/cfg-if" +-documentation = "http://alexcrichton.com/cfg-if" +-description = """ +-A macro to ergonomically define an item depending on a large number of #[cfg] +-parameters. Structured like an if-else chain, the first matching branch is the +-item that gets emitted. +-""" ++[badges.travis-ci] ++repository = "alexcrichton/cfg-if" +diff --git a/third_party/rust/cfg-if/README.md b/third_party/rust/cfg-if/README.md +index e9859dadb609..344a946c0487 100644 +--- a/third_party/rust/cfg-if/README.md ++++ b/third_party/rust/cfg-if/README.md +@@ -2,7 +2,7 @@ + + [![Build Status](https://travis-ci.org/alexcrichton/cfg-if.svg?branch=master)](https://travis-ci.org/alexcrichton/cfg-if) + +-[Documentation](http://alexcrichton.com/cfg-if) ++[Documentation](https://docs.rs/cfg-if) + + A macro to ergonomically define an item depending on a large number of #[cfg] + parameters. Structured like an if-else chain, the first matching branch is the +@@ -36,9 +36,17 @@ fn main() { + + # License + +-`cfg-if` is primarily distributed under the terms of both the MIT license and +-the Apache License (Version 2.0), with portions covered by various BSD-like +-licenses. ++This project is licensed under either of + +-See LICENSE-APACHE, and LICENSE-MIT for details. ++ * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or ++ http://www.apache.org/licenses/LICENSE-2.0) ++ * MIT license ([LICENSE-MIT](LICENSE-MIT) or ++ http://opensource.org/licenses/MIT) + ++at your option. ++ ++### Contribution ++ ++Unless you explicitly state otherwise, any contribution intentionally submitted ++for inclusion in Serde by you, as defined in the Apache-2.0 license, shall be ++dual licensed as above, without any additional terms or conditions. +diff --git a/third_party/rust/cfg-if/src/lib.rs b/third_party/rust/cfg-if/src/lib.rs +index 563cda81f42d..ff144f69f862 100644 +--- a/third_party/rust/cfg-if/src/lib.rs ++++ b/third_party/rust/cfg-if/src/lib.rs +@@ -1,81 +1,90 @@ +-//! A macro for defining #[cfg] if-else statements. ++//! A macro for defining `#[cfg]` if-else statements. + //! + //! The macro provided by this crate, `cfg_if`, is similar to the `if/elif` C + //! preprocessor macro by allowing definition of a cascade of `#[cfg]` cases, + //! emitting the implementation which matches first. + //! +-//! This allows you to conveniently provide a long list #[cfg]'d blocks of code ++//! This allows you to conveniently provide a long list `#[cfg]`'d blocks of code + //! without having to rewrite each clause multiple times. + //! + //! # Example + //! + //! ``` + //! #[macro_use] + //! extern crate cfg_if; + //! + //! cfg_if! { + //! if #[cfg(unix)] { + //! fn foo() { /* unix specific functionality */ } + //! } else if #[cfg(target_pointer_width = "32")] { + //! fn foo() { /* non-unix, 32-bit functionality */ } + //! } else { + //! fn foo() { /* fallback implementation */ } + //! } + //! } + //! + //! # fn main() {} + //! ``` + + #![no_std] + +-#![doc(html_root_url = "http://alexcrichton.com/cfg-if")] ++#![doc(html_root_url = "https://docs.rs/cfg-if")] + #![deny(missing_docs)] + #![cfg_attr(test, deny(warnings))] + +-#[macro_export] ++#[macro_export(local_inner_macros)] + macro_rules! cfg_if { ++ // match if/else chains with a final `else` + ($( + if #[cfg($($meta:meta),*)] { $($it:item)* } + ) else * else { + $($it2:item)* + }) => { +- __cfg_if_items! { ++ cfg_if! { ++ @__items + () ; + $( ( ($($meta),*) ($($it)*) ), )* + ( () ($($it2)*) ), + } + }; ++ ++ // match if/else chains lacking a final `else` + ( + if #[cfg($($i_met:meta),*)] { $($i_it:item)* } + $( + else if #[cfg($($e_met:meta),*)] { $($e_it:item)* } + )* + ) => { +- __cfg_if_items! { ++ cfg_if! { ++ @__items + () ; + ( ($($i_met),*) ($($i_it)*) ), + $( ( ($($e_met),*) ($($e_it)*) ), )* + ( () () ), + } +- } +-} ++ }; + +-#[macro_export] +-#[doc(hidden)] +-macro_rules! __cfg_if_items { +- (($($not:meta,)*) ; ) => {}; +- (($($not:meta,)*) ; ( ($($m:meta),*) ($($it:item)*) ), $($rest:tt)*) => { +- __cfg_if_apply! { cfg(all($($m,)* not(any($($not),*)))), $($it)* } +- __cfg_if_items! { ($($not,)* $($m,)*) ; $($rest)* } +- } +-} ++ // Internal and recursive macro to emit all the items ++ // ++ // Collects all the negated cfgs in a list at the beginning and after the ++ // semicolon is all the remaining items ++ (@__items ($($not:meta,)*) ; ) => {}; ++ (@__items ($($not:meta,)*) ; ( ($($m:meta),*) ($($it:item)*) ), $($rest:tt)*) => { ++ // Emit all items within one block, applying an approprate #[cfg]. The ++ // #[cfg] will require all `$m` matchers specified and must also negate ++ // all previous matchers. ++ cfg_if! { @__apply cfg(all($($m,)* not(any($($not),*)))), $($it)* } + +-#[macro_export] +-#[doc(hidden)] +-macro_rules! __cfg_if_apply { +- ($m:meta, $($it:item)*) => { ++ // Recurse to emit all other items in `$rest`, and when we do so add all ++ // our `$m` matchers to the list of `$not` matchers as future emissions ++ // will have to negate everything we just matched as well. ++ cfg_if! { @__items ($($not,)* $($m,)*) ; $($rest)* } ++ }; ++ ++ // Internal macro to Apply a cfg attribute to a list of items ++ (@__apply $m:meta, $($it:item)*) => { + $(#[$m] $it)* +- } ++ }; + } + + #[cfg(test)] +diff --git a/third_party/rust/encoding_rs/.cargo-checksum.json b/third_party/rust/encoding_rs/.cargo-checksum.json +index c063d4c27534..7c1901dce515 100644 +--- a/third_party/rust/encoding_rs/.cargo-checksum.json ++++ b/third_party/rust/encoding_rs/.cargo-checksum.json +@@ -1 +1 @@ +-{"files":{"CONTRIBUTING.md":"06c26277e8dbd3f57be2eb51b5e3285dc1cbbf8c11326df413868ae702e6a61c","COPYRIGHT":"8b98376eb373dcf81950474efe34b5576a8171460dff500cc58a1ed8d160cd57","Cargo.toml":"f4c9b33981fe222ef322d640f5ef680828d75dcd534b8aa2bfdd576598deea64","Ideas.md":"b7452893f500163868d8de52c09addaf91e1632454ed02e892c467ed7ec39dbd","LICENSE-APACHE":"cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30","LICENSE-MIT":"f2ad48641d9c997d9ae3b95d93d1cd6e1ab12ab4c44de89937c7bfabbd076a4a","README.md":"ad140c9178067c8bdba8ae43ddffd0506d70d49474731247a050ff99a3ff7832","build.rs":"f5defca2c68b73e8723f489a9279af4fbe9724abc6e9abf58d32542e8a459e26","doc/Big5.txt":"f73a2edc5cb6c2d140ba6e07f4542e1c4a234950378acde1df93480f0ca0be0b","doc/EUC-JP.txt":"ee2818b907d0137f40a9ab9fd525fc700a44dbdddb6cf0c157a656566bae4bf1","doc/EUC-KR.txt":"71d9e2ccf3b124e8bdfb433c8cf2773fd878077038d0cec3c7237a50f4a78a30","doc/GBK.txt":"c1b522b5a799884e5001da661f42c5a8f4d0acb9ef1d74b206f22b5f65365606","doc/IBM866.txt":"a5a433e804d0f83af785015179fbc1d9b0eaf1f7960efcd04093e136b51fbd0e","doc/ISO-2022-JP.txt":"af86684f5a8f0e2868d7b2c292860140c3d2e5527530ca091f1b28198e8e2fe6","doc/ISO-8859-10.txt":"6d3949ad7c81ca176895101ed81a1db7df1060d64e262880b94bd31bb344ab4d","doc/ISO-8859-13.txt":"3951dd89cf93f7729148091683cf8511f4529388b7dc8dcd0d62eaed55be93fa","doc/ISO-8859-14.txt":"3d330784a0374fd255a38b47949675cc7168c800530534b0a01cac6edc623adc","doc/ISO-8859-15.txt":"24b1084aab5127a85aab99153f86e24694d0a3615f53b5ce23683f97cf66c47a","doc/ISO-8859-16.txt":"ce0272559b92ba76d7a7e476f6424ae4a5cc72e75b183611b08392e44add4d25","doc/ISO-8859-2.txt":"18ceff88c13d1b5ba455a3919b1e3de489045c4c3d2dd7e8527c125c75d54aad","doc/ISO-8859-3.txt":"21798404c68f4f5db59223362f24999da96968c0628427321fccce7d2849a130","doc/ISO-8859-4.txt":"d27f6520c6c5bfbcc19176b71d081cdb3bccde1622bb3e420d5680e812632d53","doc/ISO-8859-5.txt":"a10ec8d6ea7a78ad15da7275f6cb1a3365118527e28f9af6d0d5830501303f3a","doc/ISO-8859-6.txt":"ccda8a2efc96115336bdd77776637b9712425e44fbcf745353b9057fbef144e7","doc/ISO-8859-7.txt":"17900fa1f27a445958f0a77d7d9056be375a6bd7ee4492aa680c7c1500bab85e","doc/ISO-8859-8-I.txt":"8357555646d54265a9b9ffa3e68b08d132312f1561c60108ff9b8b1167b6ecf2","doc/ISO-8859-8.txt":"72cd6f3afb7b4a9c16a66a362473315770b7755d72c86c870e52fc3eba86c8af","doc/KOI8-R.txt":"839cf19a38da994488004ed7814b1f6151640156a9a2af02bf2efca745fb5966","doc/KOI8-U.txt":"0cc76624ed1f024183e2298b7e019957da2c70c8ca06e0fc4e6f353f50a5054f","doc/Shift_JIS.txt":"34c49141818cb9ddbcf59cc858f78a79be8ad148d563f26415108ae1f148443f","doc/UTF-16BE.txt":"e2e280d8acbaa6d2a6b3569d60e17500a285f2baa0df3363dd85537cd5a1ef8f","doc/UTF-16LE.txt":"70bdc170e3fc5298ba68f10125fb5eeb8b077036cc96bb4416c4de396f6d76c1","doc/UTF-8.txt":"ea7bae742e613010ced002cf4b601a737d2203fad65e115611451bc4428f548a","doc/gb18030.txt":"dc71378a8f07a2d8659f69ee81fb8791fef56ba86f124b429978285237bb4a7b","doc/macintosh.txt":"57491e53866711b4672d9b9ff35380b9dac9e0d8e3d6c20bdd6140603687c023","doc/replacement.txt":"4b6c3bbd7999d9d4108a281594bd02d13607e334a95465afff8c2c08d395f0e4","doc/windows-1250.txt":"61296bb6a21cdab602300d32ecfba434cb82de5ac3bc88d58710d2f125e28d39","doc/windows-1251.txt":"7deea1c61dea1485c8ff02db2c7d578db7a9aab63ab1cfd02ec04b515864689e","doc/windows-1252.txt":"933ef3bdddfce5ee132b9f1a1aa8b47423d2587bbe475b19028d0a6d38e180b6","doc/windows-1253.txt":"1a38748b88e99071a5c7b3d5456ead4caedeabab50d50d658be105bc113714de","doc/windows-1254.txt":"f8372f86c6f8d642563cd6ddc025260553292a39423df1683a98670bd7bf2b47","doc/windows-1255.txt":"4e5852494730054e2da258a74e1b9d780abbcdd8ce22ebc218ca2efe9e90493d","doc/windows-1256.txt":"c0879c5172abedead302a406e8f60d9cd9598694a0ffa4fd288ffe4fef7b8ea1","doc/windows-1257.txt":"c28a0c9f964fcb2b46d21f537c402446501a2800670481d6abf9fd9e9018d523","doc/windows-1258.txt":"5019ae4d61805c79aacbf17c93793342dbb098d65a1837783bc3e2c6d6a23602","doc/windows-874.txt":"4ef0e4501c5feba8b17aee1818602ed44b36ca8475db771ce2fc16d392cabecc","doc/x-mac-cyrillic.txt":"58be154d8a888ca3d484b83b44f749823ef339ab27f14d90ca9a856f5050a8bd","doc/x-user-defined.txt":"f9cd07c4321bf5cfb0be4bdddd251072999b04a6cf7a6f5bc63709a84e2c1ffc","generate-encoding-data.py":"92ddec35a834b6bc815fffffe6d07d9938a90d3c4526298637d8624410d83078","rustfmt.toml":"85c1a3b4382fd89e991cbb81b70fb52780472edc064c963943cdaaa56e0a2030","src/ascii.rs":"800cfbe3036d0c97ce27e07a4fd05edbcb7354ebec20903d81c76136d734931c","src/big5.rs":"1c94b35813314775c3fa1b10923cf8e8f8eba8c465d9833ad4293594e16c17f2","src/data.rs":"9544c019c7360a669bd3adaa90b70331124abd1df59841db66e74912bcdb96a5","src/euc_jp.rs":"0842e4f564a36051c6b85c47bbb652efae2f2926e91491daf77e4ceeecb18163","src/euc_kr.rs":"8e68590efa65485583bf57cae44ebf6de535bac1d37232e7f0307a38425fb992","src/gb18030.rs":"d269efb5e5d175f9d2ecf01d5606955a284b6f00749bb0ee23d3412c83aa3d59","src/handles.rs":"71aa7de1c5236a34ea0a8bb85332987751d2466b756fca6b3f6ac0da765cf91e","src/iso_2022_jp.rs":"3adc380736f24a5de36bc1cf81049bbe64473de10e6f12774195e6213c27c322","src/lib.rs":"e786de9e92e5652bc200266cf318753eea869e8971857cc0caa65a3cfe687545","src/macros.rs":"c7a019fd81d31de77569036ac36fd4e404b3f20144bbf79747faf4ea21538d09","src/mem.rs":"f412f60f2d4afb7e32ffba94dc5f93716e6ae9f065799ca17bb1f1b2145f6ee4","src/replacement.rs":"182c2093a6edb162183ca5990554fd7b199d3011924a8d80d894ba98ee7c479e","src/shift_jis.rs":"1c0c69ba6c123fcf720276646074660193bf9e6fa4327fe0d739a3e67874e081","src/simd_funcs.rs":"565ceeffe81173b85700c55c396ab72068751ef809bea8e1cb1e6c7919f5a905","src/single_byte.rs":"383d325dedbf3295acd50d880db1cecc29b69efe332ae2a37367cf40bf138ac4","src/test_data/big5_in.txt":"4c5a8691f8dc717311889c63894026d2fb62725a86c4208ca274a9cc8d42a503","src/test_data/big5_in_ref.txt":"99d399e17750cf9c7cf30bb253dbfe35b81c4fcbdead93cfa48b1429213473c7","src/test_data/big5_out.txt":"6193ca97c297aa20e09396038d18e938bb7ea331c26f0f2454097296723a0b13","src/test_data/big5_out_ref.txt":"36567691f557df144f6cc520015a87038dfa156f296fcf103b56ae9a718be1fc","src/test_data/euc_kr_in.txt":"c86a7224f3215fa0d04e685622a752fdc72763e8ae076230c7fd62de57ec4074","src/test_data/euc_kr_in_ref.txt":"1f419f4ca47d708b54c73c461545a022ae2e20498fdbf8005a483d752a204883","src/test_data/euc_kr_out.txt":"e7f32e026f70be1e1b58e0047baf7d3d2c520269c4f9b9992e158b4decb0a1a3","src/test_data/euc_kr_out_ref.txt":"c9907857980b20b8e9e3b584482ed6567a2be6185d72237b6322f0404944924e","src/test_data/gb18030_in.txt":"ab7231b2d3e9afacdbd7d7f3b9e5361a7ff9f7e1cfdb4f3bd905b9362b309e53","src/test_data/gb18030_in_ref.txt":"dc5069421adca2043c55f5012b55a76fdff651d22e6e699fd0978f8d5706815c","src/test_data/gb18030_out.txt":"f0208d527f5ca63de7d9a0323be8d5cf12d8a104b2943d92c2701f0c3364dac1","src/test_data/gb18030_out_ref.txt":"6819fe47627e4ea01027003fc514b9f21a1322e732d7f1fb92cc6c5455bc6c07","src/test_data/iso_2022_jp_in.txt":"cd24bbdcb1834e25db54646fbf4c41560a13dc7540f6be3dba4f5d97d44513af","src/test_data/iso_2022_jp_in_ref.txt":"3dc4e6a5e06471942d086b16c9440945e78415f6f3f47e43717e4bc2eac2cdf5","src/test_data/iso_2022_jp_out.txt":"9b6f015329dda6c3f9ee5ce6dbd6fa9c89acc21283e886836c78b8d833480c21","src/test_data/iso_2022_jp_out_ref.txt":"78cb260093a20116ad9a42f43b05d1848c5ab100b6b9a850749809e943884b35","src/test_data/jis0208_in.txt":"6df3030553ffb0a6615bb33dc8ea9dca6d9623a9028e2ffec754ce3c3da824cc","src/test_data/jis0208_in_ref.txt":"3dc4e6a5e06471942d086b16c9440945e78415f6f3f47e43717e4bc2eac2cdf5","src/test_data/jis0208_out.txt":"4ec24477e1675ce750733bdc3c5add1cd27b6bd4ce1f09289564646e9654e857","src/test_data/jis0208_out_ref.txt":"c3e1cef5032b2b1d93a406f31ff940c4e2dfe8859b8b17ca2761fee7a75a0e48","src/test_data/jis0212_in.txt":"c011f0dd72bd7c8cd922df9374ef8d2769a77190514c77f6c62b415852eeb9fe","src/test_data/jis0212_in_ref.txt":"7d9458b3d2f73e7092a7f505c08ce1d233dde18aa679fbcf9889256239cc9e06","src/test_data/shift_jis_in.txt":"02e389ccef0dd2122e63f503899402cb7f797912c2444cc80ab93131116c5524","src/test_data/shift_jis_in_ref.txt":"512f985950ca902e643c88682dba9708b7c38d3c5ec2925168ab00ac94ab19f9","src/test_data/shift_jis_out.txt":"5fbc44da7bf639bf6cfe0fa1fd3eba7102b88f81919c9ea991302712f69426fb","src/test_data/shift_jis_out_ref.txt":"466322c6fed8286c64582731755290c2296508efdd258826e6279686649b481f","src/test_labels_names.rs":"c962c7aeac3d9ef2aca70c9e21983b231d4cf998cb06879374b0401e5149d1da","src/testing.rs":"b299d27055f3b068de66cc10a75c024b881c48bc093627c01e0b1f8bd7d94666","src/utf_16.rs":"1ec4e1c8ed7e42e4de401c6d0f64c2835bd80c2a306f358959957d30e6ff1501","src/utf_8.rs":"f639fc5dccd5dcc2458936baa942237d0fd58ac398c83ea3f48e51dceb5b6a81","src/variant.rs":"619a8e604d2febe6a874e3ad73cddf3ef9e6011480aecf86f23708b313415251","src/x_user_defined.rs":"ab26ea900c8f7b7a4d1172872b7ca4bc573bc60b7b1979c93aafdfb86b2c2235"},"package":"a69d152eaa438a291636c1971b0a370212165ca8a75759eb66818c5ce9b538f7"} +\ No newline at end of file ++{"files":{"CONTRIBUTING.md":"06c26277e8dbd3f57be2eb51b5e3285dc1cbbf8c11326df413868ae702e6a61c","COPYRIGHT":"8b98376eb373dcf81950474efe34b5576a8171460dff500cc58a1ed8d160cd57","Cargo.toml":"fd56e8d662553f0cc559f8ef7097effefbc815ac3485799b37dee9df08ec803c","Ideas.md":"b7452893f500163868d8de52c09addaf91e1632454ed02e892c467ed7ec39dbd","LICENSE-APACHE":"cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30","LICENSE-MIT":"f2ad48641d9c997d9ae3b95d93d1cd6e1ab12ab4c44de89937c7bfabbd076a4a","README.md":"8ae2a3548dee23c19e20564a90e2fd0dfa600cf4c2dfcc538f3455f4462d7133","build.rs":"82747097b0bb8999cdaf689a9e46195f6df5d691ee90bcde8a7b79f16bd976f0","doc/Big5.txt":"f73a2edc5cb6c2d140ba6e07f4542e1c4a234950378acde1df93480f0ca0be0b","doc/EUC-JP.txt":"ee2818b907d0137f40a9ab9fd525fc700a44dbdddb6cf0c157a656566bae4bf1","doc/EUC-KR.txt":"71d9e2ccf3b124e8bdfb433c8cf2773fd878077038d0cec3c7237a50f4a78a30","doc/GBK.txt":"c1b522b5a799884e5001da661f42c5a8f4d0acb9ef1d74b206f22b5f65365606","doc/IBM866.txt":"a5a433e804d0f83af785015179fbc1d9b0eaf1f7960efcd04093e136b51fbd0e","doc/ISO-2022-JP.txt":"af86684f5a8f0e2868d7b2c292860140c3d2e5527530ca091f1b28198e8e2fe6","doc/ISO-8859-10.txt":"6d3949ad7c81ca176895101ed81a1db7df1060d64e262880b94bd31bb344ab4d","doc/ISO-8859-13.txt":"3951dd89cf93f7729148091683cf8511f4529388b7dc8dcd0d62eaed55be93fa","doc/ISO-8859-14.txt":"3d330784a0374fd255a38b47949675cc7168c800530534b0a01cac6edc623adc","doc/ISO-8859-15.txt":"24b1084aab5127a85aab99153f86e24694d0a3615f53b5ce23683f97cf66c47a","doc/ISO-8859-16.txt":"ce0272559b92ba76d7a7e476f6424ae4a5cc72e75b183611b08392e44add4d25","doc/ISO-8859-2.txt":"18ceff88c13d1b5ba455a3919b1e3de489045c4c3d2dd7e8527c125c75d54aad","doc/ISO-8859-3.txt":"21798404c68f4f5db59223362f24999da96968c0628427321fccce7d2849a130","doc/ISO-8859-4.txt":"d27f6520c6c5bfbcc19176b71d081cdb3bccde1622bb3e420d5680e812632d53","doc/ISO-8859-5.txt":"a10ec8d6ea7a78ad15da7275f6cb1a3365118527e28f9af6d0d5830501303f3a","doc/ISO-8859-6.txt":"ccda8a2efc96115336bdd77776637b9712425e44fbcf745353b9057fbef144e7","doc/ISO-8859-7.txt":"17900fa1f27a445958f0a77d7d9056be375a6bd7ee4492aa680c7c1500bab85e","doc/ISO-8859-8-I.txt":"8357555646d54265a9b9ffa3e68b08d132312f1561c60108ff9b8b1167b6ecf2","doc/ISO-8859-8.txt":"72cd6f3afb7b4a9c16a66a362473315770b7755d72c86c870e52fc3eba86c8af","doc/KOI8-R.txt":"839cf19a38da994488004ed7814b1f6151640156a9a2af02bf2efca745fb5966","doc/KOI8-U.txt":"0cc76624ed1f024183e2298b7e019957da2c70c8ca06e0fc4e6f353f50a5054f","doc/Shift_JIS.txt":"34c49141818cb9ddbcf59cc858f78a79be8ad148d563f26415108ae1f148443f","doc/UTF-16BE.txt":"e2e280d8acbaa6d2a6b3569d60e17500a285f2baa0df3363dd85537cd5a1ef8f","doc/UTF-16LE.txt":"70bdc170e3fc5298ba68f10125fb5eeb8b077036cc96bb4416c4de396f6d76c1","doc/UTF-8.txt":"ea7bae742e613010ced002cf4b601a737d2203fad65e115611451bc4428f548a","doc/gb18030.txt":"dc71378a8f07a2d8659f69ee81fb8791fef56ba86f124b429978285237bb4a7b","doc/macintosh.txt":"57491e53866711b4672d9b9ff35380b9dac9e0d8e3d6c20bdd6140603687c023","doc/replacement.txt":"4b6c3bbd7999d9d4108a281594bd02d13607e334a95465afff8c2c08d395f0e4","doc/windows-1250.txt":"61296bb6a21cdab602300d32ecfba434cb82de5ac3bc88d58710d2f125e28d39","doc/windows-1251.txt":"7deea1c61dea1485c8ff02db2c7d578db7a9aab63ab1cfd02ec04b515864689e","doc/windows-1252.txt":"933ef3bdddfce5ee132b9f1a1aa8b47423d2587bbe475b19028d0a6d38e180b6","doc/windows-1253.txt":"1a38748b88e99071a5c7b3d5456ead4caedeabab50d50d658be105bc113714de","doc/windows-1254.txt":"f8372f86c6f8d642563cd6ddc025260553292a39423df1683a98670bd7bf2b47","doc/windows-1255.txt":"4e5852494730054e2da258a74e1b9d780abbcdd8ce22ebc218ca2efe9e90493d","doc/windows-1256.txt":"c0879c5172abedead302a406e8f60d9cd9598694a0ffa4fd288ffe4fef7b8ea1","doc/windows-1257.txt":"c28a0c9f964fcb2b46d21f537c402446501a2800670481d6abf9fd9e9018d523","doc/windows-1258.txt":"5019ae4d61805c79aacbf17c93793342dbb098d65a1837783bc3e2c6d6a23602","doc/windows-874.txt":"4ef0e4501c5feba8b17aee1818602ed44b36ca8475db771ce2fc16d392cabecc","doc/x-mac-cyrillic.txt":"58be154d8a888ca3d484b83b44f749823ef339ab27f14d90ca9a856f5050a8bd","doc/x-user-defined.txt":"f9cd07c4321bf5cfb0be4bdddd251072999b04a6cf7a6f5bc63709a84e2c1ffc","generate-encoding-data.py":"92ddec35a834b6bc815fffffe6d07d9938a90d3c4526298637d8624410d83078","rustfmt.toml":"85c1a3b4382fd89e991cbb81b70fb52780472edc064c963943cdaaa56e0a2030","src/ascii.rs":"800cfbe3036d0c97ce27e07a4fd05edbcb7354ebec20903d81c76136d734931c","src/big5.rs":"1c94b35813314775c3fa1b10923cf8e8f8eba8c465d9833ad4293594e16c17f2","src/data.rs":"9544c019c7360a669bd3adaa90b70331124abd1df59841db66e74912bcdb96a5","src/euc_jp.rs":"0842e4f564a36051c6b85c47bbb652efae2f2926e91491daf77e4ceeecb18163","src/euc_kr.rs":"8e68590efa65485583bf57cae44ebf6de535bac1d37232e7f0307a38425fb992","src/gb18030.rs":"d269efb5e5d175f9d2ecf01d5606955a284b6f00749bb0ee23d3412c83aa3d59","src/handles.rs":"0646bd091892ff7a76f34efccda4e5ddabe1e624e890baa9fdc9d48011d2d38b","src/iso_2022_jp.rs":"3adc380736f24a5de36bc1cf81049bbe64473de10e6f12774195e6213c27c322","src/lib.rs":"e2917fb9f605662ec4705d8c0b3c179f2264697a761191c3ec8101748cf717dc","src/macros.rs":"c7a019fd81d31de77569036ac36fd4e404b3f20144bbf79747faf4ea21538d09","src/mem.rs":"5498de31e816f51348b8d298d4fc9568da6b0b9363146f87ca5503131d33397f","src/replacement.rs":"182c2093a6edb162183ca5990554fd7b199d3011924a8d80d894ba98ee7c479e","src/shift_jis.rs":"1c0c69ba6c123fcf720276646074660193bf9e6fa4327fe0d739a3e67874e081","src/simd_funcs.rs":"857e61c1bda9d65286c23a6c3910d6814680bbc3064bf0ff92de5bc4f3edb6f3","src/single_byte.rs":"383d325dedbf3295acd50d880db1cecc29b69efe332ae2a37367cf40bf138ac4","src/test_data/big5_in.txt":"4c5a8691f8dc717311889c63894026d2fb62725a86c4208ca274a9cc8d42a503","src/test_data/big5_in_ref.txt":"99d399e17750cf9c7cf30bb253dbfe35b81c4fcbdead93cfa48b1429213473c7","src/test_data/big5_out.txt":"6193ca97c297aa20e09396038d18e938bb7ea331c26f0f2454097296723a0b13","src/test_data/big5_out_ref.txt":"36567691f557df144f6cc520015a87038dfa156f296fcf103b56ae9a718be1fc","src/test_data/euc_kr_in.txt":"c86a7224f3215fa0d04e685622a752fdc72763e8ae076230c7fd62de57ec4074","src/test_data/euc_kr_in_ref.txt":"1f419f4ca47d708b54c73c461545a022ae2e20498fdbf8005a483d752a204883","src/test_data/euc_kr_out.txt":"e7f32e026f70be1e1b58e0047baf7d3d2c520269c4f9b9992e158b4decb0a1a3","src/test_data/euc_kr_out_ref.txt":"c9907857980b20b8e9e3b584482ed6567a2be6185d72237b6322f0404944924e","src/test_data/gb18030_in.txt":"ab7231b2d3e9afacdbd7d7f3b9e5361a7ff9f7e1cfdb4f3bd905b9362b309e53","src/test_data/gb18030_in_ref.txt":"dc5069421adca2043c55f5012b55a76fdff651d22e6e699fd0978f8d5706815c","src/test_data/gb18030_out.txt":"f0208d527f5ca63de7d9a0323be8d5cf12d8a104b2943d92c2701f0c3364dac1","src/test_data/gb18030_out_ref.txt":"6819fe47627e4ea01027003fc514b9f21a1322e732d7f1fb92cc6c5455bc6c07","src/test_data/iso_2022_jp_in.txt":"cd24bbdcb1834e25db54646fbf4c41560a13dc7540f6be3dba4f5d97d44513af","src/test_data/iso_2022_jp_in_ref.txt":"3dc4e6a5e06471942d086b16c9440945e78415f6f3f47e43717e4bc2eac2cdf5","src/test_data/iso_2022_jp_out.txt":"9b6f015329dda6c3f9ee5ce6dbd6fa9c89acc21283e886836c78b8d833480c21","src/test_data/iso_2022_jp_out_ref.txt":"78cb260093a20116ad9a42f43b05d1848c5ab100b6b9a850749809e943884b35","src/test_data/jis0208_in.txt":"6df3030553ffb0a6615bb33dc8ea9dca6d9623a9028e2ffec754ce3c3da824cc","src/test_data/jis0208_in_ref.txt":"3dc4e6a5e06471942d086b16c9440945e78415f6f3f47e43717e4bc2eac2cdf5","src/test_data/jis0208_out.txt":"4ec24477e1675ce750733bdc3c5add1cd27b6bd4ce1f09289564646e9654e857","src/test_data/jis0208_out_ref.txt":"c3e1cef5032b2b1d93a406f31ff940c4e2dfe8859b8b17ca2761fee7a75a0e48","src/test_data/jis0212_in.txt":"c011f0dd72bd7c8cd922df9374ef8d2769a77190514c77f6c62b415852eeb9fe","src/test_data/jis0212_in_ref.txt":"7d9458b3d2f73e7092a7f505c08ce1d233dde18aa679fbcf9889256239cc9e06","src/test_data/shift_jis_in.txt":"02e389ccef0dd2122e63f503899402cb7f797912c2444cc80ab93131116c5524","src/test_data/shift_jis_in_ref.txt":"512f985950ca902e643c88682dba9708b7c38d3c5ec2925168ab00ac94ab19f9","src/test_data/shift_jis_out.txt":"5fbc44da7bf639bf6cfe0fa1fd3eba7102b88f81919c9ea991302712f69426fb","src/test_data/shift_jis_out_ref.txt":"466322c6fed8286c64582731755290c2296508efdd258826e6279686649b481f","src/test_labels_names.rs":"c962c7aeac3d9ef2aca70c9e21983b231d4cf998cb06879374b0401e5149d1da","src/testing.rs":"b299d27055f3b068de66cc10a75c024b881c48bc093627c01e0b1f8bd7d94666","src/utf_16.rs":"1ec4e1c8ed7e42e4de401c6d0f64c2835bd80c2a306f358959957d30e6ff1501","src/utf_8.rs":"f639fc5dccd5dcc2458936baa942237d0fd58ac398c83ea3f48e51dceb5b6a81","src/variant.rs":"619a8e604d2febe6a874e3ad73cddf3ef9e6011480aecf86f23708b313415251","src/x_user_defined.rs":"da51def859b870ced29cb87987f02d27b220eac0f222876cb72a1dc616f9d8ec"},"package":"0535f350c60aac0b87ccf28319abc749391e912192255b0c00a2c12c6917bd73"} +\ No newline at end of file +diff --git a/third_party/rust/encoding_rs/Cargo.toml b/third_party/rust/encoding_rs/Cargo.toml +index 65fc8e8dffcd..e29f19fb9afe 100644 +--- a/third_party/rust/encoding_rs/Cargo.toml ++++ b/third_party/rust/encoding_rs/Cargo.toml +@@ -12,47 +12,47 @@ + + [package] + name = "encoding_rs" +-version = "0.8.14" ++version = "0.8.16" + authors = ["Henri Sivonen "] + description = "A Gecko-oriented implementation of the Encoding Standard" + homepage = "https://docs.rs/encoding_rs/" + documentation = "https://docs.rs/encoding_rs/" + readme = "README.md" + keywords = ["encoding", "web", "unicode", "charset"] + categories = ["text-processing", "encoding", "web-programming", "internationalization"] + license = "MIT/Apache-2.0" + repository = "https://github.com/hsivonen/encoding_rs" + [profile.release] + lto = true + [dependencies.cfg-if] + version = "0.1.0" + ++[dependencies.packed_simd] ++version = "0.3.3" ++optional = true ++ + [dependencies.serde] + version = "1.0" + optional = true +- +-[dependencies.simd] +-version = "0.2.3" +-optional = true + [dev-dependencies.bincode] + version = "0.8" + + [dev-dependencies.serde_derive] + version = "1.0" + + [dev-dependencies.serde_json] + version = "1.0" + + [features] + fast-big5-hanzi-encode = [] + fast-gb-hanzi-encode = [] + fast-hangul-encode = [] + fast-hanja-encode = [] + fast-kanji-encode = [] + fast-legacy-encode = ["fast-hangul-encode", "fast-hanja-encode", "fast-kanji-encode", "fast-gb-hanzi-encode", "fast-big5-hanzi-encode"] + less-slow-big5-hanzi-encode = [] + less-slow-gb-hanzi-encode = [] + less-slow-kanji-encode = [] +-simd-accel = ["simd"] ++simd-accel = ["packed_simd", "packed_simd/into_bits"] + [badges.travis-ci] + repository = "hsivonen/encoding_rs" +diff --git a/third_party/rust/encoding_rs/README.md b/third_party/rust/encoding_rs/README.md +index 3446efd0bb43..8a72b515450e 100644 +--- a/third_party/rust/encoding_rs/README.md ++++ b/third_party/rust/encoding_rs/README.md +@@ -126,17 +126,39 @@ There are currently these optional cargo features: + + ### `simd-accel` + +-Enables SSE2 acceleration on x86 and x86_64 and NEON acceleration on Aarch64 +-and ARMv7. _Enabling this cargo feature is recommended when building for x86, +-x86_64, ARMv7 or Aarch64._ The intention is for the functionality enabled by +-this feature to become the normal on-by-default behavior once +-[portable SIMD](https://github.com/rust-lang/rfcs/pull/2366) becames part of +-stable Rust. +- +-Enabling this feature breaks the build unless the target is x86 with SSE2 +-(Rust's default 32-bit x86 target, `i686`, has SSE2, but Linux distros may +-use an x86 target without SSE2, i.e. `i586` in `rustup` terms), ARMv7 or +-thumbv7 with NEON (`-C target_feature=+neon`), x86_64 or Aarch64. ++Enables SIMD acceleration using the nightly-dependent `packed_simd` crate. ++ ++This is an opt-in feature, because enabling this feature _opts out_ of Rust's ++guarantees of future compilers compiling old code (aka. "stability story"). ++ ++Currently, this has not been tested to be an improvement except for these ++targets: ++ ++* x86_64 ++* i686 ++* aarch64 ++* thumbv7neon ++ ++If you use nightly Rust, you use targets whose first component is one of the ++above, and you are prepared _to have to revise your configuration when updating ++Rust_, you should enable this feature. Otherwise, please _do not_ enable this ++feature. ++ ++_Note!_ If you are compiling for a target that does not have 128-bit SIMD ++enabled as part of the target definition and you are enabling 128-bit SIMD ++using `-C target_feature`, you need to enable the `core_arch` Cargo feature ++for `packed_simd` to compile a crates.io snapshot of `core_arch` instead of ++using the standard-library copy of `core::arch`, because the `core::arch` ++module of the pre-compiled standard library has been compiled with the ++assumption that the CPU doesn't have 128-bit SIMD. At present this applies ++mainly to 32-bit ARM targets whose first component does not include the ++substring `neon`. ++ ++The encoding_rs side of things has not been properly set up for POWER, ++PowerPC, MIPS, etc., SIMD at this time, so even if you were to follow ++the advice from the previous paragraph, you probably shouldn't use ++the `simd-accel` option on the less mainstream architectures at this ++time. + + Used by Firefox. + +@@ -382,6 +404,14 @@ To regenerate the generated code: + + ## Release Notes + ++### 0.8.16 ++ ++* Switch from the `simd` crate to `packed_simd`. ++ ++### 0.8.15 ++ ++* Adjust documentation for `simd-accel` (README-only release). ++ + ### 0.8.14 + + * Made UTF-16 to UTF-8 encode conversion fill the output buffer as +diff --git a/third_party/rust/encoding_rs/build.rs b/third_party/rust/encoding_rs/build.rs +index 1b7adf780010..e687878081f7 100644 +--- a/third_party/rust/encoding_rs/build.rs ++++ b/third_party/rust/encoding_rs/build.rs +@@ -1,4 +1,12 @@ + fn main() { ++ // This does not enable `RUSTC_BOOTSTRAP=1` for `packed_simd`. ++ // You still need to knowingly have a setup that makes ++ // `packed_simd` compile. Therefore, having this file on ++ // crates.io is harmless in terms of users of `encoding_rs` ++ // accidentally depending on nightly features. Having this ++ // here means that if you knowingly want this, you only ++ // need to maintain a fork of `packed_simd` without _also_ ++ // having to maintain a fork of `encoding_rs`. + #[cfg(feature = "simd-accel")] + println!("cargo:rustc-env=RUSTC_BOOTSTRAP=1"); + } +diff --git a/third_party/rust/encoding_rs/src/handles.rs b/third_party/rust/encoding_rs/src/handles.rs +index d75b65d75ce3..08da62d20051 100644 +--- a/third_party/rust/encoding_rs/src/handles.rs ++++ b/third_party/rust/encoding_rs/src/handles.rs +@@ -34,7 +34,7 @@ use simd_funcs::*; + all(target_endian = "little", target_feature = "neon") + ) + ))] +-use simd::u16x8; ++use packed_simd::u16x8; + + use super::DecoderResult; + use super::EncoderResult; +diff --git a/third_party/rust/encoding_rs/src/lib.rs b/third_party/rust/encoding_rs/src/lib.rs +index 912c349a0e25..23069375d6f8 100644 +--- a/third_party/rust/encoding_rs/src/lib.rs ++++ b/third_party/rust/encoding_rs/src/lib.rs +@@ -11,7 +11,7 @@ + feature = "cargo-clippy", + allow(doc_markdown, inline_always, new_ret_no_self) + )] +-#![doc(html_root_url = "https://docs.rs/encoding_rs/0.8.14")] ++#![doc(html_root_url = "https://docs.rs/encoding_rs/0.8.16")] + + //! encoding_rs is a Gecko-oriented Free Software / Open Source implementation + //! of the [Encoding Standard](https://encoding.spec.whatwg.org/) in Rust. +@@ -665,20 +665,21 @@ + //! See the section [_UTF-16LE, UTF-16BE and Unicode Encoding Schemes_](#utf-16le-utf-16be-and-unicode-encoding-schemes) + //! for discussion about the UTF-16 family. + +-#![cfg_attr(feature = "simd-accel", feature(platform_intrinsics, core_intrinsics))] ++#![cfg_attr(feature = "simd-accel", feature(stdsimd, core_intrinsics))] + + #[macro_use] + extern crate cfg_if; + + #[cfg(all( + feature = "simd-accel", + any( + target_feature = "sse2", + all(target_endian = "little", target_arch = "aarch64"), + all(target_endian = "little", target_feature = "neon") + ) + ))] +-extern crate simd; ++#[macro_use(shuffle)] ++extern crate packed_simd; + + #[cfg(feature = "serde")] + extern crate serde; +diff --git a/third_party/rust/encoding_rs/src/mem.rs b/third_party/rust/encoding_rs/src/mem.rs +index 6cd1a4448056..c5ee605c1b13 100644 +--- a/third_party/rust/encoding_rs/src/mem.rs ++++ b/third_party/rust/encoding_rs/src/mem.rs +@@ -228,8 +228,8 @@ macro_rules! by_unit_check_simd { + cfg_if! { + if #[cfg(all(feature = "simd-accel", any(target_feature = "sse2", all(target_endian = "little", target_arch = "aarch64"), all(target_endian = "little", target_feature = "neon"))))] { + use simd_funcs::*; +- use simd::u8x16; +- use simd::u16x8; ++ use packed_simd::u8x16; ++ use packed_simd::u16x8; + + const SIMD_ALIGNMENT: usize = 16; + +@@ -631,47 +631,42 @@ cfg_if! { + /// + /// May read the entire buffer even if it isn't all-ASCII. (I.e. the function + /// is not guaranteed to fail fast.) +-#[inline] + pub fn is_ascii(buffer: &[u8]) -> bool { + is_ascii_impl(buffer) + } + + /// Checks whether the buffer is all-Basic Latin (i.e. UTF-16 representing + /// only ASCII characters). + /// + /// May read the entire buffer even if it isn't all-ASCII. (I.e. the function + /// is not guaranteed to fail fast.) +-#[inline] + pub fn is_basic_latin(buffer: &[u16]) -> bool { + is_basic_latin_impl(buffer) + } + + /// Checks whether the buffer is valid UTF-8 representing only code points + /// less than or equal to U+00FF. + /// + /// Fails fast. (I.e. returns before having read the whole buffer if UTF-8 + /// invalidity or code points above U+00FF are discovered. +-#[inline] + pub fn is_utf8_latin1(buffer: &[u8]) -> bool { + is_utf8_latin1_impl(buffer).is_none() + } + + /// Checks whether the buffer represents only code point less than or equal + /// to U+00FF. + /// + /// Fails fast. (I.e. returns before having read the whole buffer if code + /// points above U+00FF are discovered. +-#[inline] + pub fn is_str_latin1(buffer: &str) -> bool { + is_str_latin1_impl(buffer).is_none() + } + + /// Checks whether the buffer represents only code point less than or equal + /// to U+00FF. + /// + /// May read the entire buffer even if it isn't all-Latin1. (I.e. the function + /// is not guaranteed to fail fast.) +-#[inline] + pub fn is_utf16_latin1(buffer: &[u16]) -> bool { + is_utf16_latin1_impl(buffer) + } +@@ -1283,7 +1278,6 @@ pub fn is_str_bidi(buffer: &str) -> bool { + /// high surrogate that could be the high half of an RTL character. + /// Returns `false` if the input contains neither RTL characters nor + /// unpaired high surrogates that could be higher halves of RTL characters. +-#[inline] + pub fn is_utf16_bidi(buffer: &[u16]) -> bool { + is_utf16_bidi_impl(buffer) + } +@@ -1416,67 +1410,63 @@ pub fn is_utf16_code_unit_bidi(u: u16) -> bool { + /// Returns `Latin1Bidi::Latin1` if `is_utf8_latin1()` would return `true`. + /// Otherwise, returns `Latin1Bidi::Bidi` if `is_utf8_bidi()` would return + /// `true`. Otherwise, returns `Latin1Bidi::LeftToRight`. +-#[inline] + pub fn check_utf8_for_latin1_and_bidi(buffer: &[u8]) -> Latin1Bidi { + if let Some(offset) = is_utf8_latin1_impl(buffer) { + if is_utf8_bidi(&buffer[offset..]) { + Latin1Bidi::Bidi + } else { + Latin1Bidi::LeftToRight + } + } else { + Latin1Bidi::Latin1 + } + } + + /// Checks whether a valid UTF-8 buffer contains code points + /// that trigger right-to-left processing or is all-Latin1. + /// + /// Possibly more efficient than performing the checks separately. + /// + /// Returns `Latin1Bidi::Latin1` if `is_str_latin1()` would return `true`. + /// Otherwise, returns `Latin1Bidi::Bidi` if `is_str_bidi()` would return + /// `true`. Otherwise, returns `Latin1Bidi::LeftToRight`. +-#[inline] + pub fn check_str_for_latin1_and_bidi(buffer: &str) -> Latin1Bidi { + // The transition from the latin1 check to the bidi check isn't + // optimal but not tweaking it to perfection today. + if let Some(offset) = is_str_latin1_impl(buffer) { + if is_str_bidi(&buffer[offset..]) { + Latin1Bidi::Bidi + } else { + Latin1Bidi::LeftToRight + } + } else { + Latin1Bidi::Latin1 + } + } + + /// Checks whether a potentially invalid UTF-16 buffer contains code points + /// that trigger right-to-left processing or is all-Latin1. + /// + /// Possibly more efficient than performing the checks separately. + /// + /// Returns `Latin1Bidi::Latin1` if `is_utf16_latin1()` would return `true`. + /// Otherwise, returns `Latin1Bidi::Bidi` if `is_utf16_bidi()` would return + /// `true`. Otherwise, returns `Latin1Bidi::LeftToRight`. +-#[inline] + pub fn check_utf16_for_latin1_and_bidi(buffer: &[u16]) -> Latin1Bidi { + check_utf16_for_latin1_and_bidi_impl(buffer) + } + + /// Converts potentially-invalid UTF-8 to valid UTF-16 with errors replaced + /// with the REPLACEMENT CHARACTER. + /// + /// The length of the destination buffer must be at least the length of the + /// source buffer _plus one_. + /// + /// Returns the number of `u16`s written. + /// + /// # Panics + /// + /// Panics if the destination buffer is shorter than stated above. +-#[inline] + pub fn convert_utf8_to_utf16(src: &[u8], dst: &mut [u16]) -> usize { + // TODO: Can the requirement for dst to be at least one unit longer + // be eliminated? +@@ -1516,7 +1506,6 @@ pub fn convert_utf8_to_utf16(src: &[u8], dst: &mut [u16]) -> usize { + /// # Panics + /// + /// Panics if the destination buffer is shorter than stated above. +-#[inline] + pub fn convert_str_to_utf16(src: &str, dst: &mut [u16]) -> usize { + assert!( + dst.len() >= src.len(), +@@ -1683,7 +1672,6 @@ pub fn convert_utf16_to_utf8(src: &[u16], dst: &mut [u8]) -> usize { + /// not allocating memory for the worst case up front. Specifically, + /// if the input starts with or ends with an unpaired surrogate, those are + /// replaced with the REPLACEMENT CHARACTER. +-#[inline] + pub fn convert_utf16_to_str_partial(src: &[u16], dst: &mut str) -> (usize, usize) { + let bytes: &mut [u8] = unsafe { dst.as_bytes_mut() }; + let (read, written) = convert_utf16_to_utf8_partial(src, bytes); +@@ -1727,7 +1715,6 @@ pub fn convert_utf16_to_str(src: &[u16], dst: &mut str) -> usize { + /// # Panics + /// + /// Panics if the destination buffer is shorter than stated above. +-#[inline] + pub fn convert_latin1_to_utf16(src: &[u8], dst: &mut [u16]) { + assert!( + dst.len() >= src.len(), +@@ -1755,7 +1742,6 @@ pub fn convert_latin1_to_utf16(src: &[u8], dst: &mut [u16]) { + /// indicated by the return value, so using a `&mut str` interpreted as + /// `&mut [u8]` as the destination is not safe. If you want to convert into + /// a `&mut str`, use `convert_utf16_to_str()` instead of this function. +-#[inline] + pub fn convert_latin1_to_utf8_partial(src: &[u8], dst: &mut [u8]) -> (usize, usize) { + let src_len = src.len(); + let src_ptr = src.as_ptr(); +@@ -1894,7 +1880,6 @@ pub fn convert_latin1_to_str(src: &[u8], dst: &mut str) -> usize { + /// + /// If debug assertions are enabled (and not fuzzing) and the input is + /// not in the range U+0000 to U+00FF, inclusive. +-#[inline] + pub fn convert_utf8_to_latin1_lossy(src: &[u8], dst: &mut [u8]) -> usize { + assert!( + dst.len() >= src.len(), +@@ -1957,7 +1942,6 @@ pub fn convert_utf8_to_latin1_lossy(src: &[u8], dst: &mut [u8]) -> usize { + /// + /// (Probably in future versions if debug assertions are enabled (and not + /// fuzzing) and the input is not in the range U+0000 to U+00FF, inclusive.) +-#[inline] + pub fn convert_utf16_to_latin1_lossy(src: &[u16], dst: &mut [u8]) { + assert!( + dst.len() >= src.len(), +@@ -2030,7 +2014,6 @@ pub fn encode_latin1_lossy<'a>(string: &'a str) -> Cow<'a, [u8]> { + + /// Returns the index of the first unpaired surrogate or, if the input is + /// valid UTF-16 in its entirety, the length of the input. +-#[inline] + pub fn utf16_valid_up_to(buffer: &[u16]) -> usize { + utf16_valid_up_to_impl(buffer) + } +@@ -2060,61 +2043,58 @@ pub fn ensure_utf16_validity(buffer: &mut [u16]) { + /// # Panics + /// + /// Panics if the destination buffer is shorter than stated above. +-#[inline] + pub fn copy_ascii_to_ascii(src: &[u8], dst: &mut [u8]) -> usize { + assert!( + dst.len() >= src.len(), + "Destination must not be shorter than the source." + ); + if let Some((_, consumed)) = + unsafe { ascii_to_ascii(src.as_ptr(), dst.as_mut_ptr(), src.len()) } + { + consumed + } else { + src.len() + } + } + + /// Copies ASCII from source to destination zero-extending it to UTF-16 up to + /// the first non-ASCII byte (or the end of the input if it is ASCII in its + /// entirety). + /// + /// The length of the destination buffer must be at least the length of the + /// source buffer. + /// + /// Returns the number of `u16`s written. + /// + /// # Panics + /// + /// Panics if the destination buffer is shorter than stated above. +-#[inline] + pub fn copy_ascii_to_basic_latin(src: &[u8], dst: &mut [u16]) -> usize { + assert!( + dst.len() >= src.len(), + "Destination must not be shorter than the source." + ); + if let Some((_, consumed)) = + unsafe { ascii_to_basic_latin(src.as_ptr(), dst.as_mut_ptr(), src.len()) } + { + consumed + } else { + src.len() + } + } + + /// Copies Basic Latin from source to destination narrowing it to ASCII up to + /// the first non-Basic Latin code unit (or the end of the input if it is + /// Basic Latin in its entirety). + /// + /// The length of the destination buffer must be at least the length of the + /// source buffer. + /// + /// Returns the number of bytes written. + /// + /// # Panics + /// + /// Panics if the destination buffer is shorter than stated above. +-#[inline] + pub fn copy_basic_latin_to_ascii(src: &[u16], dst: &mut [u8]) -> usize { + assert!( + dst.len() >= src.len(), +diff --git a/third_party/rust/encoding_rs/src/simd_funcs.rs b/third_party/rust/encoding_rs/src/simd_funcs.rs +index 0cc05baf784d..4e19b0e8a07e 100644 +--- a/third_party/rust/encoding_rs/src/simd_funcs.rs ++++ b/third_party/rust/encoding_rs/src/simd_funcs.rs +@@ -7,9 +7,9 @@ + // option. This file may not be copied, modified, or distributed + // except according to those terms. + +-use simd::u16x8; +-use simd::u8x16; +-use simd::Simd; ++use packed_simd::u16x8; ++use packed_simd::u8x16; ++use packed_simd::FromBits; + + // TODO: Migrate unaligned access to stdlib code if/when the RFC + // https://github.com/rust-lang/rfcs/pull/1725 is implemented. +@@ -62,81 +62,79 @@ pub unsafe fn store8_aligned(ptr: *mut u16, s: u16x8) { + *(ptr as *mut u16x8) = s; + } + +-extern "platform-intrinsic" { +- fn simd_shuffle16>(x: T, y: T, idx: [u32; 16]) -> U; ++cfg_if! { ++ if #[cfg(all(target_feature = "sse2", target_arch = "x86_64"))] { ++ use std::arch::x86_64::__m128i; ++ use std::arch::x86_64::_mm_movemask_epi8; ++ use std::arch::x86_64::_mm_packus_epi16; ++ } else if #[cfg(all(target_feature = "sse2", target_arch = "x86"))] { ++ use std::arch::x86::__m128i; ++ use std::arch::x86::_mm_movemask_epi8; ++ use std::arch::x86::_mm_packus_epi16; ++ } else if #[cfg(target_arch = "aarch64")]{ ++ use std::arch::aarch64::uint8x16_t; ++ use std::arch::aarch64::uint16x8_t; ++ use std::arch::aarch64::vmaxvq_u8; ++ use std::arch::aarch64::vmaxvq_u16; ++ } else { ++ ++ } + } + + // #[inline(always)] + // fn simd_byte_swap_u8(s: u8x16) -> u8x16 { + // unsafe { +-// simd_shuffle16(s, s, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]) ++// shuffle!(s, s, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]) + // } + // } + + // #[inline(always)] + // pub fn simd_byte_swap(s: u16x8) -> u16x8 { + // to_u16_lanes(simd_byte_swap_u8(to_u8_lanes(s))) + // } + + #[inline(always)] + pub fn simd_byte_swap(s: u16x8) -> u16x8 { + let left = s << 8; + let right = s >> 8; + left | right + } + + #[inline(always)] + pub fn to_u16_lanes(s: u8x16) -> u16x8 { +- unsafe { ::std::mem::transmute(s) } ++ u16x8::from_bits(s) + } + +-// #[inline(always)] +-// pub fn to_u8_lanes(s: u16x8) -> u8x16 { +-// unsafe { ::std::mem::transmute(s) } +-// } +- + cfg_if! { + if #[cfg(target_feature = "sse2")] { + +- use simd::i16x8; +- use simd::i8x16; +- extern "platform-intrinsic" { +- fn x86_mm_movemask_epi8(x: i8x16) -> i32; +- } +- + // Expose low-level mask instead of higher-level conclusion, + // because the non-ASCII case would perform less well otherwise. + #[inline(always)] + pub fn mask_ascii(s: u8x16) -> i32 { + unsafe { +- let signed: i8x16 = ::std::mem::transmute_copy(&s); +- x86_mm_movemask_epi8(signed) ++ _mm_movemask_epi8(__m128i::from_bits(s)) + } + } + + } else { + + } + } + + cfg_if! { + if #[cfg(target_feature = "sse2")] { + #[inline(always)] + pub fn simd_is_ascii(s: u8x16) -> bool { + unsafe { +- let signed: i8x16 = ::std::mem::transmute_copy(&s); +- x86_mm_movemask_epi8(signed) == 0 ++ _mm_movemask_epi8(__m128i::from_bits(s)) == 0 + } + } + } else if #[cfg(target_arch = "aarch64")]{ +- extern "platform-intrinsic" { +- fn aarch64_vmaxvq_u8(x: u8x16) -> u8; +- } +- + #[inline(always)] + pub fn simd_is_ascii(s: u8x16) -> bool { + unsafe { +- aarch64_vmaxvq_u8(s) < 0x80 ++ vmaxvq_u8(uint8x16_t::from_bits(s)) < 0x80 + } + } + } else { +@@ -164,35 +162,31 @@ cfg_if! { + #[inline(always)] + pub fn simd_is_str_latin1(s: u8x16) -> bool { + unsafe { +- aarch64_vmaxvq_u8(s) < 0xC4 ++ vmaxvq_u8(uint8x16_t::from_bits(s)) < 0xC4 + } + } + } else { + #[inline(always)] + pub fn simd_is_str_latin1(s: u8x16) -> bool { + let above_str_latin1 = u8x16::splat(0xC4); + s.lt(above_str_latin1).all() + } + } + } + + cfg_if! { + if #[cfg(target_arch = "aarch64")]{ +- extern "platform-intrinsic" { +- fn aarch64_vmaxvq_u16(x: u16x8) -> u16; +- } +- + #[inline(always)] + pub fn simd_is_basic_latin(s: u16x8) -> bool { + unsafe { +- aarch64_vmaxvq_u16(s) < 0x80 ++ vmaxvq_u16(uint16x8_t::from_bits(s)) < 0x80 + } + } + + #[inline(always)] + pub fn simd_is_latin1(s: u16x8) -> bool { + unsafe { +- aarch64_vmaxvq_u16(s) < 0x100 ++ vmaxvq_u16(uint16x8_t::from_bits(s)) < 0x100 + } + } + } else { +@@ -225,7 +219,7 @@ cfg_if! { + macro_rules! aarch64_return_false_if_below_hebrew { + ($s:ident) => ({ + unsafe { +- if aarch64_vmaxvq_u16($s) < 0x0590 { ++ if vmaxvq_u16(uint16x8_t::from_bits($s)) < 0x0590 { + return false; + } + } +@@ -292,47 +286,38 @@ pub fn is_u16x8_bidi(s: u16x8) -> bool { + #[inline(always)] + pub fn simd_unpack(s: u8x16) -> (u16x8, u16x8) { + unsafe { +- let first: u8x16 = simd_shuffle16( ++ let first: u8x16 = shuffle!( + s, + u8x16::splat(0), +- [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23], ++ [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23] + ); +- let second: u8x16 = simd_shuffle16( ++ let second: u8x16 = shuffle!( + s, + u8x16::splat(0), +- [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31], ++ [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31] + ); +- ( +- ::std::mem::transmute_copy(&first), +- ::std::mem::transmute_copy(&second), +- ) ++ (u16x8::from_bits(first), u16x8::from_bits(second)) + } + } + + cfg_if! { + if #[cfg(target_feature = "sse2")] { +- extern "platform-intrinsic" { +- fn x86_mm_packus_epi16(x: i16x8, y: i16x8) -> u8x16; +- } +- + #[inline(always)] + pub fn simd_pack(a: u16x8, b: u16x8) -> u8x16 { + unsafe { +- let first: i16x8 = ::std::mem::transmute_copy(&a); +- let second: i16x8 = ::std::mem::transmute_copy(&b); +- x86_mm_packus_epi16(first, second) ++ u8x16::from_bits(_mm_packus_epi16(__m128i::from_bits(a), __m128i::from_bits(b))) + } + } + } else { + #[inline(always)] + pub fn simd_pack(a: u16x8, b: u16x8) -> u8x16 { + unsafe { +- let first: u8x16 = ::std::mem::transmute_copy(&a); +- let second: u8x16 = ::std::mem::transmute_copy(&b); +- simd_shuffle16( ++ let first = u8x16::from_bits(a); ++ let second = u8x16::from_bits(b); ++ shuffle!( + first, + second, +- [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30], ++ [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] + ) + } + } +diff --git a/third_party/rust/encoding_rs/src/x_user_defined.rs b/third_party/rust/encoding_rs/src/x_user_defined.rs +index 6d0d613fa093..2d2076987984 100644 +--- a/third_party/rust/encoding_rs/src/x_user_defined.rs ++++ b/third_party/rust/encoding_rs/src/x_user_defined.rs +@@ -14,15 +14,12 @@ use variant::*; + cfg_if! { + if #[cfg(feature = "simd-accel")] { + use simd_funcs::*; +- use simd::u16x8; ++ use packed_simd::u16x8; + + #[inline(always)] + fn shift_upper(unpacked: u16x8) -> u16x8 { + let highest_ascii = u16x8::splat(0x7F); +- let offset = u16x8::splat(0xF700); +- let mask = unpacked.gt(highest_ascii).to_repr().to_u16(); +- unpacked + (offset & mask) +- } ++ unpacked + unpacked.gt(highest_ascii).select(u16x8::splat(0xF700), u16x8::splat(0)) } + } else { + } + } +diff --git a/third_party/rust/packed_simd/.appveyor.yml b/third_party/rust/packed_simd/.appveyor.yml +new file mode 100644 +index 000000000000..0388cee0a07b +--- /dev/null ++++ b/third_party/rust/packed_simd/.appveyor.yml +@@ -0,0 +1,59 @@ ++matrix: ++ allow_failures: ++ # FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/72 ++ - TARGET: i686-pc-windows-msvc ++ - TARGET: i686-pc-windows-gnu ++ - TARGET: x86_64-pc-windows-gnu ++ fast_finish: true ++ ++environment: ++ matrix: ++ - TARGET: x86_64-pc-windows-msvc ++ MSYSTEM: MINGW64 ++ NOVERIFY: "1" ++ - TARGET: x86_64-pc-windows-msvc ++ MSYSTEM: MINGW64 ++ RUSTFLAGS: "-C target-feature=+sse4.2" ++ NOVERIFY: "1" ++ - TARGET: x86_64-pc-windows-msvc ++ MSYSTEM: MINGW64 ++ RUSTFLAGS: "-C target-feature=+avx" ++ NOVERIFY: "1" ++ - TARGET: x86_64-pc-windows-msvc ++ MSYSTEM: MINGW64 ++ RUSTFLAGS: "-C target-feature=+avx2" ++ NOVERIFY: "1" ++ ++ - TARGET: i686-pc-windows-msvc ++ MSYSTEM: MINGW32 ++ NOVERIFY: "1" ++ - TARGET: i686-pc-windows-msvc ++ MSYSTEM: MINGW32 ++ RUSTFLAGS: "-C target-feature=+sse4.2" ++ NOVERIFY: "1" ++ - TARGET: i686-pc-windows-msvc ++ MSYSTEM: MINGW32 ++ RUSTFLAGS: "-C target-feature=+avx" ++ NOVERIFY: "1" ++ - TARGET: i686-pc-windows-msvc ++ MSYSTEM: MINGW32 ++ RUSTFLAGS: "-C target-feature=+avx2" ++ NOVERIFY: "1" ++ ++ - TARGET: x86_64-pc-windows-gnu ++ MSYSTEM: MINGW64 ++ ++ - TARGET: i686-pc-windows-gnu ++ MSYSTEM: MINGW32 ++ - TARGET: x86_64-pc-windows-gnu ++ MSYSTEM: MINGW64 ++install: ++ - ps: if (ls -r . -fi "*.rs" | sls "`t") { throw "Found tab character" } ++ - ps: Start-FileDownload "https://static.rust-lang.org/dist/rust-nightly-${env:TARGET}.exe" -FileName "rust-install.exe" ++ - ps: .\rust-install.exe /VERYSILENT /NORESTART /DIR="C:\rust" | Out-Null ++ - ps: $env:PATH="$env:PATH;C:\rust\bin" ++ - set PATH=c:\msys64\%MSYSTEM%\bin;c:\msys64\usr\bin;%PATH% ++ - rustc -vV ++ - cargo -vV ++build: false ++test_script: bash -c "ci/run.sh" +diff --git a/third_party/rust/packed_simd/.cargo-checksum.json b/third_party/rust/packed_simd/.cargo-checksum.json +new file mode 100644 +index 000000000000..01afcc1efdac +--- /dev/null ++++ b/third_party/rust/packed_simd/.cargo-checksum.json +@@ -0,0 +1 @@ ++{"files":{".appveyor.yml":"f1ed01850e0d725f9498f52a1a63ddf40702ad6e0bf5b2d7c4c04d76e96794a3",".travis.yml":"e9258d9a54fdaf4cbc12405fe5993ac4497eb2b29021691dbc91b19cb9b52227","Cargo.toml":"089941ba3c89ea111cbea3cc3abdcdcf2b9d0ae0db268d7269ee38226db950e5","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6485b8ed310d3f0340bf1ad1f47645069ce4069dcc6bb46c7d5c6faf41de1fdb","bors.toml":"dee881dc69b9b7834e4eba5d95c3ed5a416d4628815a167d6a22d4cb4fb064b8","build.rs":"f3baefc5e5bb9b250e762a1466371b922fd7ee4243c217b2d014307603c2f57a","ci/all.sh":"a23d14e10cb26a0eb719e389c30eb955fa53cddcd436890646df09af640bd2eb","ci/android-install-ndk.sh":"0f1746108cc30bf9b9ba45bcde7b19fc1a8bdf5b0258035b4eb8dc69b75efac4","ci/android-install-sdk.sh":"3490432022c5c8f5a115c084f7a9aca1626f96c0c87ffb62019228c4346b47e4","ci/android-sysimage.sh":"ebf4e5daa1f0fe1b2092b79f0f3f161c4c4275cb744e52352c4d81ab451e4c5a","ci/benchmark.sh":"b61d19ef6b90deba8fb79dee74c8b062d94844676293da346da87bb78a9a49a4","ci/deploy_and_run_on_ios_simulator.rs":"ec8ecf82d92072676aa47f0d1a3d021b60a7ae3531153ef12d2ff4541fc294dc","ci/docker/aarch64-linux-android/Dockerfile":"ace2e7d33c87bc0f6d3962a4a3408c04557646f7f51ab99cfbf574906796b016","ci/docker/aarch64-unknown-linux-gnu/Dockerfile":"1ecdac757101d951794fb2ab0deaa278199cf25f2e08a15c7d40ff31a8556184","ci/docker/arm-linux-androideabi/Dockerfile":"370e55d3330a413a3ccf677b3afb3e0ef9018a5fab263faa97ae8ac017fc2286","ci/docker/arm-unknown-linux-gnueabi/Dockerfile":"e25d88f6c0c94aada3d2e3f08243f755feb7e869dc5dc505b3799719cb1af591","ci/docker/arm-unknown-linux-gnueabihf/Dockerfile":"f126f4c7bae8c11ab8b16df06ad997863f0838825a9c08c9899a3eedb6d570bd","ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile":"b647545c158ee480a4c581dbdc1f57833aef056c8d498acc04b573e842bf803c","ci/docker/i586-unknown-linux-gnu/Dockerfile":"0d492759017307ccf74dc2aa4a8cf6623daf3dc728c708dc2b18fa7940800cba","ci/docker/i686-unknown-linux-gnu/Dockerfile":"0d492759017307ccf74dc2aa4a8cf6623daf3dc728c708dc2b18fa7940800cba","ci/docker/mips-unknown-linux-gnu/Dockerfile":"323776469bb7b160385f3621d66e3ee14c75242f8180f916e65af048a29d4ea0","ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile":"c647f6948a9a43b0be695cbed4eac752120d0faf28e5e69c718cb10406921dab","ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile":"77bfd00cc8639509be381b394f077e39b45a00158ad61b4e1656714c714665d1","ci/docker/mipsel-unknown-linux-musl/Dockerfile":"ec5bea6c98a3b626731fdb95f9ff2d1182639c76e8fb16d3271d0fc884901524","ci/docker/powerpc-unknown-linux-gnu/Dockerfile":"4f2b662de66e83d1354f650b7077692309637f786c2ea5516c31b5c2ee10af2d","ci/docker/powerpc64-unknown-linux-gnu/Dockerfile":"a9595402b772bc365982e22a0096a8988825d90b09b5faa97ab192e76072f71d","ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile":"df3c381c157439695ae8cd10ab71664702c061e3b4ab22906a5ad6c2680acfed","ci/docker/s390x-unknown-linux-gnu/Dockerfile":"93fb44df3d7fd31ead158570667c97b5076a05c3d968af4a84bc13819a8f2db8","ci/docker/sparc64-unknown-linux-gnu/Dockerfile":"da1c39a3ff1fe22e41395fa7c8934e90b4c1788e551b9aec6e38bfd94effc437","ci/docker/thumbv7neon-linux-androideabi/Dockerfile":"c2decd5591bd7a09378901bef629cd944acf052eb55e4f35b79eb9cb4d62246a","ci/docker/thumbv7neon-unknown-linux-gnueabihf/Dockerfile":"75c0c56161c7382b439de74c00de1c0e3dc9d59560cd6720976a751034b78714","ci/docker/wasm32-unknown-unknown/Dockerfile":"3e5f294bc1e004aa599086c2af49d6f3e7459fa250f5fbdd60cf67d53db78758","ci/docker/x86_64-linux-android/Dockerfile":"685040273cf350d5509e580ac451555efa19790c8723ca2af066adadc6880ad2","ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile":"44b6203d9290bfdc53d81219f0937e1110847a23dd982ec8c4de388354f01536","ci/docker/x86_64-unknown-linux-gnu/Dockerfile":"d253c86803b22da428fa9cc671a05f18d3318eca7733b8dccb4f7be1ddf524c5","ci/dox.sh":"5b61711be47a4e3dde0ddd15ba73d256ea95fd75af3897732c24db1dc7e66366","ci/linux-s390x.sh":"d6b732d7795b4ba131326aff893bca6228a7d2eb0e9402f135705413dbbe0dce","ci/linux-sparc64.sh":"c92966838b1ab7ad3b7a344833ee726aba6b647cf5952e56f0ad1ba420b13325","ci/lld-shim.rs":"3d7f71ec23a49e2b67f694a0168786f9a954dda15f5a138815d966643fd3fcc3","ci/max_line_width.sh":"0a1518bba4c9ecaa55694cb2e9930d0e19c265baabf73143f17f9cf285aaa5bb","ci/run-docker.sh":"92e036390ad9b0d16f109579df1b5ced2e72e9afea40c7d011400ebd3a2a90de","ci/run.sh":"63259e22a96ba539f53c06b1b39f53e3a78a71171652e7afc170836110ccd913","ci/run_examples.sh":"d1a23c6c35374a0678ba5114b9b8fefd8be0a79e774872a8bf0898d1baca18d0","ci/runtest-android.rs":"145a8e9799a5223975061fe7e586ade5669ee4877a7d7a4cf6b4ab48e8e36c7c","ci/setup_benchmarks.sh":"73fb981a8fdb1dcd54409d3c0fbbfb8f77a3ceabf8626a6b9bf9d21d6bc8ce72","ci/test-runner-linux":"c8aa6025cff5306f4f31d0c61dc5f9d4dd5a1d189ab613ef8d4c367c694d9ccd","contributing.md":"2cc8c9c560ae17867e69b06d09b758dbf7bc39eb774ada50a743724b10acc0a2","perf-guide/.gitignore":"fe82c7da551079d832cf74200b0b359b4df9828cb4a0416fa7384f07a2ae6a13","perf-guide/book.toml":"115a98284126c6b180178b44713314cc494f08a71662ee2ce15cf67f17a51064","perf-guide/src/SUMMARY.md":"3e03bffc991fdc2050f3d51842d72d9d21ea6abab56a3baf3b2d5973a78b89e1","perf-guide/src/ascii.css":"29afb08833b2fe2250f0412e1fa1161a2432a0820a14953c87124407417c741a","perf-guide/src/bound_checks.md":"5e4991ff58a183ef0cd9fdc1feb4cd12d083b44bdf87393bbb0927808ef3ce7d","perf-guide/src/float-math/approx.md":"8c09032fa2d795a0c5db1775826c850d28eb2627846d0965c60ee72de63735ad","perf-guide/src/float-math/fma.md":"311076ba4b741d604a82e74b83a8d7e8c318fcbd7f64c4392d1cf5af95c60243","perf-guide/src/float-math/fp.md":"04153e775ab6e4f0d7837bcc515230d327b04edfa34c84ce9c9e10ebaeef2be8","perf-guide/src/float-math/svml.md":"0798873b8eedaeda5fed62dc91645b57c20775a02d3cd74d8bd06958f1516506","perf-guide/src/introduction.md":"9f5a19e9e6751f25d2daad39891a0cc600974527ec4c8305843f9618910671bd","perf-guide/src/prof/linux.md":"447731eb5de7d69166728fdbc5ecb0c0c9db678ea493b45a592d67dd002184c0","perf-guide/src/prof/mca.md":"f56d54f3d20e7aa4d32052186e8237b03d65971eb5d112802b442570ff11d344","perf-guide/src/prof/profiling.md":"8a650c0fd6ede0964789bb6577557eeef1d8226a896788602ce61528e260e43c","perf-guide/src/target-feature/attribute.md":"615f88dca0a707b6c416fa605435dd6e1fb5361cc639429cbf68cd87624bd78b","perf-guide/src/target-feature/features.md":"17077760ff24c006b606dd21889c53d87228f4311f3ba3a574f9afdeacd86165","perf-guide/src/target-feature/inlining.md":"7ed1d7068d8173a00d84c16cfe5871cd68b9f04f8d0cca2d01ebc84957ebf2f6","perf-guide/src/target-feature/practice.md":"c4b371842e0086df178488fec97f20def8f0c62ee588bcd25fd948b9b1fa227e","perf-guide/src/target-feature/runtime.md":"835425f5ee597fb3e51d36e725a81ebee29f4561231d19563cd4da81dbb1cfcb","perf-guide/src/target-feature/rustflags.md":"ab49712e9293a65d74d540ba4784fcb57ff1119ec05a575d895c071f1a620f64","perf-guide/src/vert-hor-ops.md":"c6211c0ee91e60552ec592d89d9d957eedc21dee3cbd89e1ad6765ea06a27471","readme.md":"585a8f0e16877fb9abb00cd17a175fcb9d7857840c6c61209f1827ffab095070","rustfmt.toml":"de6101d0670bad65fb3b337d56957d2a024e017e5ab146ec784d77312daaf8ff","src/api.rs":"331a3a4abb19cee2df5f2df4ad7c3e88b45e62cf23fdacfc9bbaa633dc5cf788","src/api/bit_manip.rs":"e68290ee679cc5abc9c73afbe635c1035f8cbfe849e5c751a1680e459244c39e","src/api/cast.rs":"03b94a3d316ac7b7be7068810044911e965e889a0ace7bae762749ca74a92747","src/api/cast/macros.rs":"b0a14d0c83ad2ebb7a275180f6d9e3f2bc312ba57a7d3d6c39fad4e0f20f9408","src/api/cast/v128.rs":"63e28c6a3edf1a7a635f51b8d3c6adbb1d46f884d92a196b3d4a6e743d809416","src/api/cast/v16.rs":"2a584eeb57fd47baad6f3533764301b04aaaac23702b7a8db12598ac02899262","src/api/cast/v256.rs":"b91c15ed8d1536ecd97b4eb79ff9d5aba0552cd9b6f0ea6435b05f2273e23b3a","src/api/cast/v32.rs":"62ec89fcce7fa7f28497ee5770adc8f81d2d3a6b2925b02f7dc06504c40e8f38","src/api/cast/v512.rs":"d855cb943ae7106e9599ef38e30a3afb1c6bd5433178baca54cb128fd9a7d143","src/api/cast/v64.rs":"fe0f7dfaf4fc0c0c1a78c96fcfcdfdc2a1e2845843b11aa797a0c6fb52a8f774","src/api/cmp.rs":"357c3a2a09c6d4611c32dd7fa95be2fae933d513e229026ec9b44451a77b884e","src/api/cmp/eq.rs":"60f70f355bae4cb5b17db53204cacc3890f70670611c17df638d4c04f7cc8075","src/api/cmp/ord.rs":"589f7234761c294fa5df8f525bc4acd5a47cdb602207d524a0d4e19804cd9695","src/api/cmp/partial_eq.rs":"3ed23d2a930b0f9750c3a5309da766b03dc4f9c4d375b42ad3c50fe732693d15","src/api/cmp/partial_ord.rs":"e16b11805c94048acd058c93994b5bc74bb187f8d7e3b86a87df60e1601467f9","src/api/cmp/vertical.rs":"de3d62f38eba817299aa16f1e1939954c9a447e316509397465c2830852ba053","src/api/default.rs":"b61f92fc0e33a2633b3375eb405beba480da071cde03df4d437d8a6058afcd97","src/api/fmt.rs":"67fb804bb86b6cd77cf8cd492b5733ce437071b66fe3297278b8a6552c325dda","src/api/fmt/binary.rs":"35cb5c266197d6224d598fb3d286e5fe48ef0c01ed356c2ff6fe9ba946f96a92","src/api/fmt/debug.rs":"aa18eea443bf353fea3db8b1a025132bbcaf91e747ecfa43b8d9fce9af395a0c","src/api/fmt/lower_hex.rs":"69d5be366631af309f214e8031c8c20267fcc27a695eac6f45c6bc1df72a67e6","src/api/fmt/octal.rs":"9eb11ba3d990213f3c7f1ec25edba7ce997cb1320e16d308c83498ba6b9bfbd9","src/api/fmt/upper_hex.rs":"a4637d085b7bb20e759ce58e08435b510a563ba3dd468af2b03560fdc5511562","src/api/from.rs":"2e599d8329cb05eaf06224cc441355c4b7b51254fc19256619333be8c149d444","src/api/from/from_array.rs":"4151593c7bba7455821fffa5b59867005a77c95d32f1f0cc3fd87294000157d9","src/api/from/from_vector.rs":"9764371aa9e6005aace74dea14f59e5611a095b7cf42707940924749282c52f0","src/api/hash.rs":"562cfa3f1d8eb9a733c035a3665a599c2f1e341ee820d8fbdd102a4398a441bc","src/api/into_bits.rs":"82297f0697d67b5a015e904e7e6e7b2a7066ba825bc54b94b4ff3e22d7a1eefb","src/api/into_bits/arch_specific.rs":"1f925390b0ce7132587d95f2419c6e2ad3e1a9d17eb1d9c120a1c1c4bdf4277e","src/api/into_bits/macros.rs":"d762406de25aedff88d460dec7a80dc8e825a2a419d53218ce007efa6a1d3e04","src/api/into_bits/v128.rs":"ecdc5893664c71d7ab1ff3697c3fbe490d20d8748b9b76881d05e7625e40d74c","src/api/into_bits/v16.rs":"5459ec7dad1ad7bd30dc7e48374580b993abf23701d9c3cb22203fa0a9aabb6d","src/api/into_bits/v256.rs":"90ea351da0380ead1bf0f63b620afd40d01d638d09f7e7be31840bd2c1d9c663","src/api/into_bits/v32.rs":"ee1dc5a430050e16f51154b5fe85b1536f5feddf2ea23dd1d3859b67c4afc6fc","src/api/into_bits/v512.rs":"f72098ed1c9a23944f3d01abaf5e0f2d0e81d35a06fdadd2183e896d41b59867","src/api/into_bits/v64.rs":"6394462facdfe7827349c742b7801f1291e75a720dfb8c0b52100df46f371c98","src/api/math.rs":"8b2a2fc651917a850539f993aa0b9e5bf4da67b11685285b8de8cdca311719ec","src/api/math/float.rs":"61d2794d68262a1090ae473bd30793b5f65cf732f32a6694a3af2ce5d9225616","src/api/math/float/abs.rs":"5b6b2701e2e11135b7ce58a05052ea8120e10e4702c95d046b9d21b827b26bf8","src/api/math/float/consts.rs":"78acba000d3fa527111300b6327c1932de9c4c1e02d4174e1a5615c01463d38c","src/api/math/float/cos.rs":"4c2dd7173728ef189314f1576c9486e03be21b7da98843b2f9011282a7979e31","src/api/math/float/exp.rs":"7c6d5f1e304f498a01cfa23b92380c815d7da0ad94eae3483783bc377d287eef","src/api/math/float/ln.rs":"54c7583f3df793b39ff57534fade27b41bb992439e5dc178252f5ca3190a3e54","src/api/math/float/mul_add.rs":"62cac77660d20159276d4c9ef066eb90c81cbddb808e8e157182c607625ad2eb","src/api/math/float/mul_adde.rs":"bae056ee9f3a70df39ec3c3b2f6437c65303888a7b843ef1a5bcf1f5aca0e602","src/api/math/float/powf.rs":"9ddb938984b36d39d82a82f862f80df8f7fb013f1d222d45698d41d88472f568","src/api/math/float/recpre.rs":"589225794ff1dbf31158dff660e6d4509ecc8befbb57c633900dea5ac0b840d6","src/api/math/float/rsqrte.rs":"a32abdcc318d7ccc8448231f54d75b884b7cbeb03a7d595713ab6243036f4dbf","src/api/math/float/sin.rs":"cbd3622b7df74f19691743001c8cf747a201f8977ad90542fee915f37dcd1e49","src/api/math/float/sqrt.rs":"0c66d5d63fb08e4d99c6b82a8828e41173aff1ac9fa1a2764a11fac217ccf2ac","src/api/math/float/sqrte.rs":"731e1c9f321b662accdd27dacb3aac2e8043b7aecb2f2161dde733bd9f025362","src/api/minimal.rs":"1f22bcc528555444e76de569ec0ae2029b9ae9d04805efeafa93369c8098036b","src/api/minimal/iuf.rs":"c501a6696950cf5e521765f178de548af64fdfb6e10d026616d09fab93ca2d17","src/api/minimal/mask.rs":"42e415f536c5193d0218f5a754b34b87fd7c971bff068009f958712166ff056d","src/api/minimal/ptr.rs":"a9ee482d1dd1c956fb8f3f179e6e620b1de4e9d713961461d4c6923a4ef2e67c","src/api/ops.rs":"3e273b277a0f3019d42c3c59ca94a5afd4885d5ae6d2182e5089bbeec9de42ee","src/api/ops/scalar_arithmetic.rs":"d2d5ad897a59dd0787544f927e0e7ca4072c3e58b0f4a2324083312b0d5a21d7","src/api/ops/scalar_bitwise.rs":"482204e459ca6be79568e1c9f70adbe2d2151412ddf122fb2161be8ebb51c40c","src/api/ops/scalar_mask_bitwise.rs":"c250f52042e37b22d57256c80d4604104cfd2fbe2a2e127c676267270ca5d350","src/api/ops/scalar_shifts.rs":"987f8fdebeedc16e3d77c1b732e7826ef70633c541d16dfa290845d5c6289150","src/api/ops/vector_arithmetic.rs":"ddca15d09ddeef502c2ed66117a62300ca65d87e959e8b622d767bdf1c307910","src/api/ops/vector_bitwise.rs":"b3968f7005b649edcc22a54e2379b14d5ee19045f2e784029805781ae043b5ee","src/api/ops/vector_float_min_max.rs":"f5155dce75219f4ba11275b1f295d2fdcddd49d174a6f1fb2ace7ea42813ce41","src/api/ops/vector_int_min_max.rs":"a378789c6ff9b32a51fbd0a97ffd36ed102cd1fe6a067d2b02017c1df342def6","src/api/ops/vector_mask_bitwise.rs":"5052d18517d765415d40327e6e8e55a312daaca0a5e2aec959bfa54b1675f9c8","src/api/ops/vector_neg.rs":"5c62f6b0221983cdbd23cd0a3af3672e6ba1255f0dfe8b19aae6fbd6503e231b","src/api/ops/vector_rotates.rs":"03cbe8a400fd7c688e4ee771a990a6754f2031b1a59b19ae81158b21471167e5","src/api/ops/vector_shifts.rs":"9bf69d0087268f61009e39aea52e03a90f378910206b6a28e8393178b6a5d0e0","src/api/ptr.rs":"8a793251bed6130dcfb2f1519ceaa18b751bbb15875928d0fb6deb5a5e07523a","src/api/ptr/gather_scatter.rs":"9ddd960365e050674b25b2fd3116e24d94669b4375d74e71c03e3f1469576066","src/api/reductions.rs":"ae5baca81352ecd44526d6c30c0a1feeda475ec73ddd3c3ec6b14e944e5448ee","src/api/reductions/bitwise.rs":"8bf910ae226188bd15fc7e125f058cd2566b6186fcd0cd8fd020f352c39ce139","src/api/reductions/float_arithmetic.rs":"e58c8c87806a95df2b2b5b48ac5991036df024096d9d7c171a480fe9282896a4","src/api/reductions/integer_arithmetic.rs":"47471da1c5f859489680bb5d34ced3d3aa20081c16053a3af121a4496fcb57bf","src/api/reductions/mask.rs":"db83327a950e33a317f37fd33ca4e20c347fb415975ec024f3e23da8509425af","src/api/reductions/min_max.rs":"f27be3aa28e1c1f46de7890198db6e12f00c207085e89ef2de7e57ee443cdb98","src/api/select.rs":"a98e2ccf9fc6bdeed32d337c8675bc96c2fbe2cc34fbf149ad6047fb8e749774","src/api/shuffle.rs":"da58200790868c09659819322a489929a5b6e56c596ed07e6a44293ea02e7d09","src/api/shuffle1_dyn.rs":"bfea5a91905b31444e9ef7ca6eddb7a9606b7e22d3f71bb842eb2795a0346620","src/api/slice.rs":"ee87484e8af329547b9a5d4f2a69e8bed6ea10bbd96270d706083843d4eea2ac","src/api/slice/from_slice.rs":"4d4fe8a329c885fcb4fbcbedf99efb15a95296fe6b3f595056cc37037450d5ac","src/api/slice/write_to_slice.rs":"f5b23b2c4b91cfb26b713a9013a6c0da7f45eaefb79ba06dcbc27f3f23bda679","src/api/swap_bytes.rs":"4a6792a2e49a77475e1b237592b4b2804dbddb79c474331acd0dd71b36934259","src/codegen.rs":"c6eebc3d3665420aa6a2f317977e3c41a4f43e0550ac630cdbe8e4bbed5e2031","src/codegen/bit_manip.rs":"5559e095105a80003e0de35af1d19b0c65c9ab04eb743c7e01c5442d882eb34e","src/codegen/llvm.rs":"d1299c189abb17a6133f047574cffc7a6db4c1be37cb7d4785491cb5e8f8cf54","src/codegen/math.rs":"35f96e37a78fcf0cdb02146b7f27a45108fe06a37fc2a54d8851ce131a326178","src/codegen/math/float.rs":"dd86c0449e576c83b719700962ac017c332987fac08d91f2b7a2b1b883598170","src/codegen/math/float/abs.rs":"f56e2b4b8055ea861c1f5cbc6b6e1d8e7e5af163b62c13574ddee4e09513bfbc","src/codegen/math/float/cos.rs":"ef3b511a24d23045b310315e80348a9b7fedb576fc2de52d74290616a0abeb2a","src/codegen/math/float/cos_pi.rs":"4e7631a5d73dac21531e09ef1802d1180f8997509c2c8fa9f67f322194263a97","src/codegen/math/float/exp.rs":"61b691598c41b5622f24e4320c1bdd08701e612a516438bdddcc728fc3405c8c","src/codegen/math/float/ln.rs":"46b718b1ba8c9d99e1ad40f53d20dfde08a3063ca7bd2a9fdd6698e060da687e","src/codegen/math/float/macros.rs":"dd42135fff13f9aca4fd3a1a4e14c7e6c31aadc6d817d63b0d2fb9e62e062744","src/codegen/math/float/mul_add.rs":"a37bf764345d4b1714f97e83897b7cf0855fc2811704bcbc0012db91825339e1","src/codegen/math/float/mul_adde.rs":"c75702bfcb361de45964a93caf959a695ef2376bd069227600b8c6872665c755","src/codegen/math/float/powf.rs":"642346e982bc4c39203de0864d2149c4179cd7b21cf67a2951687932b4675872","src/codegen/math/float/sin.rs":"9d68164c90cdca6a85155040cdac42e27342ebe0b925273ef1593df721af4258","src/codegen/math/float/sin_cos_pi.rs":"9be02ad48585a1e8d99129382fbffbaed47852f15459256a708850b6b7a75405","src/codegen/math/float/sin_pi.rs":"9890347905b4d4a3c7341c3eb06406e46e60582bcf6960688bd727e5dadc6c57","src/codegen/math/float/sqrt.rs":"e3c60dcfb0c6d2fc62adabcc931b2d4040b83cab294dea36443fb4b89eb79e34","src/codegen/math/float/sqrte.rs":"f0f4ef9eb475ae41bcc7ec6a95ad744ba6b36925faa8b2c2814004396d196b63","src/codegen/pointer_sized_int.rs":"a70697169c28218b56fd2e8d5353f2e00671d1150d0c8cef77d613bdfacd84cb","src/codegen/reductions.rs":"645e2514746d01387ddd07f0aa4ffd8430cc9ab428d4fb13773ea319fa25dd95","src/codegen/reductions/mask.rs":"8f1afe6aabf096a3278e1fc3a30f736e04aa8b9ce96373cee22162d18cfe2702","src/codegen/reductions/mask/aarch64.rs":"cba6e17603d39795dcfe8339b6b7d8714c3e162a1f0a635979f037aa24fe4206","src/codegen/reductions/mask/arm.rs":"9447904818aa2c7c25d0963eead452a639a11ca7dbd6d21eedbfcaade07a0f33","src/codegen/reductions/mask/fallback.rs":"7a0ef9f7fd03ae318b495b95e121350cd61caffc5cc6ee17fabf130d5d933453","src/codegen/reductions/mask/fallback_impl.rs":"76547f396e55ef403327c77c314cf8db8c7a5c9b9819bfb925abeacf130249e5","src/codegen/reductions/mask/x86.rs":"14bd2c482071f2355beebcf7b7ecf950ff2dfcdb08c3ca50993092434a9de717","src/codegen/reductions/mask/x86/avx.rs":"b4913d87844c522903641cbbf10db4551addb1ce5e9e78278e21612fa65c733b","src/codegen/reductions/mask/x86/avx2.rs":"677aed3f056285285daa3adff8bc65e739630b4424defa6d9665e160f027507e","src/codegen/reductions/mask/x86/sse.rs":"226610b4ff88c676d5187114dd57b4a8800de6ce40884675e9198445b1ed0306","src/codegen/reductions/mask/x86/sse2.rs":"bc38e6c31cb4b3d62147eba6cac264e519e2a48e0f7ce9010cfa9ef0cf0ec9fd","src/codegen/shuffle.rs":"0abca97e92cdce49a58a39cc447eb09dc7d7715ef256c8dbd2181a186e61bb64","src/codegen/shuffle1_dyn.rs":"04523e9338133bdedb012dd076c2c564b79ce5593b0fc56d0fb6910e04190a81","src/codegen/swap_bytes.rs":"1d6cdc716eadddc92b4fd506b2445a821caa8dc00860447de09d7ebd69c2087f","src/codegen/v128.rs":"94226b31ec403d18d9d2fe06713f147c9c79e9b5f9105089088266313f843185","src/codegen/v16.rs":"ddec4ffb66b6f7aaffb9a1780c5ddba82557abd74f45073d335047e04cf74924","src/codegen/v256.rs":"6b63917f0444118d6b1595bff2045e59b97c4d24012bd575f69f1f0efc5a0241","src/codegen/v32.rs":"3477b3c5540aed86e61e2f5807dd31db947413cec9181c587d93ed6ec74f0eba","src/codegen/v512.rs":"5854f99d3aabc4cd42b28a20d9ce447756dc2ba024a409a69b6a8ae1f1842fc5","src/codegen/v64.rs":"e9e89caebfe63d10c0cbca61e4dfdba3b7e02ee0989170f80beed23237ddd950","src/codegen/vPtr.rs":"96d609a9eece4dcbbcc01ba0b8744d7f5958be12774176a2945bc676f4e6b5cb","src/codegen/vSize.rs":"eeee9858749aa82142b27bc120d1989bb74a6b82e1e4efbbeaccc9634dc9acfc","src/lib.rs":"1b5d419ff05ee0370d671810423ccc254708cc8d415c1dbac2a7a36be4bf63a8","src/masks.rs":"870f429967b2d7d5133f4d28d6c753fc5cef0570b27b29d4e966a066d22d2d0e","src/sealed.rs":"ff7f0324276408ae8249941cfa32c90b8835a54d750896b683efea857af19db2","src/testing.rs":"1d3a7862ef625e235a5734ad7204e68d350f902c0695182b1f08a0552432416e","src/testing/macros.rs":"6378856d7a40ba5ec5c7c0dad6327d79f0c77266921c24296d10aed6c68e9b98","src/testing/utils.rs":"d6fd5a5017f1f85d9d99585754f8f6ad06fc3d683b34083543e67a7cc6c1772c","src/v128.rs":"18fe263c4aa28cd06461c7070b0269f69f4a2e75749b8f142a83dfdfe4d22bf5","src/v16.rs":"e5c663c9fb3547eaeac78a5f7db9969f4d8b5ec96112bf2954602fff11f0aebd","src/v256.rs":"68732cd688ad12a56d8b4f8ddf279f77bdfe1be2943c7dc0c1b4f1a76798aa0f","src/v32.rs":"785b22a1ccb4a41bb53dfeb0670f624c0ce42e6cdf62d1747e3283777a1c70bd","src/v512.rs":"d1337bfe07f06a8f37f8e8fa7d4315b9307476ee435ad80dd5269eaed564fbfa","src/v64.rs":"3077468d65125b8f085e9454c8b2463a4d5225697464ba6a1300f8799528fd4b","src/vPtr.rs":"c9a53f41f466e17b6648a4ce390fd8f4d3a848d440eb8a9a803a11608d76eb05","src/vSize.rs":"5c46d3e8c3ee5863d9b6e37e681f871386e0efc254d6d84ba711edb529ce7b3c","tests/endianness.rs":"541a144be017e3dd7da7c8ea49d907dc02538245e8c5f3deb5bd43da92c929e1"},"package":null} +\ No newline at end of file +diff --git a/third_party/rust/packed_simd/.travis.yml b/third_party/rust/packed_simd/.travis.yml +new file mode 100644 +index 000000000000..8d8ed54ab737 +--- /dev/null ++++ b/third_party/rust/packed_simd/.travis.yml +@@ -0,0 +1,308 @@ ++language: rust ++sudo: false ++rust: nightly ++ ++stages: ++ - tools ++ - linux-tier1 ++ - osx-tier1 ++ - osx-tier2 ++ - linux-tier2 ++ - android ++ ++matrix: ++ fast_finish: true ++ include: ++ # Android: ++ - env: TARGET=x86_64-linux-android NOVERIFY=1 ++ name: "x86_64-unknown-linux-android + SSE2" ++ stage: android ++ - env: TARGET=arm-linux-androideabi ++ name: "arm-linux-androideabi" ++ stage: android ++ - env: TARGET=arm-linux-androideabi RUSTFLAGS="-C target-feature=+v7,+neon" ++ name: "arm-linux-androideabi + NEON" ++ stage: android ++ - env: TARGET=aarch64-linux-android ++ name: "aarch64-unknown-linux-android" ++ stage: android ++ - env: TARGET=aarch64-linux-android RUSTFLAGS="-C target-feature=+neon" ++ name: "aarch64-unknown-linux-android + NEON" ++ stage: android ++ - env: TARGET="thumbv7neon-linux-androideabi" ++ name: "thumbv7neon-linux-androideabi" ++ stage: android ++ # Linux: ++ - env: TARGET=i586-unknown-linux-gnu ++ name: "i586-unknown-linux-gnu" ++ stage: linux-tier2 ++ - env: TARGET=i586-unknown-linux-gnu RUSTFLAGS="-C target-feature=+sse" ++ name: "i586-unknown-linux-gnu + SSE" ++ stage: linux-tier2 ++ - env: TARGET=i586-unknown-linux-gnu RUSTFLAGS="-C target-feature=+sse2" ++ name: "i586-unknown-linux-gnu + SSE2" ++ stage: linux-tier2 ++ - env: TARGET=i686-unknown-linux-gnu ++ name: "i686-unknown-linux-gnu + SSE2" ++ stage: linux-tier1 ++ - env: TARGET=i686-unknown-linux-gnu RUSTFLAGS="-C target-feature=+sse4.2" ++ name: "i686-unknown-linux-gnu + SSE4.2" ++ stage: linux-tier1 ++ - env: TARGET=i686-unknown-linux-gnu RUSTFLAGS="-C target-feature=+avx2" ++ name: "i686-unknown-linux-gnu + AVX2" ++ stage: linux-tier1 ++ - env: TARGET=x86_64-unknown-linux-gnu ++ name: "x86_64-unknown-linux-gnu + SSE2" ++ install: rustup component add rustfmt-preview ++ stage: linux-tier1 ++ - env: TARGET=x86_64-unknown-linux-gnu RUSTFLAGS="-C target-feature=+sse4.2" ++ name: "x86_64-unknown-linux-gnu + SSE4.2" ++ install: rustup component add rustfmt-preview ++ stage: linux-tier1 ++ - env: TARGET=x86_64-unknown-linux-gnu RUSTFLAGS="-C target-feature=+avx" ++ name: "x86_64-unknown-linux-gnu + AVX" ++ install: rustup component add rustfmt-preview ++ stage: linux-tier1 ++ - env: TARGET=x86_64-unknown-linux-gnu RUSTFLAGS="-C target-feature=+avx2" ++ name: "x86_64-unknown-linux-gnu + AVX2" ++ install: rustup component add rustfmt-preview ++ stage: linux-tier1 ++ - env: TARGET=x86_64-unknown-linux-gnu-emulated ++ name: "Intel SDE + SSE2" ++ install: true ++ stage: linux-tier1 ++ - env: TARGET=x86_64-unknown-linux-gnu-emulated RUSTFLAGS="-C target-feature=+sse4.2" ++ name: "Intel SDE + SSE4.2" ++ install: true ++ stage: linux-tier1 ++ - env: TARGET=x86_64-unknown-linux-gnu-emulated RUSTFLAGS="-C target-feature=+avx" ++ name: "Intel SDE + AVX" ++ install: true ++ stage: linux-tier1 ++ - env: TARGET=x86_64-unknown-linux-gnu-emulated RUSTFLAGS="-C target-feature=+avx2" ++ name: "Intel SDE + AVX2" ++ install: true ++ stage: linux-tier1 ++ - env: TARGET=x86_64-unknown-linux-gnu-emulated RUSTFLAGS="-C target-feature=+avx-512f" ++ name: "Intel SDE + AVX-512" ++ install: true ++ stage: linux-tier1 ++ - env: TARGET=arm-unknown-linux-gnueabi ++ name: "arm-unknown-linux-gnueabi" ++ stage: linux-tier2 ++ - env: TARGET=arm-unknown-linux-gnueabi RUSTFLAGS="-C target-feature=+v7,+neon" ++ name: "arm-unknown-linux-gnueabi + NEON" ++ stage: linux-tier2 ++ - env: TARGET=arm-unknown-linux-gnueabihf ++ name: "arm-unknown-linux-gnueabihf" ++ stage: linux-tier2 ++ - env: TARGET=arm-unknown-linux-gnueabihf RUSTFLAGS="-C target-feature=+v7,+neon" ++ name: "arm-unknown-linux-gnueabihf + NEON" ++ stage: linux-tier2 ++ - env: TARGET=armv7-unknown-linux-gnueabihf ++ name: "armv7-unknown-linux-gnueabihf" ++ stage: linux-tier2 ++ - env: TARGET=armv7-unknown-linux-gnueabihf RUSTFLAGS="-C target-feature=+neon" ++ name: "armv7-unknown-linux-gnueabihf + NEON" ++ stage: linux-tier2 ++ - env: TARGET="thumbv7neon-unknown-linux-gnueabihf" ++ name: "thumbv7neon-unknown-linux-gnueabihf" ++ stage: linux-tier2 ++ - env: TARGET=aarch64-unknown-linux-gnu ++ name: "aarch64-unknown-linux-gnu" ++ stage: linux-tier2 ++ - env: TARGET=aarch64-unknown-linux-gnu RUSTFLAGS="-C target-feature=+neon" ++ name: "aarch64-unknown-linux-gnu + NEON" ++ stage: linux-tier2 ++ - env: TARGET=mips-unknown-linux-gnu ++ name: "mips-unknown-linux-gnu" ++ stage: linux-tier2 ++ - env: TARGET=mipsel-unknown-linux-musl ++ name: "mipsel-unknown-linux-musl" ++ stage: linux-tier2 ++ - env: TARGET=mips64-unknown-linux-gnuabi64 ++ name: "mips64-unknown-linux-gnuabi64" ++ stage: linux-tier2 ++ - env: TARGET=mips64el-unknown-linux-gnuabi64 ++ name: "mips64el-unknown-linux-gnuabi64" ++ stage: linux-tier2 ++ # FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/18 ++ # env: TARGET=mips64el-unknown-linux-gnuabi64 RUSTFLAGS="-C target-feature=+msa -C target-cpu=mips64r6" ++ - env: TARGET=powerpc-unknown-linux-gnu ++ name: "powerpc-unknown-linux-gnu" ++ stage: linux-tier2 ++ - env: TARGET=powerpc64-unknown-linux-gnu ++ name: "powerpc64-unknown-linux-gnu" ++ stage: linux-tier2 ++ - env: TARGET=powerpc64le-unknown-linux-gnu ++ name: "powerpc64le-unknown-linux-gnu" ++ stage: linux-tier2 ++ - env: TARGET=powerpc64le-unknown-linux-gnu RUSTFLAGS="-C target-feature=+altivec" ++ name: "powerpc64le-unknown-linux-gnu + ALTIVEC" ++ stage: linux-tier2 ++ - env: TARGET=powerpc64le-unknown-linux-gnu RUSTFLAGS="-C target-feature=+vsx" ++ name: "powerpc64le-unknown-linux-gnu + VSX" ++ stage: linux-tier2 ++ - env: TARGET=s390x-unknown-linux-gnu ++ name: "s390x-unknown-linux-gnu" ++ stage: linux-tier2 ++ - env: TARGET=sparc64-unknown-linux-gnu ++ name: "sparc64-unknown-linux-gnu" ++ stage: linux-tier2 ++ # WebAssembly: ++ - env: TARGET=wasm32-unknown-unknown ++ name: "wasm32-unknown-unknown" ++ stage: osx-tier1 # For now ++ # MacOSX: ++ - os: osx ++ env: TARGET=i686-apple-darwin ++ name: "i686-apple-darwin + SSE2" ++ script: ci/run.sh ++ osx_image: xcode10 ++ stage: osx-tier1 ++ - os: osx ++ env: TARGET=i686-apple-darwin RUSTFLAGS="-C target-feature=+sse4.2" ++ name: "i686-apple-darwin + SSE4.2" ++ script: ci/run.sh ++ osx_image: xcode10 ++ stage: osx-tier1 ++ # Travis-CI OSX build bots do not support AVX2: ++ - os: osx ++ env: TARGET=i686-apple-darwin RUSTFLAGS="-C target-feature=+avx" ++ name: "i686-apple-darwin + AVX" ++ script: ci/run.sh ++ osx_image: xcode10 ++ stage: osx-tier1 ++ - os: osx ++ env: TARGET=x86_64-apple-darwin ++ name: "x86_64-apple-darwin + SSE2" ++ install: true ++ script: ci/run.sh ++ osx_image: xcode10 ++ stage: osx-tier1 ++ - os: osx ++ env: TARGET=x86_64-apple-darwin RUSTFLAGS="-C target-feature=+sse4.2" ++ name: "x86_64-apple-darwin + SSE4.2" ++ install: true ++ script: ci/run.sh ++ osx_image: xcode10 ++ stage: osx-tier1 ++ # Travis-CI OSX build bots do not support AVX2: ++ - os: osx ++ env: TARGET=x86_64-apple-darwin RUSTFLAGS="-C target-feature=+avx" ++ name: "x86_64-apple-darwin + AVX" ++ install: true ++ script: ci/run.sh ++ osx_image: xcode10 ++ stage: osx-tier1 ++ # *BSDs: ++ #- env: TARGET=i686-unknown-freebsd NORUN=1 ++ # script: ci/run.sh ++ #- env: TARGET=x86_64-unknown-freebsd NORUN=1 ++ # script: ci/run.sh ++ #- env: TARGET=x86_64-unknown-netbsd NORUN=1 ++ # script: ci/run.sh ++ # Solaris: ++ #- env: TARGET=x86_64-sun-solaris NORUN=1 ++ # script: ci/run.sh ++ # iOS: ++ - os: osx ++ env: TARGET=i386-apple-ios ++ name: "i386-apple-ios" ++ script: ci/run.sh ++ osx_image: xcode9.4 ++ stage: osx-tier2 ++ - os: osx ++ env: TARGET=x86_64-apple-ios ++ name: "x86_64-apple-ios + SSE2" ++ script: ci/run.sh ++ osx_image: xcode9.4 ++ stage: osx-tier2 ++ - os: osx ++ env: TARGET=armv7-apple-ios NORUN=1 ++ name: "armv7-apple-ios [Build only]" ++ script: ci/run.sh ++ osx_image: xcode9.4 ++ stage: osx-tier2 ++ - os: osx ++ env: TARGET=aarch64-apple-ios NORUN=1 ++ name: "aarch64-apple-ios [Build only]" ++ script: ci/run.sh ++ osx_image: xcode9.4 ++ stage: osx-tier2 ++ # BENCHMARKS: ++ - name: "Benchmarks - x86_64-unknown-linux-gnu" ++ install: TARGET=x86_64-unknown-linux-gnu ./ci/setup_benchmarks.sh ++ script: PATH=$(pwd):$PATH NORUN=1 VERIFY=1 FEATURES=core_arch,ispc,sleef-sys ci/benchmark.sh ++ stage: tools ++ - name: "Benchmarks - x86_64-apple-darwin" ++ install: TARGET=x86_64-apple-darwin ./ci/setup_benchmarks.sh ++ script: PATH=$(pwd):$PATH NORUN=1 VERIFY=1 FEATURES=core_arch,ispc,sleef-sys ci/benchmark.sh ++ os: osx ++ osx_image: xcode9.4 ++ stage: tools ++ # TOOLS: ++ - name: "Documentation" ++ install: cargo install mdbook ++ script: ci/dox.sh ++ stage: tools ++ - name: "rustfmt" ++ install: true ++ before_script: rustup component add rustfmt-preview ++ script: ci/all.sh check_fmt || true ++ stage: tools ++ - name: "clippy" ++ install: true ++ before_script: rustup component add clippy-preview ++ script: ci/all.sh clippy ++ stage: tools ++ ++ allow_failures: ++ # FIXME: ISPC cannot be found? ++ - name: "Benchmarks - x86_64-apple-darwin" ++ # FIXME: TBD ++ - env: TARGET=powerpc-unknown-linux-gnu ++ - env: TARGET=powerpc64-unknown-linux-gnu ++ - env: TARGET=powerpc64le-unknown-linux-gnu ++ - env: TARGET=powerpc64le-unknown-linux-gnu RUSTFLAGS="-C target-feature=+altivec" ++ - env: TARGET=powerpc64le-unknown-linux-gnu RUSTFLAGS="-C target-feature=+vsx" ++ #- env: TARGET=i686-unknown-freebsd NORUN=1 ++ #- env: TARGET=x86_64-unknown-freebsd NORUN=1 ++ #- env: TARGET=x86_64-unknown-netbsd NORUN=1 ++ #- env: TARGET=x86_64-sun-solaris NORUN=1 ++ ++ # FIXME: TBD ++ - env: TARGET=arm-linux-androideabi ++ - env: TARGET=arm-linux-androideabi RUSTFLAGS="-C target-feature=+v7,+neon" ++ - env: TARGET=aarch64-linux-android ++ - env: TARGET=aarch64-linux-android RUSTFLAGS="-C target-feature=+neon" ++ ++ # FIXME: iOS ++ # https://github.com/rust-lang-nursery/packed_simd/issues/26 ++ - env: TARGET=i386-apple-ios ++ - env: TARGET=x86_64-apple-ios ++ ++ # FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/182 ++ - env: TARGET=arm-unknown-linux-gnueabi RUSTFLAGS="-C target-feature=+v7,+neon" ++ - env: TARGET=arm-unknown-linux-gnueabihf RUSTFLAGS="-C target-feature=+v7,+neon" ++ - env: TARGET=armv7-unknown-linux-gnueabihf RUSTFLAGS="-C target-feature=+neon" ++ ++ # FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/183 ++ - env: TARGET=wasm32-unknown-unknown ++ ++install: travis_retry rustup target add $TARGET ++before_script: cargo generate-lockfile ++script: travis_wait 50 ci/run-docker.sh ++after_script: sleep 5 ++ ++env: ++ global: ++ secure: "lPHv7s6+AxQYNaFncycVFQt++Y1asQmMhOikQU1ztlP8CK7+hn2m98cg/euOJyzIOb2iJ3ZX4cGZkzw4lc59MQBByb1GtDbazQoUOzVDbVfe9BDD2f8JVoIFh1CMfjPKQ7Gg/rJqWlwrUlSd5GNxPCutKjY7qZhJuR6SQbJjlWaGN2Vd4fVCzKXz8fHRXgMEZS+d+CR4Nsrkb83J3Z4s5kSdJmhYxJ61AWjuzJVwUh4l3/HEYlSL5XXpuh5R2i7W16h1PlNdaTUgkZli1lHzO8+6Q8LzX9+XiLIEVX9lw3A2NdIKGz8E/+7Qs5oYOkwYhjROsDQxIK7xkSM30bQuN7cwMBybAVIyOPJkqXQ1dQyp83KSdsOj7JMyDDRvcEDLI6ehRlm5EcdH7YrReuboN81iUo0Sa7VsuUmgj5hjERCt9r30f9aWuitABai7vKRtjglg7Sp5CrEVPA4PQs6PqKCCRogoggbXJ/Z5Dyw/RZaXPeNR9+qIKN1Vjm9Gew1sRN2JK/3+vXTKtyJXH/uBxgJt4jQlbuShOJuF+BSfTF88sMe67a/357SSOIb4JkaCyd0flDCWYE8576kaHPlVVMT2peXee0LeRXm1e13nG3Na0t3LS/orJLPHOShNQGoDj7qAP5aEKggRya896JGwtvlaBHHTmSQh65G7cyNErZo=" ++branches: ++ only: ++ - staging # bors r+ ++ - trying # bors try ++ - master ++notifications: ++ email: ++ on_success: never +diff --git a/third_party/rust/packed_simd/Cargo.toml b/third_party/rust/packed_simd/Cargo.toml +new file mode 100644 +index 000000000000..3db9354c9407 +--- /dev/null ++++ b/third_party/rust/packed_simd/Cargo.toml +@@ -0,0 +1,42 @@ ++[package] ++name = "packed_simd" ++version = "0.3.3" ++authors = ["Gonzalo Brito Gadeschi "] ++description = "Portable Packed SIMD vectors" ++documentation = "https://docs.rs/crate/packed_simd/" ++homepage = "https://github.com/rust-lang-nursery/packed_simd" ++repository = "https://github.com/rust-lang-nursery/packed_simd" ++keywords = ["simd", "vector", "portability"] ++categories = ["hardware-support", "concurrency", "no-std", "data-structures"] ++license = "MIT/Apache-2.0" ++build = "build.rs" ++edition = "2018" ++ ++[badges] ++appveyor = { repository = "rust-lang-nursery/packed_simd" } ++travis-ci = { repository = "rust-lang-nursery/packed_simd" } ++codecov = { repository = "rust-lang-nursery/packed_simd" } ++is-it-maintained-issue-resolution = { repository = "rust-lang-nursery/packed_simd" } ++is-it-maintained-open-issues = { repository = "rust-lang-nursery/packed_simd" } ++maintenance = { status = "experimental" } ++ ++[dependencies] ++cfg-if = "^0.1.6" ++core_arch = { version = "^0.1.3", optional = true } ++ ++[features] ++default = [] ++into_bits = [] ++libcore_neon = [] ++ ++[dev-dependencies] ++paste = "^0.1.3" ++arrayvec = { version = "^0.4", default-features = false } ++ ++[target.'cfg(target_arch = "x86_64")'.dependencies.sleef-sys] ++version = "^0.1.2" ++optional = true ++ ++[target.wasm32-unknown-unknown.dev-dependencies] ++wasm-bindgen = "=0.2.19" ++wasm-bindgen-test = "=0.2.19" +\ No newline at end of file +diff --git a/third_party/rust/simd/LICENSE-APACHE b/third_party/rust/packed_simd/LICENSE-APACHE +similarity index 100% +rename from third_party/rust/simd/LICENSE-APACHE +rename to third_party/rust/packed_simd/LICENSE-APACHE +diff --git a/third_party/rust/simd/LICENSE-MIT b/third_party/rust/packed_simd/LICENSE-MIT +similarity index 93% +rename from third_party/rust/simd/LICENSE-MIT +rename to third_party/rust/packed_simd/LICENSE-MIT +index bf6c304f7774..39d4bdb5acd3 100644 +--- a/third_party/rust/simd/LICENSE-MIT ++++ b/third_party/rust/packed_simd/LICENSE-MIT +@@ -1,25 +1,25 @@ +-Copyright (c) 2014 Huon Wilson ++Copyright (c) 2014 The Rust Project Developers + + Permission is hereby granted, free of charge, to any + person obtaining a copy of this software and associated + documentation files (the "Software"), to deal in the + Software without restriction, including without + limitation the rights to use, copy, modify, merge, + publish, distribute, sublicense, and/or sell copies of + the Software, and to permit persons to whom the Software + is furnished to do so, subject to the following + conditions: + + The above copyright notice and this permission notice + shall be included in all copies or substantial portions + of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF + ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED + TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT + SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR + IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +-DEALINGS IN THE SOFTWARE. +\ No newline at end of file ++DEALINGS IN THE SOFTWARE. +diff --git a/third_party/rust/packed_simd/bors.toml b/third_party/rust/packed_simd/bors.toml +new file mode 100644 +index 000000000000..6d302dc85cf6 +--- /dev/null ++++ b/third_party/rust/packed_simd/bors.toml +@@ -0,0 +1,3 @@ ++status = [ ++ "continuous-integration/travis-ci/push" ++] +\ No newline at end of file +diff --git a/third_party/rust/packed_simd/build.rs b/third_party/rust/packed_simd/build.rs +new file mode 100644 +index 000000000000..85639ff9d085 +--- /dev/null ++++ b/third_party/rust/packed_simd/build.rs +@@ -0,0 +1,8 @@ ++fn main() { ++ println!("cargo:rustc-env=RUSTC_BOOTSTRAP=1"); ++ let target = std::env::var("TARGET") ++ .expect("TARGET environment variable not defined"); ++ if target.contains("neon") { ++ println!("cargo:rustc-cfg=libcore_neon"); ++ } ++} +diff --git a/third_party/rust/packed_simd/ci/all.sh b/third_party/rust/packed_simd/ci/all.sh +new file mode 100644 +index 000000000000..273562d4a9bb +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/all.sh +@@ -0,0 +1,71 @@ ++#!/usr/bin/env bash ++# ++# Performs an operation on all targets ++ ++set -ex ++ ++: "${1?The all.sh script requires one argument.}" ++ ++op=$1 ++ ++cargo_clean() { ++ cargo clean ++} ++ ++cargo_check_fmt() { ++ cargo fmt --all -- --check ++} ++ ++cargo_fmt() { ++ cargo fmt --all ++} ++ ++cargo_clippy() { ++ cargo clippy --all -- -D clippy::pedantic ++} ++ ++CMD="-1" ++ ++case $op in ++ clean*) ++ CMD=cargo_clean ++ ;; ++ check_fmt*) ++ CMD=cargo_check_fmt ++ ;; ++ fmt*) ++ CMD=cargo_fmt ++ ;; ++ clippy) ++ CMD=cargo_clippy ++ ;; ++ *) ++ echo "Unknown operation: \"${op}\"" ++ exit 1 ++ ;; ++esac ++ ++echo "Operation is: ${CMD}" ++ ++# On src/ ++$CMD ++ ++# Check examples/ ++for dir in examples/*/ ++do ++ dir=${dir%*/} ++ ( ++ cd "${dir%*/}" ++ $CMD ++ ) ++done ++ ++( ++ cd verify/verify ++ $CMD ++) ++ ++( ++ cd micro_benchmarks ++ $CMD ++) +diff --git a/third_party/rust/packed_simd/ci/android-install-ndk.sh b/third_party/rust/packed_simd/ci/android-install-ndk.sh +new file mode 100644 +index 000000000000..818e78446ae8 +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/android-install-ndk.sh +@@ -0,0 +1,37 @@ ++#!/usr/bin/env sh ++# Copyright 2016 The Rust Project Developers. See the COPYRIGHT ++# file at the top-level directory of this distribution and at ++# http://rust-lang.org/COPYRIGHT. ++# ++# Licensed under the Apache License, Version 2.0 or the MIT license ++# , at your ++# option. This file may not be copied, modified, or distributed ++# except according to those terms. ++ ++set -ex ++ ++curl --retry 5 -O https://dl.google.com/android/repository/android-ndk-r15b-linux-x86_64.zip ++unzip -q android-ndk-r15b-linux-x86_64.zip ++ ++case "$1" in ++ aarch64) ++ arch=arm64 ++ ;; ++ ++ i686) ++ arch=x86 ++ ;; ++ ++ *) ++ arch=$1 ++ ;; ++esac; ++ ++android-ndk-r15b/build/tools/make_standalone_toolchain.py \ ++ --unified-headers \ ++ --install-dir "/android/ndk-${1}" \ ++ --arch "${arch}" \ ++ --api 24 ++ ++rm -rf ./android-ndk-r15b-linux-x86_64.zip ./android-ndk-r15b +diff --git a/third_party/rust/packed_simd/ci/android-install-sdk.sh b/third_party/rust/packed_simd/ci/android-install-sdk.sh +new file mode 100644 +index 000000000000..6b5ac09ab04a +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/android-install-sdk.sh +@@ -0,0 +1,60 @@ ++#!/usr/bin/env sh ++# Copyright 2016 The Rust Project Developers. See the COPYRIGHT ++# file at the top-level directory of this distribution and at ++# http://rust-lang.org/COPYRIGHT. ++# ++# Licensed under the Apache License, Version 2.0 or the MIT license ++# , at your ++# option. This file may not be copied, modified, or distributed ++# except according to those terms. ++ ++set -ex ++ ++# Prep the SDK and emulator ++# ++# Note that the update process requires that we accept a bunch of licenses, and ++# we can't just pipe `yes` into it for some reason, so we take the same strategy ++# located in https://github.com/appunite/docker by just wrapping it in a script ++# which apparently magically accepts the licenses. ++ ++mkdir sdk ++curl --retry 5 https://dl.google.com/android/repository/sdk-tools-linux-3859397.zip -O ++unzip -d sdk sdk-tools-linux-3859397.zip ++ ++case "$1" in ++ arm | armv7) ++ abi=armeabi-v7a ++ ;; ++ ++ aarch64) ++ abi=arm64-v8a ++ ;; ++ ++ i686) ++ abi=x86 ++ ;; ++ ++ x86_64) ++ abi=x86_64 ++ ;; ++ ++ *) ++ echo "invalid arch: $1" ++ exit 1 ++ ;; ++esac; ++ ++# --no_https avoids ++ # javax.net.ssl.SSLHandshakeException: sun.security.validator.ValidatorException: No trusted certificate found ++yes | ./sdk/tools/bin/sdkmanager --licenses --no_https ++yes | ./sdk/tools/bin/sdkmanager --no_https \ ++ "emulator" \ ++ "platform-tools" \ ++ "platforms;android-24" \ ++ "system-images;android-24;default;$abi" ++ ++echo "no" | ++ ./sdk/tools/bin/avdmanager create avd \ ++ --name "${1}" \ ++ --package "system-images;android-24;default;$abi" +diff --git a/third_party/rust/packed_simd/ci/android-sysimage.sh b/third_party/rust/packed_simd/ci/android-sysimage.sh +new file mode 100644 +index 000000000000..9eabd7c8d94f +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/android-sysimage.sh +@@ -0,0 +1,56 @@ ++#!/usr/bin/env bash ++ ++# Copyright 2017 The Rust Project Developers. See the COPYRIGHT ++# file at the top-level directory of this distribution and at ++# http://rust-lang.org/COPYRIGHT. ++# ++# Licensed under the Apache License, Version 2.0 or the MIT license ++# , at your ++# option. This file may not be copied, modified, or distributed ++# except according to those terms. ++ ++set -ex ++ ++URL=https://dl.google.com/android/repository/sys-img/android ++ ++main() { ++ local arch="${1}" ++ local name="${2}" ++ local dest=/system ++ local td ++ td="$(mktemp -d)" ++ ++ apt-get install --no-install-recommends e2tools ++ ++ pushd "${td}" ++ curl --retry 5 -O "${URL}/${name}" ++ unzip -q "${name}" ++ ++ local system ++ system="$(find . -name system.img)" ++ mkdir -p ${dest}/{bin,lib,lib64} ++ ++ # Extract android linker and libraries to /system ++ # This allows android executables to be run directly (or with qemu) ++ if [ "${arch}" = "x86_64" ] || [ "${arch}" = "arm64" ]; then ++ e2cp -p "${system}:/bin/linker64" "${dest}/bin/" ++ e2cp -p "${system}:/lib64/libdl.so" "${dest}/lib64/" ++ e2cp -p "${system}:/lib64/libc.so" "${dest}/lib64/" ++ e2cp -p "${system}:/lib64/libm.so" "${dest}/lib64/" ++ else ++ e2cp -p "${system}:/bin/linker" "${dest}/bin/" ++ e2cp -p "${system}:/lib/libdl.so" "${dest}/lib/" ++ e2cp -p "${system}:/lib/libc.so" "${dest}/lib/" ++ e2cp -p "${system}:/lib/libm.so" "${dest}/lib/" ++ fi ++ ++ # clean up ++ apt-get purge --auto-remove -y e2tools ++ ++ popd ++ ++ rm -rf "${td}" ++} ++ ++main "${@}" +diff --git a/third_party/rust/packed_simd/ci/benchmark.sh b/third_party/rust/packed_simd/ci/benchmark.sh +new file mode 100644 +index 000000000000..3635b9e371d1 +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/benchmark.sh +@@ -0,0 +1,32 @@ ++#!/usr/bin/env bash ++# ++# Runs all benchmarks. Controlled by the following environment variables: ++# ++# FEATURES={} - cargo features to pass to all benchmarks (e.g. core_arch,sleef-sys,ispc) ++# NORUN={1} - only builds the benchmarks ++ ++set -ex ++ ++if [[ ${NORUN} != 1 ]]; then ++ # Most benchmarks require hyperfine; require it upfront. ++ hash hyperfine 2>/dev/null || { echo >&2 "hyperfine is not in PATH."; exit 1; } ++fi ++ ++ ++# If the ispc benchmark feature is enabled, ispc must be in the path of the ++# benchmarks. ++if echo "$FEATURES" | grep -q "ispc"; then ++ hash ispc 2>/dev/null || { echo >&2 "ispc is not in PATH."; exit 1; } ++fi ++ ++# An example with a benchmark.sh is a benchmark: ++for dir in examples/*/ ++do ++ dir=${dir%*/} ++ cd ${dir%*/} ++ if [ -f "benchmark.sh" ]; then ++ ./benchmark.sh ++ fi ++ cd - ++done ++ +diff --git a/third_party/rust/packed_simd/ci/deploy_and_run_on_ios_simulator.rs b/third_party/rust/packed_simd/ci/deploy_and_run_on_ios_simulator.rs +new file mode 100644 +index 000000000000..c0fe52c35659 +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/deploy_and_run_on_ios_simulator.rs +@@ -0,0 +1,176 @@ ++// Copyright 2017 The Rust Project Developers. See the COPYRIGHT ++// file at the top-level directory of this distribution and at ++// http://rust-lang.org/COPYRIGHT. ++// ++// Licensed under the Apache License, Version 2.0 or the MIT license ++// , at your ++// option. This file may not be copied, modified, or distributed ++// except according to those terms. ++ ++// This is a script to deploy and execute a binary on an iOS simulator. ++// The primary use of this is to be able to run unit tests on the simulator and ++// retrieve the results. ++// ++// To do this through Cargo instead, use Dinghy ++// (https://github.com/snipsco/dinghy): cargo dinghy install, then cargo dinghy ++// test. ++ ++use std::env; ++use std::fs::{self, File}; ++use std::io::Write; ++use std::path::Path; ++use std::process; ++use std::process::Command; ++ ++macro_rules! t { ++ ($e:expr) => (match $e { ++ Ok(e) => e, ++ Err(e) => panic!("{} failed with: {}", stringify!($e), e), ++ }) ++} ++ ++// Step one: Wrap as an app ++fn package_as_simulator_app(crate_name: &str, test_binary_path: &Path) { ++ println!("Packaging simulator app"); ++ drop(fs::remove_dir_all("ios_simulator_app")); ++ t!(fs::create_dir("ios_simulator_app")); ++ t!(fs::copy(test_binary_path, ++ Path::new("ios_simulator_app").join(crate_name))); ++ ++ let mut f = t!(File::create("ios_simulator_app/Info.plist")); ++ t!(f.write_all(format!(r#" ++ ++ ++ ++ ++ CFBundleExecutable ++ {} ++ CFBundleIdentifier ++ com.rust.unittests ++ ++ ++ "#, crate_name).as_bytes())); ++} ++ ++// Step two: Start the iOS simulator ++fn start_simulator() { ++ println!("Looking for iOS simulator"); ++ let output = t!(Command::new("xcrun").arg("simctl").arg("list").output()); ++ assert!(output.status.success()); ++ let mut simulator_exists = false; ++ let mut simulator_booted = false; ++ let mut found_rust_sim = false; ++ let stdout = t!(String::from_utf8(output.stdout)); ++ for line in stdout.lines() { ++ if line.contains("rust_ios") { ++ if found_rust_sim { ++ panic!("Duplicate rust_ios simulators found. Please \ ++ double-check xcrun simctl list."); ++ } ++ simulator_exists = true; ++ simulator_booted = line.contains("(Booted)"); ++ found_rust_sim = true; ++ } ++ } ++ ++ if simulator_exists == false { ++ println!("Creating iOS simulator"); ++ Command::new("xcrun") ++ .arg("simctl") ++ .arg("create") ++ .arg("rust_ios") ++ .arg("com.apple.CoreSimulator.SimDeviceType.iPhone-SE") ++ .arg("com.apple.CoreSimulator.SimRuntime.iOS-10-2") ++ .check_status(); ++ } else if simulator_booted == true { ++ println!("Shutting down already-booted simulator"); ++ Command::new("xcrun") ++ .arg("simctl") ++ .arg("shutdown") ++ .arg("rust_ios") ++ .check_status(); ++ } ++ ++ println!("Starting iOS simulator"); ++ // We can't uninstall the app (if present) as that will hang if the ++ // simulator isn't completely booted; just erase the simulator instead. ++ Command::new("xcrun").arg("simctl").arg("erase").arg("rust_ios").check_status(); ++ Command::new("xcrun").arg("simctl").arg("boot").arg("rust_ios").check_status(); ++} ++ ++// Step three: Install the app ++fn install_app_to_simulator() { ++ println!("Installing app to simulator"); ++ Command::new("xcrun") ++ .arg("simctl") ++ .arg("install") ++ .arg("booted") ++ .arg("ios_simulator_app/") ++ .check_status(); ++} ++ ++// Step four: Run the app ++fn run_app_on_simulator() { ++ println!("Running app"); ++ let output = t!(Command::new("xcrun") ++ .arg("simctl") ++ .arg("launch") ++ .arg("--console") ++ .arg("booted") ++ .arg("com.rust.unittests") ++ .output()); ++ ++ println!("stdout --\n{}\n", String::from_utf8_lossy(&output.stdout)); ++ println!("stderr --\n{}\n", String::from_utf8_lossy(&output.stderr)); ++ ++ let stdout = String::from_utf8_lossy(&output.stdout); ++ let failed = stdout.lines() ++ .find(|l| l.contains("FAILED")) ++ .map(|l| l.contains("FAILED")) ++ .unwrap_or(false); ++ ++ let passed = stdout.lines() ++ .find(|l| l.contains("test result: ok")) ++ .map(|l| l.contains("test result: ok")) ++ .unwrap_or(false); ++ ++ println!("Shutting down simulator"); ++ Command::new("xcrun") ++ .arg("simctl") ++ .arg("shutdown") ++ .arg("rust_ios") ++ .check_status(); ++ if !(passed && !failed) { ++ panic!("tests didn't pass"); ++ } ++} ++ ++trait CheckStatus { ++ fn check_status(&mut self); ++} ++ ++impl CheckStatus for Command { ++ fn check_status(&mut self) { ++ println!("\trunning: {:?}", self); ++ assert!(t!(self.status()).success()); ++ } ++} ++ ++fn main() { ++ let args: Vec = env::args().collect(); ++ if args.len() != 2 { ++ println!("Usage: {} ", args[0]); ++ process::exit(-1); ++ } ++ ++ let test_binary_path = Path::new(&args[1]); ++ let crate_name = test_binary_path.file_name().unwrap(); ++ ++ package_as_simulator_app(crate_name.to_str().unwrap(), test_binary_path); ++ start_simulator(); ++ install_app_to_simulator(); ++ run_app_on_simulator(); ++} +diff --git a/third_party/rust/packed_simd/ci/docker/aarch64-linux-android/Dockerfile b/third_party/rust/packed_simd/ci/docker/aarch64-linux-android/Dockerfile +new file mode 100644 +index 000000000000..27bde89c5a8d +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/aarch64-linux-android/Dockerfile +@@ -0,0 +1,47 @@ ++FROM ubuntu:16.04 ++ ++RUN dpkg --add-architecture i386 && \ ++ apt-get update && \ ++ apt-get install -y --no-install-recommends \ ++ file \ ++ make \ ++ curl \ ++ ca-certificates \ ++ python \ ++ unzip \ ++ expect \ ++ openjdk-9-jre \ ++ libstdc++6:i386 \ ++ libpulse0 \ ++ gcc \ ++ libc6-dev ++ ++WORKDIR /android/ ++COPY android* /android/ ++ ++ENV ANDROID_ARCH=aarch64 ++ENV PATH=$PATH:/android/ndk-$ANDROID_ARCH/bin:/android/sdk/tools:/android/sdk/platform-tools ++ ++RUN sh /android/android-install-ndk.sh $ANDROID_ARCH ++RUN sh /android/android-install-sdk.sh $ANDROID_ARCH ++RUN mv /root/.android /tmp ++RUN chmod 777 -R /tmp/.android ++RUN chmod 755 /android/sdk/tools/* /android/sdk/emulator/qemu/linux-x86_64/* ++ ++ENV PATH=$PATH:/rust/bin \ ++ CARGO_TARGET_AARCH64_LINUX_ANDROID_LINKER=aarch64-linux-android-gcc \ ++ CARGO_TARGET_AARCH64_LINUX_ANDROID_RUNNER=/tmp/runtest \ ++ OBJDUMP=aarch64-linux-android-objdump \ ++ HOME=/tmp ++ ++ADD runtest-android.rs /tmp/runtest.rs ++ENTRYPOINT [ \ ++ "bash", \ ++ "-c", \ ++ # set SHELL so android can detect a 64bits system, see ++ # http://stackoverflow.com/a/41789144 ++ "SHELL=/bin/dash /android/sdk/emulator/emulator @aarch64 -no-window & \ ++ rustc /tmp/runtest.rs -o /tmp/runtest && \ ++ exec \"$@\"", \ ++ "--" \ ++] +diff --git a/third_party/rust/packed_simd/ci/docker/aarch64-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/aarch64-unknown-linux-gnu/Dockerfile +new file mode 100644 +index 000000000000..68261a2f033d +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/aarch64-unknown-linux-gnu/Dockerfile +@@ -0,0 +1,14 @@ ++FROM ubuntu:17.10 ++RUN apt-get update && apt-get install -y --no-install-recommends \ ++ gcc \ ++ ca-certificates \ ++ libc6-dev \ ++ gcc-aarch64-linux-gnu \ ++ libc6-dev-arm64-cross \ ++ qemu-user \ ++ make \ ++ file ++ ++ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \ ++ CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64 -L /usr/aarch64-linux-gnu" \ ++ OBJDUMP=aarch64-linux-gnu-objdump +diff --git a/third_party/rust/packed_simd/ci/docker/arm-linux-androideabi/Dockerfile b/third_party/rust/packed_simd/ci/docker/arm-linux-androideabi/Dockerfile +new file mode 100644 +index 000000000000..995a9e30e65e +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/arm-linux-androideabi/Dockerfile +@@ -0,0 +1,47 @@ ++FROM ubuntu:16.04 ++ ++RUN dpkg --add-architecture i386 && \ ++ apt-get update && \ ++ apt-get install -y --no-install-recommends \ ++ file \ ++ make \ ++ curl \ ++ ca-certificates \ ++ python \ ++ unzip \ ++ expect \ ++ openjdk-9-jre \ ++ libstdc++6:i386 \ ++ libpulse0 \ ++ gcc \ ++ libc6-dev ++ ++WORKDIR /android/ ++COPY android* /android/ ++ ++ENV ANDROID_ARCH=arm ++ENV PATH=$PATH:/android/ndk-$ANDROID_ARCH/bin:/android/sdk/tools:/android/sdk/platform-tools ++ ++RUN sh /android/android-install-ndk.sh $ANDROID_ARCH ++RUN sh /android/android-install-sdk.sh $ANDROID_ARCH ++RUN mv /root/.android /tmp ++RUN chmod 777 -R /tmp/.android ++RUN chmod 755 /android/sdk/tools/* /android/sdk/emulator/qemu/linux-x86_64/* ++ ++ENV PATH=$PATH:/rust/bin \ ++ CARGO_TARGET_ARM_LINUX_ANDROIDEABI_LINKER=arm-linux-androideabi-gcc \ ++ CARGO_TARGET_ARM_LINUX_ANDROIDEABI_RUNNER=/tmp/runtest \ ++ OBJDUMP=arm-linux-androideabi-objdump \ ++ HOME=/tmp ++ ++ADD runtest-android.rs /tmp/runtest.rs ++ENTRYPOINT [ \ ++ "bash", \ ++ "-c", \ ++ # set SHELL so android can detect a 64bits system, see ++ # http://stackoverflow.com/a/41789144 ++ "SHELL=/bin/dash /android/sdk/emulator/emulator @arm -no-window & \ ++ rustc /tmp/runtest.rs -o /tmp/runtest && \ ++ exec \"$@\"", \ ++ "--" \ ++] +diff --git a/third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabi/Dockerfile b/third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabi/Dockerfile +new file mode 100644 +index 000000000000..cb4de6a57eaa +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabi/Dockerfile +@@ -0,0 +1,15 @@ ++FROM ubuntu:17.10 ++RUN apt-get update && apt-get install -y --no-install-recommends \ ++ gcc \ ++ ca-certificates \ ++ libc6-dev \ ++ libc6-armel-cross \ ++ libc6-dev-armel-cross \ ++ binutils-arm-linux-gnueabi \ ++ gcc-arm-linux-gnueabi \ ++ qemu-user \ ++ make \ ++ file ++ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABI_LINKER=arm-linux-gnueabi-gcc \ ++ CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABI_RUNNER="qemu-arm -L /usr/arm-linux-gnueabi" \ ++ OBJDUMP=arm-linux-gnueabi-objdump +diff --git a/third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile b/third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile +new file mode 100644 +index 000000000000..c7bd61f0a796 +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile +@@ -0,0 +1,13 @@ ++FROM ubuntu:17.10 ++RUN apt-get update && apt-get install -y --no-install-recommends \ ++ gcc \ ++ ca-certificates \ ++ libc6-dev \ ++ gcc-arm-linux-gnueabihf \ ++ libc6-dev-armhf-cross \ ++ qemu-user \ ++ make \ ++ file ++ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \ ++ CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -L /usr/arm-linux-gnueabihf" \ ++ OBJDUMP=arm-linux-gnueabihf-objdump +diff --git a/third_party/rust/packed_simd/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile b/third_party/rust/packed_simd/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile +new file mode 100644 +index 000000000000..e01b87afdf56 +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile +@@ -0,0 +1,13 @@ ++FROM ubuntu:17.10 ++RUN apt-get update && apt-get install -y --no-install-recommends \ ++ gcc \ ++ ca-certificates \ ++ libc6-dev \ ++ gcc-arm-linux-gnueabihf \ ++ libc6-dev-armhf-cross \ ++ qemu-user \ ++ make \ ++ file ++ENV CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \ ++ CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -L /usr/arm-linux-gnueabihf" \ ++ OBJDUMP=arm-linux-gnueabihf-objdump +diff --git a/third_party/rust/packed_simd/ci/docker/i586-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/i586-unknown-linux-gnu/Dockerfile +new file mode 100644 +index 000000000000..857974a858f1 +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/i586-unknown-linux-gnu/Dockerfile +@@ -0,0 +1,7 @@ ++FROM ubuntu:17.10 ++RUN apt-get update && apt-get install -y --no-install-recommends \ ++ gcc-multilib \ ++ libc6-dev \ ++ file \ ++ make \ ++ ca-certificates +diff --git a/third_party/rust/packed_simd/ci/docker/i686-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/i686-unknown-linux-gnu/Dockerfile +new file mode 100644 +index 000000000000..857974a858f1 +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/i686-unknown-linux-gnu/Dockerfile +@@ -0,0 +1,7 @@ ++FROM ubuntu:17.10 ++RUN apt-get update && apt-get install -y --no-install-recommends \ ++ gcc-multilib \ ++ libc6-dev \ ++ file \ ++ make \ ++ ca-certificates +diff --git a/third_party/rust/packed_simd/ci/docker/mips-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/mips-unknown-linux-gnu/Dockerfile +new file mode 100644 +index 000000000000..4711cead372a +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/mips-unknown-linux-gnu/Dockerfile +@@ -0,0 +1,13 @@ ++FROM ubuntu:17.10 ++ ++RUN apt-get update && apt-get install -y --no-install-recommends \ ++ gcc libc6-dev qemu-user ca-certificates \ ++ gcc-mips-linux-gnu libc6-dev-mips-cross \ ++ qemu-system-mips \ ++ qemu-user \ ++ make \ ++ file ++ ++ENV CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_LINKER=mips-linux-gnu-gcc \ ++ CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_RUNNER="qemu-mips -L /usr/mips-linux-gnu" \ ++ OBJDUMP=mips-linux-gnu-objdump +\ No newline at end of file +diff --git a/third_party/rust/packed_simd/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile b/third_party/rust/packed_simd/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile +new file mode 100644 +index 000000000000..1422e8c80924 +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile +@@ -0,0 +1,10 @@ ++FROM ubuntu:17.10 ++ ++RUN apt-get update && apt-get install -y --no-install-recommends \ ++ gcc libc6-dev qemu-user ca-certificates \ ++ gcc-mips64-linux-gnuabi64 libc6-dev-mips64-cross \ ++ qemu-system-mips64 qemu-user ++ ++ENV CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_LINKER=mips64-linux-gnuabi64-gcc \ ++ CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_RUNNER="qemu-mips64 -L /usr/mips64-linux-gnuabi64" \ ++ OBJDUMP=mips64-linux-gnuabi64-objdump +\ No newline at end of file +diff --git a/third_party/rust/packed_simd/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile b/third_party/rust/packed_simd/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile +new file mode 100644 +index 000000000000..d94deb5b2013 +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile +@@ -0,0 +1,10 @@ ++FROM ubuntu:17.10 ++ ++RUN apt-get update && apt-get install -y --no-install-recommends \ ++ gcc libc6-dev qemu-user ca-certificates \ ++ gcc-mips64el-linux-gnuabi64 libc6-dev-mips64el-cross \ ++ qemu-system-mips64el ++ ++ENV CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_LINKER=mips64el-linux-gnuabi64-gcc \ ++ CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_RUNNER="qemu-mips64el -L /usr/mips64el-linux-gnuabi64" \ ++ OBJDUMP=mips64el-linux-gnuabi64-objdump +\ No newline at end of file +diff --git a/third_party/rust/packed_simd/ci/docker/mipsel-unknown-linux-musl/Dockerfile b/third_party/rust/packed_simd/ci/docker/mipsel-unknown-linux-musl/Dockerfile +new file mode 100644 +index 000000000000..40ac50675bd9 +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/mipsel-unknown-linux-musl/Dockerfile +@@ -0,0 +1,25 @@ ++FROM ubuntu:18.10 ++ ++RUN apt-get update && \ ++ apt-get install -y --no-install-recommends \ ++ ca-certificates \ ++ gcc \ ++ libc6-dev \ ++ make \ ++ qemu-user \ ++ qemu-system-mips \ ++ bzip2 \ ++ curl \ ++ file ++ ++RUN mkdir /toolchain ++ ++# Note that this originally came from: ++# https://downloads.openwrt.org/snapshots/trunk/malta/generic/OpenWrt-Toolchain-malta-le_gcc-5.3.0_musl-1.1.15.Linux-x86_64.tar.bz2 ++RUN curl -L https://s3-us-west-1.amazonaws.com/rust-lang-ci2/libc/OpenWrt-Toolchain-malta-le_gcc-5.3.0_musl-1.1.15.Linux-x86_64.tar.bz2 | \ ++ tar xjf - -C /toolchain --strip-components=2 ++ ++ENV PATH=$PATH:/rust/bin:/toolchain/bin \ ++ CC_mipsel_unknown_linux_musl=mipsel-openwrt-linux-gcc \ ++ CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_MUSL_LINKER=mipsel-openwrt-linux-gcc \ ++ CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_MUSL_RUNNER="qemu-mipsel -L /toolchain" +\ No newline at end of file +diff --git a/third_party/rust/packed_simd/ci/docker/powerpc-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/powerpc-unknown-linux-gnu/Dockerfile +new file mode 100644 +index 000000000000..43b174ed87fc +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/powerpc-unknown-linux-gnu/Dockerfile +@@ -0,0 +1,12 @@ ++FROM ubuntu:17.10 ++ ++RUN apt-get update && apt-get install -y --no-install-recommends \ ++ gcc libc6-dev qemu-user ca-certificates \ ++ gcc-powerpc-linux-gnu libc6-dev-powerpc-cross \ ++ qemu-system-ppc \ ++ make \ ++ file ++ ++ENV CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_LINKER=powerpc-linux-gnu-gcc \ ++ CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_RUNNER="qemu-ppc -cpu Vger -L /usr/powerpc-linux-gnu" \ ++ OBJDUMP=powerpc-linux-gnu-objdump +diff --git a/third_party/rust/packed_simd/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile +new file mode 100644 +index 000000000000..7757ad28a42d +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile +@@ -0,0 +1,17 @@ ++FROM ubuntu:17.10 ++ ++RUN apt-get update && apt-get install -y --no-install-recommends \ ++ gcc \ ++ ca-certificates \ ++ libc6-dev \ ++ gcc-powerpc64-linux-gnu \ ++ libc6-dev-ppc64-cross \ ++ qemu-user \ ++ qemu-system-ppc \ ++ make \ ++ file ++ ++ENV CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_LINKER=powerpc64-linux-gnu-gcc \ ++ CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_RUNNER="qemu-ppc64 -L /usr/powerpc64-linux-gnu" \ ++ CC=powerpc64-linux-gnu-gcc \ ++ OBJDUMP=powerpc64-linux-gnu-objdump +diff --git a/third_party/rust/packed_simd/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile +new file mode 100644 +index 000000000000..0b0c214fdf1b +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile +@@ -0,0 +1,11 @@ ++FROM ubuntu:17.10 ++ ++RUN apt-get update && apt-get install -y --no-install-recommends \ ++ gcc libc6-dev qemu-user ca-certificates \ ++ gcc-powerpc64le-linux-gnu libc6-dev-ppc64el-cross \ ++ qemu-system-ppc file make ++ ++ENV CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_LINKER=powerpc64le-linux-gnu-gcc \ ++ CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_RUNNER="qemu-ppc64le -L /usr/powerpc64le-linux-gnu" \ ++ CC=powerpc64le-linux-gnu-gcc \ ++ OBJDUMP=powerpc64le-linux-gnu-objdump +diff --git a/third_party/rust/packed_simd/ci/docker/s390x-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/s390x-unknown-linux-gnu/Dockerfile +new file mode 100644 +index 000000000000..c645b0bcc2b8 +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/s390x-unknown-linux-gnu/Dockerfile +@@ -0,0 +1,20 @@ ++FROM ubuntu:18.10 ++ ++RUN apt-get update && \ ++ apt-get install -y --no-install-recommends \ ++ ca-certificates \ ++ curl \ ++ cmake \ ++ gcc \ ++ libc6-dev \ ++ g++-s390x-linux-gnu \ ++ libc6-dev-s390x-cross \ ++ qemu-user \ ++ make \ ++ file ++ ++ENV CARGO_TARGET_S390X_UNKNOWN_LINUX_GNU_LINKER=s390x-linux-gnu-gcc \ ++ CARGO_TARGET_S390X_UNKNOWN_LINUX_GNU_RUNNER="qemu-s390x -L /usr/s390x-linux-gnu" \ ++ CC_s390x_unknown_linux_gnu=s390x-linux-gnu-gcc \ ++ CXX_s390x_unknown_linux_gnu=s390x-linux-gnu-g++ \ ++ OBJDUMP=s390x-linux-gnu-objdump +\ No newline at end of file +diff --git a/third_party/rust/packed_simd/ci/docker/sparc64-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/sparc64-unknown-linux-gnu/Dockerfile +new file mode 100644 +index 000000000000..fe12af14da6f +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/sparc64-unknown-linux-gnu/Dockerfile +@@ -0,0 +1,18 @@ ++FROM debian:stretch ++ ++RUN apt-get update && apt-get install -y --no-install-recommends \ ++ curl ca-certificates \ ++ gcc libc6-dev \ ++ gcc-sparc64-linux-gnu libc6-dev-sparc64-cross \ ++ qemu-system-sparc64 openbios-sparc seabios ipxe-qemu \ ++ p7zip-full cpio ++ ++COPY linux-sparc64.sh / ++RUN bash /linux-sparc64.sh ++ ++COPY test-runner-linux / ++ ++ENV CARGO_TARGET_SPARC64_UNKNOWN_LINUX_GNU_LINKER=sparc64-linux-gnu-gcc \ ++ CARGO_TARGET_SPARC64_UNKNOWN_LINUX_GNU_RUNNER="/test-runner-linux sparc64" \ ++ CC_sparc64_unknown_linux_gnu=sparc64-linux-gnu-gcc \ ++ PATH=$PATH:/rust/bin +\ No newline at end of file +diff --git a/third_party/rust/packed_simd/ci/docker/thumbv7neon-linux-androideabi/Dockerfile b/third_party/rust/packed_simd/ci/docker/thumbv7neon-linux-androideabi/Dockerfile +new file mode 100644 +index 000000000000..c1da77109c12 +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/thumbv7neon-linux-androideabi/Dockerfile +@@ -0,0 +1,47 @@ ++FROM ubuntu:16.04 ++ ++RUN dpkg --add-architecture i386 && \ ++ apt-get update && \ ++ apt-get install -y --no-install-recommends \ ++ file \ ++ make \ ++ curl \ ++ ca-certificates \ ++ python \ ++ unzip \ ++ expect \ ++ openjdk-9-jre \ ++ libstdc++6:i386 \ ++ libpulse0 \ ++ gcc \ ++ libc6-dev ++ ++WORKDIR /android/ ++COPY android* /android/ ++ ++ENV ANDROID_ARCH=arm ++ENV PATH=$PATH:/android/ndk-$ANDROID_ARCH/bin:/android/sdk/tools:/android/sdk/platform-tools ++ ++RUN sh /android/android-install-ndk.sh $ANDROID_ARCH ++RUN sh /android/android-install-sdk.sh $ANDROID_ARCH ++RUN mv /root/.android /tmp ++RUN chmod 777 -R /tmp/.android ++RUN chmod 755 /android/sdk/tools/* /android/sdk/emulator/qemu/linux-x86_64/* ++ ++ENV PATH=$PATH:/rust/bin \ ++ CARGO_TARGET_THUMBV7NEON_LINUX_ANDROIDEABI_LINKER=arm-linux-androideabi-gcc \ ++ CARGO_TARGET_THUMBV7NEON_LINUX_ANDROIDEABI_RUNNER=/tmp/runtest \ ++ OBJDUMP=arm-linux-androideabi-objdump \ ++ HOME=/tmp ++ ++ADD runtest-android.rs /tmp/runtest.rs ++ENTRYPOINT [ \ ++ "bash", \ ++ "-c", \ ++ # set SHELL so android can detect a 64bits system, see ++ # http://stackoverflow.com/a/41789144 ++ "SHELL=/bin/dash /android/sdk/emulator/emulator @arm -no-window & \ ++ rustc /tmp/runtest.rs -o /tmp/runtest && \ ++ exec \"$@\"", \ ++ "--" \ ++] +diff --git a/third_party/rust/packed_simd/ci/docker/thumbv7neon-unknown-linux-gnueabihf/Dockerfile b/third_party/rust/packed_simd/ci/docker/thumbv7neon-unknown-linux-gnueabihf/Dockerfile +new file mode 100644 +index 000000000000..696cb6c3fb52 +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/thumbv7neon-unknown-linux-gnueabihf/Dockerfile +@@ -0,0 +1,13 @@ ++FROM ubuntu:17.10 ++RUN apt-get update && apt-get install -y --no-install-recommends \ ++ gcc \ ++ ca-certificates \ ++ libc6-dev \ ++ gcc-arm-linux-gnueabihf \ ++ libc6-dev-armhf-cross \ ++ qemu-user \ ++ make \ ++ file ++ENV CARGO_TARGET_THUMBV7NEON_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \ ++ CARGO_TARGET_THUMBV7NEON_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -L /usr/arm-linux-gnueabihf" \ ++ OBJDUMP=arm-linux-gnueabihf-objdump +diff --git a/third_party/rust/packed_simd/ci/docker/wasm32-unknown-unknown/Dockerfile b/third_party/rust/packed_simd/ci/docker/wasm32-unknown-unknown/Dockerfile +new file mode 100644 +index 000000000000..f905cf1a36eb +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/wasm32-unknown-unknown/Dockerfile +@@ -0,0 +1,37 @@ ++FROM ubuntu:18.04 ++ ++RUN apt-get update -y && apt-get install -y --no-install-recommends \ ++ ca-certificates \ ++ clang \ ++ cmake \ ++ curl \ ++ git \ ++ libc6-dev \ ++ make \ ++ python \ ++ xz-utils ++ ++# Install `wasm2wat` ++RUN git clone --recursive https://github.com/WebAssembly/wabt ++RUN make -C wabt -j$(nproc) ++ENV PATH=$PATH:/wabt/bin ++ ++# Install `wasm-bindgen-test-runner` ++RUN curl -L https://github.com/rustwasm/wasm-bindgen/releases/download/0.2.19/wasm-bindgen-0.2.19-x86_64-unknown-linux-musl.tar.gz \ ++ | tar xzf - ++ENV PATH=$PATH:/wasm-bindgen-0.2.19-x86_64-unknown-linux-musl ++ENV CARGO_TARGET_WASM32_UNKNOWN_UNKNOWN_RUNNER=wasm-bindgen-test-runner ++ ++# Install `node` ++RUN curl https://nodejs.org/dist/v10.8.0/node-v10.8.0-linux-x64.tar.xz | tar xJf - ++ENV PATH=$PATH:/node-v10.8.0-linux-x64/bin ++ ++# We use a shim linker that removes `--strip-debug` when passed to LLD. While ++# this typically results in invalid debug information in release mode it doesn't ++# result in an invalid names section which is what we're interested in. ++COPY lld-shim.rs / ++ENV CARGO_TARGET_WASM32_UNKNOWN_UNKNOWN_LINKER=/tmp/lld-shim ++ ++# Rustc isn't available until this container starts, so defer compilation of the ++# shim. ++ENTRYPOINT /rust/bin/rustc /lld-shim.rs -o /tmp/lld-shim && exec bash "$@" +diff --git a/third_party/rust/packed_simd/ci/docker/x86_64-linux-android/Dockerfile b/third_party/rust/packed_simd/ci/docker/x86_64-linux-android/Dockerfile +new file mode 100644 +index 000000000000..d52dd45b12bf +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/x86_64-linux-android/Dockerfile +@@ -0,0 +1,29 @@ ++FROM ubuntu:16.04 ++ ++RUN apt-get update && \ ++ apt-get install -y --no-install-recommends \ ++ ca-certificates \ ++ curl \ ++ gcc \ ++ libc-dev \ ++ python \ ++ unzip \ ++ file \ ++ make ++ ++WORKDIR /android/ ++ENV ANDROID_ARCH=x86_64 ++COPY android-install-ndk.sh /android/ ++RUN sh /android/android-install-ndk.sh $ANDROID_ARCH ++ ++# We do not run x86_64-linux-android tests on an android emulator. ++# See ci/android-sysimage.sh for informations about how tests are run. ++COPY android-sysimage.sh /android/ ++RUN bash /android/android-sysimage.sh x86_64 x86_64-24_r07.zip ++ ++ENV PATH=$PATH:/rust/bin:/android/ndk-$ANDROID_ARCH/bin \ ++ CARGO_TARGET_X86_64_LINUX_ANDROID_LINKER=x86_64-linux-android-gcc \ ++ CC_x86_64_linux_android=x86_64-linux-android-gcc \ ++ CXX_x86_64_linux_android=x86_64-linux-android-g++ \ ++ OBJDUMP=x86_64-linux-android-objdump \ ++ HOME=/tmp +diff --git a/third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile b/third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile +new file mode 100644 +index 000000000000..a6bbe6653928 +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile +@@ -0,0 +1,16 @@ ++FROM ubuntu:18.04 ++RUN apt-get update && apt-get install -y --no-install-recommends \ ++ gcc \ ++ libc6-dev \ ++ file \ ++ make \ ++ ca-certificates \ ++ wget \ ++ bzip2 \ ++ cmake \ ++ libclang-dev \ ++ clang ++ ++RUN wget https://github.com/gnzlbg/intel_sde/raw/master/sde-external-8.16.0-2018-01-30-lin.tar.bz2 ++RUN tar -xjf sde-external-8.16.0-2018-01-30-lin.tar.bz2 ++ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/sde-external-8.16.0-2018-01-30-lin/sde64 --" +diff --git a/third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu/Dockerfile +new file mode 100644 +index 000000000000..e6b000d0516e +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu/Dockerfile +@@ -0,0 +1,10 @@ ++FROM ubuntu:17.10 ++RUN apt-get update && apt-get install -y --no-install-recommends \ ++ gcc \ ++ libc6-dev \ ++ file \ ++ make \ ++ ca-certificates \ ++ cmake \ ++ libclang-dev \ ++ clang +diff --git a/third_party/rust/packed_simd/ci/dox.sh b/third_party/rust/packed_simd/ci/dox.sh +new file mode 100644 +index 000000000000..1743366407e3 +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/dox.sh +@@ -0,0 +1,24 @@ ++#!/bin/sh ++ ++set -ex ++ ++rm -rf target/doc ++mkdir -p target/doc ++ ++# Build API documentation ++cargo doc --features=into_bits ++ ++# Build Performance Guide ++# FIXME: https://github.com/rust-lang-nursery/mdBook/issues/780 ++# mdbook build perf-guide -d target/doc/perf-guide ++cd perf-guide ++mdbook build ++cd - ++cp -r perf-guide/book target/doc/perf-guide ++ ++# If we're on travis, not a PR, and on the right branch, publish! ++if [ "$TRAVIS_PULL_REQUEST" = "false" ] && [ "$TRAVIS_BRANCH" = "master" ]; then ++ pip install ghp_import --install-option="--prefix=$HOME/.local" ++ $HOME/.local/bin/ghp-import -n target/doc ++ git push -qf https://${GH_PAGES}@github.com/${TRAVIS_REPO_SLUG}.git gh-pages ++fi +diff --git a/third_party/rust/packed_simd/ci/linux-s390x.sh b/third_party/rust/packed_simd/ci/linux-s390x.sh +new file mode 100644 +index 000000000000..972abeec569e +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/linux-s390x.sh +@@ -0,0 +1,18 @@ ++set -ex ++ ++mkdir -m 777 /qemu ++cd /qemu ++ ++curl -LO https://github.com/qemu/qemu/raw/master/pc-bios/s390-ccw.img ++curl -LO http://ftp.debian.org/debian/dists/testing/main/installer-s390x/20170828/images/generic/kernel.debian ++curl -LO http://ftp.debian.org/debian/dists/testing/main/installer-s390x/20170828/images/generic/initrd.debian ++ ++mv kernel.debian kernel ++mv initrd.debian initrd.gz ++ ++mkdir init ++cd init ++gunzip -c ../initrd.gz | cpio -id ++rm ../initrd.gz ++cp /usr/s390x-linux-gnu/lib/libgcc_s.so.1 usr/lib/ ++chmod a+w . +diff --git a/third_party/rust/packed_simd/ci/linux-sparc64.sh b/third_party/rust/packed_simd/ci/linux-sparc64.sh +new file mode 100644 +index 000000000000..4452b120e1b6 +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/linux-sparc64.sh +@@ -0,0 +1,17 @@ ++set -ex ++ ++mkdir -m 777 /qemu ++cd /qemu ++ ++curl -LO https://cdimage.debian.org/cdimage/ports/9.0/sparc64/iso-cd/debian-9.0-sparc64-NETINST-1.iso ++7z e debian-9.0-sparc64-NETINST-1.iso boot/initrd.gz ++7z e debian-9.0-sparc64-NETINST-1.iso boot/sparc64 ++mv sparc64 kernel ++rm debian-9.0-sparc64-NETINST-1.iso ++ ++mkdir init ++cd init ++gunzip -c ../initrd.gz | cpio -id ++rm ../initrd.gz ++cp /usr/sparc64-linux-gnu/lib/libgcc_s.so.1 usr/lib/ ++chmod a+w . +diff --git a/third_party/rust/packed_simd/ci/lld-shim.rs b/third_party/rust/packed_simd/ci/lld-shim.rs +new file mode 100644 +index 000000000000..10263869e8dc +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/lld-shim.rs +@@ -0,0 +1,11 @@ ++use std::os::unix::prelude::*; ++use std::process::Command; ++use std::env; ++ ++fn main() { ++ let args = env::args() ++ .skip(1) ++ .filter(|s| s != "--strip-debug") ++ .collect::>(); ++ panic!("failed to exec: {}", Command::new("rust-lld").args(&args).exec()); ++} +diff --git a/third_party/rust/packed_simd/ci/max_line_width.sh b/third_party/rust/packed_simd/ci/max_line_width.sh +new file mode 100644 +index 000000000000..f70639b6f89b +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/max_line_width.sh +@@ -0,0 +1,17 @@ ++#!/usr/bin/env sh ++ ++set -x ++ ++export success=true ++ ++find . -iname '*.rs' | while read -r file; do ++ result=$(grep '.\{79\}' "${file}" | grep --invert 'http') ++ if [ "${result}" = "" ] ++ then ++ : ++ else ++ echo "file \"${file}\": $result" ++ exit 1 ++ fi ++done ++ +diff --git a/third_party/rust/packed_simd/ci/run-docker.sh b/third_party/rust/packed_simd/ci/run-docker.sh +new file mode 100644 +index 000000000000..abdd6852fc3a +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/run-docker.sh +@@ -0,0 +1,38 @@ ++# Small script to run tests for a target (or all targets) inside all the ++# respective docker images. ++ ++set -ex ++ ++run() { ++ echo "Building docker container for TARGET=${TARGET} RUSTFLAGS=${RUSTFLAGS}" ++ docker build -t packed_simd -f ci/docker/${TARGET}/Dockerfile ci/ ++ mkdir -p target ++ target=$(echo "${TARGET}" | sed 's/-emulated//') ++ echo "Running docker" ++ docker run \ ++ --user `id -u`:`id -g` \ ++ --rm \ ++ --init \ ++ --volume $HOME/.cargo:/cargo \ ++ --env CARGO_HOME=/cargo \ ++ --volume `rustc --print sysroot`:/rust:ro \ ++ --env TARGET=$target \ ++ --env NORUN \ ++ --env NOVERIFY \ ++ --env RUSTFLAGS \ ++ --volume `pwd`:/checkout:ro \ ++ --volume `pwd`/target:/checkout/target \ ++ --workdir /checkout \ ++ --privileged \ ++ packed_simd \ ++ bash \ ++ -c 'PATH=$PATH:/rust/bin exec ci/run.sh' ++} ++ ++if [ -z "${TARGET}" ]; then ++ for d in `ls ci/docker/`; do ++ run $d ++ done ++else ++ run ${TARGET} ++fi +diff --git a/third_party/rust/packed_simd/ci/run.sh b/third_party/rust/packed_simd/ci/run.sh +new file mode 100644 +index 000000000000..7bb825883680 +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/run.sh +@@ -0,0 +1,96 @@ ++#!/usr/bin/env bash ++ ++set -ex ++ ++: ${TARGET?"The TARGET environment variable must be set."} ++ ++# Tests are all super fast anyway, and they fault often enough on travis that ++# having only one thread increases debuggability to be worth it. ++#export RUST_TEST_THREADS=1 ++#export RUST_BACKTRACE=full ++#export RUST_TEST_NOCAPTURE=1 ++ ++# Some appveyor builds run out-of-memory; this attempts to mitigate that: ++# https://github.com/rust-lang-nursery/packed_simd/issues/39 ++# export RUSTFLAGS="${RUSTFLAGS} -C codegen-units=1" ++# export CARGO_BUILD_JOBS=1 ++ ++export CARGO_SUBCMD=test ++if [[ "${NORUN}" == "1" ]]; then ++ export CARGO_SUBCMD=build ++fi ++ ++if [[ ${TARGET} == "x86_64-apple-ios" ]] || [[ ${TARGET} == "i386-apple-ios" ]]; then ++ export RUSTFLAGS="${RUSTFLAGS} -Clink-arg=-mios-simulator-version-min=7.0" ++ rustc ./ci/deploy_and_run_on_ios_simulator.rs -o $HOME/runtest ++ export CARGO_TARGET_X86_64_APPLE_IOS_RUNNER=$HOME/runtest ++ export CARGO_TARGET_I386_APPLE_IOS_RUNNER=$HOME/runtest ++fi ++ ++# The source directory is read-only. Need to copy internal crates to the target ++# directory for their Cargo.lock to be properly written. ++mkdir target || true ++ ++rustc --version ++cargo --version ++echo "TARGET=${TARGET}" ++echo "HOST=${HOST}" ++echo "RUSTFLAGS=${RUSTFLAGS}" ++echo "NORUN=${NORUN}" ++echo "NOVERIFY=${NOVERIFY}" ++echo "CARGO_SUBCMD=${CARGO_SUBCMD}" ++echo "CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS}" ++echo "CARGO_INCREMENTAL=${CARGO_INCREMENTAL}" ++echo "RUST_TEST_THREADS=${RUST_TEST_THREADS}" ++echo "RUST_BACKTRACE=${RUST_BACKTRACE}" ++echo "RUST_TEST_NOCAPTURE=${RUST_TEST_NOCAPTURE}" ++ ++cargo_test() { ++ cmd="cargo ${CARGO_SUBCMD} --verbose --target=${TARGET} ${@}" ++ if [ "${NORUN}" != "1" ] ++ then ++ if [ "$TARGET" != "wasm32-unknown-unknown" ] ++ then ++ cmd="$cmd -- --quiet" ++ fi ++ fi ++ mkdir target || true ++ ${cmd} 2>&1 | tee > target/output ++ if [[ ${PIPESTATUS[0]} != 0 ]]; then ++ cat target/output ++ return 1 ++ fi ++} ++ ++cargo_test_impl() { ++ ORIGINAL_RUSTFLAGS=${RUSTFLAGS} ++ RUSTFLAGS="${ORIGINAL_RUSTFLAGS} --cfg test_v16 --cfg test_v32 --cfg test_v64" cargo_test ${@} ++ RUSTFLAGS="${ORIGINAL_RUSTFLAGS} --cfg test_v128 --cfg test_v256" cargo_test ${@} ++ RUSTFLAGS="${ORIGINAL_RUSTFLAGS} --cfg test_v512" cargo_test ${@} ++ RUSTFLAGS=${ORIGINAL_RUSTFLAGS} ++} ++ ++# Debug run: ++if [[ "${TARGET}" != "wasm32-unknown-unknown" ]]; then ++ # Run wasm32-unknown-unknown in release mode only ++ cargo_test_impl ++fi ++ ++if [[ "${TARGET}" == "x86_64-unknown-linux-gnu" ]] || [[ "${TARGET}" == "x86_64-pc-windows-msvc" ]]; then ++ # use sleef on linux and windows x86_64 builds ++ cargo_test_impl --release --features=into_bits,core_arch,sleef-sys ++else ++ cargo_test_impl --release --features=into_bits,core_arch ++fi ++ ++# Verify code generation ++if [[ "${NOVERIFY}" != "1" ]]; then ++ cp -r verify/verify target/verify ++ export STDSIMD_ASSERT_INSTR_LIMIT=30 ++ if [[ "${TARGET}" == "i586-unknown-linux-gnu" ]]; then ++ export STDSIMD_ASSERT_INSTR_LIMIT=50 ++ fi ++ cargo_test --release --manifest-path=target/verify/Cargo.toml ++fi ++ ++. ci/run_examples.sh +diff --git a/third_party/rust/packed_simd/ci/run_examples.sh b/third_party/rust/packed_simd/ci/run_examples.sh +new file mode 100644 +index 000000000000..5b26b18afb20 +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/run_examples.sh +@@ -0,0 +1,51 @@ ++# Runs all examples. ++ ++# FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/55 ++# All examples fail to build for `armv7-apple-ios`. ++if [[ ${TARGET} == "armv7-apple-ios" ]]; then ++ exit 0 ++fi ++ ++# FIXME: travis exceeds 50 minutes on these targets ++# Skipping the examples is an attempt at preventing travis from timing-out ++if [[ ${TARGET} == "arm-linux-androidabi" ]] || [[ ${TARGET} == "aarch64-linux-androidabi" ]] \ ++ || [[ ${TARGET} == "sparc64-unknown-linux-gnu" ]]; then ++ exit 0 ++fi ++ ++if [[ ${TARGET} == "wasm32-unknown-unknown" ]]; then ++ exit 0 ++fi ++ ++cp -r examples/aobench target/aobench ++cargo_test --manifest-path=target/aobench/Cargo.toml --release --no-default-features ++cargo_test --manifest-path=target/aobench/Cargo.toml --release --features=256bit ++ ++cp -r examples/dot_product target/dot_product ++cargo_test --manifest-path=target/dot_product/Cargo.toml --release ++ ++cp -r examples/fannkuch_redux target/fannkuch_redux ++cargo_test --manifest-path=target/fannkuch_redux/Cargo.toml --release ++ ++# FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/56 ++if [[ ${TARGET} != "i586-unknown-linux-gnu" ]]; then ++ cp -r examples/mandelbrot target/mandelbrot ++ cargo_test --manifest-path=target/mandelbrot/Cargo.toml --release ++fi ++ ++cp -r examples/matrix_inverse target/matrix_inverse ++cargo_test --manifest-path=target/matrix_inverse/Cargo.toml --release ++ ++cp -r examples/nbody target/nbody ++cargo_test --manifest-path=target/nbody/Cargo.toml --release ++ ++cp -r examples/spectral_norm target/spectral_norm ++cargo_test --manifest-path=target/spectral_norm/Cargo.toml --release ++ ++if [[ ${TARGET} != "i586-unknown-linux-gnu" ]]; then ++ cp -r examples/stencil target/stencil ++ cargo_test --manifest-path=target/stencil/Cargo.toml --release ++fi ++ ++cp -r examples/triangle_xform target/triangle_xform ++cargo_test --manifest-path=target/triangle_xform/Cargo.toml --release +diff --git a/third_party/rust/packed_simd/ci/runtest-android.rs b/third_party/rust/packed_simd/ci/runtest-android.rs +new file mode 100644 +index 000000000000..ed1cd80c834a +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/runtest-android.rs +@@ -0,0 +1,45 @@ ++use std::env; ++use std::process::Command; ++use std::path::{Path, PathBuf}; ++ ++fn main() { ++ let args = env::args_os() ++ .skip(1) ++ .filter(|arg| arg != "--quiet") ++ .collect::>(); ++ assert_eq!(args.len(), 1); ++ let test = PathBuf::from(&args[0]); ++ let dst = Path::new("/data/local/tmp").join(test.file_name().unwrap()); ++ ++ let status = Command::new("adb") ++ .arg("wait-for-device") ++ .status() ++ .expect("failed to run: adb wait-for-device"); ++ assert!(status.success()); ++ ++ let status = Command::new("adb") ++ .arg("push") ++ .arg(&test) ++ .arg(&dst) ++ .status() ++ .expect("failed to run: adb pushr"); ++ assert!(status.success()); ++ ++ let output = Command::new("adb") ++ .arg("shell") ++ .arg(&dst) ++ .output() ++ .expect("failed to run: adb shell"); ++ assert!(status.success()); ++ ++ println!("status: {}\nstdout ---\n{}\nstderr ---\n{}", ++ output.status, ++ String::from_utf8_lossy(&output.stdout), ++ String::from_utf8_lossy(&output.stderr)); ++ ++ let stdout = String::from_utf8_lossy(&output.stdout); ++ let mut lines = stdout.lines().filter(|l| l.starts_with("test result")); ++ if !lines.all(|l| l.contains("test result: ok") && l.contains("0 failed")) { ++ panic!("failed to find successful test run"); ++ } ++} +diff --git a/third_party/rust/packed_simd/ci/setup_benchmarks.sh b/third_party/rust/packed_simd/ci/setup_benchmarks.sh +new file mode 100644 +index 000000000000..ddc4765d5ceb +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/setup_benchmarks.sh +@@ -0,0 +1,10 @@ ++#!/usr/bin/env bash ++ ++set -ex ++ ++# Get latest ISPC binary for the target and put it in the path ++git clone https://github.com/gnzlbg/ispc-binaries ++cp ispc-binaries/ispc-${TARGET} ispc ++ ++# Rust-bindgen requires RUSTFMT ++rustup component add rustfmt-preview +diff --git a/third_party/rust/packed_simd/ci/test-runner-linux b/third_party/rust/packed_simd/ci/test-runner-linux +new file mode 100644 +index 000000000000..0654f63bfdb9 +--- /dev/null ++++ b/third_party/rust/packed_simd/ci/test-runner-linux +@@ -0,0 +1,24 @@ ++#!/bin/sh ++ ++set -e ++ ++arch=$1 ++prog=$2 ++ ++cd /qemu/init ++cp -f $2 prog ++find . | cpio --create --format='newc' --quiet | gzip > ../initrd.gz ++cd .. ++ ++timeout 30s qemu-system-$arch \ ++ -m 1024 \ ++ -nographic \ ++ -kernel kernel \ ++ -initrd initrd.gz \ ++ -append init=/prog > output || true ++ ++# remove kernel messages ++tr -d '\r' < output | egrep -v '^\[' ++ ++# if the output contains a failure, return error ++! grep FAILED output > /dev/null +diff --git a/third_party/rust/packed_simd/contributing.md b/third_party/rust/packed_simd/contributing.md +new file mode 100644 +index 000000000000..93fa92783740 +--- /dev/null ++++ b/third_party/rust/packed_simd/contributing.md +@@ -0,0 +1,67 @@ ++# Contributing to `packed_simd` ++ ++Welcome! If you are reading this document, it means you are interested in contributing ++to the `packed_simd` crate. ++ ++## Reporting issues ++ ++All issues with this crate are tracked using GitHub's [Issue Tracker]. ++ ++You can use issues to bring bugs to the attention of the maintainers, to discuss ++certain problems encountered with the crate, or to request new features (although ++feature requests should be limited to things mentioned in the [RFC]). ++ ++One thing to keep in mind is to always use the **latest** nightly toolchain when ++working on this crate. Due to the nature of this project, we use a lot of unstable ++features, meaning breakage happens often. ++ ++[Issue Tracker]: https://github.com/rust-lang-nursery/packed_simd/issues ++[RFC]: https://github.com/rust-lang/rfcs/pull/2366 ++ ++### LLVM issues ++ ++The Rust compiler relies on [LLVM](https://llvm.org/) for machine code generation, ++and quite a few LLVM bugs have been discovered during the development of this project. ++ ++If you encounter issues with incorrect/suboptimal codegen, which you do not encounter ++when using the [SIMD vendor intrinsics](https://doc.rust-lang.org/nightly/std/arch/), ++it is likely the issue is with LLVM, or this crate's interaction with it. ++ ++You should first open an issue **in this repo** to help us track the problem, and we ++will help determine what is the exact cause of the problem. ++If LLVM is indeed the cause, the issue will be reported upstream to the ++[LLVM bugtracker](https://bugs.llvm.org/). ++ ++## Submitting Pull Requests ++ ++New code is submitted to the crate using GitHub's [pull request] mechanism. ++You should first fork this repository, make your changes (preferrably in a new ++branch), then use GitHub's web UI to create a new PR. ++ ++[pull request]: https://help.github.com/articles/about-pull-requests/ ++ ++### Examples ++ ++The `examples` directory contains code showcasing SIMD code written with this crate, ++usually in comparison to scalar or ISPC code. If you have a project / idea which ++uses SIMD, we'd love to add it to the examples list. ++ ++Every example should include a small `README`, describing the example code's purpose. ++If your example could potentially work as a benchmark, then add a `benchmark.sh` ++script to allow running the example benchmark code in CI. See an existing example's ++[`benchmark.sh`](examples/aobench/benchmark.sh) for a sample. ++ ++Don't forget to update the crate's top-level `README` with a link to your example. ++ ++### Perf guide ++ ++The objective of the [performance guide][perf-guide] is to be a comprehensive ++resource detailing the process of optimizing Rust code with SIMD support. ++ ++If you believe a certain section could be reworded, or if you have any tips & tricks ++related to SIMD which you'd like to share, please open a PR. ++ ++[mdBook] is used to manage the formatting of the guide as a book. ++ ++[perf-guide]: https://rust-lang-nursery.github.io/packed_simd/perf-guide/ ++[mdBook]: https://github.com/rust-lang-nursery/mdBook +diff --git a/third_party/rust/packed_simd/perf-guide/.gitignore b/third_party/rust/packed_simd/perf-guide/.gitignore +new file mode 100644 +index 000000000000..5a0bf0317d75 +--- /dev/null ++++ b/third_party/rust/packed_simd/perf-guide/.gitignore +@@ -0,0 +1 @@ ++/book +diff --git a/third_party/rust/packed_simd/perf-guide/book.toml b/third_party/rust/packed_simd/perf-guide/book.toml +new file mode 100644 +index 000000000000..69ba3053ca25 +--- /dev/null ++++ b/third_party/rust/packed_simd/perf-guide/book.toml +@@ -0,0 +1,12 @@ ++[book] ++authors = ["Gonzalo Brito Gadeschi", "Gabriel Majeri"] ++multilingual = false ++src = "src" ++title = "Rust SIMD Performance Guide" ++description = "This book describes how to write performant SIMD code in Rust." ++ ++[build] ++create-missing = false ++ ++[output.html] ++additional-css = ["./src/ascii.css"] +diff --git a/third_party/rust/packed_simd/perf-guide/src/SUMMARY.md b/third_party/rust/packed_simd/perf-guide/src/SUMMARY.md +new file mode 100644 +index 000000000000..1e76898865c5 +--- /dev/null ++++ b/third_party/rust/packed_simd/perf-guide/src/SUMMARY.md +@@ -0,0 +1,21 @@ ++# Summary ++ ++[Introduction](./introduction.md) ++ ++- [Floating-point Math](./float-math/fp.md) ++ - [Short-vector Math Library](./float-math/svml.md) ++ - [Approximate functions](./float-math/approx.md) ++ - [Fused multiply-accumulate](./float-math/fma.md) ++ ++- [Target features](./target-feature/features.md) ++ - [Using `RUSTFLAGS`](./target-feature/rustflags.md) ++ - [Using the `target_feature` attribute](./target-feature/attribute.md) ++ - [Interaction with inlining](./target-feature/inlining.md) ++ - [Detecting features at runtime](./target-feature/runtime.md) ++ ++- [Bounds checking](./bound_checks.md) ++- [Vertical and horizontal operations](./vert-hor-ops.md) ++ ++- [Performance profiling](./prof/profiling.md) ++ - [Profiling on Linux](./prof/linux.md) ++ - [Using machine code analyzers](./prof/mca.md) +diff --git a/third_party/rust/packed_simd/perf-guide/src/ascii.css b/third_party/rust/packed_simd/perf-guide/src/ascii.css +new file mode 100644 +index 000000000000..4c02651195f9 +--- /dev/null ++++ b/third_party/rust/packed_simd/perf-guide/src/ascii.css +@@ -0,0 +1,4 @@ ++code { ++ /* "Source Code Pro" breaks ASCII art */ ++ font-family: Consolas, "Ubuntu Mono", Menlo, "DejaVu Sans Mono", monospace; ++} +diff --git a/third_party/rust/packed_simd/perf-guide/src/bound_checks.md b/third_party/rust/packed_simd/perf-guide/src/bound_checks.md +new file mode 100644 +index 000000000000..2eeedb5ac829 +--- /dev/null ++++ b/third_party/rust/packed_simd/perf-guide/src/bound_checks.md +@@ -0,0 +1,22 @@ ++# Bounds checking ++ ++Reading and writing packed vectors to/from slices is checked by default. ++Independently of the configuration options used, the safe functions: ++ ++* `Simd<[T; N]>::from_slice_aligned(& s[..])` ++* `Simd<[T; N]>::write_to_slice_aligned(&mut s[..])` ++ ++always check that: ++ ++* the slice is big enough to hold the vector ++* the slice is suitably aligned to perform an aligned load/store for a `Simd<[T; ++ N]>` (this alignment is often much larger than that of `T`). ++ ++There are `_unaligned` versions that use unaligned load and stores, as well as ++`unsafe` `_unchecked` that do not perform any checks iff `debug-assertions = ++false` / `debug = false`. That is, the `_unchecked` methods do still assert size ++and alignment in debug builds and could also do so in release builds depending ++on the configuration options. ++ ++These assertions do often significantly impact performance and you should be ++aware of them. +diff --git a/third_party/rust/packed_simd/perf-guide/src/float-math/approx.md b/third_party/rust/packed_simd/perf-guide/src/float-math/approx.md +new file mode 100644 +index 000000000000..2237c67ec4b3 +--- /dev/null ++++ b/third_party/rust/packed_simd/perf-guide/src/float-math/approx.md +@@ -0,0 +1,8 @@ ++# Approximate functions ++ ++ +diff --git a/third_party/rust/packed_simd/perf-guide/src/float-math/fma.md b/third_party/rust/packed_simd/perf-guide/src/float-math/fma.md +new file mode 100644 +index 000000000000..357748383d63 +--- /dev/null ++++ b/third_party/rust/packed_simd/perf-guide/src/float-math/fma.md +@@ -0,0 +1,6 @@ ++# Fused Multiply Add ++ ++ +diff --git a/third_party/rust/packed_simd/perf-guide/src/float-math/fp.md b/third_party/rust/packed_simd/perf-guide/src/float-math/fp.md +new file mode 100644 +index 000000000000..711fcc4fd598 +--- /dev/null ++++ b/third_party/rust/packed_simd/perf-guide/src/float-math/fp.md +@@ -0,0 +1,3 @@ ++# Floating-point math ++ ++This chapter contains information pertaining to working with floating-point numbers. +diff --git a/third_party/rust/packed_simd/perf-guide/src/float-math/svml.md b/third_party/rust/packed_simd/perf-guide/src/float-math/svml.md +new file mode 100644 +index 000000000000..266c2531cc04 +--- /dev/null ++++ b/third_party/rust/packed_simd/perf-guide/src/float-math/svml.md +@@ -0,0 +1,7 @@ ++# Short Vector Math Library ++ ++ +diff --git a/third_party/rust/packed_simd/perf-guide/src/introduction.md b/third_party/rust/packed_simd/perf-guide/src/introduction.md +new file mode 100644 +index 000000000000..7243e19c8a54 +--- /dev/null ++++ b/third_party/rust/packed_simd/perf-guide/src/introduction.md +@@ -0,0 +1,26 @@ ++# Introduction ++ ++## What is SIMD ++ ++ ++ ++## History of SIMD in Rust ++ ++ ++ ++## Discover packed_simd ++ ++ ++ ++Writing fast and portable SIMD algorithms using `packed_simd` is, unfortunately, ++not trivial. There are many pitfals that one should be aware of, and some idioms ++that help avoid those pitfalls. ++ ++This book attempts to document these best practices and provides practical examples ++on how to apply the tips to _your_ code. +diff --git a/third_party/rust/packed_simd/perf-guide/src/prof/linux.md b/third_party/rust/packed_simd/perf-guide/src/prof/linux.md +new file mode 100644 +index 000000000000..96c7d67bc476 +--- /dev/null ++++ b/third_party/rust/packed_simd/perf-guide/src/prof/linux.md +@@ -0,0 +1,107 @@ ++# Performance profiling on Linux ++ ++## Using `perf` ++ ++[perf](https://perf.wiki.kernel.org/) is the most powerful performance profiler ++for Linux, featuring support for various hardware Performance Monitoring Units, ++as well as integration with the kernel's performance events framework. ++ ++We will only look at how can the `perf` command can be used to profile SIMD code. ++Full system profiling is outside of the scope of this book. ++ ++### Recording ++ ++The first step is to record a program's execution during an average workload. ++It helps if you can isolate the parts of your program which have performance ++issues, and set up a benchmark which can be easily (re)run. ++ ++Build the benchmark binary in release mode, after having enabled debug info: ++ ++```sh ++$ cargo build --release ++Finished release [optimized + debuginfo] target(s) in 0.02s ++``` ++ ++Then use the `perf record` subcommand: ++ ++```sh ++$ perf record --call-graph=dwarf ./target/release/my-program ++[ perf record: Woken up 10 times to write data ] ++[ perf record: Captured and wrote 2,356 MB perf.data (292 samples) ] ++``` ++ ++Instead of using `--call-graph=dwarf`, which can become pretty slow, you can use ++`--call-graph=lbr` if you have a processor with support for Last Branch Record ++(i.e. Intel Haswell and newer). ++ ++`perf` will, by default, record the count of CPU cycles it takes to execute ++various parts of your program. You can use the `-e` command line option ++to enable other performance events, such as `cache-misses`. Use `perf list` ++to get a list of all hardware counters supported by your CPU. ++ ++### Viewing the report ++ ++The next step is getting a bird's eye view of the program's execution. ++`perf` provides a `ncurses`-based interface which will get you started. ++ ++Use `perf report` to open a visualization of your program's performance: ++ ++```sh ++perf report --hierarchy -M intel ++``` ++ ++`--hierarchy` will display a tree-like structure of where your program spent ++most of its time. `-M intel` enables disassembly output with Intel syntax, which ++is subjectively more readable than the default AT&T syntax. ++ ++Here is the output from profiling the `nbody` benchmark: ++ ++``` ++- 100,00% nbody ++ - 94,18% nbody ++ + 93,48% [.] nbody_lib::simd::advance ++ + 0,70% [.] nbody_lib::run ++ + 5,06% libc-2.28.so ++``` ++ ++If you move with the arrow keys to any node in the tree, you can the press `a` ++to have `perf` _annotate_ that node. This means it will: ++ ++- disassemble the function ++ ++- associate every instruction with the percentage of time which was spent executing it ++ ++- interleaves the disassembly with the source code, ++ assuming it found the debug symbols ++ (you can use `s` to toggle this behaviour) ++ ++`perf` will, by default, open the instruction which it identified as being the ++hottest spot in the function: ++ ++``` ++0,76 │ movapd xmm2,xmm0 ++0,38 │ movhlps xmm2,xmm0 ++ │ addpd xmm2,xmm0 ++ │ unpcklpd xmm1,xmm2 ++12,50 │ sqrtpd xmm0,xmm1 ++1,52 │ mulpd xmm0,xmm1 ++``` ++ ++In this case, `sqrtpd` will be highlighted in red, since that's the instruction ++which the CPU spends most of its time executing. ++ ++## Using Valgrind ++ ++Valgrind is a set of tools which initially helped C/C++ programmers find unsafe ++memory accesses in their code. Nowadays the project also has ++ ++- a heap profiler called `massif` ++ ++- a cache utilization profiler called `cachegrind` ++ ++- a call-graph performance profiler called `callgrind` ++ ++ +diff --git a/third_party/rust/packed_simd/perf-guide/src/prof/mca.md b/third_party/rust/packed_simd/perf-guide/src/prof/mca.md +new file mode 100644 +index 000000000000..65ddf1a4eb3a +--- /dev/null ++++ b/third_party/rust/packed_simd/perf-guide/src/prof/mca.md +@@ -0,0 +1,100 @@ ++# Machine code analysis tools ++ ++## The microarchitecture of modern CPUs ++ ++While you might have heard of Instruction Set Architectures, such as `x86` or ++`arm` or `mips`, the term _microarchitecture_ (also written here as _µ-arch_), ++refers to the internal details of an actual family of CPUs, such as Intel's ++_Haswell_ or AMD's _Jaguar_. ++ ++Replacing scalar code with SIMD code will improve performance on all CPUs ++supporting the required vector extensions. ++However, due to microarchitectural differences, the actual speed-up at ++runtime might vary. ++ ++**Example**: a simple example arises when optimizing for AMD K8 CPUs. ++The assembly generated for an empty function should look like this: ++ ++```asm ++nop ++ret ++``` ++ ++The `nop` is used to align the `ret` instruction for better performance. ++However, the compiler will actually generated the following code: ++ ++```asm ++repz ret ++``` ++ ++The `repz` instruction will repeat the following instruction until a certain ++condition. Of course, in this situation, the function will simply immediately ++return, and the `ret` instruction is still aligned. ++However, AMD K8's branch predictor performs better with the latter code. ++ ++For those looking to absolutely maximize performance for a certain target µ-arch, ++you will have to read some CPU manuals, or ask the compiler to do it for you ++with `-C target-cpu`. ++ ++### Summary of CPU internals ++ ++Modern processors are able to execute instructions out-of-order for better performance, ++by utilizing tricks such as [branch prediction], [instruction pipelining], ++or [superscalar execution]. ++ ++[branch prediction]: https://en.wikipedia.org/wiki/Branch_predictor ++[instruction pipelining]: https://en.wikipedia.org/wiki/Instruction_pipelining ++[superscalar execution]: https://en.wikipedia.org/wiki/Superscalar_processor ++ ++SIMD instructions are also subject to these optimizations, meaning it can get pretty ++difficult to determine where the slowdown happens. ++For example, if the profiler reports a store operation is slow, one of two things ++could be happening: ++ ++- the store is limited by the CPU's memory bandwidth, which is actually an ideal ++ scenario, all things considered; ++ ++- memory bandwidth is nowhere near its peak, but the value to be stored is at the ++ end of a long chain of operations, and this store is where the profiler ++ encountered the pipeline stall; ++ ++Since most profilers are simple tools which don't understand the subtleties of ++instruction scheduling, you ++ ++## Analyzing the machine code ++ ++Certain tools have knowledge of internal CPU microarchitecture, i.e. they know ++ ++- how many physical [register files] a CPU actually has ++ ++- what is the latency / throughtput of an instruction ++ ++- what [µ-ops] are generated for a set of instructions ++ ++and many other architectural details. ++ ++[register files]: https://en.wikipedia.org/wiki/Register_file ++[µ-ops]: https://en.wikipedia.org/wiki/Micro-operation ++ ++These tools are therefore able to provide accurate information as to why some ++instructions are inefficient, and where the bottleneck is. ++ ++The disadvantage is that the output of these tools requires advanced knowledge ++of the target architecture to understand, i.e. they **cannot** point out what ++the cause of the issue is explicitly. ++ ++## Intel's Architecture Code Analyzer (IACA) ++ ++[IACA] is a free tool offered by Intel for analyzing the performance of various ++computational kernels. ++ ++Being a proprietary, closed source tool, it _only_ supports Intel's µ-arches. ++ ++[IACA]: https://software.intel.com/en-us/articles/intel-architecture-code-analyzer ++ ++## llvm-mca ++ ++ +diff --git a/third_party/rust/packed_simd/perf-guide/src/prof/profiling.md b/third_party/rust/packed_simd/perf-guide/src/prof/profiling.md +new file mode 100644 +index 000000000000..02ba78d2f22f +--- /dev/null ++++ b/third_party/rust/packed_simd/perf-guide/src/prof/profiling.md +@@ -0,0 +1,14 @@ ++# Performance profiling ++ ++While the rest of the book provides practical advice on how to improve the performance ++of SIMD code, this chapter is dedicated to [**performance profiling**][profiling]. ++Profiling consists of recording a program's execution in order to identify program ++hotspots. ++ ++**Important**: most profilers require debug information in order to accurately ++link the program hotspots back to the corresponding source code lines. Rust will ++disable debug info generation by default for optimized builds, but you can change ++that [in your `Cargo.toml`][cargo-ref]. ++ ++[profiling]: https://en.wikipedia.org/wiki/Profiling_(computer_programming) ++[cargo-ref]: https://doc.rust-lang.org/cargo/reference/manifest.html#the-profile-sections +diff --git a/third_party/rust/packed_simd/perf-guide/src/target-feature/attribute.md b/third_party/rust/packed_simd/perf-guide/src/target-feature/attribute.md +new file mode 100644 +index 000000000000..ee670fea5bd8 +--- /dev/null ++++ b/third_party/rust/packed_simd/perf-guide/src/target-feature/attribute.md +@@ -0,0 +1,5 @@ ++# The `target_feature` attribute ++ ++ +diff --git a/third_party/rust/packed_simd/perf-guide/src/target-feature/features.md b/third_party/rust/packed_simd/perf-guide/src/target-feature/features.md +new file mode 100644 +index 000000000000..b93030ca6708 +--- /dev/null ++++ b/third_party/rust/packed_simd/perf-guide/src/target-feature/features.md +@@ -0,0 +1,13 @@ ++# Enabling target features ++ ++Not all processors of a certain architecture will have SIMD processing units, ++and using a SIMD instruction which is not supported will trigger undefined behavior. ++ ++To allow building safe, portable programs, the Rust compiler will **not**, by default, ++generate any sort of vector instructions, unless it can statically determine ++they are supported. For example, on AMD64, SSE2 support is architecturally guaranteed. ++The `x86_64-apple-darwin` target enables up to SSSE3. The get a defintive list of ++which features are enabled by default on various platforms, refer to the target ++specifications [in the compiler's source code][targets]. ++ ++[targets]: https://github.com/rust-lang/rust/tree/master/src/librustc_target/spec +diff --git a/third_party/rust/packed_simd/perf-guide/src/target-feature/inlining.md b/third_party/rust/packed_simd/perf-guide/src/target-feature/inlining.md +new file mode 100644 +index 000000000000..86705102a74b +--- /dev/null ++++ b/third_party/rust/packed_simd/perf-guide/src/target-feature/inlining.md +@@ -0,0 +1,5 @@ ++# Inlining ++ ++ +diff --git a/third_party/rust/packed_simd/perf-guide/src/target-feature/practice.md b/third_party/rust/packed_simd/perf-guide/src/target-feature/practice.md +new file mode 100644 +index 000000000000..5b55c61c268a +--- /dev/null ++++ b/third_party/rust/packed_simd/perf-guide/src/target-feature/practice.md +@@ -0,0 +1,31 @@ ++# Target features in practice ++ ++Using `RUSTFLAGS` will allow the crate being compiled, as well as all its ++transitive dependencies to use certain target features. ++ ++A tehnique used to avoid undefined behavior at runtime is to compile and ++ship multiple binaries, each compiled with a certain set of features. ++This might not be feasible in some cases, and can quickly get out of hand ++as more and more vector extensions are added to an architecture. ++ ++Rust can be more flexible: you can build a single binary/library which automatically ++picks the best supported vector instructions depending on the host machine. ++The trick consists of monomorphizing parts of the code during building, and then ++using run-time feature detection to select the right code path when running. ++ ++ ++ ++**NOTE** (x86 specific): because the AVX (256-bit) registers extend the existing ++SSE (128-bit) registers, mixing SSE and AVX instructions in a program can cause ++performance issues. ++ ++The solution is to compile all code, even the code written with 128-bit vectors, ++with the AVX target feature enabled. This will cause the compiler to prefix the ++generated instructions with the [VEX] prefix. ++ ++[VEX]: https://en.wikipedia.org/wiki/VEX_prefix +diff --git a/third_party/rust/packed_simd/perf-guide/src/target-feature/runtime.md b/third_party/rust/packed_simd/perf-guide/src/target-feature/runtime.md +new file mode 100644 +index 000000000000..47ddcc8660db +--- /dev/null ++++ b/third_party/rust/packed_simd/perf-guide/src/target-feature/runtime.md +@@ -0,0 +1,5 @@ ++# Detecting host features at runtime ++ ++ +diff --git a/third_party/rust/packed_simd/perf-guide/src/target-feature/rustflags.md b/third_party/rust/packed_simd/perf-guide/src/target-feature/rustflags.md +new file mode 100644 +index 000000000000..e2e806e085b6 +--- /dev/null ++++ b/third_party/rust/packed_simd/perf-guide/src/target-feature/rustflags.md +@@ -0,0 +1,77 @@ ++# Using RUSTFLAGS ++ ++One of the easiest ways to benefit from SIMD is to allow the compiler ++to generate code using certain vector instruction extensions. ++ ++The environment variable `RUSTFLAGS` can be used to pass options for code ++generation to the Rust compiler. These flags will affect **all** compiled crates. ++ ++There are two flags which can be used to enable specific vector extensions: ++ ++## target-feature ++ ++- Syntax: `-C target-feature=` ++ ++- Provides the compiler with a comma-separated set of instruction extensions ++ to enable. ++ ++ **Example**: Use `-C target-features=+sse3,+avx` to enable generating instructions ++ for [Streaming SIMD Extensions 3](https://en.wikipedia.org/wiki/SSE3) and ++ [Advanced Vector Extensions](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions). ++ ++- To list target triples for all targets supported by Rust, use: ++ ++ ```sh ++ rustc --print target-list ++ ``` ++ ++- To list all support target features for a certain target triple, use: ++ ++ ```sh ++ rustc --target=${TRIPLE} --print target-features ++ ``` ++ ++- Note that all CPU features are independent, and will have to be enabled individually. ++ ++ **Example**: Setting `-C target-features=+avx2` will _not_ enable `fma`, even though ++ all CPUs which support AVX2 also support FMA. To enable both, one has to use ++ `-C target-features=+avx2,+fma` ++ ++- Some features also depend on other features, which need to be enabled for the ++ target instructions to be generated. ++ ++ **Example**: Unless `v7` is specified as the target CPU (see below), to enable ++ NEON on ARM it is necessary to use `-C target-feature=+v7,+neon`. ++ ++## target-cpu ++ ++- Syntax: `-C target-cpu=` ++ ++- Sets the identifier of a CPU family / model for which to build and optimize the code. ++ ++ **Example**: `RUSTFLAGS='-C target-cpu=cortex-a75'` ++ ++- To list all supported target CPUs for a certain target triple, use: ++ ++ ```sh ++ rustc --target=${TRIPLE} --print target-cpus ++ ``` ++ ++ **Example**: ++ ++ ```sh ++ rustc --target=i686-pc-windows-msvc --print target-cpus ++ ``` ++ ++- The compiler will translate this into a list of target features. Therefore, ++ individual feature checks (`#[cfg(target_feature = "...")]`) will still ++ work properly. ++ ++- It will cause the code generator to optimize the generated code for that ++ specific CPU model. ++ ++- Using `native` as the CPU model will cause Rust to generate and optimize code ++ for the CPU running the compiler. It is useful when building programs which you ++ plan to only use locally. This should never be used when the generated programs ++ are meant to be run on other computers, such as when packaging for distribution ++ or cross-compiling. +diff --git a/third_party/rust/packed_simd/perf-guide/src/vert-hor-ops.md b/third_party/rust/packed_simd/perf-guide/src/vert-hor-ops.md +new file mode 100644 +index 000000000000..d0dd1be12a19 +--- /dev/null ++++ b/third_party/rust/packed_simd/perf-guide/src/vert-hor-ops.md +@@ -0,0 +1,76 @@ ++# Vertical and horizontal operations ++ ++In SIMD terminology, each vector has a certain "width" (number of lanes). ++A vector processor is able to perform two kinds of operations on a vector: ++ ++- Vertical operations: ++ operate on two vectors of the same width, result has same width ++ ++**Example**: vertical addition of two `f32x4` vectors ++ ++ %0 == | 2 | -3.5 | 0 | 7 | ++ + + + + ++ %1 == | 4 | 1.5 | -1 | 0 | ++ = = = = ++ %0 + %1 == | 6 | -2 | -1 | 7 | ++ ++- Horizontal operations: ++ reduce the elements of two vectors in some way, ++ the result's elements combine information from the two original ones ++ ++**Example**: horizontal addition of two `u64x2` vectors ++ ++ %0 == | 1 | 3 | ++ └─+───┘ ++ └───────┐ ++ │ ++ %1 == | 4 | -1 | │ ++ └─+──┘ │ ++ └───┐ │ ++ │ │ ++ ┌─────│───┘ ++ ▼ ▼ ++ %0 + %1 == | 4 | 3 | ++ ++## Performance consideration of horizontal operations ++ ++The result of vertical operations, like vector negation: `-a`, for a given lane, ++does not depend on the result of the operation for the other lanes. The result ++of horizontal operations, like the vector `sum` reduction: `a.sum()`, depends on ++the value of all vector lanes. ++ ++In virtually all architectures vertical operations are fast, while horizontal ++operations are, by comparison, very slow. ++ ++Consider the following two functions for computing the sum of all `f32` values ++in a slice: ++ ++```rust ++fn fast_sum(x: &[f32]) -> f32 { ++ assert!(x.len() % 4 == 0); ++ let mut sum = f32x4::splat(0.); // [0., 0., 0., 0.] ++ for i in (0..x.len()).step_by(4) { ++ sum += f32x4::from_slice_unaligned(&x[i..]); ++ } ++ sum.sum() ++} ++ ++fn slow_sum(x: &[f32]) -> f32 { ++ assert!(x.len() % 4 == 0); ++ let mut sum: f32 = 0.; ++ for i in (0..x.len()).step_by(4) { ++ sum += f32x4::from_slice_unaligned(&x[i..]).sum(); ++ } ++ sum ++} ++``` ++ ++The inner loop over the slice is where the bulk of the work actually happens. ++There, the `fast_sum` function perform vertical operations into a vector, doing ++a single horizontal reduction at the end, while the `slow_sum` function performs ++horizontal vector operations inside of the loop. ++ ++On all widely-used architectures, `fast_sum` is a large constant factor faster ++than `slow_sum`. You can run the [slice_sum]() example and see for yourself. On ++the particular machine tested there the algorithm using the horizontal vector ++addition is 2.7x slower than the one using vertical vector operations! +diff --git a/third_party/rust/packed_simd/readme.md b/third_party/rust/packed_simd/readme.md +new file mode 100644 +index 000000000000..3b27a2bba0d6 +--- /dev/null ++++ b/third_party/rust/packed_simd/readme.md +@@ -0,0 +1,182 @@ ++# `Simd<[T; N]>` ++ ++## Implementation of [Rust RFC #2366: `std::simd`][rfc2366] ++ ++[![Travis-CI Status]][travis] [![Appveyor Status]][appveyor] [![Latest Version]][crates.io] [![docs]][master_docs] ++ ++> This aims to be a 100% conforming implementation of Rust RFC 2366 for stabilization. ++ ++**WARNING**: this crate only supports the most recent nightly Rust toolchain. ++ ++## Documentation ++ ++* [API docs (`master` branch)][master_docs] ++* [Performance guide][perf_guide] ++* [API docs (`docs.rs`)][docs.rs]: **CURRENTLY DOWN** due to ++ https://github.com/rust-lang-nursery/packed_simd/issues/110 ++* [RFC2366 `std::simd`][rfc2366]: - contains motivation, design rationale, ++ discussion, etc. ++ ++## Examples ++ ++Most of the examples come with both a scalar and a vectorized implementation. ++ ++* [`aobench`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/aobench) ++* [`fannkuch_redux`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/fannkuch_redux) ++* [`matrix inverse`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/matrix_inverse) ++* [`mandelbrot`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/mandelbrot) ++* [`n-body`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/nbody) ++* [`options_pricing`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/options_pricing) ++* [`spectral_norm`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/spectral_norm) ++* [`triangle transform`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/triangle_xform) ++* [`stencil`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/stencil) ++* [`vector dot product`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/dot_product) ++ ++## Cargo features ++ ++* `into_bits` (default: disabled): enables `FromBits`/`IntoBits` trait ++ implementations for the vector types. These allow reinterpreting the bits of a ++ vector type as those of another vector type safely by just using the ++ `.into_bits()` method. ++ ++* `core_arch` (default: disabled): enable this feature to recompile `core::arch` ++ for the target-features enabled. `packed_simd` includes optimizations for some ++ target feature combinations that are enabled by this feature. Note, however, ++ that this is an unstable dependency, that rustc might break at any time. ++ ++* `sleef-sys` (default: disabled - `x86_64` only): internally uses the [SLEEF] ++ short-vector math library when profitable via the [`sleef-sys`][sleef_sys] ++ crate. [SLEEF] is licensed under the [Boost Software License ++ v1.0][boost_license], an extremely permissive license, and can be statically ++ linked without issues. ++ ++## Performance ++ ++The following [ISPC] examples are also part of `packed_simd`'s ++[`examples/`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/) ++directory, where `packed_simd`+[`rayon`][rayon] are used to emulate [ISPC]'s ++Single-Program-Multiple-Data (SPMD) programming model. The performance results ++on different hardware is shown in the `readme.md` of each example. The following ++table summarizes the performance ranges, where `+` means speed-up and `-` ++slowdown: ++ ++* `aobench`: `[-1.02x, +1.53x]`, ++* `stencil`: `[+1.06x, +1.72x]`, ++* `mandelbrot`: `[-1.74x, +1.2x]`, ++* `options_pricing`: ++ * `black_scholes`: `+1.0x` ++ * `binomial_put`: `+1.4x` ++ ++ While SPMD is not the intended use case for `packed_simd`, it is possible to ++ combine the library with [`rayon`][rayon] to poorly emulate [ISPC]'s SPMD programming ++ model in Rust. Writing performant code is not as straightforward as with ++ [ISPC], but with some care (e.g. see the [Performance Guide][perf_guide]) one ++ can easily match and often out-perform [ISPC]'s "default performance". ++ ++## Platform support ++ ++The following table describes the supported platforms: `build` shows whether the ++library compiles without issues for a given target, while `run` shows whether ++the full testsuite passes on the target. ++ ++| Linux targets: | build | run | ++|-----------------------------------|-----------|---------| ++| `i586-unknown-linux-gnu` | ✓ | ✓ | ++| `i686-unknown-linux-gnu` | ✓ | ✓ | ++| `x86_64-unknown-linux-gnu` | ✓ | ✓ | ++| `arm-unknown-linux-gnueabi` | ✗ | ✗ | ++| `arm-unknown-linux-gnueabihf` | ✓ | ✓ | ++| `armv7-unknown-linux-gnueabi` | ✓ | ✓ | ++| `aarch64-unknown-linux-gnu` | ✓ | ✓ | ++| `mips-unknown-linux-gnu` | ✓ | ✓ | ++| `mipsel-unknown-linux-musl` | ✓ | ✓ | ++| `mips64-unknown-linux-gnuabi64` | ✓ | ✓ | ++| `mips64el-unknown-linux-gnuabi64` | ✓ | ✓ | ++| `powerpc-unknown-linux-gnu` | ✗ | ✗ | ++| `powerpc64-unknown-linux-gnu` | ✗ | ✗ | ++| `powerpc64le-unknown-linux-gnu` | ✗ | ✗ | ++| `s390x-unknown-linux-gnu` | ✓ | ✓* | ++| `sparc64-unknown-linux-gnu` | ✓ | ✓* | ++| `thumbv7neon-unknown-linux-gnueabihf` | ✓ | ✓ | ++| **MacOSX targets:** | **build** | **run** | ++| `x86_64-apple-darwin` | ✓ | ✓ | ++| `i686-apple-darwin` | ✓ | ✓ | ++| **Windows targets:** | **build** | **run** | ++| `x86_64-pc-windows-msvc` | ✓ | ✓ | ++| `i686-pc-windows-msvc` | ✓ | ✓ | ++| `x86_64-pc-windows-gnu` | ✗ | ✗ | ++| `i686-pc-windows-gnu` | ✗ | ✗ | ++| **WebAssembly targets:** | **build** | **run** | ++| `wasm32-unknown-unknown` | ✓ | ✓ | ++| **Android targets:** | **build** | **run** | ++| `x86_64-linux-android` | ✓ | ✓ | ++| `arm-linux-androideabi` | ✓ | ✓ | ++| `aarch64-linux-android` | ✓ | ✗ | ++| `thumbv7neon-linux-androideabi` | ✓ | ✓ | ++| **iOS targets:** | **build** | **run** | ++| `i386-apple-ios` | ✓ | ✗ | ++| `x86_64-apple-ios` | ✓ | ✗ | ++| `armv7-apple-ios` | ✓ | ✗** | ++| `aarch64-apple-ios` | ✓ | ✗** | ++| **xBSD targets:** | **build** | **run** | ++| `i686-unknown-freebsd` | ✗ | ✗** | ++| `x86_64-unknown-freebsd` | ✗ | ✗** | ++| `x86_64-unknown-netbsd` | ✗ | ✗** | ++| **Solaris targets:** | **build** | **run** | ++| `x86_64-sun-solaris` | ✗ | ✗** | ++ ++[*] most of the test suite passes correctly on these platform but ++there are correctness bugs open in the issue tracker. ++ ++[**] it is currently not easily possible to run these platforms on CI. ++ ++## Machine code verification ++ ++The ++[`verify/`](https://github.com/rust-lang-nursery/packed_simd/tree/master/verify) ++crate tests disassembles the portable packed vector APIs at run-time and ++compares the generated machine code against the desired one to make sure that ++this crate remains efficient. ++ ++## License ++ ++This project is licensed under either of ++ ++* [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) ++ ([LICENSE-APACHE](LICENSE-APACHE)) ++ ++* [MIT License](http://opensource.org/licenses/MIT) ++ ([LICENSE-MIT](LICENSE-MIT)) ++ ++at your option. ++ ++## Contributing ++ ++We welcome all people who want to contribute. ++Please see the [contributing instructions] for more information. ++ ++Contributions in any form (issues, pull requests, etc.) to this project ++must adhere to Rust's [Code of Conduct]. ++ ++Unless you explicitly state otherwise, any contribution intentionally submitted ++for inclusion in `packed_simd` by you, as defined in the Apache-2.0 license, shall be ++dual licensed as above, without any additional terms or conditions. ++ ++[travis]: https://travis-ci.org/rust-lang-nursery/packed_simd ++[Travis-CI Status]: https://travis-ci.org/rust-lang-nursery/packed_simd.svg?branch=master ++[appveyor]: https://ci.appveyor.com/project/gnzlbg/packed-simd ++[Appveyor Status]: https://ci.appveyor.com/api/projects/status/hd7v9dvr442hgdix?svg=true ++[Latest Version]: https://img.shields.io/crates/v/packed_simd.svg ++[crates.io]: https://crates.io/crates/packed_simd ++[docs]: https://docs.rs/packed_simd/badge.svg ++[docs.rs]: https://docs.rs/packed_simd/ ++[master_docs]: https://rust-lang-nursery.github.io/packed_simd/packed_simd/ ++[perf_guide]: https://rust-lang-nursery.github.io/packed_simd/perf-guide/ ++[rfc2366]: https://github.com/rust-lang/rfcs/pull/2366 ++[ISPC]: https://ispc.github.io/ ++[rayon]: https://crates.io/crates/rayon ++[boost_license]: https://www.boost.org/LICENSE_1_0.txt ++[SLEEF]: https://sleef.org/ ++[sleef_sys]: https://crates.io/crates/sleef-sys ++[contributing instructions]: contributing.md ++[Code of Conduct]: https://www.rust-lang.org/en-US/conduct.html +diff --git a/third_party/rust/packed_simd/rustfmt.toml b/third_party/rust/packed_simd/rustfmt.toml +new file mode 100644 +index 000000000000..5b400a4ce440 +--- /dev/null ++++ b/third_party/rust/packed_simd/rustfmt.toml +@@ -0,0 +1,7 @@ ++max_width = 79 ++use_small_heuristics = "Max" ++wrap_comments = true ++comment_width = 79 ++fn_args_density = "Compressed" ++edition = "2018" ++error_on_line_overflow = true +\ No newline at end of file +diff --git a/third_party/rust/packed_simd/src/api.rs b/third_party/rust/packed_simd/src/api.rs +new file mode 100644 +index 000000000000..9959a052ae96 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api.rs +@@ -0,0 +1,301 @@ ++//! Implements the Simd<[T; N]> APIs ++ ++crate mod cast; ++#[macro_use] ++mod cmp; ++#[macro_use] ++mod default; ++#[macro_use] ++mod fmt; ++#[macro_use] ++mod from; ++#[macro_use] ++mod hash; ++#[macro_use] ++mod math; ++#[macro_use] ++mod minimal; ++#[macro_use] ++mod ops; ++#[macro_use] ++mod ptr; ++#[macro_use] ++mod reductions; ++#[macro_use] ++mod select; ++#[macro_use] ++mod shuffle; ++#[macro_use] ++mod shuffle1_dyn; ++#[macro_use] ++mod slice; ++#[macro_use] ++mod swap_bytes; ++#[macro_use] ++mod bit_manip; ++ ++#[cfg(feature = "into_bits")] ++crate mod into_bits; ++ ++macro_rules! impl_i { ++ ([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident ++ | $ielem_ty:ident | $test_tt:tt | $($elem_ids:ident),* ++ | From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => { ++ impl_minimal_iuf!([$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt ++ | $($elem_ids),* | $(#[$doc])*); ++ impl_ops_vector_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_ops_scalar_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_ops_vector_bitwise!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (!(0 as $elem_ty), 0) ++ ); ++ impl_ops_scalar_bitwise!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (!(0 as $elem_ty), 0) ++ ); ++ impl_ops_vector_shifts!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_ops_scalar_shifts!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_ops_vector_rotates!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_ops_vector_neg!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_ops_vector_int_min_max!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt ++ ); ++ impl_reduction_integer_arithmetic!( ++ [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt ++ ); ++ impl_reduction_min_max!( ++ [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt ++ ); ++ impl_reduction_bitwise!( ++ [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt ++ | (|x|{ x as $elem_ty }) | (!(0 as $elem_ty), 0) ++ ); ++ impl_fmt_debug!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_fmt_lower_hex!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_fmt_upper_hex!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_fmt_octal!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_fmt_binary!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_from_array!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (1, 1)); ++ impl_from_vectors!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt | $($from_vec_ty),* ++ ); ++ impl_default!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_hash!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_slice_from_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_slice_write_to_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_swap_bytes!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_bit_manip!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_cmp_partial_eq!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1) ++ ); ++ impl_cmp_eq!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1)); ++ impl_cmp_vertical!( ++ [$elem_ty; $elem_n]: $tuple_id, $mask_ty, false, (1, 0) | $test_tt ++ ); ++ impl_cmp_partial_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_cmp_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1)); ++ ++ test_select!($elem_ty, $mask_ty, $tuple_id, (1, 2) | $test_tt); ++ test_cmp_partial_ord_int!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ test_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ } ++} ++ ++macro_rules! impl_u { ++ ([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident ++ | $ielem_ty:ident | $test_tt:tt | $($elem_ids:ident),* ++ | From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => { ++ impl_minimal_iuf!([$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt ++ | $($elem_ids),* | $(#[$doc])*); ++ impl_ops_vector_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_ops_scalar_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_ops_vector_bitwise!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (!(0 as $elem_ty), 0) ++ ); ++ impl_ops_scalar_bitwise!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (!(0 as $elem_ty), 0) ++ ); ++ impl_ops_vector_shifts!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_ops_scalar_shifts!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_ops_vector_rotates!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_ops_vector_int_min_max!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt ++ ); ++ impl_reduction_integer_arithmetic!( ++ [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt ++ ); ++ impl_reduction_min_max!( ++ [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt ++ ); ++ impl_reduction_bitwise!( ++ [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt ++ | (|x|{ x as $elem_ty }) | (!(0 as $elem_ty), 0) ++ ); ++ impl_fmt_debug!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_fmt_lower_hex!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_fmt_upper_hex!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_fmt_octal!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_fmt_binary!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_from_array!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (1, 1)); ++ impl_from_vectors!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt | $($from_vec_ty),* ++ ); ++ impl_default!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_hash!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_slice_from_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_slice_write_to_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_swap_bytes!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_bit_manip!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_cmp_partial_eq!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (1, 0) ++ ); ++ impl_cmp_eq!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1)); ++ impl_cmp_vertical!( ++ [$elem_ty; $elem_n]: $tuple_id, $mask_ty, false, (1, 0) | $test_tt ++ ); ++ impl_cmp_partial_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_cmp_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1)); ++ ++ test_select!($elem_ty, $mask_ty, $tuple_id, (1, 2) | $test_tt); ++ test_cmp_partial_ord_int!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ test_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ } ++} ++ ++macro_rules! impl_f { ++ ([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident ++ | $ielem_ty:ident | $test_tt:tt | $($elem_ids:ident),* ++ | From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => { ++ impl_minimal_iuf!([$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt ++ | $($elem_ids),* | $(#[$doc])*); ++ impl_ops_vector_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_ops_scalar_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_ops_vector_neg!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_ops_vector_float_min_max!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt ++ ); ++ impl_reduction_float_arithmetic!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_reduction_min_max!( ++ [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt ++ ); ++ impl_fmt_debug!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_from_array!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (1., 1.)); ++ impl_from_vectors!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt | $($from_vec_ty),* ++ ); ++ impl_default!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_cmp_partial_eq!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (1., 0.) ++ ); ++ impl_slice_from_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_slice_write_to_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ ++ impl_float_consts!([$elem_ty; $elem_n]: $tuple_id); ++ impl_float_category!([$elem_ty; $elem_n]: $tuple_id, $mask_ty); ++ ++ // floating-point math ++ impl_math_float_abs!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_math_float_cos!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_math_float_exp!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_math_float_ln!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_math_float_mul_add!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_math_float_mul_adde!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_math_float_powf!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_math_float_recpre!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_math_float_rsqrte!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_math_float_sin!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_math_float_sqrt!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_math_float_sqrte!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_cmp_vertical!( ++ [$elem_ty; $elem_n]: $tuple_id, $mask_ty, false, (1., 0.) ++ | $test_tt ++ ); ++ ++ test_select!($elem_ty, $mask_ty, $tuple_id, (1., 2.) | $test_tt); ++ test_reduction_float_min_max!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt ++ ); ++ test_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ } ++} ++ ++macro_rules! impl_m { ++ ([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident | $ielem_ty:ident ++ | $test_tt:tt | $($elem_ids:ident),* | From: $($from_vec_ty:ident),* ++ | $(#[$doc:meta])*) => { ++ impl_minimal_mask!( ++ [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt ++ | $($elem_ids),* | $(#[$doc])* ++ ); ++ impl_ops_vector_mask_bitwise!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (true, false) ++ ); ++ impl_ops_scalar_mask_bitwise!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (true, false) ++ ); ++ impl_reduction_bitwise!( ++ [bool; $elem_n]: $tuple_id | $ielem_ty | $test_tt ++ | (|x|{ x != 0 }) | (true, false) ++ ); ++ impl_reduction_mask!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_fmt_debug!([bool; $elem_n]: $tuple_id | $test_tt); ++ impl_from_array!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt ++ | (crate::$elem_ty::new(true), true) ++ ); ++ impl_from_vectors!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt | $($from_vec_ty),* ++ ); ++ impl_default!([bool; $elem_n]: $tuple_id | $test_tt); ++ impl_cmp_partial_eq!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (true, false) ++ ); ++ impl_cmp_eq!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (true, false) ++ ); ++ impl_cmp_vertical!( ++ [$elem_ty; $elem_n]: $tuple_id, $tuple_id, true, (true, false) ++ | $test_tt ++ ); ++ impl_select!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_cmp_partial_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ impl_cmp_ord!( ++ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (false, true) ++ ); ++ impl_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ ++ test_cmp_partial_ord_mask!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ test_shuffle1_dyn_mask!([$elem_ty; $elem_n]: $tuple_id | $test_tt); ++ } ++} ++ ++macro_rules! impl_const_p { ++ ([$elem_ty:ty; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident, ++ $usize_ty:ident, $isize_ty:ident ++ | $test_tt:tt | $($elem_ids:ident),* ++ | From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => { ++ impl_minimal_p!( ++ [$elem_ty; $elem_n]: $tuple_id, $mask_ty, $usize_ty, $isize_ty ++ | ref_ | $test_tt | $($elem_ids),* ++ | (1 as $elem_ty, 0 as $elem_ty) | $(#[$doc])* ++ ); ++ impl_ptr_read!([$elem_ty; $elem_n]: $tuple_id, $mask_ty | $test_tt); ++ } ++} ++ ++macro_rules! impl_mut_p { ++ ([$elem_ty:ty; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident, ++ $usize_ty:ident, $isize_ty:ident ++ | $test_tt:tt | $($elem_ids:ident),* ++ | From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => { ++ impl_minimal_p!( ++ [$elem_ty; $elem_n]: $tuple_id, $mask_ty, $usize_ty, $isize_ty ++ | ref_mut_ | $test_tt | $($elem_ids),* ++ | (1 as $elem_ty, 0 as $elem_ty) | $(#[$doc])* ++ ); ++ impl_ptr_read!([$elem_ty; $elem_n]: $tuple_id, $mask_ty | $test_tt); ++ impl_ptr_write!([$elem_ty; $elem_n]: $tuple_id, $mask_ty | $test_tt); ++ } ++} +diff --git a/third_party/rust/packed_simd/src/api/bit_manip.rs b/third_party/rust/packed_simd/src/api/bit_manip.rs +new file mode 100644 +index 000000000000..3d3c4eb8850a +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/bit_manip.rs +@@ -0,0 +1,128 @@ ++//! Bit manipulations. ++ ++macro_rules! impl_bit_manip { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Returns the number of ones in the binary representation of ++ /// the lanes of `self`. ++ #[inline] ++ pub fn count_ones(self) -> Self { ++ super::codegen::bit_manip::BitManip::ctpop(self) ++ } ++ ++ /// Returns the number of zeros in the binary representation of ++ /// the lanes of `self`. ++ #[inline] ++ pub fn count_zeros(self) -> Self { ++ super::codegen::bit_manip::BitManip::ctpop(!self) ++ } ++ ++ /// Returns the number of leading zeros in the binary ++ /// representation of the lanes of `self`. ++ #[inline] ++ pub fn leading_zeros(self) -> Self { ++ super::codegen::bit_manip::BitManip::ctlz(self) ++ } ++ ++ /// Returns the number of trailing zeros in the binary ++ /// representation of the lanes of `self`. ++ #[inline] ++ pub fn trailing_zeros(self) -> Self { ++ super::codegen::bit_manip::BitManip::cttz(self) ++ } ++ } ++ ++ test_if! { ++ $test_tt: ++ paste::item_with_macros! { ++ #[allow(overflowing_literals)] ++ pub mod [<$id _bit_manip>] { ++ use super::*; ++ ++ const LANE_WIDTH: usize = mem::size_of::<$elem_ty>() * 8; ++ ++ macro_rules! test_func { ++ ($x:expr, $func:ident) => {{ ++ let mut actual = $x; ++ for i in 0..$id::lanes() { ++ actual = actual.replace( ++ i, ++ $x.extract(i).$func() as $elem_ty ++ ); ++ } ++ let expected = $x.$func(); ++ assert_eq!(actual, expected); ++ }}; ++ } ++ ++ const BYTES: [u8; 64] = [ ++ 0, 1, 2, 3, 4, 5, 6, 7, ++ 8, 9, 10, 11, 12, 13, 14, 15, ++ 16, 17, 18, 19, 20, 21, 22, 23, ++ 24, 25, 26, 27, 28, 29, 30, 31, ++ 32, 33, 34, 35, 36, 37, 38, 39, ++ 40, 41, 42, 43, 44, 45, 46, 47, ++ 48, 49, 50, 51, 52, 53, 54, 55, ++ 56, 57, 58, 59, 60, 61, 62, 63, ++ ]; ++ ++ fn load_bytes() -> $id { ++ let elems: &mut [$elem_ty] = unsafe { ++ slice::from_raw_parts_mut( ++ BYTES.as_mut_ptr() as *mut $elem_ty, ++ $id::lanes(), ++ ) ++ }; ++ $id::from_slice_unaligned(elems) ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn count_ones() { ++ test_func!($id::splat(0), count_ones); ++ test_func!($id::splat(!0), count_ones); ++ test_func!(load_bytes(), count_ones); ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn count_zeros() { ++ test_func!($id::splat(0), count_zeros); ++ test_func!($id::splat(!0), count_zeros); ++ test_func!(load_bytes(), count_zeros); ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn leading_zeros() { ++ test_func!($id::splat(0), leading_zeros); ++ test_func!($id::splat(1), leading_zeros); ++ // some implementations use `pshufb` which has unique ++ // behavior when the 8th bit is set. ++ test_func!($id::splat(0b1000_0010), leading_zeros); ++ test_func!($id::splat(!0), leading_zeros); ++ test_func!( ++ $id::splat(1 << (LANE_WIDTH - 1)), ++ leading_zeros ++ ); ++ test_func!(load_bytes(), leading_zeros); ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn trailing_zeros() { ++ test_func!($id::splat(0), trailing_zeros); ++ test_func!($id::splat(1), trailing_zeros); ++ test_func!($id::splat(0b1000_0010), trailing_zeros); ++ test_func!($id::splat(!0), trailing_zeros); ++ test_func!( ++ $id::splat(1 << (LANE_WIDTH - 1)), ++ trailing_zeros ++ ); ++ test_func!(load_bytes(), trailing_zeros); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/cast.rs b/third_party/rust/packed_simd/src/api/cast.rs +new file mode 100644 +index 000000000000..f1c32ca1a38b +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/cast.rs +@@ -0,0 +1,108 @@ ++//! Implementation of `FromCast` and `IntoCast`. ++#![allow(clippy::module_name_repetitions)] ++ ++/// Numeric cast from `T` to `Self`. ++/// ++/// > Note: This is a temporary workaround until the conversion traits ++/// specified > in [RFC2484] are implemented. ++/// ++/// Numeric cast between vectors with the same number of lanes, such that: ++/// ++/// * casting integer vectors whose lane types have the same size (e.g. `i32xN` ++/// -> `u32xN`) is a **no-op**, ++/// ++/// * casting from a larger integer to a smaller integer (e.g. `u32xN` -> ++/// `u8xN`) will **truncate**, ++/// ++/// * casting from a smaller integer to a larger integer (e.g. `u8xN` -> ++/// `u32xN`) will: ++/// * **zero-extend** if the source is unsigned, or ++/// * **sign-extend** if the source is signed, ++/// ++/// * casting from a float to an integer will **round the float towards zero**, ++/// ++/// * casting from an integer to float will produce the floating point ++/// representation of the integer, **rounding to nearest, ties to even**, ++/// ++/// * casting from an `f32` to an `f64` is perfect and lossless, ++/// ++/// * casting from an `f64` to an `f32` **rounds to nearest, ties to even**. ++/// ++/// [RFC2484]: https://github.com/rust-lang/rfcs/pull/2484 ++pub trait FromCast: crate::marker::Sized { ++ /// Numeric cast from `T` to `Self`. ++ fn from_cast(_: T) -> Self; ++} ++ ++/// Numeric cast from `Self` to `T`. ++/// ++/// > Note: This is a temporary workaround until the conversion traits ++/// specified > in [RFC2484] are implemented. ++/// ++/// Numeric cast between vectors with the same number of lanes, such that: ++/// ++/// * casting integer vectors whose lane types have the same size (e.g. `i32xN` ++/// -> `u32xN`) is a **no-op**, ++/// ++/// * casting from a larger integer to a smaller integer (e.g. `u32xN` -> ++/// `u8xN`) will **truncate**, ++/// ++/// * casting from a smaller integer to a larger integer (e.g. `u8xN` -> ++/// `u32xN`) will: ++/// * **zero-extend** if the source is unsigned, or ++/// * **sign-extend** if the source is signed, ++/// ++/// * casting from a float to an integer will **round the float towards zero**, ++/// ++/// * casting from an integer to float will produce the floating point ++/// representation of the integer, **rounding to nearest, ties to even**, ++/// ++/// * casting from an `f32` to an `f64` is perfect and lossless, ++/// ++/// * casting from an `f64` to an `f32` **rounds to nearest, ties to even**. ++/// ++/// [RFC2484]: https://github.com/rust-lang/rfcs/pull/2484 ++pub trait Cast: crate::marker::Sized { ++ /// Numeric cast from `self` to `T`. ++ fn cast(self) -> T; ++} ++ ++/// `FromCast` implies `Cast`. ++impl Cast for T ++where ++ U: FromCast, ++{ ++ #[inline] ++ fn cast(self) -> U { ++ U::from_cast(self) ++ } ++} ++ ++/// `FromCast` and `Cast` are reflexive ++impl FromCast for T { ++ #[inline] ++ fn from_cast(t: Self) -> Self { ++ t ++ } ++} ++ ++#[macro_use] ++mod macros; ++ ++mod v16; ++pub use self::v16::*; ++ ++mod v32; ++pub use self::v32::*; ++ ++mod v64; ++pub use self::v64::*; ++ ++mod v128; ++pub use self::v128::*; ++ ++mod v256; ++pub use self::v256::*; ++ ++mod v512; ++pub use self::v512::*; +diff --git a/third_party/rust/packed_simd/src/api/cast/macros.rs b/third_party/rust/packed_simd/src/api/cast/macros.rs +new file mode 100644 +index 000000000000..3bb29f0b80b7 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/cast/macros.rs +@@ -0,0 +1,82 @@ ++//! Macros implementing `FromCast` ++ ++macro_rules! impl_from_cast_ { ++ ($id:ident[$test_tt:tt]: $from_ty:ident) => { ++ impl crate::api::cast::FromCast<$from_ty> for $id { ++ #[inline] ++ fn from_cast(x: $from_ty) -> Self { ++ use crate::llvm::simd_cast; ++ debug_assert_eq!($from_ty::lanes(), $id::lanes()); ++ Simd(unsafe { simd_cast(x.0) }) ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _from_cast_ $from_ty>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn test() { ++ assert_eq!($id::lanes(), $from_ty::lanes()); ++ } ++ } ++ } ++ } ++ }; ++} ++ ++macro_rules! impl_from_cast { ++ ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => { ++ $( ++ impl_from_cast_!($id[$test_tt]: $from_ty); ++ )* ++ } ++} ++ ++macro_rules! impl_from_cast_mask_ { ++ ($id:ident[$test_tt:tt]: $from_ty:ident) => { ++ impl crate::api::cast::FromCast<$from_ty> for $id { ++ #[inline] ++ fn from_cast(x: $from_ty) -> Self { ++ debug_assert_eq!($from_ty::lanes(), $id::lanes()); ++ x.ne($from_ty::default()) ++ .select($id::splat(true), $id::splat(false)) ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _from_cast_ $from_ty>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn test() { ++ assert_eq!($id::lanes(), $from_ty::lanes()); ++ ++ let x = $from_ty::default(); ++ let m: $id = x.cast(); ++ assert!(m.none()); ++ } ++ } ++ } ++ } ++ }; ++} ++ ++macro_rules! impl_from_cast_mask { ++ ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => { ++ $( ++ impl_from_cast_mask_!($id[$test_tt]: $from_ty); ++ )* ++ } ++} ++ ++#[allow(unused)] ++macro_rules! impl_into_cast { ++ ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => { ++ $( ++ impl_from_cast_!($from_ty[$test_tt]: $id); ++ )* ++ } ++} +diff --git a/third_party/rust/packed_simd/src/api/cast/v128.rs b/third_party/rust/packed_simd/src/api/cast/v128.rs +new file mode 100644 +index 000000000000..78c07f3a5597 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/cast/v128.rs +@@ -0,0 +1,79 @@ ++//! `FromCast` and `IntoCast` implementations for portable 128-bit wide vectors ++#![rustfmt::skip] ++ ++use crate::*; ++ ++impl_from_cast!( ++ i8x16[test_v128]: u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16, m32x16 ++); ++impl_from_cast!( ++ u8x16[test_v128]: i8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16, m32x16 ++); ++impl_from_cast_mask!( ++ m8x16[test_v128]: i8x16, u8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16, m32x16 ++); ++ ++impl_from_cast!( ++ i16x8[test_v128]: i8x8, u8x8, m8x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, ++ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 ++); ++impl_from_cast!( ++ u16x8[test_v128]: i8x8, u8x8, m8x8, i16x8, m16x8, i32x8, u32x8, f32x8, m32x8, ++ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 ++); ++impl_from_cast_mask!( ++ m16x8[test_v128]: i8x8, u8x8, m8x8, i16x8, u16x8, i32x8, u32x8, f32x8, m32x8, ++ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 ++); ++ ++impl_from_cast!( ++ i32x4[test_v128]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, u32x4, f32x4, m32x4, ++ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 ++); ++impl_from_cast!( ++ u32x4[test_v128]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, f32x4, m32x4, ++ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 ++); ++impl_from_cast!( ++ f32x4[test_v128]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, m32x4, ++ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 ++); ++impl_from_cast_mask!( ++ m32x4[test_v128]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, ++ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 ++); ++ ++impl_from_cast!( ++ i64x2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, ++ u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 ++); ++impl_from_cast!( ++ u64x2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, ++ i64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 ++); ++impl_from_cast!( ++ f64x2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, ++ i64x2, u64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 ++); ++impl_from_cast_mask!( ++ m64x2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, ++ i64x2, u64x2, f64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 ++); ++ ++impl_from_cast!( ++ isizex2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, ++ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, usizex2, msizex2 ++); ++impl_from_cast!( ++ usizex2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, ++ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, msizex2 ++); ++impl_from_cast_mask!( ++ msizex2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, ++ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2 ++); ++ ++// FIXME[test_v128]: 64-bit single element vectors into_cast impls ++impl_from_cast!(i128x1[test_v128]: u128x1, m128x1); ++impl_from_cast!(u128x1[test_v128]: i128x1, m128x1); ++impl_from_cast!(m128x1[test_v128]: i128x1, u128x1); +diff --git a/third_party/rust/packed_simd/src/api/cast/v16.rs b/third_party/rust/packed_simd/src/api/cast/v16.rs +new file mode 100644 +index 000000000000..d292936baa41 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/cast/v16.rs +@@ -0,0 +1,17 @@ ++//! `FromCast` and `IntoCast` implementations for portable 16-bit wide vectors ++#![rustfmt::skip] ++ ++use crate::*; ++ ++impl_from_cast!( ++ i8x2[test_v16]: u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, ++ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 ++); ++impl_from_cast!( ++ u8x2[test_v16]: i8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, ++ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 ++); ++impl_from_cast_mask!( ++ m8x2[test_v16]: i8x2, u8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, ++ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 ++); +diff --git a/third_party/rust/packed_simd/src/api/cast/v256.rs b/third_party/rust/packed_simd/src/api/cast/v256.rs +new file mode 100644 +index 000000000000..0a669e0beebe +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/cast/v256.rs +@@ -0,0 +1,81 @@ ++//! `FromCast` and `IntoCast` implementations for portable 256-bit wide vectors ++#![rustfmt::skip] ++ ++use crate::*; ++ ++impl_from_cast!(i8x32[test_v256]: u8x32, m8x32, i16x32, u16x32, m16x32); ++impl_from_cast!(u8x32[test_v256]: i8x32, m8x32, i16x32, u16x32, m16x32); ++impl_from_cast_mask!(m8x32[test_v256]: i8x32, u8x32, i16x32, u16x32, m16x32); ++ ++impl_from_cast!( ++ i16x16[test_v256]: i8x16, u8x16, m8x16, u16x16, m16x16, ++ i32x16, u32x16, f32x16, m32x16 ++); ++impl_from_cast!( ++ u16x16[test_v256]: i8x16, u8x16, m8x16, i16x16, m16x16, ++ i32x16, u32x16, f32x16, m32x16 ++); ++impl_from_cast_mask!( ++ m16x16[test_v256]: i8x16, u8x16, m8x16, i16x16, u16x16, ++ i32x16, u32x16, f32x16, m32x16 ++); ++ ++impl_from_cast!( ++ i32x8[test_v256]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, u32x8, f32x8, m32x8, ++ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 ++); ++impl_from_cast!( ++ u32x8[test_v256]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, f32x8, m32x8, ++ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 ++); ++impl_from_cast!( ++ f32x8[test_v256]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, m32x8, ++ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 ++); ++impl_from_cast_mask!( ++ m32x8[test_v256]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, ++ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 ++); ++ ++impl_from_cast!( ++ i64x4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, ++ u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 ++); ++impl_from_cast!( ++ u64x4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, ++ i64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 ++); ++impl_from_cast!( ++ f64x4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, ++ i64x4, u64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 ++); ++impl_from_cast_mask!( ++ m64x4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, ++ i64x4, u64x4, f64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 ++); ++ ++impl_from_cast!( ++ i128x2[test_v256]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, ++ i64x2, u64x2, f64x2, m64x2, u128x2, m128x2, isizex2, usizex2, msizex2 ++); ++impl_from_cast!( ++ u128x2[test_v256]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, ++ i64x2, u64x2, f64x2, m64x2, i128x2, m128x2, isizex2, usizex2, msizex2 ++); ++impl_from_cast_mask!( ++ m128x2[test_v256]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, ++ i64x2, u64x2, m64x2, f64x2, i128x2, u128x2, isizex2, usizex2, msizex2 ++); ++ ++impl_from_cast!( ++ isizex4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, ++ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, usizex4, msizex4 ++); ++impl_from_cast!( ++ usizex4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, ++ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, msizex4 ++); ++impl_from_cast_mask!( ++ msizex4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, ++ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4 ++); +diff --git a/third_party/rust/packed_simd/src/api/cast/v32.rs b/third_party/rust/packed_simd/src/api/cast/v32.rs +new file mode 100644 +index 000000000000..65050cdacb4e +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/cast/v32.rs +@@ -0,0 +1,30 @@ ++//! `FromCast` and `IntoCast` implementations for portable 32-bit wide vectors ++#![rustfmt::skip] ++ ++use crate::*; ++ ++impl_from_cast!( ++ i8x4[test_v32]: u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, ++ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 ++); ++impl_from_cast!( ++ u8x4[test_v32]: i8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, ++ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 ++); ++impl_from_cast_mask!( ++ m8x4[test_v32]: i8x4, u8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, ++ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 ++); ++ ++impl_from_cast!( ++ i16x2[test_v32]: i8x2, u8x2, m8x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, ++ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 ++); ++impl_from_cast!( ++ u16x2[test_v32]: i8x2, u8x2, m8x2, i16x2, m16x2, i32x2, u32x2, f32x2, m32x2, ++ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 ++); ++impl_from_cast_mask!( ++ m16x2[test_v32]: i8x2, u8x2, m8x2, i16x2, u16x2, i32x2, u32x2, f32x2, m32x2, ++ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 ++); +diff --git a/third_party/rust/packed_simd/src/api/cast/v512.rs b/third_party/rust/packed_simd/src/api/cast/v512.rs +new file mode 100644 +index 000000000000..9ae1caed35e2 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/cast/v512.rs +@@ -0,0 +1,68 @@ ++//! `FromCast` and `IntoCast` implementations for portable 512-bit wide vectors ++#![rustfmt::skip] ++ ++use crate::*; ++ ++impl_from_cast!(i8x64[test_v512]: u8x64, m8x64); ++impl_from_cast!(u8x64[test_v512]: i8x64, m8x64); ++impl_from_cast_mask!(m8x64[test_v512]: i8x64, u8x64); ++ ++impl_from_cast!(i16x32[test_v512]: i8x32, u8x32, m8x32, u16x32, m16x32); ++impl_from_cast!(u16x32[test_v512]: i8x32, u8x32, m8x32, i16x32, m16x32); ++impl_from_cast_mask!(m16x32[test_v512]: i8x32, u8x32, m8x32, i16x32, u16x32); ++ ++impl_from_cast!( ++ i32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, u32x16, f32x16, m32x16 ++); ++impl_from_cast!( ++ u32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, f32x16, m32x16 ++); ++impl_from_cast!( ++ f32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, m32x16 ++); ++impl_from_cast_mask!( ++ m32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16 ++); ++ ++impl_from_cast!( ++ i64x8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, ++ u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 ++); ++impl_from_cast!( ++ u64x8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, ++ i64x8, f64x8, m64x8, isizex8, usizex8, msizex8 ++); ++impl_from_cast!( ++ f64x8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, ++ i64x8, u64x8, m64x8, isizex8, usizex8, msizex8 ++); ++impl_from_cast_mask!( ++ m64x8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, ++ i64x8, u64x8, f64x8, isizex8, usizex8, msizex8 ++); ++ ++impl_from_cast!( ++ i128x4[test_v512]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, ++ i64x4, u64x4, f64x4, m64x4, u128x4, m128x4, isizex4, usizex4, msizex4 ++); ++impl_from_cast!( ++ u128x4[test_v512]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, ++ i64x4, u64x4, f64x4, m64x4, i128x4, m128x4, isizex4, usizex4, msizex4 ++); ++impl_from_cast_mask!( ++ m128x4[test_v512]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, ++ i64x4, u64x4, m64x4, f64x4, i128x4, u128x4, isizex4, usizex4, msizex4 ++); ++ ++impl_from_cast!( ++ isizex8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, ++ i64x8, u64x8, f64x8, m64x8, usizex8, msizex8 ++); ++impl_from_cast!( ++ usizex8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, ++ i64x8, u64x8, f64x8, m64x8, isizex8, msizex8 ++); ++impl_from_cast_mask!( ++ msizex8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, ++ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8 ++); +diff --git a/third_party/rust/packed_simd/src/api/cast/v64.rs b/third_party/rust/packed_simd/src/api/cast/v64.rs +new file mode 100644 +index 000000000000..0e2f78f7335b +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/cast/v64.rs +@@ -0,0 +1,47 @@ ++//! `FromCast` and `IntoCast` implementations for portable 64-bit wide vectors ++#![rustfmt::skip] ++ ++use crate::*; ++ ++impl_from_cast!( ++ i8x8[test_v64]: u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, ++ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 ++); ++impl_from_cast!( ++ u8x8[test_v64]: i8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, ++ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 ++); ++impl_from_cast_mask!( ++ m8x8[test_v64]: i8x8, u8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, ++ i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 ++); ++ ++impl_from_cast!( ++ i16x4[test_v64]: i8x4, u8x4, m8x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, ++ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 ++); ++impl_from_cast!( ++ u16x4[test_v64]: i8x4, u8x4, m8x4, i16x4, m16x4, i32x4, u32x4, f32x4, m32x4, ++ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 ++); ++impl_from_cast_mask!( ++ m16x4[test_v64]: i8x4, u8x4, m8x4, i16x4, u16x4, i32x4, u32x4, f32x4, m32x4, ++ i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 ++); ++ ++impl_from_cast!( ++ i32x2[test_v64]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, u32x2, f32x2, m32x2, ++ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 ++); ++impl_from_cast!( ++ u32x2[test_v64]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, f32x2, m32x2, ++ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 ++); ++impl_from_cast!( ++ f32x2[test_v64]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, m32x2, ++ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 ++); ++impl_from_cast_mask!( ++ m32x2[test_v64]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, ++ i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 ++); +diff --git a/third_party/rust/packed_simd/src/api/cmp.rs b/third_party/rust/packed_simd/src/api/cmp.rs +new file mode 100644 +index 000000000000..6d5301ddddbd +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/cmp.rs +@@ -0,0 +1,16 @@ ++//! Implement cmp traits for vector types ++ ++#[macro_use] ++mod partial_eq; ++ ++#[macro_use] ++mod eq; ++ ++#[macro_use] ++mod partial_ord; ++ ++#[macro_use] ++mod ord; ++ ++#[macro_use] ++mod vertical; +diff --git a/third_party/rust/packed_simd/src/api/cmp/eq.rs b/third_party/rust/packed_simd/src/api/cmp/eq.rs +new file mode 100644 +index 000000000000..3c55d0dce57e +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/cmp/eq.rs +@@ -0,0 +1,27 @@ ++//! Implements `Eq` for vector types. ++ ++macro_rules! impl_cmp_eq { ++ ( ++ [$elem_ty:ident; $elem_count:expr]: ++ $id:ident | $test_tt:tt | ++ ($true:expr, $false:expr) ++ ) => { ++ impl crate::cmp::Eq for $id {} ++ impl crate::cmp::Eq for LexicographicallyOrdered<$id> {} ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _cmp_eq>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn eq() { ++ fn foo(_: E) {} ++ let a = $id::splat($false); ++ foo(a); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/cmp/ord.rs b/third_party/rust/packed_simd/src/api/cmp/ord.rs +new file mode 100644 +index 000000000000..e54ba3bfde9a +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/cmp/ord.rs +@@ -0,0 +1,43 @@ ++//! Implements `Ord` for vector types. ++ ++macro_rules! impl_cmp_ord { ++ ( ++ [$elem_ty:ident; $elem_count:expr]: ++ $id:ident | $test_tt:tt | ++ ($true:expr, $false:expr) ++ ) => { ++ impl $id { ++ /// Returns a wrapper that implements `Ord`. ++ #[inline] ++ pub fn lex_ord(&self) -> LexicographicallyOrdered<$id> { ++ LexicographicallyOrdered(*self) ++ } ++ } ++ ++ impl crate::cmp::Ord for LexicographicallyOrdered<$id> { ++ #[inline] ++ fn cmp(&self, other: &Self) -> crate::cmp::Ordering { ++ match self.partial_cmp(other) { ++ Some(x) => x, ++ None => unsafe { crate::hint::unreachable_unchecked() }, ++ } ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _cmp_ord>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn eq() { ++ fn foo(_: E) {} ++ let a = $id::splat($false); ++ foo(a.partial_lex_ord()); ++ foo(a.lex_ord()); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/cmp/partial_eq.rs b/third_party/rust/packed_simd/src/api/cmp/partial_eq.rs +new file mode 100644 +index 000000000000..1712a0de56cb +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/cmp/partial_eq.rs +@@ -0,0 +1,67 @@ ++//! Implements `PartialEq` for vector types. ++ ++macro_rules! impl_cmp_partial_eq { ++ ( ++ [$elem_ty:ident; $elem_count:expr]: ++ $id:ident | $test_tt:tt | ++ ($true:expr, $false:expr) ++ ) => { ++ // FIXME: https://github.com/rust-lang-nursery/rust-clippy/issues/2892 ++ #[allow(clippy::partialeq_ne_impl)] ++ impl crate::cmp::PartialEq<$id> for $id { ++ #[inline] ++ fn eq(&self, other: &Self) -> bool { ++ $id::eq(*self, *other).all() ++ } ++ #[inline] ++ fn ne(&self, other: &Self) -> bool { ++ $id::ne(*self, *other).any() ++ } ++ } ++ ++ // FIXME: https://github.com/rust-lang-nursery/rust-clippy/issues/2892 ++ #[allow(clippy::partialeq_ne_impl)] ++ impl crate::cmp::PartialEq> ++ for LexicographicallyOrdered<$id> ++ { ++ #[inline] ++ fn eq(&self, other: &Self) -> bool { ++ self.0 == other.0 ++ } ++ #[inline] ++ fn ne(&self, other: &Self) -> bool { ++ self.0 != other.0 ++ } ++ } ++ ++ test_if! { ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _cmp_PartialEq>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn partial_eq() { ++ let a = $id::splat($false); ++ let b = $id::splat($true); ++ ++ assert!(a != b); ++ assert!(!(a == b)); ++ assert!(a == a); ++ assert!(!(a != a)); ++ ++ if $id::lanes() > 1 { ++ let a = $id::splat($false).replace(0, $true); ++ let b = $id::splat($true); ++ ++ assert!(a != b); ++ assert!(!(a == b)); ++ assert!(a == a); ++ assert!(!(a != a)); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/cmp/partial_ord.rs b/third_party/rust/packed_simd/src/api/cmp/partial_ord.rs +new file mode 100644 +index 000000000000..a2292918bae1 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/cmp/partial_ord.rs +@@ -0,0 +1,234 @@ ++//! Implements `PartialOrd` for vector types. ++//! ++//! This implements a lexicographical order. ++ ++macro_rules! impl_cmp_partial_ord { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Returns a wrapper that implements `PartialOrd`. ++ #[inline] ++ pub fn partial_lex_ord(&self) -> LexicographicallyOrdered<$id> { ++ LexicographicallyOrdered(*self) ++ } ++ } ++ ++ impl crate::cmp::PartialOrd> ++ for LexicographicallyOrdered<$id> ++ { ++ #[inline] ++ fn partial_cmp( ++ &self, other: &Self, ++ ) -> Option { ++ if PartialEq::eq(self, other) { ++ Some(crate::cmp::Ordering::Equal) ++ } else if PartialOrd::lt(self, other) { ++ Some(crate::cmp::Ordering::Less) ++ } else if PartialOrd::gt(self, other) { ++ Some(crate::cmp::Ordering::Greater) ++ } else { ++ None ++ } ++ } ++ #[inline] ++ fn lt(&self, other: &Self) -> bool { ++ let m_lt = self.0.lt(other.0); ++ let m_eq = self.0.eq(other.0); ++ for i in 0..$id::lanes() { ++ if m_eq.extract(i) { ++ continue; ++ } ++ return m_lt.extract(i); ++ } ++ false ++ } ++ #[inline] ++ fn le(&self, other: &Self) -> bool { ++ self.lt(other) | PartialEq::eq(self, other) ++ } ++ #[inline] ++ fn ge(&self, other: &Self) -> bool { ++ self.gt(other) | PartialEq::eq(self, other) ++ } ++ #[inline] ++ fn gt(&self, other: &Self) -> bool { ++ let m_gt = self.0.gt(other.0); ++ let m_eq = self.0.eq(other.0); ++ for i in 0..$id::lanes() { ++ if m_eq.extract(i) { ++ continue; ++ } ++ return m_gt.extract(i); ++ } ++ false ++ } ++ } ++ }; ++} ++ ++macro_rules! test_cmp_partial_ord_int { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _cmp_PartialOrd>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn partial_lex_ord() { ++ use crate::testing::utils::{test_cmp}; ++ // constant values ++ let a = $id::splat(0); ++ let b = $id::splat(1); ++ ++ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), ++ Some(crate::cmp::Ordering::Less)); ++ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), ++ Some(crate::cmp::Ordering::Greater)); ++ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), ++ Some(crate::cmp::Ordering::Equal)); ++ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), ++ Some(crate::cmp::Ordering::Equal)); ++ ++ // variable values: a = [0, 1, 2, 3]; b = [3, 2, 1, 0] ++ let mut a = $id::splat(0); ++ let mut b = $id::splat(0); ++ for i in 0..$id::lanes() { ++ a = a.replace(i, i as $elem_ty); ++ b = b.replace(i, ($id::lanes() - i) as $elem_ty); ++ } ++ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), ++ Some(crate::cmp::Ordering::Less)); ++ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), ++ Some(crate::cmp::Ordering::Greater)); ++ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), ++ Some(crate::cmp::Ordering::Equal)); ++ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), ++ Some(crate::cmp::Ordering::Equal)); ++ ++ // variable values: a = [0, 1, 2, 3]; b = [0, 1, 2, 4] ++ let mut b = a; ++ b = b.replace( ++ $id::lanes() - 1, ++ a.extract($id::lanes() - 1) + 1 as $elem_ty ++ ); ++ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), ++ Some(crate::cmp::Ordering::Less)); ++ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), ++ Some(crate::cmp::Ordering::Greater)); ++ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), ++ Some(crate::cmp::Ordering::Equal)); ++ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), ++ Some(crate::cmp::Ordering::Equal)); ++ ++ if $id::lanes() > 2 { ++ // variable values a = [0, 1, 0, 0]; b = [0, 1, 2, 3] ++ let b = a; ++ let mut a = $id::splat(0); ++ a = a.replace(1, 1 as $elem_ty); ++ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), ++ Some(crate::cmp::Ordering::Less)); ++ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), ++ Some(crate::cmp::Ordering::Greater)); ++ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), ++ Some(crate::cmp::Ordering::Equal)); ++ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), ++ Some(crate::cmp::Ordering::Equal)); ++ ++ // variable values: a = [0, 1, 2, 3]; b = [0, 1, 3, 2] ++ let mut b = a; ++ b = b.replace( ++ 2, a.extract($id::lanes() - 1) + 1 as $elem_ty ++ ); ++ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), ++ Some(crate::cmp::Ordering::Less)); ++ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), ++ Some(crate::cmp::Ordering::Greater)); ++ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), ++ Some(crate::cmp::Ordering::Equal)); ++ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), ++ Some(crate::cmp::Ordering::Equal)); ++ } ++ } ++ } ++ } ++ } ++ }; ++} ++ ++macro_rules! test_cmp_partial_ord_mask { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _cmp_PartialOrd>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn partial_lex_ord() { ++ use crate::testing::utils::{test_cmp}; ++ use crate::cmp::Ordering; ++ ++ // constant values ++ let a = $id::splat(false); ++ let b = $id::splat(true); ++ ++ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), ++ Some(Ordering::Less)); ++ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), ++ Some(Ordering::Greater)); ++ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), ++ Some(Ordering::Equal)); ++ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), ++ Some(Ordering::Equal)); ++ ++ // variable values: ++ // a = [false, false, false, false]; ++ // b = [false, false, false, true] ++ let a = $id::splat(false); ++ let mut b = $id::splat(false); ++ b = b.replace($id::lanes() - 1, true); ++ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), ++ Some(Ordering::Less)); ++ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), ++ Some(Ordering::Greater)); ++ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), ++ Some(Ordering::Equal)); ++ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), ++ Some(Ordering::Equal)); ++ ++ // variable values: ++ // a = [true, true, true, false]; ++ // b = [true, true, true, true] ++ let mut a = $id::splat(true); ++ let b = $id::splat(true); ++ a = a.replace($id::lanes() - 1, false); ++ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), ++ Some(Ordering::Less)); ++ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), ++ Some(Ordering::Greater)); ++ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), ++ Some(Ordering::Equal)); ++ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), ++ Some(Ordering::Equal)); ++ ++ if $id::lanes() > 2 { ++ // variable values ++ // a = [false, true, false, false]; ++ // b = [false, true, true, true] ++ let mut a = $id::splat(false); ++ let mut b = $id::splat(true); ++ a = a.replace(1, true); ++ b = b.replace(0, false); ++ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), ++ Some(Ordering::Less)); ++ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), ++ Some(Ordering::Greater)); ++ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), ++ Some(Ordering::Equal)); ++ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), ++ Some(Ordering::Equal)); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/cmp/vertical.rs b/third_party/rust/packed_simd/src/api/cmp/vertical.rs +new file mode 100644 +index 000000000000..ea4a0d1a3467 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/cmp/vertical.rs +@@ -0,0 +1,114 @@ ++//! Vertical (lane-wise) vector comparisons returning vector masks. ++ ++macro_rules! impl_cmp_vertical { ++ ( ++ [$elem_ty:ident; $elem_count:expr]: ++ $id:ident, ++ $mask_ty:ident, ++ $is_mask:expr,($true:expr, $false:expr) | $test_tt:tt ++ ) => { ++ impl $id { ++ /// Lane-wise equality comparison. ++ #[inline] ++ pub fn eq(self, other: Self) -> $mask_ty { ++ use crate::llvm::simd_eq; ++ Simd(unsafe { simd_eq(self.0, other.0) }) ++ } ++ ++ /// Lane-wise inequality comparison. ++ #[inline] ++ pub fn ne(self, other: Self) -> $mask_ty { ++ use crate::llvm::simd_ne; ++ Simd(unsafe { simd_ne(self.0, other.0) }) ++ } ++ ++ /// Lane-wise less-than comparison. ++ #[inline] ++ pub fn lt(self, other: Self) -> $mask_ty { ++ use crate::llvm::{simd_gt, simd_lt}; ++ if $is_mask { ++ Simd(unsafe { simd_gt(self.0, other.0) }) ++ } else { ++ Simd(unsafe { simd_lt(self.0, other.0) }) ++ } ++ } ++ ++ /// Lane-wise less-than-or-equals comparison. ++ #[inline] ++ pub fn le(self, other: Self) -> $mask_ty { ++ use crate::llvm::{simd_ge, simd_le}; ++ if $is_mask { ++ Simd(unsafe { simd_ge(self.0, other.0) }) ++ } else { ++ Simd(unsafe { simd_le(self.0, other.0) }) ++ } ++ } ++ ++ /// Lane-wise greater-than comparison. ++ #[inline] ++ pub fn gt(self, other: Self) -> $mask_ty { ++ use crate::llvm::{simd_gt, simd_lt}; ++ if $is_mask { ++ Simd(unsafe { simd_lt(self.0, other.0) }) ++ } else { ++ Simd(unsafe { simd_gt(self.0, other.0) }) ++ } ++ } ++ ++ /// Lane-wise greater-than-or-equals comparison. ++ #[inline] ++ pub fn ge(self, other: Self) -> $mask_ty { ++ use crate::llvm::{simd_ge, simd_le}; ++ if $is_mask { ++ Simd(unsafe { simd_le(self.0, other.0) }) ++ } else { ++ Simd(unsafe { simd_ge(self.0, other.0) }) ++ } ++ } ++ } ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _cmp_vertical>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn cmp() { ++ let a = $id::splat($false); ++ let b = $id::splat($true); ++ ++ let r = a.lt(b); ++ let e = $mask_ty::splat(true); ++ assert!(r == e); ++ let r = a.le(b); ++ assert!(r == e); ++ ++ let e = $mask_ty::splat(false); ++ let r = a.gt(b); ++ assert!(r == e); ++ let r = a.ge(b); ++ assert!(r == e); ++ let r = a.eq(b); ++ assert!(r == e); ++ ++ let mut a = a; ++ let mut b = b; ++ let mut e = e; ++ for i in 0..$id::lanes() { ++ if i % 2 == 0 { ++ a = a.replace(i, $false); ++ b = b.replace(i, $true); ++ e = e.replace(i, true); ++ } else { ++ a = a.replace(i, $true); ++ b = b.replace(i, $false); ++ e = e.replace(i, false); ++ } ++ } ++ let r = a.lt(b); ++ assert!(r == e); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/default.rs b/third_party/rust/packed_simd/src/api/default.rs +new file mode 100644 +index 000000000000..843d51bcc4bb +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/default.rs +@@ -0,0 +1,28 @@ ++//! Implements `Default` for vector types. ++ ++macro_rules! impl_default { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl Default for $id { ++ #[inline] ++ fn default() -> Self { ++ Self::splat($elem_ty::default()) ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _default>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn default() { ++ let a = $id::default(); ++ for i in 0..$id::lanes() { ++ assert_eq!(a.extract(i), $elem_ty::default()); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/fmt.rs b/third_party/rust/packed_simd/src/api/fmt.rs +new file mode 100644 +index 000000000000..f3f55c401548 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/fmt.rs +@@ -0,0 +1,12 @@ ++//! Implements formatting APIs ++ ++#[macro_use] ++mod debug; ++#[macro_use] ++mod lower_hex; ++#[macro_use] ++mod upper_hex; ++#[macro_use] ++mod octal; ++#[macro_use] ++mod binary; +diff --git a/third_party/rust/packed_simd/src/api/fmt/binary.rs b/third_party/rust/packed_simd/src/api/fmt/binary.rs +new file mode 100644 +index 000000000000..b60769082d51 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/fmt/binary.rs +@@ -0,0 +1,56 @@ ++//! Implement Octal formatting ++ ++macro_rules! impl_fmt_binary { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl crate::fmt::Binary for $id { ++ #[allow(clippy::missing_inline_in_public_items)] ++ fn fmt( ++ &self, f: &mut crate::fmt::Formatter<'_>, ++ ) -> crate::fmt::Result { ++ write!(f, "{}(", stringify!($id))?; ++ for i in 0..$elem_count { ++ if i > 0 { ++ write!(f, ", ")?; ++ } ++ self.extract(i).fmt(f)?; ++ } ++ write!(f, ")") ++ } ++ } ++ test_if! { ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _fmt_binary>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn binary() { ++ use arrayvec::{ArrayString,ArrayVec}; ++ type TinyString = ArrayString<[u8; 512]>; ++ ++ use crate::fmt::Write; ++ let v = $id::splat($elem_ty::default()); ++ let mut s = TinyString::new(); ++ write!(&mut s, "{:#b}", v).unwrap(); ++ ++ let mut beg = TinyString::new(); ++ write!(&mut beg, "{}(", stringify!($id)).unwrap(); ++ assert!(s.starts_with(beg.as_str())); ++ assert!(s.ends_with(")")); ++ let s: ArrayVec<[TinyString; 64]> ++ = s.replace(beg.as_str(), "") ++ .replace(")", "").split(",") ++ .map(|v| TinyString::from(v.trim()).unwrap()) ++ .collect(); ++ assert_eq!(s.len(), $id::lanes()); ++ for (index, ss) in s.into_iter().enumerate() { ++ let mut e = TinyString::new(); ++ write!(&mut e, "{:#b}", v.extract(index)).unwrap(); ++ assert_eq!(ss, e); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/fmt/debug.rs b/third_party/rust/packed_simd/src/api/fmt/debug.rs +new file mode 100644 +index 000000000000..ad0b8a59a1f0 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/fmt/debug.rs +@@ -0,0 +1,62 @@ ++//! Implement debug formatting ++ ++macro_rules! impl_fmt_debug_tests { ++ ([$elem_ty:ty; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ test_if! { ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _fmt_debug>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn debug() { ++ use arrayvec::{ArrayString,ArrayVec}; ++ type TinyString = ArrayString<[u8; 512]>; ++ ++ use crate::fmt::Write; ++ let v = $id::default(); ++ let mut s = TinyString::new(); ++ write!(&mut s, "{:?}", v).unwrap(); ++ ++ let mut beg = TinyString::new(); ++ write!(&mut beg, "{}(", stringify!($id)).unwrap(); ++ assert!(s.starts_with(beg.as_str())); ++ assert!(s.ends_with(")")); ++ let s: ArrayVec<[TinyString; 64]> ++ = s.replace(beg.as_str(), "") ++ .replace(")", "").split(",") ++ .map(|v| TinyString::from(v.trim()).unwrap()) ++ .collect(); ++ assert_eq!(s.len(), $id::lanes()); ++ for (index, ss) in s.into_iter().enumerate() { ++ let mut e = TinyString::new(); ++ write!(&mut e, "{:?}", v.extract(index)).unwrap(); ++ assert_eq!(ss, e); ++ } ++ } ++ } ++ } ++ } ++ }; ++} ++ ++macro_rules! impl_fmt_debug { ++ ([$elem_ty:ty; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl crate::fmt::Debug for $id { ++ #[allow(clippy::missing_inline_in_public_items)] ++ fn fmt( ++ &self, f: &mut crate::fmt::Formatter<'_>, ++ ) -> crate::fmt::Result { ++ write!(f, "{}(", stringify!($id))?; ++ for i in 0..$elem_count { ++ if i > 0 { ++ write!(f, ", ")?; ++ } ++ self.extract(i).fmt(f)?; ++ } ++ write!(f, ")") ++ } ++ } ++ impl_fmt_debug_tests!([$elem_ty; $elem_count]: $id | $test_tt); ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/fmt/lower_hex.rs b/third_party/rust/packed_simd/src/api/fmt/lower_hex.rs +new file mode 100644 +index 000000000000..5a7aa14b5b8a +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/fmt/lower_hex.rs +@@ -0,0 +1,56 @@ ++//! Implement `LowerHex` formatting ++ ++macro_rules! impl_fmt_lower_hex { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl crate::fmt::LowerHex for $id { ++ #[allow(clippy::missing_inline_in_public_items)] ++ fn fmt( ++ &self, f: &mut crate::fmt::Formatter<'_>, ++ ) -> crate::fmt::Result { ++ write!(f, "{}(", stringify!($id))?; ++ for i in 0..$elem_count { ++ if i > 0 { ++ write!(f, ", ")?; ++ } ++ self.extract(i).fmt(f)?; ++ } ++ write!(f, ")") ++ } ++ } ++ test_if! { ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _fmt_lower_hex>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn lower_hex() { ++ use arrayvec::{ArrayString,ArrayVec}; ++ type TinyString = ArrayString<[u8; 512]>; ++ ++ use crate::fmt::Write; ++ let v = $id::splat($elem_ty::default()); ++ let mut s = TinyString::new(); ++ write!(&mut s, "{:#x}", v).unwrap(); ++ ++ let mut beg = TinyString::new(); ++ write!(&mut beg, "{}(", stringify!($id)).unwrap(); ++ assert!(s.starts_with(beg.as_str())); ++ assert!(s.ends_with(")")); ++ let s: ArrayVec<[TinyString; 64]> ++ = s.replace(beg.as_str(), "").replace(")", "") ++ .split(",") ++ .map(|v| TinyString::from(v.trim()).unwrap()) ++ .collect(); ++ assert_eq!(s.len(), $id::lanes()); ++ for (index, ss) in s.into_iter().enumerate() { ++ let mut e = TinyString::new(); ++ write!(&mut e, "{:#x}", v.extract(index)).unwrap(); ++ assert_eq!(ss, e); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/fmt/octal.rs b/third_party/rust/packed_simd/src/api/fmt/octal.rs +new file mode 100644 +index 000000000000..83ac8abc7dae +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/fmt/octal.rs +@@ -0,0 +1,56 @@ ++//! Implement Octal formatting ++ ++macro_rules! impl_fmt_octal { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl crate::fmt::Octal for $id { ++ #[allow(clippy::missing_inline_in_public_items)] ++ fn fmt( ++ &self, f: &mut crate::fmt::Formatter<'_>, ++ ) -> crate::fmt::Result { ++ write!(f, "{}(", stringify!($id))?; ++ for i in 0..$elem_count { ++ if i > 0 { ++ write!(f, ", ")?; ++ } ++ self.extract(i).fmt(f)?; ++ } ++ write!(f, ")") ++ } ++ } ++ test_if! { ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _fmt_octal>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn octal_hex() { ++ use arrayvec::{ArrayString,ArrayVec}; ++ type TinyString = ArrayString<[u8; 512]>; ++ ++ use crate::fmt::Write; ++ let v = $id::splat($elem_ty::default()); ++ let mut s = TinyString::new(); ++ write!(&mut s, "{:#o}", v).unwrap(); ++ ++ let mut beg = TinyString::new(); ++ write!(&mut beg, "{}(", stringify!($id)).unwrap(); ++ assert!(s.starts_with(beg.as_str())); ++ assert!(s.ends_with(")")); ++ let s: ArrayVec<[TinyString; 64]> ++ = s.replace(beg.as_str(), "").replace(")", "") ++ .split(",") ++ .map(|v| TinyString::from(v.trim()).unwrap()) ++ .collect(); ++ assert_eq!(s.len(), $id::lanes()); ++ for (index, ss) in s.into_iter().enumerate() { ++ let mut e = TinyString::new(); ++ write!(&mut e, "{:#o}", v.extract(index)).unwrap(); ++ assert_eq!(ss, e); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/fmt/upper_hex.rs b/third_party/rust/packed_simd/src/api/fmt/upper_hex.rs +new file mode 100644 +index 000000000000..aa88f673abf0 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/fmt/upper_hex.rs +@@ -0,0 +1,56 @@ ++//! Implement `UpperHex` formatting ++ ++macro_rules! impl_fmt_upper_hex { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl crate::fmt::UpperHex for $id { ++ #[allow(clippy::missing_inline_in_public_items)] ++ fn fmt( ++ &self, f: &mut crate::fmt::Formatter<'_>, ++ ) -> crate::fmt::Result { ++ write!(f, "{}(", stringify!($id))?; ++ for i in 0..$elem_count { ++ if i > 0 { ++ write!(f, ", ")?; ++ } ++ self.extract(i).fmt(f)?; ++ } ++ write!(f, ")") ++ } ++ } ++ test_if! { ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _fmt_upper_hex>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn upper_hex() { ++ use arrayvec::{ArrayString,ArrayVec}; ++ type TinyString = ArrayString<[u8; 512]>; ++ ++ use crate::fmt::Write; ++ let v = $id::splat($elem_ty::default()); ++ let mut s = TinyString::new(); ++ write!(&mut s, "{:#X}", v).unwrap(); ++ ++ let mut beg = TinyString::new(); ++ write!(&mut beg, "{}(", stringify!($id)).unwrap(); ++ assert!(s.starts_with(beg.as_str())); ++ assert!(s.ends_with(")")); ++ let s: ArrayVec<[TinyString; 64]> ++ = s.replace(beg.as_str(), "").replace(")", "") ++ .split(",") ++ .map(|v| TinyString::from(v.trim()).unwrap()) ++ .collect(); ++ assert_eq!(s.len(), $id::lanes()); ++ for (index, ss) in s.into_iter().enumerate() { ++ let mut e = TinyString::new(); ++ write!(&mut e, "{:#X}", v.extract(index)).unwrap(); ++ assert_eq!(ss, e); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/from.rs b/third_party/rust/packed_simd/src/api/from.rs +new file mode 100644 +index 000000000000..c30c4d6e216d +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/from.rs +@@ -0,0 +1,7 @@ ++//! Implementations of the `From` and `Into` traits ++ ++#[macro_use] ++mod from_array; ++ ++#[macro_use] ++mod from_vector; +diff --git a/third_party/rust/packed_simd/src/api/from/from_array.rs b/third_party/rust/packed_simd/src/api/from/from_array.rs +new file mode 100644 +index 000000000000..964d1501df6a +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/from/from_array.rs +@@ -0,0 +1,121 @@ ++//! Implements `From<[T; N]>` and `Into<[T; N]>` for vector types. ++ ++macro_rules! impl_from_array { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt ++ | ($non_default_array:expr, $non_default_vec:expr)) => { ++ impl From<[$elem_ty; $elem_count]> for $id { ++ #[inline] ++ fn from(array: [$elem_ty; $elem_count]) -> Self { ++ union U { ++ array: [$elem_ty; $elem_count], ++ vec: $id, ++ } ++ unsafe { U { array }.vec } ++ } ++ } ++ ++ impl From<$id> for [$elem_ty; $elem_count] { ++ #[inline] ++ fn from(vec: $id) -> Self { ++ union U { ++ array: [$elem_ty; $elem_count], ++ vec: $id, ++ } ++ unsafe { U { vec }.array } ++ } ++ } ++ ++ // FIXME: `Into::into` is not inline, but due to ++ // the blanket impl in `std`, which is not ++ // marked `default`, we cannot override it here with ++ // specialization. ++ /* ++ impl Into<[$elem_ty; $elem_count]> for $id { ++ #[inline] ++ fn into(self) -> [$elem_ty; $elem_count] { ++ union U { ++ array: [$elem_ty; $elem_count], ++ vec: $id, ++ } ++ unsafe { U { vec: self }.array } ++ } ++ } ++ ++ impl Into<$id> for [$elem_ty; $elem_count] { ++ #[inline] ++ fn into(self) -> $id { ++ union U { ++ array: [$elem_ty; $elem_count], ++ vec: $id, ++ } ++ unsafe { U { array: self }.vec } ++ } ++ } ++ */ ++ ++ test_if! { ++ $test_tt: ++ paste::item! { ++ mod [<$id _from>] { ++ use super::*; ++ #[test] ++ fn array() { ++ let vec: $id = Default::default(); ++ ++ // FIXME: Workaround for arrays with more than 32 ++ // elements. ++ // ++ // Safe because we never take a reference to any ++ // uninitialized element. ++ union W { ++ array: [$elem_ty; $elem_count], ++ other: () ++ } ++ let mut array = W { other: () }; ++ for i in 0..$elem_count { ++ let default: $elem_ty = Default::default(); ++ // note: array.other is the active member and ++ // initialized so we can take a reference to it: ++ let p = unsafe { ++ &mut array.other as *mut () as *mut $elem_ty ++ }; ++ // note: default is a valid bit-pattern for ++ // $elem_ty: ++ unsafe { ++ crate::ptr::write(p.wrapping_add(i), default) ++ }; ++ } ++ // note: the array variant of the union is properly ++ // initialized: ++ let mut array = unsafe { ++ array.array ++ }; ++ ++ array[0] = $non_default_array; ++ let vec = vec.replace(0, $non_default_vec); ++ ++ let vec_from_array = $id::from(array); ++ assert_eq!(vec_from_array, vec); ++ let array_from_vec ++ = <[$elem_ty; $elem_count]>::from(vec); ++ // FIXME: Workaround for arrays with more than 32 ++ // elements. ++ for i in 0..$elem_count { ++ assert_eq!(array_from_vec[i], array[i]); ++ } ++ ++ let vec_from_into_array: $id = array.into(); ++ assert_eq!(vec_from_into_array, vec); ++ let array_from_into_vec: [$elem_ty; $elem_count] ++ = vec.into(); ++ // FIXME: Workaround for arrays with more than 32 ++ // elements. ++ for i in 0..$elem_count { ++ assert_eq!(array_from_into_vec[i], array[i]); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/from/from_vector.rs b/third_party/rust/packed_simd/src/api/from/from_vector.rs +new file mode 100644 +index 000000000000..55f70016d51d +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/from/from_vector.rs +@@ -0,0 +1,67 @@ ++//! Implements `From` and `Into` for vector types. ++ ++macro_rules! impl_from_vector { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt ++ | $source:ident) => { ++ impl From<$source> for $id { ++ #[inline] ++ fn from(source: $source) -> Self { ++ fn static_assert_same_number_of_lanes() ++ where ++ T: crate::sealed::Simd, ++ U: crate::sealed::Simd, ++ { ++ } ++ use crate::llvm::simd_cast; ++ static_assert_same_number_of_lanes::<$id, $source>(); ++ Simd(unsafe { simd_cast(source.0) }) ++ } ++ } ++ ++ // FIXME: `Into::into` is not inline, but due to the blanket impl in ++ // `std`, which is not marked `default`, we cannot override it here ++ // with specialization. ++ ++ /* ++ impl Into<$id> for $source { ++ #[inline] ++ fn into(self) -> $id { ++ unsafe { simd_cast(self) } ++ } ++ } ++ */ ++ ++ test_if! { ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _from_ $source>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn from() { ++ assert_eq!($id::lanes(), $source::lanes()); ++ let source: $source = Default::default(); ++ let vec: $id = Default::default(); ++ ++ let e = $id::from(source); ++ assert_eq!(e, vec); ++ ++ let e: $id = source.into(); ++ assert_eq!(e, vec); ++ } ++ } ++ } ++ } ++ }; ++} ++ ++macro_rules! impl_from_vectors { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt ++ | $($source:ident),*) => { ++ $( ++ impl_from_vector!( ++ [$elem_ty; $elem_count]: $id | $test_tt | $source ++ ); ++ )* ++ } ++} +diff --git a/third_party/rust/packed_simd/src/api/hash.rs b/third_party/rust/packed_simd/src/api/hash.rs +new file mode 100644 +index 000000000000..08d42496ea8b +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/hash.rs +@@ -0,0 +1,47 @@ ++//! Implements `Hash` for vector types. ++ ++macro_rules! impl_hash { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl crate::hash::Hash for $id { ++ #[inline] ++ fn hash(&self, state: &mut H) { ++ unsafe { ++ union A { ++ data: [$elem_ty; $id::lanes()], ++ vec: $id, ++ } ++ A { vec: *self }.data.hash(state) ++ } ++ } ++ } ++ ++ test_if! { ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _hash>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn hash() { ++ use crate::hash::{Hash, Hasher}; ++ #[allow(deprecated)] ++ use crate::hash::{SipHasher13}; ++ type A = [$elem_ty; $id::lanes()]; ++ let a: A = [42 as $elem_ty; $id::lanes()]; ++ assert_eq!( ++ crate::mem::size_of::(), ++ crate::mem::size_of::<$id>() ++ ); ++ #[allow(deprecated)] ++ let mut a_hash = SipHasher13::new(); ++ let mut v_hash = a_hash.clone(); ++ a.hash(&mut a_hash); ++ ++ let v = $id::splat(42 as $elem_ty); ++ v.hash(&mut v_hash); ++ assert_eq!(a_hash.finish(), v_hash.finish()); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/into_bits.rs b/third_party/rust/packed_simd/src/api/into_bits.rs +new file mode 100644 +index 000000000000..f2cc1bae5397 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/into_bits.rs +@@ -0,0 +1,59 @@ ++//! Implementation of `FromBits` and `IntoBits`. ++ ++/// Safe lossless bitwise conversion from `T` to `Self`. ++pub trait FromBits: crate::marker::Sized { ++ /// Safe lossless bitwise transmute from `T` to `Self`. ++ fn from_bits(t: T) -> Self; ++} ++ ++/// Safe lossless bitwise conversion from `Self` to `T`. ++pub trait IntoBits: crate::marker::Sized { ++ /// Safe lossless bitwise transmute from `self` to `T`. ++ fn into_bits(self) -> T; ++} ++ ++/// `FromBits` implies `IntoBits`. ++impl IntoBits for T ++where ++ U: FromBits, ++{ ++ #[inline] ++ fn into_bits(self) -> U { ++ debug_assert!( ++ crate::mem::size_of::() == crate::mem::size_of::() ++ ); ++ U::from_bits(self) ++ } ++} ++ ++/// `FromBits` and `IntoBits` are reflexive ++impl FromBits for T { ++ #[inline] ++ fn from_bits(t: Self) -> Self { ++ t ++ } ++} ++ ++#[macro_use] ++mod macros; ++ ++mod v16; ++pub use self::v16::*; ++ ++mod v32; ++pub use self::v32::*; ++ ++mod v64; ++pub use self::v64::*; ++ ++mod v128; ++pub use self::v128::*; ++ ++mod v256; ++pub use self::v256::*; ++ ++mod v512; ++pub use self::v512::*; ++ ++mod arch_specific; ++pub use self::arch_specific::*; +diff --git a/third_party/rust/packed_simd/src/api/into_bits/arch_specific.rs b/third_party/rust/packed_simd/src/api/into_bits/arch_specific.rs +new file mode 100644 +index 000000000000..6cc2fa37b728 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/into_bits/arch_specific.rs +@@ -0,0 +1,190 @@ ++//! `FromBits` and `IntoBits` between portable vector types and the ++//! architecture-specific vector types. ++#![rustfmt::skip] ++ ++// FIXME: MIPS FromBits/IntoBits ++ ++#[allow(unused)] ++use crate::*; ++ ++/// This macro implements FromBits for the portable and the architecture ++/// specific vector types. ++/// ++/// The "leaf" case is at the bottom, and the most generic case is at the top. ++/// The generic case is split into smaller cases recursively. ++macro_rules! impl_arch { ++ ([$arch_head_i:ident[$arch_head_tt:tt]: $($arch_head_ty:ident),*], ++ $([$arch_tail_i:ident[$arch_tail_tt:tt]: $($arch_tail_ty:ident),*]),* | ++ from: $($from_ty:ident),* | into: $($into_ty:ident),* | ++ test: $test_tt:tt) => { ++ impl_arch!( ++ [$arch_head_i[$arch_head_tt]: $($arch_head_ty),*] | ++ from: $($from_ty),* | ++ into: $($into_ty),* | ++ test: $test_tt ++ ); ++ impl_arch!( ++ $([$arch_tail_i[$arch_tail_tt]: $($arch_tail_ty),*]),* | ++ from: $($from_ty),* | ++ into: $($into_ty),* | ++ test: $test_tt ++ ); ++ }; ++ ([$arch:ident[$arch_tt:tt]: $($arch_ty:ident),*] | ++ from: $($from_ty:ident),* | into: $($into_ty:ident),* | ++ test: $test_tt:tt) => { ++ // note: if target is "arm", "+v7,+neon" must be enabled ++ // and the std library must be recompiled with them ++ #[cfg(any( ++ not(target_arch = "arm"), ++ all(target_feature = "v7", target_feature = "neon", ++ any(feature = "core_arch", libcore_neon))) ++ )] ++ // note: if target is "powerpc", "altivec" must be enabled ++ // and the std library must be recompiled with it ++ #[cfg(any( ++ not(target_arch = "powerpc"), ++ all(target_feature = "altivec", feature = "core_arch"), ++ ))] ++ #[cfg(target_arch = $arch_tt)] ++ use crate::arch::$arch::{ ++ $($arch_ty),* ++ }; ++ ++ #[cfg(any( ++ not(target_arch = "arm"), ++ all(target_feature = "v7", target_feature = "neon", ++ any(feature = "core_arch", libcore_neon))) ++ )] ++ #[cfg(any( ++ not(target_arch = "powerpc"), ++ all(target_feature = "altivec", feature = "core_arch"), ++ ))] ++ #[cfg(target_arch = $arch_tt)] ++ impl_arch!($($arch_ty),* | $($from_ty),* | $($into_ty),* | ++ test: $test_tt); ++ }; ++ ($arch_head:ident, $($arch_tail:ident),* | $($from_ty:ident),* ++ | $($into_ty:ident),* | test: $test_tt:tt) => { ++ impl_arch!($arch_head | $($from_ty),* | $($into_ty),* | ++ test: $test_tt); ++ impl_arch!($($arch_tail),* | $($from_ty),* | $($into_ty),* | ++ test: $test_tt); ++ }; ++ ($arch_head:ident | $($from_ty:ident),* | $($into_ty:ident),* | ++ test: $test_tt:tt) => { ++ impl_from_bits!($arch_head[$test_tt]: $($from_ty),*); ++ impl_into_bits!($arch_head[$test_tt]: $($into_ty),*); ++ }; ++} ++ ++//////////////////////////////////////////////////////////////////////////////// ++// Implementations for the 64-bit wide vector types: ++ ++// FIXME: 64-bit single element types ++// FIXME: arm/aarch float16x4_t missing ++impl_arch!( ++ [x86["x86"]: __m64], [x86_64["x86_64"]: __m64], ++ [arm["arm"]: int8x8_t, uint8x8_t, poly8x8_t, int16x4_t, uint16x4_t, ++ poly16x4_t, int32x2_t, uint32x2_t, float32x2_t, int64x1_t, ++ uint64x1_t], ++ [aarch64["aarch64"]: int8x8_t, uint8x8_t, poly8x8_t, int16x4_t, uint16x4_t, ++ poly16x4_t, int32x2_t, uint32x2_t, float32x2_t, int64x1_t, uint64x1_t, ++ float64x1_t] | ++ from: i8x8, u8x8, m8x8, i16x4, u16x4, m16x4, i32x2, u32x2, f32x2, m32x2 | ++ into: i8x8, u8x8, i16x4, u16x4, i32x2, u32x2, f32x2 | ++ test: test_v64 ++); ++ ++//////////////////////////////////////////////////////////////////////////////// ++// Implementations for the 128-bit wide vector types: ++ ++// FIXME: arm/aarch float16x8_t missing ++// FIXME: ppc vector_pixel missing ++// FIXME: ppc64 vector_Float16 missing ++// FIXME: ppc64 vector_signed_long_long missing ++// FIXME: ppc64 vector_unsigned_long_long missing ++// FIXME: ppc64 vector_bool_long_long missing ++// FIXME: ppc64 vector_signed___int128 missing ++// FIXME: ppc64 vector_unsigned___int128 missing ++impl_arch!( ++ [x86["x86"]: __m128, __m128i, __m128d], ++ [x86_64["x86_64"]: __m128, __m128i, __m128d], ++ [arm["arm"]: int8x16_t, uint8x16_t, poly8x16_t, int16x8_t, uint16x8_t, ++ poly16x8_t, int32x4_t, uint32x4_t, float32x4_t, int64x2_t, uint64x2_t], ++ [aarch64["aarch64"]: int8x16_t, uint8x16_t, poly8x16_t, int16x8_t, ++ uint16x8_t, poly16x8_t, int32x4_t, uint32x4_t, float32x4_t, int64x2_t, ++ uint64x2_t, float64x2_t], ++ [powerpc["powerpc"]: vector_signed_char, vector_unsigned_char, ++ vector_signed_short, vector_unsigned_short, vector_signed_int, ++ vector_unsigned_int, vector_float], ++ [powerpc64["powerpc64"]: vector_signed_char, vector_unsigned_char, ++ vector_signed_short, vector_unsigned_short, vector_signed_int, ++ vector_unsigned_int, vector_float, vector_signed_long, ++ vector_unsigned_long, vector_double] | ++ from: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, ++ i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1 | ++ into: i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, f32x4, i64x2, u64x2, f64x2, ++ i128x1, u128x1 | ++ test: test_v128 ++); ++ ++impl_arch!( ++ [powerpc["powerpc"]: vector_bool_char], ++ [powerpc64["powerpc64"]: vector_bool_char] | ++ from: m8x16, m16x8, m32x4, m64x2, m128x1 | ++ into: i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, f32x4, ++ i64x2, u64x2, f64x2, i128x1, u128x1, ++ // Masks: ++ m8x16 | ++ test: test_v128 ++); ++ ++impl_arch!( ++ [powerpc["powerpc"]: vector_bool_short], ++ [powerpc64["powerpc64"]: vector_bool_short] | ++ from: m16x8, m32x4, m64x2, m128x1 | ++ into: i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, f32x4, ++ i64x2, u64x2, f64x2, i128x1, u128x1, ++ // Masks: ++ m8x16, m16x8 | ++ test: test_v128 ++); ++ ++impl_arch!( ++ [powerpc["powerpc"]: vector_bool_int], ++ [powerpc64["powerpc64"]: vector_bool_int] | ++ from: m32x4, m64x2, m128x1 | ++ into: i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, f32x4, ++ i64x2, u64x2, f64x2, i128x1, u128x1, ++ // Masks: ++ m8x16, m16x8, m32x4 | ++ test: test_v128 ++); ++ ++impl_arch!( ++ [powerpc64["powerpc64"]: vector_bool_long] | ++ from: m64x2, m128x1 | ++ into: i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, f32x4, ++ i64x2, u64x2, f64x2, i128x1, u128x1, ++ // Masks: ++ m8x16, m16x8, m32x4, m64x2 | ++ test: test_v128 ++); ++ ++//////////////////////////////////////////////////////////////////////////////// ++// Implementations for the 256-bit wide vector types ++ ++impl_arch!( ++ [x86["x86"]: __m256, __m256i, __m256d], ++ [x86_64["x86_64"]: __m256, __m256i, __m256d] | ++ from: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, ++ i32x8, u32x8, f32x8, m32x8, ++ i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2 | ++ into: i8x32, u8x32, i16x16, u16x16, i32x8, u32x8, f32x8, ++ i64x4, u64x4, f64x4, i128x2, u128x2 | ++ test: test_v256 ++); ++ ++//////////////////////////////////////////////////////////////////////////////// ++// FIXME: Implementations for the 512-bit wide vector types +diff --git a/third_party/rust/packed_simd/src/api/into_bits/macros.rs b/third_party/rust/packed_simd/src/api/into_bits/macros.rs +new file mode 100644 +index 000000000000..8cec5b00479f +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/into_bits/macros.rs +@@ -0,0 +1,74 @@ ++//! Macros implementing `FromBits` ++ ++macro_rules! impl_from_bits_ { ++ ($id:ident[$test_tt:tt]: $from_ty:ident) => { ++ impl crate::api::into_bits::FromBits<$from_ty> for $id { ++ #[inline] ++ fn from_bits(x: $from_ty) -> Self { ++ unsafe { crate::mem::transmute(x) } ++ } ++ } ++ ++ test_if! { ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _from_bits_ $from_ty>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn test() { ++ use crate::{ ++ ptr::{read_unaligned}, ++ mem::{size_of, zeroed} ++ }; ++ use crate::IntoBits; ++ assert_eq!(size_of::<$id>(), ++ size_of::<$from_ty>()); ++ // This is safe becasue we never create a reference to ++ // uninitialized memory: ++ let a: $from_ty = unsafe { zeroed() }; ++ ++ let b_0: $id = crate::FromBits::from_bits(a); ++ let b_1: $id = a.into_bits(); ++ ++ // Check that these are byte-wise equal, that is, ++ // that the bit patterns are identical: ++ for i in 0..size_of::<$id>() { ++ // This is safe because we only read initialized ++ // memory in bounds. Also, taking a reference to ++ // `b_i` is ok because the fields are initialized. ++ unsafe { ++ let b_0_v: u8 = read_unaligned( ++ (&b_0 as *const $id as *const u8) ++ .wrapping_add(i) ++ ); ++ let b_1_v: u8 = read_unaligned( ++ (&b_1 as *const $id as *const u8) ++ .wrapping_add(i) ++ ); ++ assert_eq!(b_0_v, b_1_v); ++ } ++ } ++ } ++ } ++ } ++ } ++ }; ++} ++ ++macro_rules! impl_from_bits { ++ ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => { ++ $( ++ impl_from_bits_!($id[$test_tt]: $from_ty); ++ )* ++ } ++} ++ ++#[allow(unused)] ++macro_rules! impl_into_bits { ++ ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => { ++ $( ++ impl_from_bits_!($from_ty[$test_tt]: $id); ++ )* ++ } ++} +diff --git a/third_party/rust/packed_simd/src/api/into_bits/v128.rs b/third_party/rust/packed_simd/src/api/into_bits/v128.rs +new file mode 100644 +index 000000000000..804dbf282d53 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/into_bits/v128.rs +@@ -0,0 +1,28 @@ ++//! `FromBits` and `IntoBits` implementations for portable 128-bit wide vectors ++#![rustfmt::skip] ++ ++#[allow(unused)] // wasm_bindgen_test ++use crate::*; ++ ++impl_from_bits!(i8x16[test_v128]: u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); ++impl_from_bits!(u8x16[test_v128]: i8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); ++impl_from_bits!(m8x16[test_v128]: m16x8, m32x4, m64x2, m128x1); ++ ++impl_from_bits!(i16x8[test_v128]: i8x16, u8x16, m8x16, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); ++impl_from_bits!(u16x8[test_v128]: i8x16, u8x16, m8x16, i16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); ++impl_from_bits!(m16x8[test_v128]: m32x4, m64x2, m128x1); ++ ++impl_from_bits!(i32x4[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); ++impl_from_bits!(u32x4[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); ++impl_from_bits!(f32x4[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); ++impl_from_bits!(m32x4[test_v128]: m64x2, m128x1); ++ ++impl_from_bits!(i64x2[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); ++impl_from_bits!(u64x2[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, f64x2, m64x2, i128x1, u128x1, m128x1); ++impl_from_bits!(f64x2[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, m64x2, i128x1, u128x1, m128x1); ++impl_from_bits!(m64x2[test_v128]: m128x1); ++ ++impl_from_bits!(i128x1[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, u128x1, m128x1); ++impl_from_bits!(u128x1[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, m128x1); ++// note: m128x1 cannot be constructed from all the other masks bit patterns in here ++ +diff --git a/third_party/rust/packed_simd/src/api/into_bits/v16.rs b/third_party/rust/packed_simd/src/api/into_bits/v16.rs +new file mode 100644 +index 000000000000..1162a62e5bd1 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/into_bits/v16.rs +@@ -0,0 +1,9 @@ ++//! `FromBits` and `IntoBits` implementations for portable 16-bit wide vectors ++#![rustfmt::skip] ++ ++#[allow(unused)] // wasm_bindgen_test ++use crate::*; ++ ++impl_from_bits!(i8x2[test_v16]: u8x2, m8x2); ++impl_from_bits!(u8x2[test_v16]: i8x2, m8x2); ++// note: m8x2 cannot be constructed from all i8x2 or u8x2 bit patterns +diff --git a/third_party/rust/packed_simd/src/api/into_bits/v256.rs b/third_party/rust/packed_simd/src/api/into_bits/v256.rs +new file mode 100644 +index 000000000000..cc7a6646b535 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/into_bits/v256.rs +@@ -0,0 +1,27 @@ ++//! `FromBits` and `IntoBits` implementations for portable 256-bit wide vectors ++#![rustfmt::skip] ++ ++#[allow(unused)] // wasm_bindgen_test ++use crate::*; ++ ++impl_from_bits!(i8x32[test_v256]: u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); ++impl_from_bits!(u8x32[test_v256]: i8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); ++impl_from_bits!(m8x32[test_v256]: m16x16, m32x8, m64x4, m128x2); ++ ++impl_from_bits!(i16x16[test_v256]: i8x32, u8x32, m8x32, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); ++impl_from_bits!(u16x16[test_v256]: i8x32, u8x32, m8x32, i16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); ++impl_from_bits!(m16x16[test_v256]: m32x8, m64x4, m128x2); ++ ++impl_from_bits!(i32x8[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); ++impl_from_bits!(u32x8[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); ++impl_from_bits!(f32x8[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); ++impl_from_bits!(m32x8[test_v256]: m64x4, m128x2); ++ ++impl_from_bits!(i64x4[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); ++impl_from_bits!(u64x4[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, f64x4, m64x4, i128x2, u128x2, m128x2); ++impl_from_bits!(f64x4[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, m64x4, i128x2, u128x2, m128x2); ++impl_from_bits!(m64x4[test_v256]: m128x2); ++ ++impl_from_bits!(i128x2[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, u128x2, m128x2); ++impl_from_bits!(u128x2[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, m128x2); ++// note: m128x2 cannot be constructed from all the other masks bit patterns in here +diff --git a/third_party/rust/packed_simd/src/api/into_bits/v32.rs b/third_party/rust/packed_simd/src/api/into_bits/v32.rs +new file mode 100644 +index 000000000000..2c183ecf1c77 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/into_bits/v32.rs +@@ -0,0 +1,13 @@ ++//! `FromBits` and `IntoBits` implementations for portable 32-bit wide vectors ++#![rustfmt::skip] ++ ++#[allow(unused)] // wasm_bindgen_test ++use crate::*; ++ ++impl_from_bits!(i8x4[test_v32]: u8x4, m8x4, i16x2, u16x2, m16x2); ++impl_from_bits!(u8x4[test_v32]: i8x4, m8x4, i16x2, u16x2, m16x2); ++impl_from_bits!(m8x4[test_v32]: m16x2); ++ ++impl_from_bits!(i16x2[test_v32]: i8x4, u8x4, m8x4, u16x2, m16x2); ++impl_from_bits!(u16x2[test_v32]: i8x4, u8x4, m8x4, i16x2, m16x2); ++// note: m16x2 cannot be constructed from all m8x4 bit patterns +diff --git a/third_party/rust/packed_simd/src/api/into_bits/v512.rs b/third_party/rust/packed_simd/src/api/into_bits/v512.rs +new file mode 100644 +index 000000000000..8dec6a7f63a0 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/into_bits/v512.rs +@@ -0,0 +1,27 @@ ++//! `FromBits` and `IntoBits` implementations for portable 512-bit wide vectors ++#![rustfmt::skip] ++ ++#[allow(unused)] // wasm_bindgen_test ++use crate::*; ++ ++impl_from_bits!(i8x64[test_v512]: u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); ++impl_from_bits!(u8x64[test_v512]: i8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); ++impl_from_bits!(m8x64[test_v512]: m16x32, m32x16, m64x8, m128x4); ++ ++impl_from_bits!(i16x32[test_v512]: i8x64, u8x64, m8x64, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); ++impl_from_bits!(u16x32[test_v512]: i8x64, u8x64, m8x64, i16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); ++impl_from_bits!(m16x32[test_v512]: m32x16, m64x8, m128x4); ++ ++impl_from_bits!(i32x16[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); ++impl_from_bits!(u32x16[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); ++impl_from_bits!(f32x16[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); ++impl_from_bits!(m32x16[test_v512]: m64x8, m128x4); ++ ++impl_from_bits!(i64x8[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); ++impl_from_bits!(u64x8[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, f64x8, m64x8, i128x4, u128x4, m128x4); ++impl_from_bits!(f64x8[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, m64x8, i128x4, u128x4, m128x4); ++impl_from_bits!(m64x8[test_v512]: m128x4); ++ ++impl_from_bits!(i128x4[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, u128x4, m128x4); ++impl_from_bits!(u128x4[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, m128x4); ++// note: m128x4 cannot be constructed from all the other masks bit patterns in here +diff --git a/third_party/rust/packed_simd/src/api/into_bits/v64.rs b/third_party/rust/packed_simd/src/api/into_bits/v64.rs +new file mode 100644 +index 000000000000..8999d98e13f8 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/into_bits/v64.rs +@@ -0,0 +1,18 @@ ++//! `FromBits` and `IntoBits` implementations for portable 64-bit wide vectors ++#![rustfmt::skip] ++ ++#[allow(unused)] // wasm_bindgen_test ++use crate::*; ++ ++impl_from_bits!(i8x8[test_v64]: u8x8, m8x8, i16x4, u16x4, m16x4, i32x2, u32x2, f32x2, m32x2); ++impl_from_bits!(u8x8[test_v64]: i8x8, m8x8, i16x4, u16x4, m16x4, i32x2, u32x2, f32x2, m32x2); ++impl_from_bits!(m8x8[test_v64]: m16x4, m32x2); ++ ++impl_from_bits!(i16x4[test_v64]: i8x8, u8x8, m8x8, u16x4, m16x4, i32x2, u32x2, f32x2, m32x2); ++impl_from_bits!(u16x4[test_v64]: i8x8, u8x8, m8x8, i16x4, m16x4, i32x2, u32x2, f32x2, m32x2); ++impl_from_bits!(m16x4[test_v64]: m32x2); ++ ++impl_from_bits!(i32x2[test_v64]: i8x8, u8x8, m8x8, i16x4, u16x4, m16x4, u32x2, f32x2, m32x2); ++impl_from_bits!(u32x2[test_v64]: i8x8, u8x8, m8x8, i16x4, u16x4, m16x4, i32x2, f32x2, m32x2); ++impl_from_bits!(f32x2[test_v64]: i8x8, u8x8, m8x8, i16x4, u16x4, m16x4, i32x2, u32x2, m32x2); ++// note: m32x2 cannot be constructed from all m16x4 or m8x8 bit patterns +diff --git a/third_party/rust/packed_simd/src/api/math.rs b/third_party/rust/packed_simd/src/api/math.rs +new file mode 100644 +index 000000000000..e7a8d256baf5 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/math.rs +@@ -0,0 +1,4 @@ ++//! Implements vertical math operations ++ ++#[macro_use] ++mod float; +diff --git a/third_party/rust/packed_simd/src/api/math/float.rs b/third_party/rust/packed_simd/src/api/math/float.rs +new file mode 100644 +index 000000000000..c0ec46e91789 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/math/float.rs +@@ -0,0 +1,61 @@ ++//! Implements vertical floating-point math operations. ++ ++#[macro_use] ++mod abs; ++ ++#[macro_use] ++mod consts; ++ ++#[macro_use] ++mod cos; ++ ++#[macro_use] ++mod exp; ++ ++#[macro_use] ++mod powf; ++ ++#[macro_use] ++mod ln; ++ ++#[macro_use] ++mod mul_add; ++ ++#[macro_use] ++mod mul_adde; ++ ++#[macro_use] ++mod recpre; ++ ++#[macro_use] ++mod rsqrte; ++ ++#[macro_use] ++mod sin; ++ ++#[macro_use] ++mod sqrt; ++ ++#[macro_use] ++mod sqrte; ++ ++macro_rules! impl_float_category { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident, $mask_ty:ident) => { ++ impl $id { ++ #[inline] ++ pub fn is_nan(self) -> $mask_ty { ++ self.ne(self) ++ } ++ ++ #[inline] ++ pub fn is_infinite(self) -> $mask_ty { ++ self.eq(Self::INFINITY) | self.eq(Self::NEG_INFINITY) ++ } ++ ++ #[inline] ++ pub fn is_finite(self) -> $mask_ty { ++ !(self.is_nan() | self.is_infinite()) ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/math/float/abs.rs b/third_party/rust/packed_simd/src/api/math/float/abs.rs +new file mode 100644 +index 000000000000..1865bdb68ec6 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/math/float/abs.rs +@@ -0,0 +1,31 @@ ++//! Implements vertical (lane-wise) floating-point `abs`. ++ ++macro_rules! impl_math_float_abs { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Absolute value. ++ #[inline] ++ pub fn abs(self) -> Self { ++ use crate::codegen::math::float::abs::Abs; ++ Abs::abs(self) ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _math_abs>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn abs() { ++ let o = $id::splat(1 as $elem_ty); ++ assert_eq!(o, o.abs()); ++ ++ let mo = $id::splat(-1 as $elem_ty); ++ assert_eq!(o, mo.abs()); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/math/float/consts.rs b/third_party/rust/packed_simd/src/api/math/float/consts.rs +new file mode 100644 +index 000000000000..89f93a6d692b +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/math/float/consts.rs +@@ -0,0 +1,86 @@ ++macro_rules! impl_float_consts { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident) => { ++ impl $id { ++ /// Machine epsilon value. ++ pub const EPSILON: $id = $id::splat(core::$elem_ty::EPSILON); ++ ++ /// Smallest finite value. ++ pub const MIN: $id = $id::splat(core::$elem_ty::MIN); ++ ++ /// Smallest positive normal value. ++ pub const MIN_POSITIVE: $id = ++ $id::splat(core::$elem_ty::MIN_POSITIVE); ++ ++ /// Largest finite value. ++ pub const MAX: $id = $id::splat(core::$elem_ty::MAX); ++ ++ /// Not a Number (NaN). ++ pub const NAN: $id = $id::splat(core::$elem_ty::NAN); ++ ++ /// Infinity (∞). ++ pub const INFINITY: $id = $id::splat(core::$elem_ty::INFINITY); ++ ++ /// Negative infinity (-∞). ++ pub const NEG_INFINITY: $id = ++ $id::splat(core::$elem_ty::NEG_INFINITY); ++ ++ /// Archimedes' constant (π) ++ pub const PI: $id = $id::splat(core::$elem_ty::consts::PI); ++ ++ /// π/2 ++ pub const FRAC_PI_2: $id = ++ $id::splat(core::$elem_ty::consts::FRAC_PI_2); ++ ++ /// π/3 ++ pub const FRAC_PI_3: $id = ++ $id::splat(core::$elem_ty::consts::FRAC_PI_3); ++ ++ /// π/4 ++ pub const FRAC_PI_4: $id = ++ $id::splat(core::$elem_ty::consts::FRAC_PI_4); ++ ++ /// π/6 ++ pub const FRAC_PI_6: $id = ++ $id::splat(core::$elem_ty::consts::FRAC_PI_6); ++ ++ /// π/8 ++ pub const FRAC_PI_8: $id = ++ $id::splat(core::$elem_ty::consts::FRAC_PI_8); ++ ++ /// 1/π ++ pub const FRAC_1_PI: $id = ++ $id::splat(core::$elem_ty::consts::FRAC_1_PI); ++ ++ /// 2/π ++ pub const FRAC_2_PI: $id = ++ $id::splat(core::$elem_ty::consts::FRAC_2_PI); ++ ++ /// 2/sqrt(π) ++ pub const FRAC_2_SQRT_PI: $id = ++ $id::splat(core::$elem_ty::consts::FRAC_2_SQRT_PI); ++ ++ /// sqrt(2) ++ pub const SQRT_2: $id = $id::splat(core::$elem_ty::consts::SQRT_2); ++ ++ /// 1/sqrt(2) ++ pub const FRAC_1_SQRT_2: $id = ++ $id::splat(core::$elem_ty::consts::FRAC_1_SQRT_2); ++ ++ /// Euler's number (e) ++ pub const E: $id = $id::splat(core::$elem_ty::consts::E); ++ ++ /// log2(e) ++ pub const LOG2_E: $id = $id::splat(core::$elem_ty::consts::LOG2_E); ++ ++ /// log10(e) ++ pub const LOG10_E: $id = ++ $id::splat(core::$elem_ty::consts::LOG10_E); ++ ++ /// ln(2) ++ pub const LN_2: $id = $id::splat(core::$elem_ty::consts::LN_2); ++ ++ /// ln(10) ++ pub const LN_10: $id = $id::splat(core::$elem_ty::consts::LN_10); ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/math/float/cos.rs b/third_party/rust/packed_simd/src/api/math/float/cos.rs +new file mode 100644 +index 000000000000..e5b8f46036c7 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/math/float/cos.rs +@@ -0,0 +1,44 @@ ++//! Implements vertical (lane-wise) floating-point `cos`. ++ ++macro_rules! impl_math_float_cos { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Cosine. ++ #[inline] ++ pub fn cos(self) -> Self { ++ use crate::codegen::math::float::cos::Cos; ++ Cos::cos(self) ++ } ++ ++ /// Cosine of `self * PI`. ++ #[inline] ++ pub fn cos_pi(self) -> Self { ++ use crate::codegen::math::float::cos_pi::CosPi; ++ CosPi::cos_pi(self) ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _math_cos>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn cos() { ++ use crate::$elem_ty::consts::PI; ++ let z = $id::splat(0 as $elem_ty); ++ let o = $id::splat(1 as $elem_ty); ++ let p = $id::splat(PI as $elem_ty); ++ let ph = $id::splat(PI as $elem_ty / 2.); ++ let z_r = $id::splat((PI as $elem_ty / 2.).cos()); ++ let o_r = $id::splat((PI as $elem_ty).cos()); ++ ++ assert_eq!(o, z.cos()); ++ assert_eq!(z_r, ph.cos()); ++ assert_eq!(o_r, p.cos()); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/math/float/exp.rs b/third_party/rust/packed_simd/src/api/math/float/exp.rs +new file mode 100644 +index 000000000000..e3356d853a83 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/math/float/exp.rs +@@ -0,0 +1,33 @@ ++//! Implements vertical (lane-wise) floating-point `exp`. ++ ++macro_rules! impl_math_float_exp { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Returns the exponential function of `self`: `e^(self)`. ++ #[inline] ++ pub fn exp(self) -> Self { ++ use crate::codegen::math::float::exp::Exp; ++ Exp::exp(self) ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _math_exp>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn exp() { ++ let z = $id::splat(0 as $elem_ty); ++ let o = $id::splat(1 as $elem_ty); ++ assert_eq!(o, z.exp()); ++ ++ let e = $id::splat(crate::f64::consts::E as $elem_ty); ++ let tol = $id::splat(2.4e-4 as $elem_ty); ++ assert!((e - o.exp()).abs().le(tol).all()); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/math/float/ln.rs b/third_party/rust/packed_simd/src/api/math/float/ln.rs +new file mode 100644 +index 000000000000..5ceb9173ae05 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/math/float/ln.rs +@@ -0,0 +1,33 @@ ++//! Implements vertical (lane-wise) floating-point `ln`. ++ ++macro_rules! impl_math_float_ln { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Returns the natural logarithm of `self`. ++ #[inline] ++ pub fn ln(self) -> Self { ++ use crate::codegen::math::float::ln::Ln; ++ Ln::ln(self) ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _math_ln>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn ln() { ++ let z = $id::splat(0 as $elem_ty); ++ let o = $id::splat(1 as $elem_ty); ++ assert_eq!(z, o.ln()); ++ ++ let e = $id::splat(crate::f64::consts::E as $elem_ty); ++ let tol = $id::splat(2.4e-4 as $elem_ty); ++ assert!((o - e.ln()).abs().le(tol).all()); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/math/float/mul_add.rs b/third_party/rust/packed_simd/src/api/math/float/mul_add.rs +new file mode 100644 +index 000000000000..4b170ee2b755 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/math/float/mul_add.rs +@@ -0,0 +1,44 @@ ++//! Implements vertical (lane-wise) floating-point `mul_add`. ++ ++macro_rules! impl_math_float_mul_add { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Fused multiply add: `self * y + z` ++ #[inline] ++ pub fn mul_add(self, y: Self, z: Self) -> Self { ++ use crate::codegen::math::float::mul_add::MulAdd; ++ MulAdd::mul_add(self, y, z) ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _math_mul_add>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn mul_add() { ++ let z = $id::splat(0 as $elem_ty); ++ let o = $id::splat(1 as $elem_ty); ++ let t = $id::splat(2 as $elem_ty); ++ let t3 = $id::splat(3 as $elem_ty); ++ let f = $id::splat(4 as $elem_ty); ++ ++ assert_eq!(z, z.mul_add(z, z)); ++ assert_eq!(o, o.mul_add(o, z)); ++ assert_eq!(o, o.mul_add(z, o)); ++ assert_eq!(o, z.mul_add(o, o)); ++ ++ assert_eq!(t, o.mul_add(o, o)); ++ assert_eq!(t, o.mul_add(t, z)); ++ assert_eq!(t, t.mul_add(o, z)); ++ ++ assert_eq!(f, t.mul_add(t, z)); ++ assert_eq!(f, t.mul_add(o, t)); ++ assert_eq!(t3, t.mul_add(o, o)); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/math/float/mul_adde.rs b/third_party/rust/packed_simd/src/api/math/float/mul_adde.rs +new file mode 100644 +index 000000000000..c5b27110f2d7 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/math/float/mul_adde.rs +@@ -0,0 +1,48 @@ ++//! Implements vertical (lane-wise) floating-point `mul_adde`. ++ ++macro_rules! impl_math_float_mul_adde { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Fused multiply add estimate: ~= `self * y + z` ++ /// ++ /// While fused multiply-add (`fma`) has infinite precision, ++ /// `mul_adde` has _at worst_ the same precision of a multiply followed by an add. ++ /// This might be more efficient on architectures that do not have an `fma` instruction. ++ #[inline] ++ pub fn mul_adde(self, y: Self, z: Self) -> Self { ++ use crate::codegen::math::float::mul_adde::MulAddE; ++ MulAddE::mul_adde(self, y, z) ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _math_mul_adde>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn mul_adde() { ++ let z = $id::splat(0 as $elem_ty); ++ let o = $id::splat(1 as $elem_ty); ++ let t = $id::splat(2 as $elem_ty); ++ let t3 = $id::splat(3 as $elem_ty); ++ let f = $id::splat(4 as $elem_ty); ++ ++ assert_eq!(z, z.mul_adde(z, z)); ++ assert_eq!(o, o.mul_adde(o, z)); ++ assert_eq!(o, o.mul_adde(z, o)); ++ assert_eq!(o, z.mul_adde(o, o)); ++ ++ assert_eq!(t, o.mul_adde(o, o)); ++ assert_eq!(t, o.mul_adde(t, z)); ++ assert_eq!(t, t.mul_adde(o, z)); ++ ++ assert_eq!(f, t.mul_adde(t, z)); ++ assert_eq!(f, t.mul_adde(o, t)); ++ assert_eq!(t3, t.mul_adde(o, o)); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/math/float/powf.rs b/third_party/rust/packed_simd/src/api/math/float/powf.rs +new file mode 100644 +index 000000000000..83dc9ff9c05e +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/math/float/powf.rs +@@ -0,0 +1,36 @@ ++//! Implements vertical (lane-wise) floating-point `powf`. ++ ++macro_rules! impl_math_float_powf { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Raises `self` number to the floating point power of `x`. ++ #[inline] ++ pub fn powf(self, x: Self) -> Self { ++ use crate::codegen::math::float::powf::Powf; ++ Powf::powf(self, x) ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _math_powf>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn powf() { ++ let z = $id::splat(0 as $elem_ty); ++ let o = $id::splat(1 as $elem_ty); ++ let t = $id::splat(2 as $elem_ty); ++ assert_eq!(o, o.powf(z)); ++ assert_eq!(o, t.powf(z)); ++ assert_eq!(o, o.powf(o)); ++ assert_eq!(t, t.powf(o)); ++ ++ let f = $id::splat(4 as $elem_ty); ++ assert_eq!(f, t.powf(t)); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/math/float/recpre.rs b/third_party/rust/packed_simd/src/api/math/float/recpre.rs +new file mode 100644 +index 000000000000..127f0b2ff674 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/math/float/recpre.rs +@@ -0,0 +1,36 @@ ++//! Implements vertical (lane-wise) floating-point `recpre`. ++ ++macro_rules! impl_math_float_recpre { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Reciprocal estimate: `~= 1. / self`. ++ /// ++ /// FIXME: The precision of the estimate is currently unspecified. ++ #[inline] ++ pub fn recpre(self) -> Self { ++ $id::splat(1.) / self ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _math_recpre>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn recpre() { ++ let tol = $id::splat(2.4e-4 as $elem_ty); ++ let o = $id::splat(1 as $elem_ty); ++ let error = (o - o.recpre()).abs(); ++ assert!(error.le(tol).all()); ++ ++ let t = $id::splat(2 as $elem_ty); ++ let e = 0.5; ++ let error = (e - t.recpre()).abs(); ++ assert!(error.le(tol).all()); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/math/float/rsqrte.rs b/third_party/rust/packed_simd/src/api/math/float/rsqrte.rs +new file mode 100644 +index 000000000000..c77977f7b1cd +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/math/float/rsqrte.rs +@@ -0,0 +1,40 @@ ++//! Implements vertical (lane-wise) floating-point `rsqrte`. ++ ++macro_rules! impl_math_float_rsqrte { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Reciprocal square-root estimate: `~= 1. / self.sqrt()`. ++ /// ++ /// FIXME: The precision of the estimate is currently unspecified. ++ #[inline] ++ pub fn rsqrte(self) -> Self { ++ unsafe { ++ use crate::llvm::simd_fsqrt; ++ $id::splat(1.) / Simd(simd_fsqrt(self.0)) ++ } ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _math_rsqrte>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn rsqrte() { ++ use crate::$elem_ty::consts::SQRT_2; ++ let tol = $id::splat(2.4e-4 as $elem_ty); ++ let o = $id::splat(1 as $elem_ty); ++ let error = (o - o.rsqrte()).abs(); ++ assert!(error.le(tol).all()); ++ ++ let t = $id::splat(2 as $elem_ty); ++ let e = 1. / SQRT_2; ++ let error = (e - t.rsqrte()).abs(); ++ assert!(error.le(tol).all()); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/math/float/sin.rs b/third_party/rust/packed_simd/src/api/math/float/sin.rs +new file mode 100644 +index 000000000000..49908319b126 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/math/float/sin.rs +@@ -0,0 +1,50 @@ ++//! Implements vertical (lane-wise) floating-point `sin`. ++ ++macro_rules! impl_math_float_sin { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Sine. ++ #[inline] ++ pub fn sin(self) -> Self { ++ use crate::codegen::math::float::sin::Sin; ++ Sin::sin(self) ++ } ++ ++ /// Sine of `self * PI`. ++ #[inline] ++ pub fn sin_pi(self) -> Self { ++ use crate::codegen::math::float::sin_pi::SinPi; ++ SinPi::sin_pi(self) ++ } ++ ++ /// Sine and cosine of `self * PI`. ++ #[inline] ++ pub fn sin_cos_pi(self) -> (Self, Self) { ++ use crate::codegen::math::float::sin_cos_pi::SinCosPi; ++ SinCosPi::sin_cos_pi(self) ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _math_sin>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn sin() { ++ use crate::$elem_ty::consts::PI; ++ let z = $id::splat(0 as $elem_ty); ++ let p = $id::splat(PI as $elem_ty); ++ let ph = $id::splat(PI as $elem_ty / 2.); ++ let o_r = $id::splat((PI as $elem_ty / 2.).sin()); ++ let z_r = $id::splat((PI as $elem_ty).sin()); ++ ++ assert_eq!(z, z.sin()); ++ assert_eq!(o_r, ph.sin()); ++ assert_eq!(z_r, p.sin()); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/math/float/sqrt.rs b/third_party/rust/packed_simd/src/api/math/float/sqrt.rs +new file mode 100644 +index 000000000000..ae624122d0e2 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/math/float/sqrt.rs +@@ -0,0 +1,35 @@ ++//! Implements vertical (lane-wise) floating-point `sqrt`. ++ ++macro_rules! impl_math_float_sqrt { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ #[inline] ++ pub fn sqrt(self) -> Self { ++ use crate::codegen::math::float::sqrt::Sqrt; ++ Sqrt::sqrt(self) ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _math_sqrt>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn sqrt() { ++ use crate::$elem_ty::consts::SQRT_2; ++ let z = $id::splat(0 as $elem_ty); ++ let o = $id::splat(1 as $elem_ty); ++ assert_eq!(z, z.sqrt()); ++ assert_eq!(o, o.sqrt()); ++ ++ let t = $id::splat(2 as $elem_ty); ++ let e = $id::splat(SQRT_2); ++ assert_eq!(e, t.sqrt()); ++ ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/math/float/sqrte.rs b/third_party/rust/packed_simd/src/api/math/float/sqrte.rs +new file mode 100644 +index 000000000000..f7ffad748d9c +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/math/float/sqrte.rs +@@ -0,0 +1,44 @@ ++//! Implements vertical (lane-wise) floating-point `sqrte`. ++ ++macro_rules! impl_math_float_sqrte { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Square-root estimate. ++ /// ++ /// FIXME: The precision of the estimate is currently unspecified. ++ #[inline] ++ pub fn sqrte(self) -> Self { ++ use crate::codegen::math::float::sqrte::Sqrte; ++ Sqrte::sqrte(self) ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _math_sqrte>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn sqrte() { ++ use crate::$elem_ty::consts::SQRT_2; ++ let tol = $id::splat(2.4e-4 as $elem_ty); ++ ++ let z = $id::splat(0 as $elem_ty); ++ let error = (z - z.sqrte()).abs(); ++ assert!(error.le(tol).all()); ++ ++ let o = $id::splat(1 as $elem_ty); ++ let error = (o - o.sqrte()).abs(); ++ assert!(error.le(tol).all()); ++ ++ let t = $id::splat(2 as $elem_ty); ++ let e = $id::splat(SQRT_2 as $elem_ty); ++ let error = (e - t.sqrte()).abs(); ++ ++ assert!(error.le(tol).all()); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/minimal.rs b/third_party/rust/packed_simd/src/api/minimal.rs +new file mode 100644 +index 000000000000..840d9e32585d +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/minimal.rs +@@ -0,0 +1,6 @@ ++#[macro_use] ++mod iuf; ++#[macro_use] ++mod mask; ++#[macro_use] ++mod ptr; +diff --git a/third_party/rust/packed_simd/src/api/minimal/iuf.rs b/third_party/rust/packed_simd/src/api/minimal/iuf.rs +new file mode 100644 +index 000000000000..58ffabab994f +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/minimal/iuf.rs +@@ -0,0 +1,167 @@ ++//! Minimal API of signed integer, unsigned integer, and floating-point ++//! vectors. ++ ++macro_rules! impl_minimal_iuf { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $ielem_ty:ident | ++ $test_tt:tt | $($elem_name:ident),+ | $(#[$doc:meta])*) => { ++ ++ $(#[$doc])* ++ pub type $id = Simd<[$elem_ty; $elem_count]>; ++ ++ impl sealed::Simd for $id { ++ type Element = $elem_ty; ++ const LANES: usize = $elem_count; ++ type LanesType = [u32; $elem_count]; ++ } ++ ++ impl $id { ++ /// Creates a new instance with each vector elements initialized ++ /// with the provided values. ++ #[inline] ++ #[allow(clippy::too_many_arguments)] ++ pub const fn new($($elem_name: $elem_ty),*) -> Self { ++ Simd(codegen::$id($($elem_name as $ielem_ty),*)) ++ } ++ ++ /// Returns the number of vector lanes. ++ #[inline] ++ pub const fn lanes() -> usize { ++ $elem_count ++ } ++ ++ /// Constructs a new instance with each element initialized to ++ /// `value`. ++ #[inline] ++ pub const fn splat(value: $elem_ty) -> Self { ++ Simd(codegen::$id($({ ++ #[allow(non_camel_case_types, dead_code)] ++ struct $elem_name; ++ value as $ielem_ty ++ }),*)) ++ } ++ ++ /// Extracts the value at `index`. ++ /// ++ /// # Panics ++ /// ++ /// If `index >= Self::lanes()`. ++ #[inline] ++ pub fn extract(self, index: usize) -> $elem_ty { ++ assert!(index < $elem_count); ++ unsafe { self.extract_unchecked(index) } ++ } ++ ++ /// Extracts the value at `index`. ++ /// ++ /// # Precondition ++ /// ++ /// If `index >= Self::lanes()` the behavior is undefined. ++ #[inline] ++ pub unsafe fn extract_unchecked(self, index: usize) -> $elem_ty { ++ use crate::llvm::simd_extract; ++ let e: $ielem_ty = simd_extract(self.0, index as u32); ++ e as $elem_ty ++ } ++ ++ /// Returns a new vector where the value at `index` is replaced by `new_value`. ++ /// ++ /// # Panics ++ /// ++ /// If `index >= Self::lanes()`. ++ #[inline] ++ #[must_use = "replace does not modify the original value - \ ++ it returns a new vector with the value at `index` \ ++ replaced by `new_value`d" ++ ] ++ pub fn replace(self, index: usize, new_value: $elem_ty) -> Self { ++ assert!(index < $elem_count); ++ unsafe { self.replace_unchecked(index, new_value) } ++ } ++ ++ /// Returns a new vector where the value at `index` is replaced by `new_value`. ++ /// ++ /// # Precondition ++ /// ++ /// If `index >= Self::lanes()` the behavior is undefined. ++ #[inline] ++ #[must_use = "replace_unchecked does not modify the original value - \ ++ it returns a new vector with the value at `index` \ ++ replaced by `new_value`d" ++ ] ++ pub unsafe fn replace_unchecked( ++ self, ++ index: usize, ++ new_value: $elem_ty, ++ ) -> Self { ++ use crate::llvm::simd_insert; ++ Simd(simd_insert(self.0, index as u32, new_value as $ielem_ty)) ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _minimal>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn minimal() { ++ // lanes: ++ assert_eq!($elem_count, $id::lanes()); ++ ++ // splat and extract / extract_unchecked: ++ const VAL: $elem_ty = 7 as $elem_ty; ++ const VEC: $id = $id::splat(VAL); ++ for i in 0..$id::lanes() { ++ assert_eq!(VAL, VEC.extract(i)); ++ assert_eq!( ++ VAL, unsafe { VEC.extract_unchecked(i) } ++ ); ++ } ++ ++ // replace / replace_unchecked ++ let new_vec = VEC.replace(0, 42 as $elem_ty); ++ for i in 0..$id::lanes() { ++ if i == 0 { ++ assert_eq!(42 as $elem_ty, new_vec.extract(i)); ++ } else { ++ assert_eq!(VAL, new_vec.extract(i)); ++ } ++ } ++ let new_vec = unsafe { ++ VEC.replace_unchecked(0, 42 as $elem_ty) ++ }; ++ for i in 0..$id::lanes() { ++ if i == 0 { ++ assert_eq!(42 as $elem_ty, new_vec.extract(i)); ++ } else { ++ assert_eq!(VAL, new_vec.extract(i)); ++ } ++ } ++ } ++ ++ // FIXME: wasm-bindgen-test does not support #[should_panic] ++ // #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[cfg(not(target_arch = "wasm32"))] ++ #[test] ++ #[should_panic] ++ fn extract_panic_oob() { ++ const VAL: $elem_ty = 7 as $elem_ty; ++ const VEC: $id = $id::splat(VAL); ++ let _ = VEC.extract($id::lanes()); ++ } ++ // FIXME: wasm-bindgen-test does not support #[should_panic] ++ // #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[cfg(not(target_arch = "wasm32"))] ++ #[test] ++ #[should_panic] ++ fn replace_panic_oob() { ++ const VAL: $elem_ty = 7 as $elem_ty; ++ const VEC: $id = $id::splat(VAL); ++ let _ = VEC.replace($id::lanes(), 42 as $elem_ty); ++ } ++ } ++ } ++ } ++ } ++} +diff --git a/third_party/rust/packed_simd/src/api/minimal/mask.rs b/third_party/rust/packed_simd/src/api/minimal/mask.rs +new file mode 100644 +index 000000000000..e65be95db12c +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/minimal/mask.rs +@@ -0,0 +1,174 @@ ++//! Minimal API of mask vectors. ++ ++macro_rules! impl_minimal_mask { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $ielem_ty:ident ++ | $test_tt:tt | $($elem_name:ident),+ | $(#[$doc:meta])*) => { ++ $(#[$doc])* ++ pub type $id = Simd<[$elem_ty; $elem_count]>; ++ ++ impl sealed::Simd for $id { ++ type Element = $elem_ty; ++ const LANES: usize = $elem_count; ++ type LanesType = [u32; $elem_count]; ++ } ++ ++ impl $id { ++ /// Creates a new instance with each vector elements initialized ++ /// with the provided values. ++ #[inline] ++ #[allow(clippy::too_many_arguments)] ++ pub const fn new($($elem_name: bool),*) -> Self { ++ Simd(codegen::$id($(Self::bool_to_internal($elem_name)),*)) ++ } ++ ++ /// Converts a boolean type into the type of the vector lanes. ++ #[inline] ++ #[allow(clippy::indexing_slicing)] ++ const fn bool_to_internal(x: bool) -> $ielem_ty { ++ [0 as $ielem_ty, !(0 as $ielem_ty)][x as usize] ++ } ++ ++ /// Returns the number of vector lanes. ++ #[inline] ++ pub const fn lanes() -> usize { ++ $elem_count ++ } ++ ++ /// Constructs a new instance with each element initialized to ++ /// `value`. ++ #[inline] ++ pub const fn splat(value: bool) -> Self { ++ Simd(codegen::$id($({ ++ #[allow(non_camel_case_types, dead_code)] ++ struct $elem_name; ++ Self::bool_to_internal(value) ++ }),*)) ++ } ++ ++ /// Extracts the value at `index`. ++ /// ++ /// # Panics ++ /// ++ /// If `index >= Self::lanes()`. ++ #[inline] ++ pub fn extract(self, index: usize) -> bool { ++ assert!(index < $elem_count); ++ unsafe { self.extract_unchecked(index) } ++ } ++ ++ /// Extracts the value at `index`. ++ /// ++ /// If `index >= Self::lanes()` the behavior is undefined. ++ #[inline] ++ pub unsafe fn extract_unchecked(self, index: usize) -> bool { ++ use crate::llvm::simd_extract; ++ let x: $ielem_ty = simd_extract(self.0, index as u32); ++ x != 0 ++ } ++ ++ /// Returns a new vector where the value at `index` is replaced by ++ /// `new_value`. ++ /// ++ /// # Panics ++ /// ++ /// If `index >= Self::lanes()`. ++ #[inline] ++ #[must_use = "replace does not modify the original value - \ ++ it returns a new vector with the value at `index` \ ++ replaced by `new_value`d" ++ ] ++ pub fn replace(self, index: usize, new_value: bool) -> Self { ++ assert!(index < $elem_count); ++ unsafe { self.replace_unchecked(index, new_value) } ++ } ++ ++ /// Returns a new vector where the value at `index` is replaced by ++ /// `new_value`. ++ /// ++ /// # Panics ++ /// ++ /// If `index >= Self::lanes()`. ++ #[inline] ++ #[must_use = "replace_unchecked does not modify the original value - \ ++ it returns a new vector with the value at `index` \ ++ replaced by `new_value`d" ++ ] ++ pub unsafe fn replace_unchecked( ++ self, ++ index: usize, ++ new_value: bool, ++ ) -> Self { ++ use crate::llvm::simd_insert; ++ Simd(simd_insert(self.0, index as u32, ++ Self::bool_to_internal(new_value))) ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _minimal>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn minimal() { ++ // TODO: test new ++ ++ // lanes: ++ assert_eq!($elem_count, $id::lanes()); ++ ++ // splat and extract / extract_unchecked: ++ let vec = $id::splat(true); ++ for i in 0..$id::lanes() { ++ assert_eq!(true, vec.extract(i)); ++ assert_eq!(true, ++ unsafe { vec.extract_unchecked(i) } ++ ); ++ } ++ ++ // replace / replace_unchecked ++ let new_vec = vec.replace(0, false); ++ for i in 0..$id::lanes() { ++ if i == 0 { ++ assert_eq!(false, new_vec.extract(i)); ++ } else { ++ assert_eq!(true, new_vec.extract(i)); ++ } ++ } ++ let new_vec = unsafe { ++ vec.replace_unchecked(0, false) ++ }; ++ for i in 0..$id::lanes() { ++ if i == 0 { ++ assert_eq!(false, new_vec.extract(i)); ++ } else { ++ assert_eq!(true, new_vec.extract(i)); ++ } ++ } ++ } ++ ++ // FIXME: wasm-bindgen-test does not support #[should_panic] ++ // #[cfg_attr(not(target_arch = "wasm32"), test)] ++ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[cfg(not(target_arch = "wasm32"))] ++ #[test] ++ #[should_panic] ++ fn extract_panic_oob() { ++ let vec = $id::splat(false); ++ let _ = vec.extract($id::lanes()); ++ } ++ // FIXME: wasm-bindgen-test does not support #[should_panic] ++ // #[cfg_attr(not(target_arch = "wasm32"), test)] ++ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[cfg(not(target_arch = "wasm32"))] ++ #[test] ++ #[should_panic] ++ fn replace_panic_oob() { ++ let vec = $id::splat(false); ++ let _ = vec.replace($id::lanes(), true); ++ } ++ } ++ } ++ } ++ } ++} +diff --git a/third_party/rust/packed_simd/src/api/minimal/ptr.rs b/third_party/rust/packed_simd/src/api/minimal/ptr.rs +new file mode 100644 +index 000000000000..75e5aad5c065 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/minimal/ptr.rs +@@ -0,0 +1,1385 @@ ++//! Minimal API of pointer vectors. ++ ++macro_rules! impl_minimal_p { ++ ([$elem_ty:ty; $elem_count:expr]: $id:ident, $mask_ty:ident, ++ $usize_ty:ident, $isize_ty:ident | $ref:ident | $test_tt:tt ++ | $($elem_name:ident),+ | ($true:expr, $false:expr) | ++ $(#[$doc:meta])*) => { ++ ++ $(#[$doc])* ++ pub type $id = Simd<[$elem_ty; $elem_count]>; ++ ++ impl sealed::Simd for $id { ++ type Element = $elem_ty; ++ const LANES: usize = $elem_count; ++ type LanesType = [u32; $elem_count]; ++ } ++ ++ impl $id { ++ /// Creates a new instance with each vector elements initialized ++ /// with the provided values. ++ #[inline] ++ #[allow(clippy::too_many_arguments)] ++ pub const fn new($($elem_name: $elem_ty),*) -> Self { ++ Simd(codegen::$id($($elem_name),*)) ++ } ++ ++ /// Returns the number of vector lanes. ++ #[inline] ++ pub const fn lanes() -> usize { ++ $elem_count ++ } ++ ++ /// Constructs a new instance with each element initialized to ++ /// `value`. ++ #[inline] ++ pub const fn splat(value: $elem_ty) -> Self { ++ Simd(codegen::$id($({ ++ #[allow(non_camel_case_types, dead_code)] ++ struct $elem_name; ++ value ++ }),*)) ++ } ++ ++ /// Constructs a new instance with each element initialized to ++ /// `null`. ++ #[inline] ++ pub const fn null() -> Self { ++ Self::splat(crate::ptr::null_mut() as $elem_ty) ++ } ++ ++ /// Returns a mask that selects those lanes that contain `null` ++ /// pointers. ++ #[inline] ++ pub fn is_null(self) -> $mask_ty { ++ self.eq(Self::null()) ++ } ++ ++ /// Extracts the value at `index`. ++ /// ++ /// # Panics ++ /// ++ /// If `index >= Self::lanes()`. ++ #[inline] ++ pub fn extract(self, index: usize) -> $elem_ty { ++ assert!(index < $elem_count); ++ unsafe { self.extract_unchecked(index) } ++ } ++ ++ /// Extracts the value at `index`. ++ /// ++ /// # Precondition ++ /// ++ /// If `index >= Self::lanes()` the behavior is undefined. ++ #[inline] ++ pub unsafe fn extract_unchecked(self, index: usize) -> $elem_ty { ++ use crate::llvm::simd_extract; ++ simd_extract(self.0, index as u32) ++ } ++ ++ /// Returns a new vector where the value at `index` is replaced by ++ /// `new_value`. ++ /// ++ /// # Panics ++ /// ++ /// If `index >= Self::lanes()`. ++ #[inline] ++ #[must_use = "replace does not modify the original value - \ ++ it returns a new vector with the value at `index` \ ++ replaced by `new_value`d" ++ ] ++ #[allow(clippy::not_unsafe_ptr_arg_deref)] ++ pub fn replace(self, index: usize, new_value: $elem_ty) -> Self { ++ assert!(index < $elem_count); ++ unsafe { self.replace_unchecked(index, new_value) } ++ } ++ ++ /// Returns a new vector where the value at `index` is replaced by `new_value`. ++ /// ++ /// # Precondition ++ /// ++ /// If `index >= Self::lanes()` the behavior is undefined. ++ #[inline] ++ #[must_use = "replace_unchecked does not modify the original value - \ ++ it returns a new vector with the value at `index` \ ++ replaced by `new_value`d" ++ ] ++ pub unsafe fn replace_unchecked( ++ self, ++ index: usize, ++ new_value: $elem_ty, ++ ) -> Self { ++ use crate::llvm::simd_insert; ++ Simd(simd_insert(self.0, index as u32, new_value)) ++ } ++ } ++ ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _minimal>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn minimal() { ++ // lanes: ++ assert_eq!($elem_count, $id::::lanes()); ++ ++ // splat and extract / extract_unchecked: ++ let VAL7: <$id as sealed::Simd>::Element ++ = $ref!(7); ++ let VAL42: <$id as sealed::Simd>::Element ++ = $ref!(42); ++ let VEC: $id = $id::splat(VAL7); ++ for i in 0..$id::::lanes() { ++ assert_eq!(VAL7, VEC.extract(i)); ++ assert_eq!( ++ VAL7, unsafe { VEC.extract_unchecked(i) } ++ ); ++ } ++ ++ // replace / replace_unchecked ++ let new_vec = VEC.replace(0, VAL42); ++ for i in 0..$id::::lanes() { ++ if i == 0 { ++ assert_eq!(VAL42, new_vec.extract(i)); ++ } else { ++ assert_eq!(VAL7, new_vec.extract(i)); ++ } ++ } ++ let new_vec = unsafe { ++ VEC.replace_unchecked(0, VAL42) ++ }; ++ for i in 0..$id::::lanes() { ++ if i == 0 { ++ assert_eq!(VAL42, new_vec.extract(i)); ++ } else { ++ assert_eq!(VAL7, new_vec.extract(i)); ++ } ++ } ++ ++ let mut n = $id::::null(); ++ assert_eq!( ++ n, ++ $id::::splat(unsafe { crate::mem::zeroed() }) ++ ); ++ assert!(n.is_null().all()); ++ n = n.replace( ++ 0, unsafe { crate::mem::transmute(1_isize) } ++ ); ++ assert!(!n.is_null().all()); ++ if $id::::lanes() > 1 { ++ assert!(n.is_null().any()); ++ } else { ++ assert!(!n.is_null().any()); ++ } ++ } ++ ++ // FIXME: wasm-bindgen-test does not support #[should_panic] ++ // #[cfg_attr(not(target_arch = "wasm32"), test)] ++ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[cfg(not(target_arch = "wasm32"))] ++ #[test] ++ #[should_panic] ++ fn extract_panic_oob() { ++ let VAL: <$id as sealed::Simd>::Element ++ = $ref!(7); ++ let VEC: $id = $id::splat(VAL); ++ let _ = VEC.extract($id::::lanes()); ++ } ++ ++ // FIXME: wasm-bindgen-test does not support #[should_panic] ++ // #[cfg_attr(not(target_arch = "wasm32"), test)] ++ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[cfg(not(target_arch = "wasm32"))] ++ #[test] ++ #[should_panic] ++ fn replace_panic_oob() { ++ let VAL: <$id as sealed::Simd>::Element ++ = $ref!(7); ++ let VAL42: <$id as sealed::Simd>::Element ++ = $ref!(42); ++ let VEC: $id = $id::splat(VAL); ++ let _ = VEC.replace($id::::lanes(), VAL42); ++ } ++ } ++ } ++ } ++ ++ impl crate::fmt::Debug for $id { ++ #[allow(clippy::missing_inline_in_public_items)] ++ fn fmt(&self, f: &mut crate::fmt::Formatter<'_>) ++ -> crate::fmt::Result { ++ write!( ++ f, ++ "{}<{}>(", ++ stringify!($id), ++ unsafe { crate::intrinsics::type_name::() } ++ )?; ++ for i in 0..$elem_count { ++ if i > 0 { ++ write!(f, ", ")?; ++ } ++ self.extract(i).fmt(f)?; ++ } ++ write!(f, ")") ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _fmt_debug>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn debug() { ++ use arrayvec::{ArrayString,ArrayVec}; ++ type TinyString = ArrayString<[u8; 512]>; ++ ++ use crate::fmt::Write; ++ let v = $id::::default(); ++ let mut s = TinyString::new(); ++ write!(&mut s, "{:?}", v).unwrap(); ++ ++ let mut beg = TinyString::new(); ++ write!(&mut beg, "{}(", stringify!($id)).unwrap(); ++ assert!( ++ s.starts_with(beg.as_str()), ++ "s = {} (should start with = {})", s, beg ++ ); ++ assert!(s.ends_with(")")); ++ let s: ArrayVec<[TinyString; 64]> ++ = s.replace(beg.as_str(), "") ++ .replace(")", "").split(",") ++ .map(|v| TinyString::from(v.trim()).unwrap()) ++ .collect(); ++ assert_eq!(s.len(), $id::::lanes()); ++ for (index, ss) in s.into_iter().enumerate() { ++ let mut e = TinyString::new(); ++ write!(&mut e, "{:?}", v.extract(index)).unwrap(); ++ assert_eq!(ss, e); ++ } ++ } ++ } ++ } ++ } ++ ++ impl Default for $id { ++ #[inline] ++ fn default() -> Self { ++ // FIXME: ptrs do not implement default ++ Self::null() ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _default>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn default() { ++ let a = $id::::default(); ++ for i in 0..$id::::lanes() { ++ assert_eq!( ++ a.extract(i), unsafe { crate::mem::zeroed() } ++ ); ++ } ++ } ++ } ++ } ++ } ++ ++ impl $id { ++ /// Lane-wise equality comparison. ++ #[inline] ++ pub fn eq(self, other: Self) -> $mask_ty { ++ unsafe { ++ use crate::llvm::simd_eq; ++ let a: $usize_ty = crate::mem::transmute(self); ++ let b: $usize_ty = crate::mem::transmute(other); ++ Simd(simd_eq(a.0, b.0)) ++ } ++ } ++ ++ /// Lane-wise inequality comparison. ++ #[inline] ++ pub fn ne(self, other: Self) -> $mask_ty { ++ unsafe { ++ use crate::llvm::simd_ne; ++ let a: $usize_ty = crate::mem::transmute(self); ++ let b: $usize_ty = crate::mem::transmute(other); ++ Simd(simd_ne(a.0, b.0)) ++ } ++ } ++ ++ /// Lane-wise less-than comparison. ++ #[inline] ++ pub fn lt(self, other: Self) -> $mask_ty { ++ unsafe { ++ use crate::llvm::simd_lt; ++ let a: $usize_ty = crate::mem::transmute(self); ++ let b: $usize_ty = crate::mem::transmute(other); ++ Simd(simd_lt(a.0, b.0)) ++ } ++ } ++ ++ /// Lane-wise less-than-or-equals comparison. ++ #[inline] ++ pub fn le(self, other: Self) -> $mask_ty { ++ unsafe { ++ use crate::llvm::simd_le; ++ let a: $usize_ty = crate::mem::transmute(self); ++ let b: $usize_ty = crate::mem::transmute(other); ++ Simd(simd_le(a.0, b.0)) ++ } ++ } ++ ++ /// Lane-wise greater-than comparison. ++ #[inline] ++ pub fn gt(self, other: Self) -> $mask_ty { ++ unsafe { ++ use crate::llvm::simd_gt; ++ let a: $usize_ty = crate::mem::transmute(self); ++ let b: $usize_ty = crate::mem::transmute(other); ++ Simd(simd_gt(a.0, b.0)) ++ } ++ } ++ ++ /// Lane-wise greater-than-or-equals comparison. ++ #[inline] ++ pub fn ge(self, other: Self) -> $mask_ty { ++ unsafe { ++ use crate::llvm::simd_ge; ++ let a: $usize_ty = crate::mem::transmute(self); ++ let b: $usize_ty = crate::mem::transmute(other); ++ Simd(simd_ge(a.0, b.0)) ++ } ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _cmp_vertical>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn cmp() { ++ let a = $id::::null(); ++ let b = $id::::splat(unsafe { ++ crate::mem::transmute(1_isize) ++ }); ++ ++ let r = a.lt(b); ++ let e = $mask_ty::splat(true); ++ assert!(r == e); ++ let r = a.le(b); ++ assert!(r == e); ++ ++ let e = $mask_ty::splat(false); ++ let r = a.gt(b); ++ assert!(r == e); ++ let r = a.ge(b); ++ assert!(r == e); ++ let r = a.eq(b); ++ assert!(r == e); ++ ++ let mut a = a; ++ let mut b = b; ++ let mut e = e; ++ for i in 0..$id::::lanes() { ++ if i % 2 == 0 { ++ a = a.replace( ++ i, ++ unsafe { crate::mem::transmute(0_isize) } ++ ); ++ b = b.replace( ++ i, ++ unsafe { crate::mem::transmute(1_isize) } ++ ); ++ e = e.replace(i, true); ++ } else { ++ a = a.replace( ++ i, ++ unsafe { crate::mem::transmute(1_isize) } ++ ); ++ b = b.replace( ++ i, ++ unsafe { crate::mem::transmute(0_isize) } ++ ); ++ e = e.replace(i, false); ++ } ++ } ++ let r = a.lt(b); ++ assert!(r == e); ++ } ++ } ++ } ++ } ++ ++ #[allow(clippy::partialeq_ne_impl)] ++ impl crate::cmp::PartialEq<$id> for $id { ++ #[inline] ++ fn eq(&self, other: &Self) -> bool { ++ $id::::eq(*self, *other).all() ++ } ++ #[inline] ++ fn ne(&self, other: &Self) -> bool { ++ $id::::ne(*self, *other).any() ++ } ++ } ++ ++ // FIXME: https://github.com/rust-lang-nursery/rust-clippy/issues/2892 ++ #[allow(clippy::partialeq_ne_impl)] ++ impl crate::cmp::PartialEq>> ++ for LexicographicallyOrdered<$id> ++ { ++ #[inline] ++ fn eq(&self, other: &Self) -> bool { ++ self.0 == other.0 ++ } ++ #[inline] ++ fn ne(&self, other: &Self) -> bool { ++ self.0 != other.0 ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _cmp_PartialEq>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn partial_eq() { ++ let a = $id::::null(); ++ let b = $id::::splat(unsafe { ++ crate::mem::transmute(1_isize) ++ }); ++ ++ assert!(a != b); ++ assert!(!(a == b)); ++ assert!(a == a); ++ assert!(!(a != a)); ++ ++ if $id::::lanes() > 1 { ++ let a = $id::::null().replace(0, unsafe { ++ crate::mem::transmute(1_isize) ++ }); ++ let b = $id::::splat(unsafe { ++ crate::mem::transmute(1_isize) ++ }); ++ ++ assert!(a != b); ++ assert!(!(a == b)); ++ assert!(a == a); ++ assert!(!(a != a)); ++ } ++ } ++ } ++ } ++ } ++ ++ impl crate::cmp::Eq for $id {} ++ impl crate::cmp::Eq for LexicographicallyOrdered<$id> {} ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _cmp_eq>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn eq() { ++ fn foo(_: E) {} ++ let a = $id::::null(); ++ foo(a); ++ } ++ } ++ } ++ } ++ ++ impl From<[$elem_ty; $elem_count]> for $id { ++ #[inline] ++ fn from(array: [$elem_ty; $elem_count]) -> Self { ++ unsafe { ++ // FIXME: unnecessary zeroing; better than UB. ++ let mut u: Self = crate::mem::zeroed(); ++ crate::ptr::copy_nonoverlapping( ++ &array as *const [$elem_ty; $elem_count] as *const u8, ++ &mut u as *mut Self as *mut u8, ++ crate::mem::size_of::() ++ ); ++ u ++ } ++ } ++ } ++ impl Into<[$elem_ty; $elem_count]> for $id { ++ #[inline] ++ fn into(self) -> [$elem_ty; $elem_count] { ++ unsafe { ++ // FIXME: unnecessary zeroing; better than UB. ++ let mut u: [$elem_ty; $elem_count] = crate::mem::zeroed(); ++ crate::ptr::copy_nonoverlapping( ++ &self as *const $id as *const u8, ++ &mut u as *mut [$elem_ty; $elem_count] as *mut u8, ++ crate::mem::size_of::() ++ ); ++ u ++ } ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _from>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn array() { ++ let values = [1_i32; $elem_count]; ++ ++ let mut vec: $id = Default::default(); ++ let mut array = [ ++ $id::::null().extract(0); $elem_count ++ ]; ++ ++ for i in 0..$elem_count { ++ let ptr = unsafe { ++ crate::mem::transmute( ++ &values[i] as *const i32 ++ ) ++ }; ++ vec = vec.replace(i, ptr); ++ array[i] = ptr; ++ } ++ ++ // FIXME: there is no impl of From<$id> for [$elem_ty; N] ++ // let a0 = From::from(vec); ++ // assert_eq!(a0, array); ++ #[allow(unused_assignments)] ++ let mut a1 = array; ++ a1 = vec.into(); ++ assert_eq!(a1, array); ++ ++ let v0: $id = From::from(array); ++ assert_eq!(v0, vec); ++ let v1: $id = array.into(); ++ assert_eq!(v1, vec); ++ } ++ } ++ } ++ } ++ ++ impl $id { ++ /// Instantiates a new vector with the values of the `slice`. ++ /// ++ /// # Panics ++ /// ++ /// If `slice.len() < Self::lanes()` or `&slice[0]` is not aligned ++ /// to an `align_of::()` boundary. ++ #[inline] ++ pub fn from_slice_aligned(slice: &[$elem_ty]) -> Self { ++ unsafe { ++ assert!(slice.len() >= $elem_count); ++ let target_ptr = slice.get_unchecked(0) as *const $elem_ty; ++ assert!( ++ target_ptr.align_offset(crate::mem::align_of::()) ++ == 0 ++ ); ++ Self::from_slice_aligned_unchecked(slice) ++ } ++ } ++ ++ /// Instantiates a new vector with the values of the `slice`. ++ /// ++ /// # Panics ++ /// ++ /// If `slice.len() < Self::lanes()`. ++ #[inline] ++ pub fn from_slice_unaligned(slice: &[$elem_ty]) -> Self { ++ unsafe { ++ assert!(slice.len() >= $elem_count); ++ Self::from_slice_unaligned_unchecked(slice) ++ } ++ } ++ ++ /// Instantiates a new vector with the values of the `slice`. ++ /// ++ /// # Precondition ++ /// ++ /// If `slice.len() < Self::lanes()` or `&slice[0]` is not aligned ++ /// to an `align_of::()` boundary, the behavior is undefined. ++ #[inline] ++ pub unsafe fn from_slice_aligned_unchecked(slice: &[$elem_ty]) ++ -> Self { ++ #[allow(clippy::cast_ptr_alignment)] ++ *(slice.get_unchecked(0) as *const $elem_ty as *const Self) ++ } ++ ++ /// Instantiates a new vector with the values of the `slice`. ++ /// ++ /// # Precondition ++ /// ++ /// If `slice.len() < Self::lanes()` the behavior is undefined. ++ #[inline] ++ pub unsafe fn from_slice_unaligned_unchecked( ++ slice: &[$elem_ty], ++ ) -> Self { ++ use crate::mem::size_of; ++ let target_ptr = ++ slice.get_unchecked(0) as *const $elem_ty as *const u8; ++ let mut x = Self::splat(crate::ptr::null_mut() as $elem_ty); ++ let self_ptr = &mut x as *mut Self as *mut u8; ++ crate::ptr::copy_nonoverlapping( ++ target_ptr, ++ self_ptr, ++ size_of::(), ++ ); ++ x ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _slice_from_slice>] { ++ use super::*; ++ use crate::iter::Iterator; ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn from_slice_unaligned() { ++ let (null, non_null) = ptr_vals!($id); ++ ++ let mut unaligned = [ ++ non_null; $id::::lanes() + 1 ++ ]; ++ unaligned[0] = null; ++ let vec = $id::::from_slice_unaligned( ++ &unaligned[1..] ++ ); ++ for (index, &b) in unaligned.iter().enumerate() { ++ if index == 0 { ++ assert_eq!(b, null); ++ } else { ++ assert_eq!(b, non_null); ++ assert_eq!(b, vec.extract(index - 1)); ++ } ++ } ++ } ++ ++ // FIXME: wasm-bindgen-test does not support #[should_panic] ++ // #[cfg_attr(not(target_arch = "wasm32"), test)] ++ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[cfg(not(target_arch = "wasm32"))] ++ #[test] ++ #[should_panic] ++ fn from_slice_unaligned_fail() { ++ let (_null, non_null) = ptr_vals!($id); ++ let unaligned = [non_null; $id::::lanes() + 1]; ++ // the slice is not large enough => panic ++ let _vec = $id::::from_slice_unaligned( ++ &unaligned[2..] ++ ); ++ } ++ ++ union A { ++ data: [<$id as sealed::Simd>::Element; ++ 2 * $id::::lanes()], ++ _vec: $id, ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn from_slice_aligned() { ++ let (null, non_null) = ptr_vals!($id); ++ let mut aligned = A { ++ data: [null; 2 * $id::::lanes()], ++ }; ++ for i in ++ $id::::lanes()..(2 * $id::::lanes()) { ++ unsafe { ++ aligned.data[i] = non_null; ++ } ++ } ++ ++ let vec = unsafe { ++ $id::::from_slice_aligned( ++ &aligned.data[$id::::lanes()..] ++ ) ++ }; ++ for (index, &b) in unsafe { ++ aligned.data.iter().enumerate() ++ } { ++ if index < $id::::lanes() { ++ assert_eq!(b, null); ++ } else { ++ assert_eq!(b, non_null); ++ assert_eq!( ++ b, vec.extract(index - $id::::lanes()) ++ ); ++ } ++ } ++ } ++ ++ // FIXME: wasm-bindgen-test does not support #[should_panic] ++ // #[cfg_attr(not(target_arch = "wasm32"), test)] ++ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[cfg(not(target_arch = "wasm32"))] ++ #[test] ++ #[should_panic] ++ fn from_slice_aligned_fail_lanes() { ++ let (_null, non_null) = ptr_vals!($id); ++ let aligned = A { ++ data: [non_null; 2 * $id::::lanes()], ++ }; ++ // the slice is not large enough => panic ++ let _vec = unsafe { ++ $id::::from_slice_aligned( ++ &aligned.data[2 * $id::::lanes()..] ++ ) ++ }; ++ } ++ ++ // FIXME: wasm-bindgen-test does not support #[should_panic] ++ // #[cfg_attr(not(target_arch = "wasm32"), test)] ++ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[cfg(not(target_arch = "wasm32"))] ++ #[test] ++ #[should_panic] ++ fn from_slice_aligned_fail_align() { ++ unsafe { ++ let (null, _non_null) = ptr_vals!($id); ++ let aligned = A { ++ data: [null; 2 * $id::::lanes()], ++ }; ++ ++ // get a pointer to the front of data ++ let ptr = aligned.data.as_ptr(); ++ // offset pointer by one element ++ let ptr = ptr.wrapping_add(1); ++ ++ if ptr.align_offset( ++ crate::mem::align_of::<$id>() ++ ) == 0 { ++ // the pointer is properly aligned, so ++ // from_slice_aligned won't fail here (e.g. this ++ // can happen for i128x1). So we panic to make ++ // the "should_fail" test pass: ++ panic!("ok"); ++ } ++ ++ // create a slice - this is safe, because the ++ // elements of the slice exist, are properly ++ // initialized, and properly aligned: ++ let s = slice::from_raw_parts( ++ ptr, $id::::lanes() ++ ); ++ // this should always panic because the slice ++ // alignment does not match the alignment ++ // requirements for the vector type: ++ let _vec = $id::::from_slice_aligned(s); ++ } ++ } ++ } ++ } ++ } ++ ++ impl $id { ++ /// Writes the values of the vector to the `slice`. ++ /// ++ /// # Panics ++ /// ++ /// If `slice.len() < Self::lanes()` or `&slice[0]` is not ++ /// aligned to an `align_of::()` boundary. ++ #[inline] ++ pub fn write_to_slice_aligned(self, slice: &mut [$elem_ty]) { ++ unsafe { ++ assert!(slice.len() >= $elem_count); ++ let target_ptr = ++ slice.get_unchecked_mut(0) as *mut $elem_ty; ++ assert!( ++ target_ptr.align_offset(crate::mem::align_of::()) ++ == 0 ++ ); ++ self.write_to_slice_aligned_unchecked(slice); ++ } ++ } ++ ++ /// Writes the values of the vector to the `slice`. ++ /// ++ /// # Panics ++ /// ++ /// If `slice.len() < Self::lanes()`. ++ #[inline] ++ pub fn write_to_slice_unaligned(self, slice: &mut [$elem_ty]) { ++ unsafe { ++ assert!(slice.len() >= $elem_count); ++ self.write_to_slice_unaligned_unchecked(slice); ++ } ++ } ++ ++ /// Writes the values of the vector to the `slice`. ++ /// ++ /// # Precondition ++ /// ++ /// If `slice.len() < Self::lanes()` or `&slice[0]` is not ++ /// aligned to an `align_of::()` boundary, the behavior is ++ /// undefined. ++ #[inline] ++ pub unsafe fn write_to_slice_aligned_unchecked( ++ self, slice: &mut [$elem_ty], ++ ) { ++ #[allow(clippy::cast_ptr_alignment)] ++ *(slice.get_unchecked_mut(0) as *mut $elem_ty as *mut Self) = ++ self; ++ } ++ ++ /// Writes the values of the vector to the `slice`. ++ /// ++ /// # Precondition ++ /// ++ /// If `slice.len() < Self::lanes()` the behavior is undefined. ++ #[inline] ++ pub unsafe fn write_to_slice_unaligned_unchecked( ++ self, slice: &mut [$elem_ty], ++ ) { ++ let target_ptr = ++ slice.get_unchecked_mut(0) as *mut $elem_ty as *mut u8; ++ let self_ptr = &self as *const Self as *const u8; ++ crate::ptr::copy_nonoverlapping( ++ self_ptr, ++ target_ptr, ++ crate::mem::size_of::(), ++ ); ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _slice_write_to_slice>] { ++ use super::*; ++ use crate::iter::Iterator; ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn write_to_slice_unaligned() { ++ let (null, non_null) = ptr_vals!($id); ++ let mut unaligned = [null; $id::::lanes() + 1]; ++ let vec = $id::::splat(non_null); ++ vec.write_to_slice_unaligned(&mut unaligned[1..]); ++ for (index, &b) in unaligned.iter().enumerate() { ++ if index == 0 { ++ assert_eq!(b, null); ++ } else { ++ assert_eq!(b, non_null); ++ assert_eq!(b, vec.extract(index - 1)); ++ } ++ } ++ } ++ ++ // FIXME: wasm-bindgen-test does not support #[should_panic] ++ // #[cfg_attr(not(target_arch = "wasm32"), test)] ++ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[cfg(not(target_arch = "wasm32"))] ++ #[test] ++ #[should_panic] ++ fn write_to_slice_unaligned_fail() { ++ let (null, non_null) = ptr_vals!($id); ++ let mut unaligned = [null; $id::::lanes() + 1]; ++ let vec = $id::::splat(non_null); ++ // the slice is not large enough => panic ++ vec.write_to_slice_unaligned(&mut unaligned[2..]); ++ } ++ ++ union A { ++ data: [<$id as sealed::Simd>::Element; ++ 2 * $id::::lanes()], ++ _vec: $id, ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn write_to_slice_aligned() { ++ let (null, non_null) = ptr_vals!($id); ++ let mut aligned = A { ++ data: [null; 2 * $id::::lanes()], ++ }; ++ let vec = $id::::splat(non_null); ++ unsafe { ++ vec.write_to_slice_aligned( ++ &mut aligned.data[$id::::lanes()..] ++ ) ++ }; ++ for (index, &b) in ++ unsafe { aligned.data.iter().enumerate() } { ++ if index < $id::::lanes() { ++ assert_eq!(b, null); ++ } else { ++ assert_eq!(b, non_null); ++ assert_eq!( ++ b, vec.extract(index - $id::::lanes()) ++ ); ++ } ++ } ++ } ++ ++ // FIXME: wasm-bindgen-test does not support #[should_panic] ++ // #[cfg_attr(not(target_arch = "wasm32"), test)] ++ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[cfg(not(target_arch = "wasm32"))] ++ #[test] ++ #[should_panic] ++ fn write_to_slice_aligned_fail_lanes() { ++ let (null, non_null) = ptr_vals!($id); ++ let mut aligned = A { ++ data: [null; 2 * $id::::lanes()], ++ }; ++ let vec = $id::::splat(non_null); ++ // the slice is not large enough => panic ++ unsafe { ++ vec.write_to_slice_aligned( ++ &mut aligned.data[2 * $id::::lanes()..] ++ ) ++ }; ++ } ++ ++ // FIXME: wasm-bindgen-test does not support #[should_panic] ++ // #[cfg_attr(not(target_arch = "wasm32"), test)] ++ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[cfg(not(target_arch = "wasm32"))] ++ #[test] ++ #[should_panic] ++ fn write_to_slice_aligned_fail_align() { ++ let (null, non_null) = ptr_vals!($id); ++ unsafe { ++ let mut aligned = A { ++ data: [null; 2 * $id::::lanes()], ++ }; ++ ++ // get a pointer to the front of data ++ let ptr = aligned.data.as_mut_ptr(); ++ // offset pointer by one element ++ let ptr = ptr.wrapping_add(1); ++ ++ if ptr.align_offset( ++ crate::mem::align_of::<$id>() ++ ) == 0 { ++ // the pointer is properly aligned, so ++ // write_to_slice_aligned won't fail here (e.g. ++ // this can happen for i128x1). So we panic to ++ // make the "should_fail" test pass: ++ panic!("ok"); ++ } ++ ++ // create a slice - this is safe, because the ++ // elements of the slice exist, are properly ++ // initialized, and properly aligned: ++ let s = slice::from_raw_parts_mut( ++ ptr, $id::::lanes() ++ ); ++ // this should always panic because the slice ++ // alignment does not match the alignment ++ // requirements for the vector type: ++ let vec = $id::::splat(non_null); ++ vec.write_to_slice_aligned(s); ++ } ++ } ++ } ++ } ++ } ++ ++ impl crate::hash::Hash for $id { ++ #[inline] ++ fn hash(&self, state: &mut H) { ++ let s: $usize_ty = unsafe { crate::mem::transmute(*self) }; ++ s.hash(state) ++ } ++ } ++ ++ test_if! { ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _hash>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn hash() { ++ use crate::hash::{Hash, Hasher}; ++ #[allow(deprecated)] ++ use crate::hash::{SipHasher13}; ++ ++ let values = [1_i32; $elem_count]; ++ ++ let mut vec: $id = Default::default(); ++ let mut array = [ ++ $id::::null().extract(0); ++ $elem_count ++ ]; ++ ++ for i in 0..$elem_count { ++ let ptr = unsafe { ++ crate::mem::transmute( ++ &values[i] as *const i32 ++ ) ++ }; ++ vec = vec.replace(i, ptr); ++ array[i] = ptr; ++ } ++ ++ #[allow(deprecated)] ++ let mut a_hash = SipHasher13::new(); ++ let mut v_hash = a_hash.clone(); ++ array.hash(&mut a_hash); ++ vec.hash(&mut v_hash); ++ assert_eq!(a_hash.finish(), v_hash.finish()); ++ } ++ } ++ } ++ } ++ ++ impl $id { ++ /// Calculates the offset from a pointer. ++ /// ++ /// `count` is in units of `T`; e.g. a count of `3` represents a ++ /// pointer offset of `3 * size_of::()` bytes. ++ /// ++ /// # Safety ++ /// ++ /// If any of the following conditions are violated, the result is ++ /// Undefined Behavior: ++ /// ++ /// * Both the starting and resulting pointer must be either in ++ /// bounds or one byte past the end of an allocated object. ++ /// ++ /// * The computed offset, in bytes, cannot overflow an `isize`. ++ /// ++ /// * The offset being in bounds cannot rely on "wrapping around" ++ /// the address space. That is, the infinite-precision sum, in bytes ++ /// must fit in a `usize`. ++ /// ++ /// The compiler and standard library generally tries to ensure ++ /// allocations never reach a size where an offset is a concern. For ++ /// instance, `Vec` and `Box` ensure they never allocate more than ++ /// `isize::MAX` bytes, so `vec.as_ptr().offset(vec.len() as isize)` ++ /// is always safe. ++ /// ++ /// Most platforms fundamentally can't even construct such an ++ /// allocation. For instance, no known 64-bit platform can ever ++ /// serve a request for 263 bytes due to page-table limitations or ++ /// splitting the address space. However, some 32-bit and 16-bit ++ /// platforms may successfully serve a request for more than ++ /// `isize::MAX` bytes with things like Physical Address Extension. ++ /// As such, memory acquired directly from allocators or memory ++ /// mapped files may be too large to handle with this function. ++ /// ++ /// Consider using `wrapping_offset` instead if these constraints ++ /// are difficult to satisfy. The only advantage of this method is ++ /// that it enables more aggressive compiler optimizations. ++ #[inline] ++ pub unsafe fn offset(self, count: $isize_ty) -> Self { ++ // FIXME: should use LLVM's `add nsw nuw` ++ self.wrapping_offset(count) ++ } ++ ++ /// Calculates the offset from a pointer using wrapping arithmetic. ++ /// ++ /// `count` is in units of `T`; e.g. a count of `3` represents a ++ /// pointer offset of `3 * size_of::()` bytes. ++ /// ++ /// # Safety ++ /// ++ /// The resulting pointer does not need to be in bounds, but it is ++ /// potentially hazardous to dereference (which requires unsafe). ++ /// ++ /// Always use `.offset(count)` instead when possible, because ++ /// offset allows the compiler to optimize better. ++ #[inline] ++ pub fn wrapping_offset(self, count: $isize_ty) -> Self { ++ unsafe { ++ let x: $isize_ty = crate::mem::transmute(self); ++ // note: {+,*} currently performs a `wrapping_{add, mul}` ++ crate::mem::transmute( ++ x + (count * crate::mem::size_of::() as isize) ++ ) ++ } ++ } ++ ++ /// Calculates the distance between two pointers. ++ /// ++ /// The returned value is in units of `T`: the distance in bytes is ++ /// divided by `mem::size_of::()`. ++ /// ++ /// This function is the inverse of offset. ++ /// ++ /// # Safety ++ /// ++ /// If any of the following conditions are violated, the result is ++ /// Undefined Behavior: ++ /// ++ /// * Both the starting and other pointer must be either in bounds ++ /// or one byte past the end of the same allocated object. ++ /// ++ /// * The distance between the pointers, in bytes, cannot overflow ++ /// an `isize`. ++ /// ++ /// * The distance between the pointers, in bytes, must be an exact ++ /// multiple of the size of `T`. ++ /// ++ /// * The distance being in bounds cannot rely on "wrapping around" ++ /// the address space. ++ /// ++ /// The compiler and standard library generally try to ensure ++ /// allocations never reach a size where an offset is a concern. For ++ /// instance, `Vec` and `Box` ensure they never allocate more than ++ /// `isize::MAX` bytes, so `ptr_into_vec.offset_from(vec.as_ptr())` ++ /// is always safe. ++ /// ++ /// Most platforms fundamentally can't even construct such an ++ /// allocation. For instance, no known 64-bit platform can ever ++ /// serve a request for 263 bytes due to page-table limitations or ++ /// splitting the address space. However, some 32-bit and 16-bit ++ /// platforms may successfully serve a request for more than ++ /// `isize::MAX` bytes with things like Physical Address Extension. ++ /// As such, memory acquired directly from allocators or memory ++ /// mapped files may be too large to handle with this function. ++ /// ++ /// Consider using wrapping_offset_from instead if these constraints ++ /// are difficult to satisfy. The only advantage of this method is ++ /// that it enables more aggressive compiler optimizations. ++ #[inline] ++ pub unsafe fn offset_from(self, origin: Self) -> $isize_ty { ++ // FIXME: should use LLVM's `sub nsw nuw`. ++ self.wrapping_offset_from(origin) ++ } ++ ++ /// Calculates the distance between two pointers. ++ /// ++ /// The returned value is in units of `T`: the distance in bytes is ++ /// divided by `mem::size_of::()`. ++ /// ++ /// If the address different between the two pointers is not a ++ /// multiple of `mem::size_of::()` then the result of the ++ /// division is rounded towards zero. ++ /// ++ /// Though this method is safe for any two pointers, note that its ++ /// result will be mostly useless if the two pointers aren't into ++ /// the same allocated object, for example if they point to two ++ /// different local variables. ++ #[inline] ++ pub fn wrapping_offset_from(self, origin: Self) -> $isize_ty { ++ let x: $isize_ty = unsafe { crate::mem::transmute(self) }; ++ let y: $isize_ty = unsafe { crate::mem::transmute(origin) }; ++ // note: {-,/} currently perform wrapping_{sub, div} ++ (y - x) / (crate::mem::size_of::() as isize) ++ } ++ ++ /// Calculates the offset from a pointer (convenience for ++ /// `.offset(count as isize)`). ++ /// ++ /// `count` is in units of `T`; e.g. a count of 3 represents a ++ /// pointer offset of `3 * size_of::()` bytes. ++ /// ++ /// # Safety ++ /// ++ /// If any of the following conditions are violated, the result is ++ /// Undefined Behavior: ++ /// ++ /// * Both the starting and resulting pointer must be either in ++ /// bounds or one byte past the end of an allocated object. ++ /// ++ /// * The computed offset, in bytes, cannot overflow an `isize`. ++ /// ++ /// * The offset being in bounds cannot rely on "wrapping around" ++ /// the address space. That is, the infinite-precision sum must fit ++ /// in a `usize`. ++ /// ++ /// The compiler and standard library generally tries to ensure ++ /// allocations never reach a size where an offset is a concern. For ++ /// instance, `Vec` and `Box` ensure they never allocate more than ++ /// `isize::MAX` bytes, so `vec.as_ptr().add(vec.len())` is always ++ /// safe. ++ /// ++ /// Most platforms fundamentally can't even construct such an ++ /// allocation. For instance, no known 64-bit platform can ever ++ /// serve a request for 263 bytes due to page-table limitations or ++ /// splitting the address space. However, some 32-bit and 16-bit ++ /// platforms may successfully serve a request for more than ++ /// `isize::MAX` bytes with things like Physical Address Extension. ++ /// As such, memory acquired directly from allocators or memory ++ /// mapped files may be too large to handle with this function. ++ /// ++ /// Consider using `wrapping_offset` instead if these constraints ++ /// are difficult to satisfy. The only advantage of this method is ++ /// that it enables more aggressive compiler optimizations. ++ #[inline] ++ #[allow(clippy::should_implement_trait)] ++ pub unsafe fn add(self, count: $usize_ty) -> Self { ++ self.offset(count.cast()) ++ } ++ ++ /// Calculates the offset from a pointer (convenience for ++ /// `.offset((count as isize).wrapping_neg())`). ++ /// ++ /// `count` is in units of T; e.g. a `count` of 3 represents a ++ /// pointer offset of `3 * size_of::()` bytes. ++ /// ++ /// # Safety ++ /// ++ /// If any of the following conditions are violated, the result is ++ /// Undefined Behavior: ++ /// ++ /// * Both the starting and resulting pointer must be either in ++ /// bounds or one byte past the end of an allocated object. ++ /// ++ /// * The computed offset cannot exceed `isize::MAX` **bytes**. ++ /// ++ /// * The offset being in bounds cannot rely on "wrapping around" ++ /// the address space. That is, the infinite-precision sum must fit ++ /// in a usize. ++ /// ++ /// The compiler and standard library generally tries to ensure ++ /// allocations never reach a size where an offset is a concern. For ++ /// instance, `Vec` and `Box` ensure they never allocate more than ++ /// `isize::MAX` bytes, so ++ /// `vec.as_ptr().add(vec.len()).sub(vec.len())` is always safe. ++ /// ++ /// Most platforms fundamentally can't even construct such an ++ /// allocation. For instance, no known 64-bit platform can ever ++ /// serve a request for 263 bytes due to page-table ++ /// limitations or splitting the address space. However, some 32-bit ++ /// and 16-bit platforms may successfully serve a request for more ++ /// than `isize::MAX` bytes with things like Physical Address ++ /// Extension. As such, memory acquired directly from allocators or ++ /// memory mapped files *may* be too large to handle with this ++ /// function. ++ /// ++ /// Consider using `wrapping_offset` instead if these constraints ++ /// are difficult to satisfy. The only advantage of this method is ++ /// that it enables more aggressive compiler optimizations. ++ #[inline] ++ #[allow(clippy::should_implement_trait)] ++ pub unsafe fn sub(self, count: $usize_ty) -> Self { ++ let x: $isize_ty = count.cast(); ++ // note: - is currently wrapping_neg ++ self.offset(-x) ++ } ++ ++ /// Calculates the offset from a pointer using wrapping arithmetic. ++ /// (convenience for `.wrapping_offset(count as isize)`) ++ /// ++ /// `count` is in units of T; e.g. a `count` of 3 represents a ++ /// pointer offset of `3 * size_of::()` bytes. ++ /// ++ /// # Safety ++ /// ++ /// The resulting pointer does not need to be in bounds, but it is ++ /// potentially hazardous to dereference (which requires `unsafe`). ++ /// ++ /// Always use `.add(count)` instead when possible, because `add` ++ /// allows the compiler to optimize better. ++ #[inline] ++ pub fn wrapping_add(self, count: $usize_ty) -> Self { ++ self.wrapping_offset(count.cast()) ++ } ++ ++ /// Calculates the offset from a pointer using wrapping arithmetic. ++ /// (convenience for `.wrapping_offset((count as ++ /// isize).wrapping_sub())`) ++ /// ++ /// `count` is in units of T; e.g. a `count` of 3 represents a ++ /// pointer offset of `3 * size_of::()` bytes. ++ /// ++ /// # Safety ++ /// ++ /// The resulting pointer does not need to be in bounds, but it is ++ /// potentially hazardous to dereference (which requires `unsafe`). ++ /// ++ /// Always use `.sub(count)` instead when possible, because `sub` ++ /// allows the compiler to optimize better. ++ #[inline] ++ pub fn wrapping_sub(self, count: $usize_ty) -> Self { ++ let x: $isize_ty = count.cast(); ++ self.wrapping_offset(-1 * x) ++ } ++ } ++ ++ impl $id { ++ /// Shuffle vector elements according to `indices`. ++ #[inline] ++ pub fn shuffle1_dyn(self, indices: I) -> Self ++ where ++ Self: codegen::shuffle1_dyn::Shuffle1Dyn, ++ { ++ codegen::shuffle1_dyn::Shuffle1Dyn::shuffle1_dyn(self, indices) ++ } ++ } ++ ++ test_if! { ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _shuffle1_dyn>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn shuffle1_dyn() { ++ let (null, non_null) = ptr_vals!($id); ++ ++ // alternating = [non_null, null, non_null, null, ...] ++ let mut alternating = $id::::splat(null); ++ for i in 0..$id::::lanes() { ++ if i % 2 == 0 { ++ alternating = alternating.replace(i, non_null); ++ } ++ } ++ ++ type Indices = <$id ++ as codegen::shuffle1_dyn::Shuffle1Dyn>::Indices; ++ // even = [0, 0, 2, 2, 4, 4, ..] ++ let even = { ++ let mut v = Indices::splat(0); ++ for i in 0..$id::::lanes() { ++ if i % 2 == 0 { ++ v = v.replace(i, (i as u8).into()); ++ } else { ++ v = v.replace(i, (i as u8 - 1).into()); ++ } ++ } ++ v ++ }; ++ // odd = [1, 1, 3, 3, 5, 5, ...] ++ let odd = { ++ let mut v = Indices::splat(0); ++ for i in 0..$id::::lanes() { ++ if i % 2 != 0 { ++ v = v.replace(i, (i as u8).into()); ++ } else { ++ v = v.replace(i, (i as u8 + 1).into()); ++ } ++ } ++ v ++ }; ++ ++ assert_eq!( ++ alternating.shuffle1_dyn(even), ++ $id::::splat(non_null) ++ ); ++ if $id::::lanes() > 1 { ++ assert_eq!( ++ alternating.shuffle1_dyn(odd), ++ $id::::splat(null) ++ ); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/ops.rs b/third_party/rust/packed_simd/src/api/ops.rs +new file mode 100644 +index 000000000000..f71c98795da3 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/ops.rs +@@ -0,0 +1,32 @@ ++//! Implementation of the `ops` traits ++#[macro_use] ++mod vector_mask_bitwise; ++#[macro_use] ++mod scalar_mask_bitwise; ++ ++#[macro_use] ++mod vector_arithmetic; ++#[macro_use] ++mod scalar_arithmetic; ++ ++#[macro_use] ++mod vector_bitwise; ++#[macro_use] ++mod scalar_bitwise; ++ ++#[macro_use] ++mod vector_shifts; ++#[macro_use] ++mod scalar_shifts; ++ ++#[macro_use] ++mod vector_rotates; ++ ++#[macro_use] ++mod vector_neg; ++ ++#[macro_use] ++mod vector_int_min_max; ++ ++#[macro_use] ++mod vector_float_min_max; +diff --git a/third_party/rust/packed_simd/src/api/ops/scalar_arithmetic.rs b/third_party/rust/packed_simd/src/api/ops/scalar_arithmetic.rs +new file mode 100644 +index 000000000000..da1a2037eaaf +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/ops/scalar_arithmetic.rs +@@ -0,0 +1,203 @@ ++//! Vertical (lane-wise) vector-scalar / scalar-vector arithmetic operations. ++ ++macro_rules! impl_ops_scalar_arithmetic { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl crate::ops::Add<$elem_ty> for $id { ++ type Output = Self; ++ #[inline] ++ fn add(self, other: $elem_ty) -> Self { ++ self + $id::splat(other) ++ } ++ } ++ impl crate::ops::Add<$id> for $elem_ty { ++ type Output = $id; ++ #[inline] ++ fn add(self, other: $id) -> $id { ++ $id::splat(self) + other ++ } ++ } ++ ++ impl crate::ops::Sub<$elem_ty> for $id { ++ type Output = Self; ++ #[inline] ++ fn sub(self, other: $elem_ty) -> Self { ++ self - $id::splat(other) ++ } ++ } ++ impl crate::ops::Sub<$id> for $elem_ty { ++ type Output = $id; ++ #[inline] ++ fn sub(self, other: $id) -> $id { ++ $id::splat(self) - other ++ } ++ } ++ ++ impl crate::ops::Mul<$elem_ty> for $id { ++ type Output = Self; ++ #[inline] ++ fn mul(self, other: $elem_ty) -> Self { ++ self * $id::splat(other) ++ } ++ } ++ impl crate::ops::Mul<$id> for $elem_ty { ++ type Output = $id; ++ #[inline] ++ fn mul(self, other: $id) -> $id { ++ $id::splat(self) * other ++ } ++ } ++ ++ impl crate::ops::Div<$elem_ty> for $id { ++ type Output = Self; ++ #[inline] ++ fn div(self, other: $elem_ty) -> Self { ++ self / $id::splat(other) ++ } ++ } ++ impl crate::ops::Div<$id> for $elem_ty { ++ type Output = $id; ++ #[inline] ++ fn div(self, other: $id) -> $id { ++ $id::splat(self) / other ++ } ++ } ++ ++ impl crate::ops::Rem<$elem_ty> for $id { ++ type Output = Self; ++ #[inline] ++ fn rem(self, other: $elem_ty) -> Self { ++ self % $id::splat(other) ++ } ++ } ++ impl crate::ops::Rem<$id> for $elem_ty { ++ type Output = $id; ++ #[inline] ++ fn rem(self, other: $id) -> $id { ++ $id::splat(self) % other ++ } ++ } ++ ++ impl crate::ops::AddAssign<$elem_ty> for $id { ++ #[inline] ++ fn add_assign(&mut self, other: $elem_ty) { ++ *self = *self + other; ++ } ++ } ++ ++ impl crate::ops::SubAssign<$elem_ty> for $id { ++ #[inline] ++ fn sub_assign(&mut self, other: $elem_ty) { ++ *self = *self - other; ++ } ++ } ++ ++ impl crate::ops::MulAssign<$elem_ty> for $id { ++ #[inline] ++ fn mul_assign(&mut self, other: $elem_ty) { ++ *self = *self * other; ++ } ++ } ++ ++ impl crate::ops::DivAssign<$elem_ty> for $id { ++ #[inline] ++ fn div_assign(&mut self, other: $elem_ty) { ++ *self = *self / other; ++ } ++ } ++ ++ impl crate::ops::RemAssign<$elem_ty> for $id { ++ #[inline] ++ fn rem_assign(&mut self, other: $elem_ty) { ++ *self = *self % other; ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _ops_scalar_arith>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn ops_scalar_arithmetic() { ++ let zi = 0 as $elem_ty; ++ let oi = 1 as $elem_ty; ++ let ti = 2 as $elem_ty; ++ let fi = 4 as $elem_ty; ++ let z = $id::splat(zi); ++ let o = $id::splat(oi); ++ let t = $id::splat(ti); ++ let f = $id::splat(fi); ++ ++ // add ++ assert_eq!(zi + z, z); ++ assert_eq!(z + zi, z); ++ assert_eq!(oi + z, o); ++ assert_eq!(o + zi, o); ++ assert_eq!(ti + z, t); ++ assert_eq!(t + zi, t); ++ assert_eq!(ti + t, f); ++ assert_eq!(t + ti, f); ++ // sub ++ assert_eq!(zi - z, z); ++ assert_eq!(z - zi, z); ++ assert_eq!(oi - z, o); ++ assert_eq!(o - zi, o); ++ assert_eq!(ti - z, t); ++ assert_eq!(t - zi, t); ++ assert_eq!(fi - t, t); ++ assert_eq!(f - ti, t); ++ assert_eq!(f - o - o, t); ++ assert_eq!(f - oi - oi, t); ++ // mul ++ assert_eq!(zi * z, z); ++ assert_eq!(z * zi, z); ++ assert_eq!(zi * o, z); ++ assert_eq!(z * oi, z); ++ assert_eq!(zi * t, z); ++ assert_eq!(z * ti, z); ++ assert_eq!(oi * t, t); ++ assert_eq!(o * ti, t); ++ assert_eq!(ti * t, f); ++ assert_eq!(t * ti, f); ++ // div ++ assert_eq!(zi / o, z); ++ assert_eq!(z / oi, z); ++ assert_eq!(ti / o, t); ++ assert_eq!(t / oi, t); ++ assert_eq!(fi / o, f); ++ assert_eq!(f / oi, f); ++ assert_eq!(ti / t, o); ++ assert_eq!(t / ti, o); ++ assert_eq!(fi / t, t); ++ assert_eq!(f / ti, t); ++ // rem ++ assert_eq!(oi % o, z); ++ assert_eq!(o % oi, z); ++ assert_eq!(fi % t, z); ++ assert_eq!(f % ti, z); ++ ++ { ++ let mut v = z; ++ assert_eq!(v, z); ++ v += oi; // add_assign ++ assert_eq!(v, o); ++ v -= oi; // sub_assign ++ assert_eq!(v, z); ++ v = t; ++ v *= oi; // mul_assign ++ assert_eq!(v, t); ++ v *= ti; ++ assert_eq!(v, f); ++ v /= oi; // div_assign ++ assert_eq!(v, f); ++ v /= ti; ++ assert_eq!(v, t); ++ v %= ti; // rem_assign ++ assert_eq!(v, z); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/ops/scalar_bitwise.rs b/third_party/rust/packed_simd/src/api/ops/scalar_bitwise.rs +new file mode 100644 +index 000000000000..88216769aec4 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/ops/scalar_bitwise.rs +@@ -0,0 +1,162 @@ ++//! Vertical (lane-wise) vector-scalar / scalar-vector bitwise operations. ++ ++macro_rules! impl_ops_scalar_bitwise { ++ ( ++ [$elem_ty:ident; $elem_count:expr]: ++ $id:ident | $test_tt:tt | ++ ($true:expr, $false:expr) ++ ) => { ++ impl crate::ops::BitXor<$elem_ty> for $id { ++ type Output = Self; ++ #[inline] ++ fn bitxor(self, other: $elem_ty) -> Self { ++ self ^ $id::splat(other) ++ } ++ } ++ impl crate::ops::BitXor<$id> for $elem_ty { ++ type Output = $id; ++ #[inline] ++ fn bitxor(self, other: $id) -> $id { ++ $id::splat(self) ^ other ++ } ++ } ++ ++ impl crate::ops::BitAnd<$elem_ty> for $id { ++ type Output = Self; ++ #[inline] ++ fn bitand(self, other: $elem_ty) -> Self { ++ self & $id::splat(other) ++ } ++ } ++ impl crate::ops::BitAnd<$id> for $elem_ty { ++ type Output = $id; ++ #[inline] ++ fn bitand(self, other: $id) -> $id { ++ $id::splat(self) & other ++ } ++ } ++ ++ impl crate::ops::BitOr<$elem_ty> for $id { ++ type Output = Self; ++ #[inline] ++ fn bitor(self, other: $elem_ty) -> Self { ++ self | $id::splat(other) ++ } ++ } ++ impl crate::ops::BitOr<$id> for $elem_ty { ++ type Output = $id; ++ #[inline] ++ fn bitor(self, other: $id) -> $id { ++ $id::splat(self) | other ++ } ++ } ++ ++ impl crate::ops::BitAndAssign<$elem_ty> for $id { ++ #[inline] ++ fn bitand_assign(&mut self, other: $elem_ty) { ++ *self = *self & other; ++ } ++ } ++ impl crate::ops::BitOrAssign<$elem_ty> for $id { ++ #[inline] ++ fn bitor_assign(&mut self, other: $elem_ty) { ++ *self = *self | other; ++ } ++ } ++ impl crate::ops::BitXorAssign<$elem_ty> for $id { ++ #[inline] ++ fn bitxor_assign(&mut self, other: $elem_ty) { ++ *self = *self ^ other; ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _ops_scalar_bitwise>] { ++ use super::*; ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn ops_scalar_bitwise() { ++ let zi = 0 as $elem_ty; ++ let oi = 1 as $elem_ty; ++ let ti = 2 as $elem_ty; ++ let z = $id::splat(zi); ++ let o = $id::splat(oi); ++ let t = $id::splat(ti); ++ ++ // BitAnd: ++ assert_eq!(oi & o, o); ++ assert_eq!(o & oi, o); ++ assert_eq!(oi & z, z); ++ assert_eq!(o & zi, z); ++ assert_eq!(zi & o, z); ++ assert_eq!(z & oi, z); ++ assert_eq!(zi & z, z); ++ assert_eq!(z & zi, z); ++ ++ assert_eq!(ti & t, t); ++ assert_eq!(t & ti, t); ++ assert_eq!(ti & o, z); ++ assert_eq!(t & oi, z); ++ assert_eq!(oi & t, z); ++ assert_eq!(o & ti, z); ++ ++ // BitOr: ++ assert_eq!(oi | o, o); ++ assert_eq!(o | oi, o); ++ assert_eq!(oi | z, o); ++ assert_eq!(o | zi, o); ++ assert_eq!(zi | o, o); ++ assert_eq!(z | oi, o); ++ assert_eq!(zi | z, z); ++ assert_eq!(z | zi, z); ++ ++ assert_eq!(ti | t, t); ++ assert_eq!(t | ti, t); ++ assert_eq!(zi | t, t); ++ assert_eq!(z | ti, t); ++ assert_eq!(ti | z, t); ++ assert_eq!(t | zi, t); ++ ++ // BitXOR: ++ assert_eq!(oi ^ o, z); ++ assert_eq!(o ^ oi, z); ++ assert_eq!(zi ^ z, z); ++ assert_eq!(z ^ zi, z); ++ assert_eq!(zi ^ o, o); ++ assert_eq!(z ^ oi, o); ++ assert_eq!(oi ^ z, o); ++ assert_eq!(o ^ zi, o); ++ ++ assert_eq!(ti ^ t, z); ++ assert_eq!(t ^ ti, z); ++ assert_eq!(ti ^ z, t); ++ assert_eq!(t ^ zi, t); ++ assert_eq!(zi ^ t, t); ++ assert_eq!(z ^ ti, t); ++ ++ { ++ // AndAssign: ++ let mut v = o; ++ v &= ti; ++ assert_eq!(v, z); ++ } ++ { ++ // OrAssign: ++ let mut v = z; ++ v |= oi; ++ assert_eq!(v, o); ++ } ++ { ++ // XORAssign: ++ let mut v = z; ++ v ^= oi; ++ assert_eq!(v, o); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/ops/scalar_mask_bitwise.rs b/third_party/rust/packed_simd/src/api/ops/scalar_mask_bitwise.rs +new file mode 100644 +index 000000000000..523a85207b6b +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/ops/scalar_mask_bitwise.rs +@@ -0,0 +1,140 @@ ++//! Vertical (lane-wise) vector-vector bitwise operations. ++ ++macro_rules! impl_ops_scalar_mask_bitwise { ++ ( ++ [$elem_ty:ident; $elem_count:expr]: ++ $id:ident | $test_tt:tt | ++ ($true:expr, $false:expr) ++ ) => { ++ impl crate::ops::BitXor for $id { ++ type Output = Self; ++ #[inline] ++ fn bitxor(self, other: bool) -> Self { ++ self ^ $id::splat(other) ++ } ++ } ++ impl crate::ops::BitXor<$id> for bool { ++ type Output = $id; ++ #[inline] ++ fn bitxor(self, other: $id) -> $id { ++ $id::splat(self) ^ other ++ } ++ } ++ ++ impl crate::ops::BitAnd for $id { ++ type Output = Self; ++ #[inline] ++ fn bitand(self, other: bool) -> Self { ++ self & $id::splat(other) ++ } ++ } ++ impl crate::ops::BitAnd<$id> for bool { ++ type Output = $id; ++ #[inline] ++ fn bitand(self, other: $id) -> $id { ++ $id::splat(self) & other ++ } ++ } ++ ++ impl crate::ops::BitOr for $id { ++ type Output = Self; ++ #[inline] ++ fn bitor(self, other: bool) -> Self { ++ self | $id::splat(other) ++ } ++ } ++ impl crate::ops::BitOr<$id> for bool { ++ type Output = $id; ++ #[inline] ++ fn bitor(self, other: $id) -> $id { ++ $id::splat(self) | other ++ } ++ } ++ ++ impl crate::ops::BitAndAssign for $id { ++ #[inline] ++ fn bitand_assign(&mut self, other: bool) { ++ *self = *self & other; ++ } ++ } ++ impl crate::ops::BitOrAssign for $id { ++ #[inline] ++ fn bitor_assign(&mut self, other: bool) { ++ *self = *self | other; ++ } ++ } ++ impl crate::ops::BitXorAssign for $id { ++ #[inline] ++ fn bitxor_assign(&mut self, other: bool) { ++ *self = *self ^ other; ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _ops_scalar_mask_bitwise>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn ops_scalar_mask_bitwise() { ++ let ti = true; ++ let fi = false; ++ let t = $id::splat(ti); ++ let f = $id::splat(fi); ++ assert!(t != f); ++ assert!(!(t == f)); ++ ++ // BitAnd: ++ assert_eq!(ti & f, f); ++ assert_eq!(t & fi, f); ++ assert_eq!(fi & t, f); ++ assert_eq!(f & ti, f); ++ assert_eq!(ti & t, t); ++ assert_eq!(t & ti, t); ++ assert_eq!(fi & f, f); ++ assert_eq!(f & fi, f); ++ ++ // BitOr: ++ assert_eq!(ti | f, t); ++ assert_eq!(t | fi, t); ++ assert_eq!(fi | t, t); ++ assert_eq!(f | ti, t); ++ assert_eq!(ti | t, t); ++ assert_eq!(t | ti, t); ++ assert_eq!(fi | f, f); ++ assert_eq!(f | fi, f); ++ ++ // BitXOR: ++ assert_eq!(ti ^ f, t); ++ assert_eq!(t ^ fi, t); ++ assert_eq!(fi ^ t, t); ++ assert_eq!(f ^ ti, t); ++ assert_eq!(ti ^ t, f); ++ assert_eq!(t ^ ti, f); ++ assert_eq!(fi ^ f, f); ++ assert_eq!(f ^ fi, f); ++ ++ { ++ // AndAssign: ++ let mut v = f; ++ v &= ti; ++ assert_eq!(v, f); ++ } ++ { ++ // OrAssign: ++ let mut v = f; ++ v |= ti; ++ assert_eq!(v, t); ++ } ++ { ++ // XORAssign: ++ let mut v = f; ++ v ^= ti; ++ assert_eq!(v, t); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/ops/scalar_shifts.rs b/third_party/rust/packed_simd/src/api/ops/scalar_shifts.rs +new file mode 100644 +index 000000000000..9c164ad56c0b +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/ops/scalar_shifts.rs +@@ -0,0 +1,107 @@ ++//! Vertical (lane-wise) vector-scalar shifts operations. ++ ++macro_rules! impl_ops_scalar_shifts { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl crate::ops::Shl for $id { ++ type Output = Self; ++ #[inline] ++ fn shl(self, other: u32) -> Self { ++ self << $id::splat(other as $elem_ty) ++ } ++ } ++ impl crate::ops::Shr for $id { ++ type Output = Self; ++ #[inline] ++ fn shr(self, other: u32) -> Self { ++ self >> $id::splat(other as $elem_ty) ++ } ++ } ++ ++ impl crate::ops::ShlAssign for $id { ++ #[inline] ++ fn shl_assign(&mut self, other: u32) { ++ *self = *self << other; ++ } ++ } ++ impl crate::ops::ShrAssign for $id { ++ #[inline] ++ fn shr_assign(&mut self, other: u32) { ++ *self = *self >> other; ++ } ++ } ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _ops_scalar_shifts>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[cfg_attr(any(target_arch = "s390x", target_arch = "sparc64"), ++ allow(unreachable_code, ++ unused_variables, ++ unused_mut) ++ )] ++ // ^^^ FIXME: https://github.com/rust-lang/rust/issues/55344 ++ fn ops_scalar_shifts() { ++ let z = $id::splat(0 as $elem_ty); ++ let o = $id::splat(1 as $elem_ty); ++ let t = $id::splat(2 as $elem_ty); ++ let f = $id::splat(4 as $elem_ty); ++ ++ { ++ let zi = 0 as u32; ++ let oi = 1 as u32; ++ let ti = 2 as u32; ++ let maxi ++ = (mem::size_of::<$elem_ty>() * 8 - 1) as u32; ++ ++ // shr ++ assert_eq!(z >> zi, z); ++ assert_eq!(z >> oi, z); ++ assert_eq!(z >> ti, z); ++ assert_eq!(z >> ti, z); ++ ++ #[cfg(any(target_arch = "s390x", target_arch = "sparc64"))] { ++ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/13 ++ return; ++ } ++ ++ assert_eq!(o >> zi, o); ++ assert_eq!(t >> zi, t); ++ assert_eq!(f >> zi, f); ++ assert_eq!(f >> maxi, z); ++ ++ assert_eq!(o >> oi, z); ++ assert_eq!(t >> oi, o); ++ assert_eq!(t >> ti, z); ++ assert_eq!(f >> oi, t); ++ assert_eq!(f >> ti, o); ++ assert_eq!(f >> maxi, z); ++ ++ // shl ++ assert_eq!(z << zi, z); ++ assert_eq!(o << zi, o); ++ assert_eq!(t << zi, t); ++ assert_eq!(f << zi, f); ++ assert_eq!(f << maxi, z); ++ ++ assert_eq!(o << oi, t); ++ assert_eq!(o << ti, f); ++ assert_eq!(t << oi, f); ++ ++ { // shr_assign ++ let mut v = o; ++ v >>= oi; ++ assert_eq!(v, z); ++ } ++ { // shl_assign ++ let mut v = o; ++ v <<= oi; ++ assert_eq!(v, t); ++ } ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/ops/vector_arithmetic.rs b/third_party/rust/packed_simd/src/api/ops/vector_arithmetic.rs +new file mode 100644 +index 000000000000..7057f52d0317 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/ops/vector_arithmetic.rs +@@ -0,0 +1,148 @@ ++//! Vertical (lane-wise) vector-vector arithmetic operations. ++ ++macro_rules! impl_ops_vector_arithmetic { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl crate::ops::Add for $id { ++ type Output = Self; ++ #[inline] ++ fn add(self, other: Self) -> Self { ++ use crate::llvm::simd_add; ++ unsafe { Simd(simd_add(self.0, other.0)) } ++ } ++ } ++ ++ impl crate::ops::Sub for $id { ++ type Output = Self; ++ #[inline] ++ fn sub(self, other: Self) -> Self { ++ use crate::llvm::simd_sub; ++ unsafe { Simd(simd_sub(self.0, other.0)) } ++ } ++ } ++ ++ impl crate::ops::Mul for $id { ++ type Output = Self; ++ #[inline] ++ fn mul(self, other: Self) -> Self { ++ use crate::llvm::simd_mul; ++ unsafe { Simd(simd_mul(self.0, other.0)) } ++ } ++ } ++ ++ impl crate::ops::Div for $id { ++ type Output = Self; ++ #[inline] ++ fn div(self, other: Self) -> Self { ++ use crate::llvm::simd_div; ++ unsafe { Simd(simd_div(self.0, other.0)) } ++ } ++ } ++ ++ impl crate::ops::Rem for $id { ++ type Output = Self; ++ #[inline] ++ fn rem(self, other: Self) -> Self { ++ use crate::llvm::simd_rem; ++ unsafe { Simd(simd_rem(self.0, other.0)) } ++ } ++ } ++ ++ impl crate::ops::AddAssign for $id { ++ #[inline] ++ fn add_assign(&mut self, other: Self) { ++ *self = *self + other; ++ } ++ } ++ ++ impl crate::ops::SubAssign for $id { ++ #[inline] ++ fn sub_assign(&mut self, other: Self) { ++ *self = *self - other; ++ } ++ } ++ ++ impl crate::ops::MulAssign for $id { ++ #[inline] ++ fn mul_assign(&mut self, other: Self) { ++ *self = *self * other; ++ } ++ } ++ ++ impl crate::ops::DivAssign for $id { ++ #[inline] ++ fn div_assign(&mut self, other: Self) { ++ *self = *self / other; ++ } ++ } ++ ++ impl crate::ops::RemAssign for $id { ++ #[inline] ++ fn rem_assign(&mut self, other: Self) { ++ *self = *self % other; ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _ops_vector_arith>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn ops_vector_arithmetic() { ++ let z = $id::splat(0 as $elem_ty); ++ let o = $id::splat(1 as $elem_ty); ++ let t = $id::splat(2 as $elem_ty); ++ let f = $id::splat(4 as $elem_ty); ++ ++ // add ++ assert_eq!(z + z, z); ++ assert_eq!(o + z, o); ++ assert_eq!(t + z, t); ++ assert_eq!(t + t, f); ++ // sub ++ assert_eq!(z - z, z); ++ assert_eq!(o - z, o); ++ assert_eq!(t - z, t); ++ assert_eq!(f - t, t); ++ assert_eq!(f - o - o, t); ++ // mul ++ assert_eq!(z * z, z); ++ assert_eq!(z * o, z); ++ assert_eq!(z * t, z); ++ assert_eq!(o * t, t); ++ assert_eq!(t * t, f); ++ // div ++ assert_eq!(z / o, z); ++ assert_eq!(t / o, t); ++ assert_eq!(f / o, f); ++ assert_eq!(t / t, o); ++ assert_eq!(f / t, t); ++ // rem ++ assert_eq!(o % o, z); ++ assert_eq!(f % t, z); ++ ++ { ++ let mut v = z; ++ assert_eq!(v, z); ++ v += o; // add_assign ++ assert_eq!(v, o); ++ v -= o; // sub_assign ++ assert_eq!(v, z); ++ v = t; ++ v *= o; // mul_assign ++ assert_eq!(v, t); ++ v *= t; ++ assert_eq!(v, f); ++ v /= o; // div_assign ++ assert_eq!(v, f); ++ v /= t; ++ assert_eq!(v, t); ++ v %= t; // rem_assign ++ assert_eq!(v, z); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/ops/vector_bitwise.rs b/third_party/rust/packed_simd/src/api/ops/vector_bitwise.rs +new file mode 100644 +index 000000000000..7be9603fa261 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/ops/vector_bitwise.rs +@@ -0,0 +1,129 @@ ++//! Vertical (lane-wise) vector-vector bitwise operations. ++ ++macro_rules! impl_ops_vector_bitwise { ++ ( ++ [$elem_ty:ident; $elem_count:expr]: ++ $id:ident | $test_tt:tt | ++ ($true:expr, $false:expr) ++ ) => { ++ impl crate::ops::Not for $id { ++ type Output = Self; ++ #[inline] ++ fn not(self) -> Self { ++ Self::splat($true) ^ self ++ } ++ } ++ impl crate::ops::BitXor for $id { ++ type Output = Self; ++ #[inline] ++ fn bitxor(self, other: Self) -> Self { ++ use crate::llvm::simd_xor; ++ unsafe { Simd(simd_xor(self.0, other.0)) } ++ } ++ } ++ impl crate::ops::BitAnd for $id { ++ type Output = Self; ++ #[inline] ++ fn bitand(self, other: Self) -> Self { ++ use crate::llvm::simd_and; ++ unsafe { Simd(simd_and(self.0, other.0)) } ++ } ++ } ++ impl crate::ops::BitOr for $id { ++ type Output = Self; ++ #[inline] ++ fn bitor(self, other: Self) -> Self { ++ use crate::llvm::simd_or; ++ unsafe { Simd(simd_or(self.0, other.0)) } ++ } ++ } ++ impl crate::ops::BitAndAssign for $id { ++ #[inline] ++ fn bitand_assign(&mut self, other: Self) { ++ *self = *self & other; ++ } ++ } ++ impl crate::ops::BitOrAssign for $id { ++ #[inline] ++ fn bitor_assign(&mut self, other: Self) { ++ *self = *self | other; ++ } ++ } ++ impl crate::ops::BitXorAssign for $id { ++ #[inline] ++ fn bitxor_assign(&mut self, other: Self) { ++ *self = *self ^ other; ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _ops_vector_bitwise>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn ops_vector_bitwise() { ++ ++ let z = $id::splat(0 as $elem_ty); ++ let o = $id::splat(1 as $elem_ty); ++ let t = $id::splat(2 as $elem_ty); ++ let m = $id::splat(!z.extract(0)); ++ ++ // Not: ++ assert_eq!(!z, m); ++ assert_eq!(!m, z); ++ ++ // BitAnd: ++ assert_eq!(o & o, o); ++ assert_eq!(o & z, z); ++ assert_eq!(z & o, z); ++ assert_eq!(z & z, z); ++ ++ assert_eq!(t & t, t); ++ assert_eq!(t & o, z); ++ assert_eq!(o & t, z); ++ ++ // BitOr: ++ assert_eq!(o | o, o); ++ assert_eq!(o | z, o); ++ assert_eq!(z | o, o); ++ assert_eq!(z | z, z); ++ ++ assert_eq!(t | t, t); ++ assert_eq!(z | t, t); ++ assert_eq!(t | z, t); ++ ++ // BitXOR: ++ assert_eq!(o ^ o, z); ++ assert_eq!(z ^ z, z); ++ assert_eq!(z ^ o, o); ++ assert_eq!(o ^ z, o); ++ ++ assert_eq!(t ^ t, z); ++ assert_eq!(t ^ z, t); ++ assert_eq!(z ^ t, t); ++ ++ { ++ // AndAssign: ++ let mut v = o; ++ v &= t; ++ assert_eq!(v, z); ++ } ++ { ++ // OrAssign: ++ let mut v = z; ++ v |= o; ++ assert_eq!(v, o); ++ } ++ { ++ // XORAssign: ++ let mut v = z; ++ v ^= o; ++ assert_eq!(v, o); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/ops/vector_float_min_max.rs b/third_party/rust/packed_simd/src/api/ops/vector_float_min_max.rs +new file mode 100644 +index 000000000000..4126e87042f5 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/ops/vector_float_min_max.rs +@@ -0,0 +1,69 @@ ++//! Vertical (lane-wise) vector `min` and `max` for floating-point vectors. ++ ++macro_rules! impl_ops_vector_float_min_max { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Minimum of two vectors. ++ /// ++ /// Returns a new vector containing the minimum value of each of ++ /// the input vector lanes. ++ #[inline] ++ pub fn min(self, x: Self) -> Self { ++ use crate::llvm::simd_fmin; ++ unsafe { Simd(simd_fmin(self.0, x.0)) } ++ } ++ ++ /// Maximum of two vectors. ++ /// ++ /// Returns a new vector containing the maximum value of each of ++ /// the input vector lanes. ++ #[inline] ++ pub fn max(self, x: Self) -> Self { ++ use crate::llvm::simd_fmax; ++ unsafe { Simd(simd_fmax(self.0, x.0)) } ++ } ++ } ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _ops_vector_min_max>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn min_max() { ++ let n = crate::$elem_ty::NAN; ++ let o = $id::splat(1. as $elem_ty); ++ let t = $id::splat(2. as $elem_ty); ++ ++ let mut m = o; // [1., 2., 1., 2., ...] ++ let mut on = o; ++ for i in 0..$id::lanes() { ++ if i % 2 == 0 { ++ m = m.replace(i, 2. as $elem_ty); ++ on = on.replace(i, n); ++ } ++ } ++ ++ assert_eq!(o.min(t), o); ++ assert_eq!(t.min(o), o); ++ assert_eq!(m.min(o), o); ++ assert_eq!(o.min(m), o); ++ assert_eq!(m.min(t), m); ++ assert_eq!(t.min(m), m); ++ ++ assert_eq!(o.max(t), t); ++ assert_eq!(t.max(o), t); ++ assert_eq!(m.max(o), m); ++ assert_eq!(o.max(m), m); ++ assert_eq!(m.max(t), t); ++ assert_eq!(t.max(m), t); ++ ++ assert_eq!(on.min(o), o); ++ assert_eq!(o.min(on), o); ++ assert_eq!(on.max(o), o); ++ assert_eq!(o.max(on), o); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/ops/vector_int_min_max.rs b/third_party/rust/packed_simd/src/api/ops/vector_int_min_max.rs +new file mode 100644 +index 000000000000..36ea98e6bf32 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/ops/vector_int_min_max.rs +@@ -0,0 +1,57 @@ ++//! Vertical (lane-wise) vector `min` and `max` for integer vectors. ++ ++macro_rules! impl_ops_vector_int_min_max { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Minimum of two vectors. ++ /// ++ /// Returns a new vector containing the minimum value of each of ++ /// the input vector lanes. ++ #[inline] ++ pub fn min(self, x: Self) -> Self { ++ self.lt(x).select(self, x) ++ } ++ ++ /// Maximum of two vectors. ++ /// ++ /// Returns a new vector containing the maximum value of each of ++ /// the input vector lanes. ++ #[inline] ++ pub fn max(self, x: Self) -> Self { ++ self.gt(x).select(self, x) ++ } ++ } ++ test_if!{$test_tt: ++ paste::item! { ++ pub mod [<$id _ops_vector_min_max>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn min_max() { ++ let o = $id::splat(1 as $elem_ty); ++ let t = $id::splat(2 as $elem_ty); ++ ++ let mut m = o; ++ for i in 0..$id::lanes() { ++ if i % 2 == 0 { ++ m = m.replace(i, 2 as $elem_ty); ++ } ++ } ++ assert_eq!(o.min(t), o); ++ assert_eq!(t.min(o), o); ++ assert_eq!(m.min(o), o); ++ assert_eq!(o.min(m), o); ++ assert_eq!(m.min(t), m); ++ assert_eq!(t.min(m), m); ++ ++ assert_eq!(o.max(t), t); ++ assert_eq!(t.max(o), t); ++ assert_eq!(m.max(o), m); ++ assert_eq!(o.max(m), m); ++ assert_eq!(m.max(t), t); ++ assert_eq!(t.max(m), t); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/ops/vector_mask_bitwise.rs b/third_party/rust/packed_simd/src/api/ops/vector_mask_bitwise.rs +new file mode 100644 +index 000000000000..295fc1ca81c9 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/ops/vector_mask_bitwise.rs +@@ -0,0 +1,116 @@ ++//! Vertical (lane-wise) vector-vector bitwise operations. ++ ++macro_rules! impl_ops_vector_mask_bitwise { ++ ( ++ [$elem_ty:ident; $elem_count:expr]: ++ $id:ident | $test_tt:tt | ++ ($true:expr, $false:expr) ++ ) => { ++ impl crate::ops::Not for $id { ++ type Output = Self; ++ #[inline] ++ fn not(self) -> Self { ++ Self::splat($true) ^ self ++ } ++ } ++ impl crate::ops::BitXor for $id { ++ type Output = Self; ++ #[inline] ++ fn bitxor(self, other: Self) -> Self { ++ use crate::llvm::simd_xor; ++ unsafe { Simd(simd_xor(self.0, other.0)) } ++ } ++ } ++ impl crate::ops::BitAnd for $id { ++ type Output = Self; ++ #[inline] ++ fn bitand(self, other: Self) -> Self { ++ use crate::llvm::simd_and; ++ unsafe { Simd(simd_and(self.0, other.0)) } ++ } ++ } ++ impl crate::ops::BitOr for $id { ++ type Output = Self; ++ #[inline] ++ fn bitor(self, other: Self) -> Self { ++ use crate::llvm::simd_or; ++ unsafe { Simd(simd_or(self.0, other.0)) } ++ } ++ } ++ impl crate::ops::BitAndAssign for $id { ++ #[inline] ++ fn bitand_assign(&mut self, other: Self) { ++ *self = *self & other; ++ } ++ } ++ impl crate::ops::BitOrAssign for $id { ++ #[inline] ++ fn bitor_assign(&mut self, other: Self) { ++ *self = *self | other; ++ } ++ } ++ impl crate::ops::BitXorAssign for $id { ++ #[inline] ++ fn bitxor_assign(&mut self, other: Self) { ++ *self = *self ^ other; ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _ops_vector_mask_bitwise>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn ops_vector_mask_bitwise() { ++ let t = $id::splat(true); ++ let f = $id::splat(false); ++ assert!(t != f); ++ assert!(!(t == f)); ++ ++ // Not: ++ assert_eq!(!t, f); ++ assert_eq!(t, !f); ++ ++ // BitAnd: ++ assert_eq!(t & f, f); ++ assert_eq!(f & t, f); ++ assert_eq!(t & t, t); ++ assert_eq!(f & f, f); ++ ++ // BitOr: ++ assert_eq!(t | f, t); ++ assert_eq!(f | t, t); ++ assert_eq!(t | t, t); ++ assert_eq!(f | f, f); ++ ++ // BitXOR: ++ assert_eq!(t ^ f, t); ++ assert_eq!(f ^ t, t); ++ assert_eq!(t ^ t, f); ++ assert_eq!(f ^ f, f); ++ ++ { ++ // AndAssign: ++ let mut v = f; ++ v &= t; ++ assert_eq!(v, f); ++ } ++ { ++ // OrAssign: ++ let mut v = f; ++ v |= t; ++ assert_eq!(v, t); ++ } ++ { ++ // XORAssign: ++ let mut v = f; ++ v ^= t; ++ assert_eq!(v, t); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/ops/vector_neg.rs b/third_party/rust/packed_simd/src/api/ops/vector_neg.rs +new file mode 100644 +index 000000000000..e2d91fd2fed6 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/ops/vector_neg.rs +@@ -0,0 +1,43 @@ ++//! Vertical (lane-wise) vector `Neg`. ++ ++macro_rules! impl_ops_vector_neg { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl crate::ops::Neg for $id { ++ type Output = Self; ++ #[inline] ++ fn neg(self) -> Self { ++ Self::splat(-1 as $elem_ty) * self ++ } ++ } ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _ops_vector_neg>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn neg() { ++ let z = $id::splat(0 as $elem_ty); ++ let o = $id::splat(1 as $elem_ty); ++ let t = $id::splat(2 as $elem_ty); ++ let f = $id::splat(4 as $elem_ty); ++ ++ let nz = $id::splat(-(0 as $elem_ty)); ++ let no = $id::splat(-(1 as $elem_ty)); ++ let nt = $id::splat(-(2 as $elem_ty)); ++ let nf = $id::splat(-(4 as $elem_ty)); ++ ++ assert_eq!(-z, nz); ++ assert_eq!(-o, no); ++ assert_eq!(-t, nt); ++ assert_eq!(-f, nf); ++ ++ assert_eq!(z, -nz); ++ assert_eq!(o, -no); ++ assert_eq!(t, -nt); ++ assert_eq!(f, -nf); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/ops/vector_rotates.rs b/third_party/rust/packed_simd/src/api/ops/vector_rotates.rs +new file mode 100644 +index 000000000000..6c794ecf4b93 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/ops/vector_rotates.rs +@@ -0,0 +1,90 @@ ++//! Vertical (lane-wise) vector rotates operations. ++#![allow(unused)] ++ ++macro_rules! impl_ops_vector_rotates { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Shifts the bits of each lane to the left by the specified ++ /// amount in the corresponding lane of `n`, wrapping the ++ /// truncated bits to the end of the resulting integer. ++ /// ++ /// Note: this is neither the same operation as `<<` nor equivalent ++ /// to `slice::rotate_left`. ++ #[inline] ++ pub fn rotate_left(self, n: $id) -> $id { ++ const LANE_WIDTH: $elem_ty = ++ crate::mem::size_of::<$elem_ty>() as $elem_ty * 8; ++ // Protect against undefined behavior for over-long bit shifts ++ let n = n % LANE_WIDTH; ++ (self << n) | (self >> ((LANE_WIDTH - n) % LANE_WIDTH)) ++ } ++ ++ /// Shifts the bits of each lane to the right by the specified ++ /// amount in the corresponding lane of `n`, wrapping the ++ /// truncated bits to the beginning of the resulting integer. ++ /// ++ /// Note: this is neither the same operation as `<<` nor equivalent ++ /// to `slice::rotate_left`. ++ #[inline] ++ pub fn rotate_right(self, n: $id) -> $id { ++ const LANE_WIDTH: $elem_ty = ++ crate::mem::size_of::<$elem_ty>() as $elem_ty * 8; ++ // Protect against undefined behavior for over-long bit shifts ++ let n = n % LANE_WIDTH; ++ (self >> n) | (self << ((LANE_WIDTH - n) % LANE_WIDTH)) ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ // FIXME: ++ // https://github.com/rust-lang-nursery/packed_simd/issues/75 ++ #[cfg(not(any( ++ target_arch = "s390x", ++ target_arch = "sparc64", ++ )))] ++ pub mod [<$id _ops_vector_rotate>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn rotate_ops() { ++ let z = $id::splat(0 as $elem_ty); ++ let o = $id::splat(1 as $elem_ty); ++ let t = $id::splat(2 as $elem_ty); ++ let f = $id::splat(4 as $elem_ty); ++ ++ let max = $id::splat( ++ (mem::size_of::<$elem_ty>() * 8 - 1) as $elem_ty); ++ ++ // rotate_right ++ assert_eq!(z.rotate_right(z), z); ++ assert_eq!(z.rotate_right(o), z); ++ assert_eq!(z.rotate_right(t), z); ++ ++ assert_eq!(o.rotate_right(z), o); ++ assert_eq!(t.rotate_right(z), t); ++ assert_eq!(f.rotate_right(z), f); ++ assert_eq!(f.rotate_right(max), f << 1); ++ ++ assert_eq!(o.rotate_right(o), o << max); ++ assert_eq!(t.rotate_right(o), o); ++ assert_eq!(t.rotate_right(t), o << max); ++ assert_eq!(f.rotate_right(o), t); ++ assert_eq!(f.rotate_right(t), o); ++ ++ // rotate_left ++ assert_eq!(z.rotate_left(z), z); ++ assert_eq!(o.rotate_left(z), o); ++ assert_eq!(t.rotate_left(z), t); ++ assert_eq!(f.rotate_left(z), f); ++ assert_eq!(f.rotate_left(max), t); ++ ++ assert_eq!(o.rotate_left(o), t); ++ assert_eq!(o.rotate_left(t), f); ++ assert_eq!(t.rotate_left(o), f); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/ops/vector_shifts.rs b/third_party/rust/packed_simd/src/api/ops/vector_shifts.rs +new file mode 100644 +index 000000000000..22e1fbc0ec76 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/ops/vector_shifts.rs +@@ -0,0 +1,107 @@ ++//! Vertical (lane-wise) vector-vector shifts operations. ++ ++macro_rules! impl_ops_vector_shifts { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl crate::ops::Shl<$id> for $id { ++ type Output = Self; ++ #[inline] ++ fn shl(self, other: Self) -> Self { ++ use crate::llvm::simd_shl; ++ unsafe { Simd(simd_shl(self.0, other.0)) } ++ } ++ } ++ impl crate::ops::Shr<$id> for $id { ++ type Output = Self; ++ #[inline] ++ fn shr(self, other: Self) -> Self { ++ use crate::llvm::simd_shr; ++ unsafe { Simd(simd_shr(self.0, other.0)) } ++ } ++ } ++ impl crate::ops::ShlAssign<$id> for $id { ++ #[inline] ++ fn shl_assign(&mut self, other: Self) { ++ *self = *self << other; ++ } ++ } ++ impl crate::ops::ShrAssign<$id> for $id { ++ #[inline] ++ fn shr_assign(&mut self, other: Self) { ++ *self = *self >> other; ++ } ++ } ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _ops_vector_shifts>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[cfg_attr(any(target_arch = "s390x", target_arch = "sparc64"), ++ allow(unreachable_code, ++ unused_variables, ++ unused_mut) ++ )] ++ // ^^^ FIXME: https://github.com/rust-lang/rust/issues/55344 ++ fn ops_vector_shifts() { ++ let z = $id::splat(0 as $elem_ty); ++ let o = $id::splat(1 as $elem_ty); ++ let t = $id::splat(2 as $elem_ty); ++ let f = $id::splat(4 as $elem_ty); ++ ++ let max =$id::splat( ++ (mem::size_of::<$elem_ty>() * 8 - 1) as $elem_ty ++ ); ++ ++ // shr ++ assert_eq!(z >> z, z); ++ assert_eq!(z >> o, z); ++ assert_eq!(z >> t, z); ++ assert_eq!(z >> t, z); ++ ++ #[cfg(any(target_arch = "s390x", target_arch = "sparc64"))] { ++ // FIXME: rust produces bad codegen for shifts: ++ // https://github.com/rust-lang-nursery/packed_simd/issues/13 ++ return; ++ } ++ ++ assert_eq!(o >> z, o); ++ assert_eq!(t >> z, t); ++ assert_eq!(f >> z, f); ++ assert_eq!(f >> max, z); ++ ++ assert_eq!(o >> o, z); ++ assert_eq!(t >> o, o); ++ assert_eq!(t >> t, z); ++ assert_eq!(f >> o, t); ++ assert_eq!(f >> t, o); ++ assert_eq!(f >> max, z); ++ ++ // shl ++ assert_eq!(z << z, z); ++ assert_eq!(o << z, o); ++ assert_eq!(t << z, t); ++ assert_eq!(f << z, f); ++ assert_eq!(f << max, z); ++ ++ assert_eq!(o << o, t); ++ assert_eq!(o << t, f); ++ assert_eq!(t << o, f); ++ ++ { ++ // shr_assign ++ let mut v = o; ++ v >>= o; ++ assert_eq!(v, z); ++ } ++ { ++ // shl_assign ++ let mut v = o; ++ v <<= o; ++ assert_eq!(v, t); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/ptr.rs b/third_party/rust/packed_simd/src/api/ptr.rs +new file mode 100644 +index 000000000000..d2e523a49faf +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/ptr.rs +@@ -0,0 +1,4 @@ ++//! Vector of pointers ++ ++#[macro_use] ++mod gather_scatter; +diff --git a/third_party/rust/packed_simd/src/api/ptr/gather_scatter.rs b/third_party/rust/packed_simd/src/api/ptr/gather_scatter.rs +new file mode 100644 +index 000000000000..9d8e113bb44f +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/ptr/gather_scatter.rs +@@ -0,0 +1,241 @@ ++//! Implements masked gather and scatters for vectors of pointers ++ ++macro_rules! impl_ptr_read { ++ ([$elem_ty:ty; $elem_count:expr]: $id:ident, $mask_ty:ident ++ | $test_tt:tt) => { ++ impl $id ++ where ++ [T; $elem_count]: sealed::SimdArray, ++ { ++ /// Reads selected vector elements from memory. ++ /// ++ /// Instantiates a new vector by reading the values from `self` for ++ /// those lanes whose `mask` is `true`, and using the elements of ++ /// `value` otherwise. ++ /// ++ /// No memory is accessed for those lanes of `self` whose `mask` is ++ /// `false`. ++ /// ++ /// # Safety ++ /// ++ /// This method is unsafe because it dereferences raw pointers. The ++ /// pointers must be aligned to `mem::align_of::()`. ++ #[inline] ++ pub unsafe fn read( ++ self, mask: Simd<[M; $elem_count]>, ++ value: Simd<[T; $elem_count]>, ++ ) -> Simd<[T; $elem_count]> ++ where ++ M: sealed::Mask, ++ [M; $elem_count]: sealed::SimdArray, ++ { ++ use crate::llvm::simd_gather; ++ Simd(simd_gather(value.0, self.0, mask.0)) ++ } ++ } ++ ++ test_if! { ++ $test_tt: ++ paste::item! { ++ mod [<$id _read>] { ++ use super::*; ++ #[test] ++ fn read() { ++ let mut v = [0_i32; $elem_count]; ++ for i in 0..$elem_count { ++ v[i] = i as i32; ++ } ++ ++ let mut ptr = $id::::null(); ++ ++ for i in 0..$elem_count { ++ ptr = ptr.replace(i, unsafe { ++ crate::mem::transmute(&v[i] as *const i32) ++ }); ++ } ++ ++ // all mask elements are true: ++ let mask = $mask_ty::splat(true); ++ let def = Simd::<[i32; $elem_count]>::splat(42_i32); ++ let r: Simd<[i32; $elem_count]> = unsafe { ++ ptr.read(mask, def) ++ }; ++ assert_eq!( ++ r, ++ Simd::<[i32; $elem_count]>::from_slice_unaligned( ++ &v ++ ) ++ ); ++ ++ let mut mask = mask; ++ for i in 0..$elem_count { ++ if i % 2 != 0 { ++ mask = mask.replace(i, false); ++ } ++ } ++ ++ // even mask elements are true, odd ones are false: ++ let r: Simd<[i32; $elem_count]> = unsafe { ++ ptr.read(mask, def) ++ }; ++ let mut e = v; ++ for i in 0..$elem_count { ++ if i % 2 != 0 { ++ e[i] = 42; ++ } ++ } ++ assert_eq!( ++ r, ++ Simd::<[i32; $elem_count]>::from_slice_unaligned( ++ &e ++ ) ++ ); ++ ++ // all mask elements are false: ++ let mask = $mask_ty::splat(false); ++ let def = Simd::<[i32; $elem_count]>::splat(42_i32); ++ let r: Simd<[i32; $elem_count]> = unsafe { ++ ptr.read(mask, def) } ++ ; ++ assert_eq!(r, def); ++ } ++ } ++ } ++ } ++ }; ++} ++ ++macro_rules! impl_ptr_write { ++ ([$elem_ty:ty; $elem_count:expr]: $id:ident, $mask_ty:ident ++ | $test_tt:tt) => { ++ impl $id ++ where ++ [T; $elem_count]: sealed::SimdArray, ++ { ++ /// Writes selected vector elements to memory. ++ /// ++ /// Writes the lanes of `values` for which the mask is `true` to ++ /// their corresponding memory addresses in `self`. ++ /// ++ /// No memory is accessed for those lanes of `self` whose `mask` is ++ /// `false`. ++ /// ++ /// Overlapping memory addresses of `self` are written to in order ++ /// from the lest-significant to the most-significant element. ++ /// ++ /// # Safety ++ /// ++ /// This method is unsafe because it dereferences raw pointers. The ++ /// pointers must be aligned to `mem::align_of::()`. ++ #[inline] ++ pub unsafe fn write( ++ self, mask: Simd<[M; $elem_count]>, ++ value: Simd<[T; $elem_count]>, ++ ) where ++ M: sealed::Mask, ++ [M; $elem_count]: sealed::SimdArray, ++ { ++ // FIXME: ++ // https://github.com/rust-lang-nursery/packed_simd/issues/85 ++ #[cfg(not(target_arch = "mips"))] ++ { ++ use crate::llvm::simd_scatter; ++ simd_scatter(value.0, self.0, mask.0) ++ } ++ #[cfg(target_arch = "mips")] ++ { ++ let m_ptr = ++ &mask as *const Simd<[M; $elem_count]> as *const M; ++ for i in 0..$elem_count { ++ let m = ptr::read(m_ptr.add(i)); ++ if m.test() { ++ let t_ptr = &self ++ as *const Simd<[*mut T; $elem_count]> ++ as *mut *mut T; ++ let v_ptr = &value as *const Simd<[T; $elem_count]> ++ as *const T; ++ ptr::write( ++ ptr::read(t_ptr.add(i)), ++ ptr::read(v_ptr.add(i)), ++ ); ++ } ++ } ++ } ++ } ++ } ++ ++ test_if! { ++ $test_tt: ++ paste::item! { ++ mod [<$id _write>] { ++ use super::*; ++ #[test] ++ fn write() { ++ // fourty_two = [42, 42, 42, ...] ++ let fourty_two ++ = Simd::<[i32; $elem_count]>::splat(42_i32); ++ ++ // This test will write to this array ++ let mut arr = [0_i32; $elem_count]; ++ for i in 0..$elem_count { ++ arr[i] = i as i32; ++ } ++ // arr = [0, 1, 2, ...] ++ ++ let mut ptr = $id::::null(); ++ for i in 0..$elem_count { ++ ptr = ptr.replace(i, unsafe { ++ crate::mem::transmute(arr.as_ptr().add(i)) ++ }); ++ } ++ // ptr = [&arr[0], &arr[1], ...] ++ ++ // write `fourty_two` to all elements of `v` ++ { ++ let backup = arr; ++ unsafe { ++ ptr.write($mask_ty::splat(true), fourty_two) ++ }; ++ assert_eq!(arr, [42_i32; $elem_count]); ++ arr = backup; // arr = [0, 1, 2, ...] ++ } ++ ++ // write 42 to even elements of arr: ++ { ++ // set odd elements of the mask to false ++ let mut mask = $mask_ty::splat(true); ++ for i in 0..$elem_count { ++ if i % 2 != 0 { ++ mask = mask.replace(i, false); ++ } ++ } ++ // mask = [true, false, true, false, ...] ++ ++ // expected result r = [42, 1, 42, 3, 42, 5, ...] ++ let mut r = arr; ++ for i in 0..$elem_count { ++ if i % 2 == 0 { ++ r[i] = 42; ++ } ++ } ++ ++ let backup = arr; ++ unsafe { ptr.write(mask, fourty_two) }; ++ assert_eq!(arr, r); ++ arr = backup; // arr = [0, 1, 2, 3, ...] ++ } ++ ++ // write 42 to no elements of arr ++ { ++ let backup = arr; ++ unsafe { ++ ptr.write($mask_ty::splat(false), fourty_two) ++ }; ++ assert_eq!(arr, backup); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/reductions.rs b/third_party/rust/packed_simd/src/api/reductions.rs +new file mode 100644 +index 000000000000..54d2f0cc7f08 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/reductions.rs +@@ -0,0 +1,12 @@ ++//! Reductions ++ ++#[macro_use] ++mod float_arithmetic; ++#[macro_use] ++mod integer_arithmetic; ++#[macro_use] ++mod bitwise; ++#[macro_use] ++mod mask; ++#[macro_use] ++mod min_max; +diff --git a/third_party/rust/packed_simd/src/api/reductions/bitwise.rs b/third_party/rust/packed_simd/src/api/reductions/bitwise.rs +new file mode 100644 +index 000000000000..5bad4f474b16 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/reductions/bitwise.rs +@@ -0,0 +1,151 @@ ++//! Implements portable horizontal bitwise vector reductions. ++#![allow(unused)] ++ ++macro_rules! impl_reduction_bitwise { ++ ( ++ [$elem_ty:ident; $elem_count:expr]: ++ $id:ident | $ielem_ty:ident | $test_tt:tt | ++ ($convert:expr) | ++ ($true:expr, $false:expr) ++ ) => { ++ impl $id { ++ /// Lane-wise bitwise `and` of the vector elements. ++ /// ++ /// Note: if the vector has one lane, the first element of the ++ /// vector is returned. ++ #[inline] ++ pub fn and(self) -> $elem_ty { ++ #[cfg(not(target_arch = "aarch64"))] ++ { ++ use crate::llvm::simd_reduce_and; ++ let r: $ielem_ty = unsafe { simd_reduce_and(self.0) }; ++ $convert(r) ++ } ++ #[cfg(target_arch = "aarch64")] ++ { ++ // FIXME: broken on aarch64 ++ // https://github.com/rust-lang-nursery/packed_simd/issues/15 ++ let mut x = self.extract(0) as $elem_ty; ++ for i in 1..$id::lanes() { ++ x &= self.extract(i) as $elem_ty; ++ } ++ x ++ } ++ } ++ ++ /// Lane-wise bitwise `or` of the vector elements. ++ /// ++ /// Note: if the vector has one lane, the first element of the ++ /// vector is returned. ++ #[inline] ++ pub fn or(self) -> $elem_ty { ++ #[cfg(not(target_arch = "aarch64"))] ++ { ++ use crate::llvm::simd_reduce_or; ++ let r: $ielem_ty = unsafe { simd_reduce_or(self.0) }; ++ $convert(r) ++ } ++ #[cfg(target_arch = "aarch64")] ++ { ++ // FIXME: broken on aarch64 ++ // https://github.com/rust-lang-nursery/packed_simd/issues/15 ++ let mut x = self.extract(0) as $elem_ty; ++ for i in 1..$id::lanes() { ++ x |= self.extract(i) as $elem_ty; ++ } ++ x ++ } ++ } ++ ++ /// Lane-wise bitwise `xor` of the vector elements. ++ /// ++ /// Note: if the vector has one lane, the first element of the ++ /// vector is returned. ++ #[inline] ++ pub fn xor(self) -> $elem_ty { ++ #[cfg(not(target_arch = "aarch64"))] ++ { ++ use crate::llvm::simd_reduce_xor; ++ let r: $ielem_ty = unsafe { simd_reduce_xor(self.0) }; ++ $convert(r) ++ } ++ #[cfg(target_arch = "aarch64")] ++ { ++ // FIXME: broken on aarch64 ++ // https://github.com/rust-lang-nursery/packed_simd/issues/15 ++ let mut x = self.extract(0) as $elem_ty; ++ for i in 1..$id::lanes() { ++ x ^= self.extract(i) as $elem_ty; ++ } ++ x ++ } ++ } ++ } ++ ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _reduction_bitwise>] { ++ use super::*; ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn and() { ++ let v = $id::splat($false); ++ assert_eq!(v.and(), $false); ++ let v = $id::splat($true); ++ assert_eq!(v.and(), $true); ++ let v = $id::splat($false); ++ let v = v.replace(0, $true); ++ if $id::lanes() > 1 { ++ assert_eq!(v.and(), $false); ++ } else { ++ assert_eq!(v.and(), $true); ++ } ++ let v = $id::splat($true); ++ let v = v.replace(0, $false); ++ assert_eq!(v.and(), $false); ++ ++ } ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn or() { ++ let v = $id::splat($false); ++ assert_eq!(v.or(), $false); ++ let v = $id::splat($true); ++ assert_eq!(v.or(), $true); ++ let v = $id::splat($false); ++ let v = v.replace(0, $true); ++ assert_eq!(v.or(), $true); ++ let v = $id::splat($true); ++ let v = v.replace(0, $false); ++ if $id::lanes() > 1 { ++ assert_eq!(v.or(), $true); ++ } else { ++ assert_eq!(v.or(), $false); ++ } ++ } ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn xor() { ++ let v = $id::splat($false); ++ assert_eq!(v.xor(), $false); ++ let v = $id::splat($true); ++ if $id::lanes() > 1 { ++ assert_eq!(v.xor(), $false); ++ } else { ++ assert_eq!(v.xor(), $true); ++ } ++ let v = $id::splat($false); ++ let v = v.replace(0, $true); ++ assert_eq!(v.xor(), $true); ++ let v = $id::splat($true); ++ let v = v.replace(0, $false); ++ if $id::lanes() > 1 { ++ assert_eq!(v.xor(), $true); ++ } else { ++ assert_eq!(v.xor(), $false); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/reductions/float_arithmetic.rs b/third_party/rust/packed_simd/src/api/reductions/float_arithmetic.rs +new file mode 100644 +index 000000000000..dd722ae25fdd +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/reductions/float_arithmetic.rs +@@ -0,0 +1,312 @@ ++//! Implements portable horizontal float vector arithmetic reductions. ++ ++macro_rules! impl_reduction_float_arithmetic { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Horizontal sum of the vector elements. ++ /// ++ /// The intrinsic performs a tree-reduction of the vector elements. ++ /// That is, for an 8 element vector: ++ /// ++ /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7)) ++ /// ++ /// If one of the vector element is `NaN` the reduction returns ++ /// `NaN`. The resulting `NaN` is not required to be equal to any ++ /// of the `NaN`s in the vector. ++ #[inline] ++ pub fn sum(self) -> $elem_ty { ++ #[cfg(not(target_arch = "aarch64"))] ++ { ++ use crate::llvm::simd_reduce_add_ordered; ++ unsafe { simd_reduce_add_ordered(self.0, 0 as $elem_ty) } ++ } ++ #[cfg(target_arch = "aarch64")] ++ { ++ // FIXME: broken on AArch64 ++ // https://github.com/rust-lang-nursery/packed_simd/issues/15 ++ let mut x = self.extract(0) as $elem_ty; ++ for i in 1..$id::lanes() { ++ x += self.extract(i) as $elem_ty; ++ } ++ x ++ } ++ } ++ ++ /// Horizontal product of the vector elements. ++ /// ++ /// The intrinsic performs a tree-reduction of the vector elements. ++ /// That is, for an 8 element vector: ++ /// ++ /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7)) ++ /// ++ /// If one of the vector element is `NaN` the reduction returns ++ /// `NaN`. The resulting `NaN` is not required to be equal to any ++ /// of the `NaN`s in the vector. ++ #[inline] ++ pub fn product(self) -> $elem_ty { ++ #[cfg(not(target_arch = "aarch64"))] ++ { ++ use crate::llvm::simd_reduce_mul_ordered; ++ unsafe { simd_reduce_mul_ordered(self.0, 1 as $elem_ty) } ++ } ++ #[cfg(target_arch = "aarch64")] ++ { ++ // FIXME: broken on AArch64 ++ // https://github.com/rust-lang-nursery/packed_simd/issues/15 ++ let mut x = self.extract(0) as $elem_ty; ++ for i in 1..$id::lanes() { ++ x *= self.extract(i) as $elem_ty; ++ } ++ x ++ } ++ } ++ } ++ ++ impl crate::iter::Sum for $id { ++ #[inline] ++ fn sum>(iter: I) -> $id { ++ iter.fold($id::splat(0.), crate::ops::Add::add) ++ } ++ } ++ ++ impl crate::iter::Product for $id { ++ #[inline] ++ fn product>(iter: I) -> $id { ++ iter.fold($id::splat(1.), crate::ops::Mul::mul) ++ } ++ } ++ ++ impl<'a> crate::iter::Sum<&'a $id> for $id { ++ #[inline] ++ fn sum>(iter: I) -> $id { ++ iter.fold($id::splat(0.), |a, b| crate::ops::Add::add(a, *b)) ++ } ++ } ++ ++ impl<'a> crate::iter::Product<&'a $id> for $id { ++ #[inline] ++ fn product>(iter: I) -> $id { ++ iter.fold($id::splat(1.), |a, b| crate::ops::Mul::mul(a, *b)) ++ } ++ } ++ ++ test_if! { ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _reduction_float_arith>] { ++ use super::*; ++ fn alternating(x: usize) -> $id { ++ let mut v = $id::splat(1 as $elem_ty); ++ for i in 0..$id::lanes() { ++ if i % x == 0 { ++ v = v.replace(i, 2 as $elem_ty); ++ } ++ } ++ v ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn sum() { ++ let v = $id::splat(0 as $elem_ty); ++ assert_eq!(v.sum(), 0 as $elem_ty); ++ let v = $id::splat(1 as $elem_ty); ++ assert_eq!(v.sum(), $id::lanes() as $elem_ty); ++ let v = alternating(2); ++ assert_eq!( ++ v.sum(), ++ ($id::lanes() / 2 + $id::lanes()) as $elem_ty ++ ); ++ } ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn product() { ++ let v = $id::splat(0 as $elem_ty); ++ assert_eq!(v.product(), 0 as $elem_ty); ++ let v = $id::splat(1 as $elem_ty); ++ assert_eq!(v.product(), 1 as $elem_ty); ++ let f = match $id::lanes() { ++ 64 => 16, ++ 32 => 8, ++ 16 => 4, ++ _ => 2, ++ }; ++ let v = alternating(f); ++ assert_eq!( ++ v.product(), ++ (2_usize.pow(($id::lanes() / f) as u32) ++ as $elem_ty) ++ ); ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[allow(unreachable_code)] ++ #[allow(unused_mut)] ++ // ^^^ FIXME: https://github.com/rust-lang/rust/issues/55344 ++ fn sum_nan() { ++ // FIXME: https://bugs.llvm.org/show_bug.cgi?id=36732 ++ // https://github.com/rust-lang-nursery/packed_simd/issues/6 ++ return; ++ ++ let n0 = crate::$elem_ty::NAN; ++ let v0 = $id::splat(-3.0); ++ for i in 0..$id::lanes() { ++ let mut v = v0.replace(i, n0); ++ // If the vector contains a NaN the result is NaN: ++ assert!( ++ v.sum().is_nan(), ++ "nan at {} => {} | {:?}", ++ i, ++ v.sum(), ++ v ++ ); ++ for j in 0..i { ++ v = v.replace(j, n0); ++ assert!(v.sum().is_nan()); ++ } ++ } ++ let v = $id::splat(n0); ++ assert!(v.sum().is_nan(), "all nans | {:?}", v); ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[allow(unreachable_code)] ++ #[allow(unused_mut)] ++ // ^^^ FIXME: https://github.com/rust-lang/rust/issues/55344 ++ fn product_nan() { ++ // FIXME: https://bugs.llvm.org/show_bug.cgi?id=36732 ++ // https://github.com/rust-lang-nursery/packed_simd/issues/6 ++ return; ++ ++ let n0 = crate::$elem_ty::NAN; ++ let v0 = $id::splat(-3.0); ++ for i in 0..$id::lanes() { ++ let mut v = v0.replace(i, n0); ++ // If the vector contains a NaN the result is NaN: ++ assert!( ++ v.product().is_nan(), ++ "nan at {} => {} | {:?}", ++ i, ++ v.product(), ++ v ++ ); ++ for j in 0..i { ++ v = v.replace(j, n0); ++ assert!(v.product().is_nan()); ++ } ++ } ++ let v = $id::splat(n0); ++ assert!(v.product().is_nan(), "all nans | {:?}", v); ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[allow(unused, dead_code)] ++ fn sum_roundoff() { ++ // Performs a tree-reduction ++ fn tree_reduce_sum(a: &[$elem_ty]) -> $elem_ty { ++ assert!(!a.is_empty()); ++ if a.len() == 1 { ++ a[0] ++ } else if a.len() == 2 { ++ a[0] + a[1] ++ } else { ++ let mid = a.len() / 2; ++ let (left, right) = a.split_at(mid); ++ tree_reduce_sum(left) + tree_reduce_sum(right) ++ } ++ } ++ ++ let mut start = crate::$elem_ty::EPSILON; ++ let mut scalar_reduction = 0. as $elem_ty; ++ ++ let mut v = $id::splat(0. as $elem_ty); ++ for i in 0..$id::lanes() { ++ let c = if i % 2 == 0 { 1e3 } else { -1. }; ++ start *= 3.14 * c; ++ scalar_reduction += start; ++ v = v.replace(i, start); ++ } ++ let simd_reduction = v.sum(); ++ ++ let mut a = [0. as $elem_ty; $id::lanes()]; ++ v.write_to_slice_unaligned(&mut a); ++ let tree_reduction = tree_reduce_sum(&a); ++ ++ // tolerate 1 ULP difference: ++ let red_bits = simd_reduction.to_bits(); ++ let tree_bits = tree_reduction.to_bits(); ++ assert!( ++ if red_bits > tree_bits { ++ red_bits - tree_bits ++ } else { ++ tree_bits - red_bits ++ } < 2, ++ "vector: {:?} | simd_reduction: {:?} | \ ++ tree_reduction: {} | scalar_reduction: {}", ++ v, ++ simd_reduction, ++ tree_reduction, ++ scalar_reduction ++ ); ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[allow(unused, dead_code)] ++ fn product_roundoff() { ++ // Performs a tree-reduction ++ fn tree_reduce_product(a: &[$elem_ty]) -> $elem_ty { ++ assert!(!a.is_empty()); ++ if a.len() == 1 { ++ a[0] ++ } else if a.len() == 2 { ++ a[0] * a[1] ++ } else { ++ let mid = a.len() / 2; ++ let (left, right) = a.split_at(mid); ++ tree_reduce_product(left) ++ * tree_reduce_product(right) ++ } ++ } ++ ++ let mut start = crate::$elem_ty::EPSILON; ++ let mut scalar_reduction = 1. as $elem_ty; ++ ++ let mut v = $id::splat(0. as $elem_ty); ++ for i in 0..$id::lanes() { ++ let c = if i % 2 == 0 { 1e3 } else { -1. }; ++ start *= 3.14 * c; ++ scalar_reduction *= start; ++ v = v.replace(i, start); ++ } ++ let simd_reduction = v.product(); ++ ++ let mut a = [0. as $elem_ty; $id::lanes()]; ++ v.write_to_slice_unaligned(&mut a); ++ let tree_reduction = tree_reduce_product(&a); ++ ++ // tolerate 1 ULP difference: ++ let red_bits = simd_reduction.to_bits(); ++ let tree_bits = tree_reduction.to_bits(); ++ assert!( ++ if red_bits > tree_bits { ++ red_bits - tree_bits ++ } else { ++ tree_bits - red_bits ++ } < 2, ++ "vector: {:?} | simd_reduction: {:?} | \ ++ tree_reduction: {} | scalar_reduction: {}", ++ v, ++ simd_reduction, ++ tree_reduction, ++ scalar_reduction ++ ); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/reductions/integer_arithmetic.rs b/third_party/rust/packed_simd/src/api/reductions/integer_arithmetic.rs +new file mode 100644 +index 000000000000..91dffad31032 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/reductions/integer_arithmetic.rs +@@ -0,0 +1,197 @@ ++//! Implements portable horizontal integer vector arithmetic reductions. ++ ++macro_rules! impl_reduction_integer_arithmetic { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $ielem_ty:ident ++ | $test_tt:tt) => { ++ impl $id { ++ /// Horizontal wrapping sum of the vector elements. ++ /// ++ /// The intrinsic performs a tree-reduction of the vector elements. ++ /// That is, for an 8 element vector: ++ /// ++ /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7)) ++ /// ++ /// If an operation overflows it returns the mathematical result ++ /// modulo `2^n` where `n` is the number of times it overflows. ++ #[inline] ++ pub fn wrapping_sum(self) -> $elem_ty { ++ #[cfg(not(target_arch = "aarch64"))] ++ { ++ use crate::llvm::simd_reduce_add_ordered; ++ let v: $ielem_ty = unsafe { ++ simd_reduce_add_ordered(self.0, 0 as $ielem_ty) ++ }; ++ v as $elem_ty ++ } ++ #[cfg(target_arch = "aarch64")] ++ { ++ // FIXME: broken on AArch64 ++ // https://github.com/rust-lang-nursery/packed_simd/issues/15 ++ let mut x = self.extract(0) as $elem_ty; ++ for i in 1..$id::lanes() { ++ x = x.wrapping_add(self.extract(i) as $elem_ty); ++ } ++ x ++ } ++ } ++ ++ /// Horizontal wrapping product of the vector elements. ++ /// ++ /// The intrinsic performs a tree-reduction of the vector elements. ++ /// That is, for an 8 element vector: ++ /// ++ /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7)) ++ /// ++ /// If an operation overflows it returns the mathematical result ++ /// modulo `2^n` where `n` is the number of times it overflows. ++ #[inline] ++ pub fn wrapping_product(self) -> $elem_ty { ++ #[cfg(not(target_arch = "aarch64"))] ++ { ++ use crate::llvm::simd_reduce_mul_ordered; ++ let v: $ielem_ty = unsafe { ++ simd_reduce_mul_ordered(self.0, 1 as $ielem_ty) ++ }; ++ v as $elem_ty ++ } ++ #[cfg(target_arch = "aarch64")] ++ { ++ // FIXME: broken on AArch64 ++ // https://github.com/rust-lang-nursery/packed_simd/issues/15 ++ let mut x = self.extract(0) as $elem_ty; ++ for i in 1..$id::lanes() { ++ x = x.wrapping_mul(self.extract(i) as $elem_ty); ++ } ++ x ++ } ++ } ++ } ++ ++ impl crate::iter::Sum for $id { ++ #[inline] ++ fn sum>(iter: I) -> $id { ++ iter.fold($id::splat(0), crate::ops::Add::add) ++ } ++ } ++ ++ impl crate::iter::Product for $id { ++ #[inline] ++ fn product>(iter: I) -> $id { ++ iter.fold($id::splat(1), crate::ops::Mul::mul) ++ } ++ } ++ ++ impl<'a> crate::iter::Sum<&'a $id> for $id { ++ #[inline] ++ fn sum>(iter: I) -> $id { ++ iter.fold($id::splat(0), |a, b| crate::ops::Add::add(a, *b)) ++ } ++ } ++ ++ impl<'a> crate::iter::Product<&'a $id> for $id { ++ #[inline] ++ fn product>(iter: I) -> $id { ++ iter.fold($id::splat(1), |a, b| crate::ops::Mul::mul(a, *b)) ++ } ++ } ++ ++ test_if! { ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _reduction_int_arith>] { ++ use super::*; ++ ++ fn alternating(x: usize) -> $id { ++ let mut v = $id::splat(1 as $elem_ty); ++ for i in 0..$id::lanes() { ++ if i % x == 0 { ++ v = v.replace(i, 2 as $elem_ty); ++ } ++ } ++ v ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn wrapping_sum() { ++ let v = $id::splat(0 as $elem_ty); ++ assert_eq!(v.wrapping_sum(), 0 as $elem_ty); ++ let v = $id::splat(1 as $elem_ty); ++ assert_eq!(v.wrapping_sum(), $id::lanes() as $elem_ty); ++ let v = alternating(2); ++ if $id::lanes() > 1 { ++ assert_eq!( ++ v.wrapping_sum(), ++ ($id::lanes() / 2 + $id::lanes()) as $elem_ty ++ ); ++ } else { ++ assert_eq!( ++ v.wrapping_sum(), ++ 2 as $elem_ty ++ ); ++ } ++ } ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn wrapping_sum_overflow() { ++ let start = $elem_ty::max_value() ++ - ($id::lanes() as $elem_ty / 2); ++ ++ let v = $id::splat(start as $elem_ty); ++ let vwrapping_sum = v.wrapping_sum(); ++ ++ let mut wrapping_sum = start; ++ for _ in 1..$id::lanes() { ++ wrapping_sum = wrapping_sum.wrapping_add(start); ++ } ++ assert_eq!(wrapping_sum, vwrapping_sum, "v = {:?}", v); ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn wrapping_product() { ++ let v = $id::splat(0 as $elem_ty); ++ assert_eq!(v.wrapping_product(), 0 as $elem_ty); ++ let v = $id::splat(1 as $elem_ty); ++ assert_eq!(v.wrapping_product(), 1 as $elem_ty); ++ let f = match $id::lanes() { ++ 64 => 16, ++ 32 => 8, ++ 16 => 4, ++ _ => 2, ++ }; ++ let v = alternating(f); ++ if $id::lanes() > 1 { ++ assert_eq!( ++ v.wrapping_product(), ++ (2_usize.pow(($id::lanes() / f) as u32) ++ as $elem_ty) ++ ); ++ } else { ++ assert_eq!( ++ v.wrapping_product(), ++ 2 as $elem_ty ++ ); ++ } ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn wrapping_product_overflow() { ++ let start = $elem_ty::max_value() ++ - ($id::lanes() as $elem_ty / 2); ++ ++ let v = $id::splat(start as $elem_ty); ++ let vmul = v.wrapping_product(); ++ ++ let mut mul = start; ++ for _ in 1..$id::lanes() { ++ mul = mul.wrapping_mul(start); ++ } ++ assert_eq!(mul, vmul, "v = {:?}", v); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/reductions/mask.rs b/third_party/rust/packed_simd/src/api/reductions/mask.rs +new file mode 100644 +index 000000000000..0dd6a84e7e8d +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/reductions/mask.rs +@@ -0,0 +1,89 @@ ++//! Implements portable horizontal mask reductions. ++ ++macro_rules! impl_reduction_mask { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Are `all` vector lanes `true`? ++ #[inline] ++ pub fn all(self) -> bool { ++ unsafe { crate::codegen::reductions::mask::All::all(self) } ++ } ++ /// Is `any` vector lane `true`? ++ #[inline] ++ pub fn any(self) -> bool { ++ unsafe { crate::codegen::reductions::mask::Any::any(self) } ++ } ++ /// Are `all` vector lanes `false`? ++ #[inline] ++ pub fn none(self) -> bool { ++ !self.any() ++ } ++ } ++ ++ test_if! { ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _reduction>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn all() { ++ let a = $id::splat(true); ++ assert!(a.all()); ++ let a = $id::splat(false); ++ assert!(!a.all()); ++ ++ if $id::lanes() > 1 { ++ for i in 0..$id::lanes() { ++ let mut a = $id::splat(true); ++ a = a.replace(i, false); ++ assert!(!a.all()); ++ let mut a = $id::splat(false); ++ a = a.replace(i, true); ++ assert!(!a.all()); ++ } ++ } ++ } ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn any() { ++ let a = $id::splat(true); ++ assert!(a.any()); ++ let a = $id::splat(false); ++ assert!(!a.any()); ++ ++ if $id::lanes() > 1 { ++ for i in 0..$id::lanes() { ++ let mut a = $id::splat(true); ++ a = a.replace(i, false); ++ assert!(a.any()); ++ let mut a = $id::splat(false); ++ a = a.replace(i, true); ++ assert!(a.any()); ++ } ++ } ++ } ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn none() { ++ let a = $id::splat(true); ++ assert!(!a.none()); ++ let a = $id::splat(false); ++ assert!(a.none()); ++ ++ if $id::lanes() > 1 { ++ for i in 0..$id::lanes() { ++ let mut a = $id::splat(true); ++ a = a.replace(i, false); ++ assert!(!a.none()); ++ let mut a = $id::splat(false); ++ a = a.replace(i, true); ++ assert!(!a.none()); ++ } ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/reductions/min_max.rs b/third_party/rust/packed_simd/src/api/reductions/min_max.rs +new file mode 100644 +index 000000000000..c4d3aa10f15c +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/reductions/min_max.rs +@@ -0,0 +1,377 @@ ++//! Implements portable horizontal vector min/max reductions. ++ ++macro_rules! impl_reduction_min_max { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident ++ | $ielem_ty:ident | $test_tt:tt) => { ++ impl $id { ++ /// Largest vector element value. ++ #[inline] ++ pub fn max_element(self) -> $elem_ty { ++ #[cfg(not(any( ++ target_arch = "aarch64", ++ target_arch = "arm", ++ target_arch = "powerpc64", ++ target_arch = "wasm32", ++ )))] ++ { ++ use crate::llvm::simd_reduce_max; ++ let v: $ielem_ty = unsafe { simd_reduce_max(self.0) }; ++ v as $elem_ty ++ } ++ #[cfg(any( ++ target_arch = "aarch64", ++ target_arch = "arm", ++ target_arch = "powerpc64", ++ target_arch = "wasm32", ++ ))] ++ { ++ // FIXME: broken on AArch64 ++ // https://github.com/rust-lang-nursery/packed_simd/issues/15 ++ // FIXME: broken on WASM32 ++ // https://github.com/rust-lang-nursery/packed_simd/issues/91 ++ let mut x = self.extract(0); ++ for i in 1..$id::lanes() { ++ x = x.max(self.extract(i)); ++ } ++ x ++ } ++ } ++ ++ /// Smallest vector element value. ++ #[inline] ++ pub fn min_element(self) -> $elem_ty { ++ #[cfg(not(any( ++ target_arch = "aarch64", ++ target_arch = "arm", ++ all(target_arch = "x86", not(target_feature = "sse2")), ++ target_arch = "powerpc64", ++ target_arch = "wasm32", ++ ),))] ++ { ++ use crate::llvm::simd_reduce_min; ++ let v: $ielem_ty = unsafe { simd_reduce_min(self.0) }; ++ v as $elem_ty ++ } ++ #[cfg(any( ++ target_arch = "aarch64", ++ target_arch = "arm", ++ all(target_arch = "x86", not(target_feature = "sse2")), ++ target_arch = "powerpc64", ++ target_arch = "wasm32", ++ ))] ++ { ++ // FIXME: broken on AArch64 ++ // https://github.com/rust-lang-nursery/packed_simd/issues/15 ++ // FIXME: broken on i586-unknown-linux-gnu ++ // https://github.com/rust-lang-nursery/packed_simd/issues/22 ++ // FIXME: broken on WASM32 ++ // https://github.com/rust-lang-nursery/packed_simd/issues/91 ++ let mut x = self.extract(0); ++ for i in 1..$id::lanes() { ++ x = x.min(self.extract(i)); ++ } ++ x ++ } ++ } ++ } ++ test_if! {$test_tt: ++ paste::item! { ++ pub mod [<$id _reduction_min_max>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ pub fn max_element() { ++ let v = $id::splat(0 as $elem_ty); ++ assert_eq!(v.max_element(), 0 as $elem_ty); ++ if $id::lanes() > 1 { ++ let v = v.replace(1, 1 as $elem_ty); ++ assert_eq!(v.max_element(), 1 as $elem_ty); ++ } ++ let v = v.replace(0, 2 as $elem_ty); ++ assert_eq!(v.max_element(), 2 as $elem_ty); ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ pub fn min_element() { ++ let v = $id::splat(0 as $elem_ty); ++ assert_eq!(v.min_element(), 0 as $elem_ty); ++ if $id::lanes() > 1 { ++ let v = v.replace(1, 1 as $elem_ty); ++ assert_eq!(v.min_element(), 0 as $elem_ty); ++ } ++ let v = $id::splat(1 as $elem_ty); ++ let v = v.replace(0, 2 as $elem_ty); ++ if $id::lanes() > 1 { ++ assert_eq!(v.min_element(), 1 as $elem_ty); ++ } else { ++ assert_eq!(v.min_element(), 2 as $elem_ty); ++ } ++ if $id::lanes() > 1 { ++ let v = $id::splat(2 as $elem_ty); ++ let v = v.replace(1, 1 as $elem_ty); ++ assert_eq!(v.min_element(), 1 as $elem_ty); ++ } ++ } ++ } ++ } ++ } ++ }; ++} ++ ++macro_rules! test_reduction_float_min_max { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ test_if!{ ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _reduction_min_max_nan>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn min_element_test() { ++ let n = crate::$elem_ty::NAN; ++ ++ assert_eq!(n.min(-3.), -3.); ++ assert_eq!((-3. as $elem_ty).min(n), -3.); ++ ++ let v0 = $id::splat(-3.); ++ ++ let target_with_broken_last_lane_nan = !cfg!(any( ++ target_arch = "arm", target_arch = "aarch64", ++ all(target_arch = "x86", ++ not(target_feature = "sse2") ++ ), ++ target_arch = "powerpc64", ++ target_arch = "wasm32", ++ )); ++ ++ // The vector is initialized to `-3.`s: [-3, -3, -3, -3] ++ for i in 0..$id::lanes() { ++ // We replace the i-th element of the vector with ++ // `NaN`: [-3, -3, -3, NaN] ++ let mut v = v0.replace(i, n); ++ ++ // If the NaN is in the last place, the LLVM ++ // implementation of these methods is broken on some ++ // targets: ++ if i == $id::lanes() - 1 && ++ target_with_broken_last_lane_nan { ++ // FIXME: ++ // https://github.com/rust-lang-nursery/packed_simd/issues/5 ++ // ++ // If there is a NaN, the result should always ++ // the smallest element, but currently when the ++ // last element is NaN the current ++ // implementation incorrectly returns NaN. ++ // ++ // The targets mentioned above use different ++ // codegen that produces the correct result. ++ // ++ // These asserts detect if this behavior changes ++ assert!(v.min_element().is_nan(), ++ // FIXME: ^^^ should be -3. ++ "[A]: nan at {} => {} | {:?}", ++ i, v.min_element(), v); ++ ++ // If we replace all the elements in the vector ++ // up-to the `i-th` lane with `NaN`s, the result ++ // is still always `-3.` unless all elements of ++ // the vector are `NaN`s: ++ // ++ // This is also broken: ++ for j in 0..i { ++ v = v.replace(j, n); ++ assert!(v.min_element().is_nan(), ++ // FIXME: ^^^ should be -3. ++ "[B]: nan at {} => {} | {:?}", ++ i, v.min_element(), v); ++ } ++ ++ // We are done here, since we were in the last ++ // lane which is the last iteration of the loop. ++ break ++ } ++ ++ // We are not in the last lane, and there is only ++ // one `NaN` in the vector. ++ ++ // If the vector has one lane, the result is `NaN`: ++ if $id::lanes() == 1 { ++ assert!(v.min_element().is_nan(), ++ "[C]: all nans | v={:?} | min={} | \ ++ is_nan: {}", ++ v, v.min_element(), ++ v.min_element().is_nan() ++ ); ++ ++ // And we are done, since the vector only has ++ // one lane anyways. ++ break; ++ } ++ ++ // The vector has more than one lane, since there is ++ // only one `NaN` in the vector, the result is ++ // always `-3`. ++ assert_eq!(v.min_element(), -3., ++ "[D]: nan at {} => {} | {:?}", ++ i, v.min_element(), v); ++ ++ // If we replace all the elements in the vector ++ // up-to the `i-th` lane with `NaN`s, the result is ++ // still always `-3.` unless all elements of the ++ // vector are `NaN`s: ++ for j in 0..i { ++ v = v.replace(j, n); ++ ++ if i == $id::lanes() - 1 && j == i - 1 { ++ // All elements of the vector are `NaN`s, ++ // therefore the result is NaN as well. ++ // ++ // Note: the #lanes of the vector is > 1, so ++ // "i - 1" does not overflow. ++ assert!(v.min_element().is_nan(), ++ "[E]: all nans | v={:?} | min={} | \ ++ is_nan: {}", ++ v, v.min_element(), ++ v.min_element().is_nan()); ++ } else { ++ // There are non-`NaN` elements in the ++ // vector, therefore the result is `-3.`: ++ assert_eq!(v.min_element(), -3., ++ "[F]: nan at {} => {} | {:?}", ++ i, v.min_element(), v); ++ } ++ } ++ } ++ ++ // If the vector contains all NaNs the result is NaN: ++ assert!($id::splat(n).min_element().is_nan(), ++ "all nans | v={:?} | min={} | is_nan: {}", ++ $id::splat(n), $id::splat(n).min_element(), ++ $id::splat(n).min_element().is_nan()); ++ } ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn max_element_test() { ++ let n = crate::$elem_ty::NAN; ++ ++ assert_eq!(n.max(-3.), -3.); ++ assert_eq!((-3. as $elem_ty).max(n), -3.); ++ ++ let v0 = $id::splat(-3.); ++ ++ let target_with_broken_last_lane_nan = !cfg!(any( ++ target_arch = "arm", target_arch = "aarch64", ++ target_arch = "powerpc64", target_arch = "wasm32", ++ )); ++ ++ // The vector is initialized to `-3.`s: [-3, -3, -3, -3] ++ for i in 0..$id::lanes() { ++ // We replace the i-th element of the vector with ++ // `NaN`: [-3, -3, -3, NaN] ++ let mut v = v0.replace(i, n); ++ ++ // If the NaN is in the last place, the LLVM ++ // implementation of these methods is broken on some ++ // targets: ++ if i == $id::lanes() - 1 && ++ target_with_broken_last_lane_nan { ++ // FIXME: ++ // https://github.com/rust-lang-nursery/packed_simd/issues/5 ++ // ++ // If there is a NaN, the result should ++ // always the largest element, but currently ++ // when the last element is NaN the current ++ // implementation incorrectly returns NaN. ++ // ++ // The targets mentioned above use different ++ // codegen that produces the correct result. ++ // ++ // These asserts detect if this behavior ++ // changes ++ assert!(v.max_element().is_nan(), ++ // FIXME: ^^^ should be -3. ++ "[A]: nan at {} => {} | {:?}", ++ i, v.max_element(), v); ++ ++ // If we replace all the elements in the vector ++ // up-to the `i-th` lane with `NaN`s, the result ++ // is still always `-3.` unless all elements of ++ // the vector are `NaN`s: ++ // ++ // This is also broken: ++ for j in 0..i { ++ v = v.replace(j, n); ++ assert!(v.max_element().is_nan(), ++ // FIXME: ^^^ should be -3. ++ "[B]: nan at {} => {} | {:?}", ++ i, v.max_element(), v); ++ } ++ ++ // We are done here, since we were in the last ++ // lane which is the last iteration of the loop. ++ break ++ } ++ ++ // We are not in the last lane, and there is only ++ // one `NaN` in the vector. ++ ++ // If the vector has one lane, the result is `NaN`: ++ if $id::lanes() == 1 { ++ assert!(v.max_element().is_nan(), ++ "[C]: all nans | v={:?} | min={} | \ ++ is_nan: {}", ++ v, v.max_element(), ++ v.max_element().is_nan()); ++ ++ // And we are done, since the vector only has ++ // one lane anyways. ++ break; ++ } ++ ++ // The vector has more than one lane, since there is ++ // only one `NaN` in the vector, the result is ++ // always `-3`. ++ assert_eq!(v.max_element(), -3., ++ "[D]: nan at {} => {} | {:?}", ++ i, v.max_element(), v); ++ ++ // If we replace all the elements in the vector ++ // up-to the `i-th` lane with `NaN`s, the result is ++ // still always `-3.` unless all elements of the ++ // vector are `NaN`s: ++ for j in 0..i { ++ v = v.replace(j, n); ++ ++ if i == $id::lanes() - 1 && j == i - 1 { ++ // All elements of the vector are `NaN`s, ++ // therefore the result is NaN as well. ++ // ++ // Note: the #lanes of the vector is > 1, so ++ // "i - 1" does not overflow. ++ assert!(v.max_element().is_nan(), ++ "[E]: all nans | v={:?} | max={} | \ ++ is_nan: {}", ++ v, v.max_element(), ++ v.max_element().is_nan()); ++ } else { ++ // There are non-`NaN` elements in the ++ // vector, therefore the result is `-3.`: ++ assert_eq!(v.max_element(), -3., ++ "[F]: nan at {} => {} | {:?}", ++ i, v.max_element(), v); ++ } ++ } ++ } ++ ++ // If the vector contains all NaNs the result is NaN: ++ assert!($id::splat(n).max_element().is_nan(), ++ "all nans | v={:?} | max={} | is_nan: {}", ++ $id::splat(n), $id::splat(n).max_element(), ++ $id::splat(n).max_element().is_nan()); ++ } ++ } ++ } ++ } ++ } ++} +diff --git a/third_party/rust/packed_simd/src/api/select.rs b/third_party/rust/packed_simd/src/api/select.rs +new file mode 100644 +index 000000000000..24525df56c73 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/select.rs +@@ -0,0 +1,75 @@ ++//! Implements mask's `select`. ++ ++/// Implements mask select method ++macro_rules! impl_select { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Selects elements of `a` and `b` using mask. ++ /// ++ /// The lanes of the result for which the mask is `true` contain ++ /// the values of `a`. The remaining lanes contain the values of ++ /// `b`. ++ #[inline] ++ pub fn select(self, a: Simd, b: Simd) -> Simd ++ where ++ T: sealed::SimdArray< ++ NT = <[$elem_ty; $elem_count] as sealed::SimdArray>::NT, ++ >, ++ { ++ use crate::llvm::simd_select; ++ Simd(unsafe { simd_select(self.0, a.0, b.0) }) ++ } ++ } ++ ++ test_select!(bool, $id, $id, (false, true) | $test_tt); ++ }; ++} ++ ++macro_rules! test_select { ++ ( ++ $elem_ty:ident, ++ $mask_ty:ident, ++ $vec_ty:ident,($small:expr, $large:expr) | ++ $test_tt:tt ++ ) => { ++ test_if! { ++ $test_tt: ++ paste::item! { ++ pub mod [<$vec_ty _select>] { ++ use super::*; ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn select() { ++ let o = $small as $elem_ty; ++ let t = $large as $elem_ty; ++ ++ let a = $vec_ty::splat(o); ++ let b = $vec_ty::splat(t); ++ let m = a.lt(b); ++ assert_eq!(m.select(a, b), a); ++ ++ let m = b.lt(a); ++ assert_eq!(m.select(b, a), a); ++ ++ let mut c = a; ++ let mut d = b; ++ let mut m_e = $mask_ty::splat(false); ++ for i in 0..$vec_ty::lanes() { ++ if i % 2 == 0 { ++ let c_tmp = c.extract(i); ++ c = c.replace(i, d.extract(i)); ++ d = d.replace(i, c_tmp); ++ } else { ++ m_e = m_e.replace(i, true); ++ } ++ } ++ ++ let m = c.lt(d); ++ assert_eq!(m_e, m); ++ assert_eq!(m.select(c, d), a); ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/shuffle.rs b/third_party/rust/packed_simd/src/api/shuffle.rs +new file mode 100644 +index 000000000000..13a7fae5fcee +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/shuffle.rs +@@ -0,0 +1,190 @@ ++//! Implements portable vector shuffles with immediate indices. ++ ++// FIXME: comprehensive tests ++// https://github.com/rust-lang-nursery/packed_simd/issues/20 ++ ++/// Shuffles vector elements. ++/// ++/// This macro returns a new vector that contains a shuffle of the elements in ++/// one (`shuffle!(vec, [indices...])`) or two (`shuffle!(vec0, vec1, ++/// [indices...])`) input vectors. ++/// ++/// The type of `vec0` and `vec1` must be equal, and the element type of the ++/// resulting vector is the element type of the input vector. ++/// ++/// The number of `indices` must be a power-of-two in range `[0, 64)`, since ++/// currently, the largest vector supported by the library has 64 lanes. The ++/// length of the resulting vector equals the number of indices provided. ++/// ++/// The indices must be in range `[0, M * N)` where `M` is the number of input ++/// vectors (`1` or `2`) and `N` is the number of lanes of the input vectors. ++/// The indices `i` in range `[0, N)` refer to the `i`-th element of `vec0`, ++/// while the indices in range `[N, 2*N)` refer to the `i - N`-th element of ++/// `vec1`. ++/// ++/// # Examples ++/// ++/// Shuffling elements of two vectors: ++/// ++/// ``` ++/// # #[macro_use] ++/// # extern crate packed_simd; ++/// # use packed_simd::*; ++/// # fn main() { ++/// // Shuffle allows reordering the elements: ++/// let x = i32x4::new(1, 2, 3, 4); ++/// let y = i32x4::new(5, 6, 7, 8); ++/// let r = shuffle!(x, y, [4, 0, 5, 1]); ++/// assert_eq!(r, i32x4::new(5, 1, 6, 2)); ++/// ++/// // The resulting vector can als be smaller than the input: ++/// let r = shuffle!(x, y, [1, 6]); ++/// assert_eq!(r, i32x2::new(2, 7)); ++/// ++/// // Or larger: ++/// let r = shuffle!(x, y, [1, 3, 4, 2, 1, 7, 2, 2]); ++/// assert_eq!(r, i32x8::new(2, 4, 5, 3, 2, 8, 3, 3)); ++/// // At most 2 * the number of lanes in the input vector. ++/// # } ++/// ``` ++/// ++/// Shuffling elements of one vector: ++/// ++/// ``` ++/// # #[macro_use] ++/// # extern crate packed_simd; ++/// # use packed_simd::*; ++/// # fn main() { ++/// // Shuffle allows reordering the elements of a vector: ++/// let x = i32x4::new(1, 2, 3, 4); ++/// let r = shuffle!(x, [2, 1, 3, 0]); ++/// assert_eq!(r, i32x4::new(3, 2, 4, 1)); ++/// ++/// // The resulting vector can be smaller than the input: ++/// let r = shuffle!(x, [1, 3]); ++/// assert_eq!(r, i32x2::new(2, 4)); ++/// ++/// // Equal: ++/// let r = shuffle!(x, [1, 3, 2, 0]); ++/// assert_eq!(r, i32x4::new(2, 4, 3, 1)); ++/// ++/// // Or larger: ++/// let r = shuffle!(x, [1, 3, 2, 2, 1, 3, 2, 2]); ++/// assert_eq!(r, i32x8::new(2, 4, 3, 3, 2, 4, 3, 3)); ++/// // At most 2 * the number of lanes in the input vector. ++/// # } ++/// ``` ++#[macro_export] ++macro_rules! shuffle { ++ ($vec0:expr, $vec1:expr, [$l0:expr, $l1:expr]) => {{ ++ #[allow(unused_unsafe)] ++ unsafe { ++ $crate::Simd($crate::__shuffle_vector2( ++ $vec0.0, ++ $vec1.0, ++ [$l0, $l1], ++ )) ++ } ++ }}; ++ ($vec0:expr, $vec1:expr, [$l0:expr, $l1:expr, $l2:expr, $l3:expr]) => {{ ++ #[allow(unused_unsafe)] ++ unsafe { ++ $crate::Simd($crate::__shuffle_vector4( ++ $vec0.0, ++ $vec1.0, ++ [$l0, $l1, $l2, $l3], ++ )) ++ } ++ }}; ++ ($vec0:expr, $vec1:expr, ++ [$l0:expr, $l1:expr, $l2:expr, $l3:expr, ++ $l4:expr, $l5:expr, $l6:expr, $l7:expr]) => {{ ++ #[allow(unused_unsafe)] ++ unsafe { ++ $crate::Simd($crate::__shuffle_vector8( ++ $vec0.0, ++ $vec1.0, ++ [$l0, $l1, $l2, $l3, $l4, $l5, $l6, $l7], ++ )) ++ } ++ }}; ++ ($vec0:expr, $vec1:expr, ++ [$l0:expr, $l1:expr, $l2:expr, $l3:expr, ++ $l4:expr, $l5:expr, $l6:expr, $l7:expr, ++ $l8:expr, $l9:expr, $l10:expr, $l11:expr, ++ $l12:expr, $l13:expr, $l14:expr, $l15:expr]) => {{ ++ #[allow(unused_unsafe)] ++ unsafe { ++ $crate::Simd($crate::__shuffle_vector16( ++ $vec0.0, ++ $vec1.0, ++ [ ++ $l0, $l1, $l2, $l3, $l4, $l5, $l6, $l7, $l8, $l9, $l10, ++ $l11, $l12, $l13, $l14, $l15, ++ ], ++ )) ++ } ++ }}; ++ ($vec0:expr, $vec1:expr, ++ [$l0:expr, $l1:expr, $l2:expr, $l3:expr, ++ $l4:expr, $l5:expr, $l6:expr, $l7:expr, ++ $l8:expr, $l9:expr, $l10:expr, $l11:expr, ++ $l12:expr, $l13:expr, $l14:expr, $l15:expr, ++ $l16:expr, $l17:expr, $l18:expr, $l19:expr, ++ $l20:expr, $l21:expr, $l22:expr, $l23:expr, ++ $l24:expr, $l25:expr, $l26:expr, $l27:expr, ++ $l28:expr, $l29:expr, $l30:expr, $l31:expr]) => {{ ++ #[allow(unused_unsafe)] ++ unsafe { ++ $crate::Simd($crate::__shuffle_vector32( ++ $vec0.0, ++ $vec1.0, ++ [ ++ $l0, $l1, $l2, $l3, $l4, $l5, $l6, $l7, $l8, $l9, $l10, ++ $l11, $l12, $l13, $l14, $l15, $l16, $l17, $l18, $l19, ++ $l20, $l21, $l22, $l23, $l24, $l25, $l26, $l27, $l28, ++ $l29, $l30, $l31, ++ ], ++ )) ++ } ++ }}; ++ ($vec0:expr, $vec1:expr, ++ [$l0:expr, $l1:expr, $l2:expr, $l3:expr, ++ $l4:expr, $l5:expr, $l6:expr, $l7:expr, ++ $l8:expr, $l9:expr, $l10:expr, $l11:expr, ++ $l12:expr, $l13:expr, $l14:expr, $l15:expr, ++ $l16:expr, $l17:expr, $l18:expr, $l19:expr, ++ $l20:expr, $l21:expr, $l22:expr, $l23:expr, ++ $l24:expr, $l25:expr, $l26:expr, $l27:expr, ++ $l28:expr, $l29:expr, $l30:expr, $l31:expr, ++ $l32:expr, $l33:expr, $l34:expr, $l35:expr, ++ $l36:expr, $l37:expr, $l38:expr, $l39:expr, ++ $l40:expr, $l41:expr, $l42:expr, $l43:expr, ++ $l44:expr, $l45:expr, $l46:expr, $l47:expr, ++ $l48:expr, $l49:expr, $l50:expr, $l51:expr, ++ $l52:expr, $l53:expr, $l54:expr, $l55:expr, ++ $l56:expr, $l57:expr, $l58:expr, $l59:expr, ++ $l60:expr, $l61:expr, $l62:expr, $l63:expr]) => {{ ++ #[allow(unused_unsafe)] ++ unsafe { ++ $crate::Simd($crate::__shuffle_vector64( ++ $vec0.0, ++ $vec1.0, ++ [ ++ $l0, $l1, $l2, $l3, $l4, $l5, $l6, $l7, $l8, $l9, $l10, ++ $l11, $l12, $l13, $l14, $l15, $l16, $l17, $l18, $l19, ++ $l20, $l21, $l22, $l23, $l24, $l25, $l26, $l27, $l28, ++ $l29, $l30, $l31, $l32, $l33, $l34, $l35, $l36, $l37, ++ $l38, $l39, $l40, $l41, $l42, $l43, $l44, $l45, $l46, ++ $l47, $l48, $l49, $l50, $l51, $l52, $l53, $l54, $l55, ++ $l56, $l57, $l58, $l59, $l60, $l61, $l62, $l63, ++ ], ++ )) ++ } ++ }}; ++ ($vec:expr, [$($l:expr),*]) => { ++ match $vec { ++ v => shuffle!(v, v, [$($l),*]) ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/shuffle1_dyn.rs b/third_party/rust/packed_simd/src/api/shuffle1_dyn.rs +new file mode 100644 +index 000000000000..64536be6cba1 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/shuffle1_dyn.rs +@@ -0,0 +1,159 @@ ++//! Shuffle vector elements according to a dynamic vector of indices. ++ ++macro_rules! impl_shuffle1_dyn { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Shuffle vector elements according to `indices`. ++ #[inline] ++ pub fn shuffle1_dyn(self, indices: I) -> Self ++ where ++ Self: codegen::shuffle1_dyn::Shuffle1Dyn, ++ { ++ codegen::shuffle1_dyn::Shuffle1Dyn::shuffle1_dyn(self, indices) ++ } ++ } ++ }; ++} ++ ++macro_rules! test_shuffle1_dyn { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ test_if! { ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _shuffle1_dyn>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn shuffle1_dyn() { ++ let increasing = { ++ let mut v = $id::splat(0 as $elem_ty); ++ for i in 0..$id::lanes() { ++ v = v.replace(i, i as $elem_ty); ++ } ++ v ++ }; ++ let decreasing = { ++ let mut v = $id::splat(0 as $elem_ty); ++ for i in 0..$id::lanes() { ++ v = v.replace( ++ i, ++ ($id::lanes() - 1 - i) as $elem_ty ++ ); ++ } ++ v ++ }; ++ ++ type Indices = < ++ $id as codegen::shuffle1_dyn::Shuffle1Dyn ++ >::Indices; ++ let increasing_ids: Indices = increasing.cast(); ++ let decreasing_ids: Indices = decreasing.cast(); ++ ++ assert_eq!( ++ increasing.shuffle1_dyn(increasing_ids), ++ increasing, ++ "(i,i)=>i" ++ ); ++ assert_eq!( ++ decreasing.shuffle1_dyn(increasing_ids), ++ decreasing, ++ "(d,i)=>d" ++ ); ++ assert_eq!( ++ increasing.shuffle1_dyn(decreasing_ids), ++ decreasing, ++ "(i,d)=>d" ++ ); ++ assert_eq!( ++ decreasing.shuffle1_dyn(decreasing_ids), ++ increasing, ++ "(d,d)=>i" ++ ); ++ ++ for i in 0..$id::lanes() { ++ let v_ids: Indices ++ = $id::splat(i as $elem_ty).cast(); ++ assert_eq!(increasing.shuffle1_dyn(v_ids), ++ $id::splat(increasing.extract(i)) ++ ); ++ assert_eq!(decreasing.shuffle1_dyn(v_ids), ++ $id::splat(decreasing.extract(i)) ++ ); ++ assert_eq!( ++ $id::splat(i as $elem_ty) ++ .shuffle1_dyn(increasing_ids), ++ $id::splat(i as $elem_ty) ++ ); ++ assert_eq!( ++ $id::splat(i as $elem_ty) ++ .shuffle1_dyn(decreasing_ids), ++ $id::splat(i as $elem_ty) ++ ); ++ } ++ } ++ } ++ } ++ } ++ }; ++} ++ ++macro_rules! test_shuffle1_dyn_mask { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ test_if! { ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _shuffle1_dyn>] { ++ use super::*; ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn shuffle1_dyn() { ++ // alternating = [true, false, true, false, ...] ++ let mut alternating = $id::splat(false); ++ for i in 0..$id::lanes() { ++ if i % 2 == 0 { ++ alternating = alternating.replace(i, true); ++ } ++ } ++ ++ type Indices = < ++ $id as codegen::shuffle1_dyn::Shuffle1Dyn ++ >::Indices; ++ // even = [0, 0, 2, 2, 4, 4, ..] ++ let even = { ++ let mut v = Indices::splat(0); ++ for i in 0..$id::lanes() { ++ if i % 2 == 0 { ++ v = v.replace(i, (i as u8).into()); ++ } else { ++ v = v.replace(i, (i as u8 - 1).into()); ++ } ++ } ++ v ++ }; ++ // odd = [1, 1, 3, 3, 5, 5, ...] ++ let odd = { ++ let mut v = Indices::splat(0); ++ for i in 0..$id::lanes() { ++ if i % 2 != 0 { ++ v = v.replace(i, (i as u8).into()); ++ } else { ++ v = v.replace(i, (i as u8 + 1).into()); ++ } ++ } ++ v ++ }; ++ ++ assert_eq!( ++ alternating.shuffle1_dyn(even), ++ $id::splat(true) ++ ); ++ if $id::lanes() > 1 { ++ assert_eq!( ++ alternating.shuffle1_dyn(odd), ++ $id::splat(false) ++ ); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/slice.rs b/third_party/rust/packed_simd/src/api/slice.rs +new file mode 100644 +index 000000000000..526b848b5c06 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/slice.rs +@@ -0,0 +1,7 @@ ++//! Slice from/to methods ++ ++#[macro_use] ++mod from_slice; ++ ++#[macro_use] ++mod write_to_slice; +diff --git a/third_party/rust/packed_simd/src/api/slice/from_slice.rs b/third_party/rust/packed_simd/src/api/slice/from_slice.rs +new file mode 100644 +index 000000000000..109cd1f10b01 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/slice/from_slice.rs +@@ -0,0 +1,216 @@ ++//! Implements methods to read a vector type from a slice. ++ ++macro_rules! impl_slice_from_slice { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Instantiates a new vector with the values of the `slice`. ++ /// ++ /// # Panics ++ /// ++ /// If `slice.len() < Self::lanes()` or `&slice[0]` is not aligned ++ /// to an `align_of::()` boundary. ++ #[inline] ++ pub fn from_slice_aligned(slice: &[$elem_ty]) -> Self { ++ unsafe { ++ assert!(slice.len() >= $elem_count); ++ let target_ptr = slice.get_unchecked(0) as *const $elem_ty; ++ assert_eq!( ++ target_ptr ++ .align_offset(crate::mem::align_of::()), ++ 0 ++ ); ++ Self::from_slice_aligned_unchecked(slice) ++ } ++ } ++ ++ /// Instantiates a new vector with the values of the `slice`. ++ /// ++ /// # Panics ++ /// ++ /// If `slice.len() < Self::lanes()`. ++ #[inline] ++ pub fn from_slice_unaligned(slice: &[$elem_ty]) -> Self { ++ unsafe { ++ assert!(slice.len() >= $elem_count); ++ Self::from_slice_unaligned_unchecked(slice) ++ } ++ } ++ ++ /// Instantiates a new vector with the values of the `slice`. ++ /// ++ /// # Precondition ++ /// ++ /// If `slice.len() < Self::lanes()` or `&slice[0]` is not aligned ++ /// to an `align_of::()` boundary, the behavior is undefined. ++ #[inline] ++ pub unsafe fn from_slice_aligned_unchecked( ++ slice: &[$elem_ty], ++ ) -> Self { ++ debug_assert!(slice.len() >= $elem_count); ++ let target_ptr = slice.get_unchecked(0) as *const $elem_ty; ++ debug_assert_eq!( ++ target_ptr.align_offset(crate::mem::align_of::()), ++ 0 ++ ); ++ ++ #[allow(clippy::cast_ptr_alignment)] ++ *(target_ptr as *const Self) ++ } ++ ++ /// Instantiates a new vector with the values of the `slice`. ++ /// ++ /// # Precondition ++ /// ++ /// If `slice.len() < Self::lanes()` the behavior is undefined. ++ #[inline] ++ pub unsafe fn from_slice_unaligned_unchecked( ++ slice: &[$elem_ty], ++ ) -> Self { ++ use crate::mem::size_of; ++ debug_assert!(slice.len() >= $elem_count); ++ let target_ptr = ++ slice.get_unchecked(0) as *const $elem_ty as *const u8; ++ let mut x = Self::splat(0 as $elem_ty); ++ let self_ptr = &mut x as *mut Self as *mut u8; ++ crate::ptr::copy_nonoverlapping( ++ target_ptr, ++ self_ptr, ++ size_of::(), ++ ); ++ x ++ } ++ } ++ ++ test_if! { ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _slice_from_slice>] { ++ use super::*; ++ use crate::iter::Iterator; ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn from_slice_unaligned() { ++ let mut unaligned = [42 as $elem_ty; $id::lanes() + 1]; ++ unaligned[0] = 0 as $elem_ty; ++ let vec = $id::from_slice_unaligned(&unaligned[1..]); ++ for (index, &b) in unaligned.iter().enumerate() { ++ if index == 0 { ++ assert_eq!(b, 0 as $elem_ty); ++ } else { ++ assert_eq!(b, 42 as $elem_ty); ++ assert_eq!(b, vec.extract(index - 1)); ++ } ++ } ++ } ++ ++ // FIXME: wasm-bindgen-test does not support #[should_panic] ++ // #[cfg_attr(not(target_arch = "wasm32"), test)] ++ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[cfg(not(target_arch = "wasm32"))] ++ #[test] ++ #[should_panic] ++ fn from_slice_unaligned_fail() { ++ let mut unaligned = [42 as $elem_ty; $id::lanes() + 1]; ++ unaligned[0] = 0 as $elem_ty; ++ // the slice is not large enough => panic ++ let _vec = $id::from_slice_unaligned(&unaligned[2..]); ++ } ++ ++ union A { ++ data: [$elem_ty; 2 * $id::lanes()], ++ _vec: $id, ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn from_slice_aligned() { ++ let mut aligned = A { ++ data: [0 as $elem_ty; 2 * $id::lanes()], ++ }; ++ for i in $id::lanes()..(2 * $id::lanes()) { ++ unsafe { ++ aligned.data[i] = 42 as $elem_ty; ++ } ++ } ++ ++ let vec = unsafe { ++ $id::from_slice_aligned( ++ &aligned.data[$id::lanes()..] ++ ) ++ }; ++ for (index, &b) in ++ unsafe { aligned.data.iter().enumerate() } { ++ if index < $id::lanes() { ++ assert_eq!(b, 0 as $elem_ty); ++ } else { ++ assert_eq!(b, 42 as $elem_ty); ++ assert_eq!( ++ b, vec.extract(index - $id::lanes()) ++ ); ++ } ++ } ++ } ++ ++ // FIXME: wasm-bindgen-test does not support #[should_panic] ++ // #[cfg_attr(not(target_arch = "wasm32"), test)] ++ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[cfg(not(target_arch = "wasm32"))] ++ #[test] ++ #[should_panic] ++ fn from_slice_aligned_fail_lanes() { ++ let aligned = A { ++ data: [0 as $elem_ty; 2 * $id::lanes()], ++ }; ++ let _vec = unsafe { ++ $id::from_slice_aligned( ++ &aligned.data[2 * $id::lanes()..] ++ ) ++ }; ++ } ++ ++ // FIXME: wasm-bindgen-test does not support #[should_panic] ++ // #[cfg_attr(not(target_arch = "wasm32"), test)] ++ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[cfg(not(target_arch = "wasm32"))] ++ #[test] ++ #[should_panic] ++ fn from_slice_aligned_fail_align() { ++ unsafe { ++ let aligned = A { ++ data: [0 as $elem_ty; 2 * $id::lanes()], ++ }; ++ ++ // get a pointer to the front of data ++ let ptr: *const $elem_ty = aligned.data.as_ptr() ++ as *const $elem_ty; ++ // offset pointer by one element ++ let ptr = ptr.wrapping_add(1); ++ ++ if ptr.align_offset( ++ crate::mem::align_of::<$id>() ++ ) == 0 { ++ // the pointer is properly aligned, so ++ // from_slice_aligned won't fail here (e.g. this ++ // can happen for i128x1). So we panic to make ++ // the "should_fail" test pass: ++ panic!("ok"); ++ } ++ ++ // create a slice - this is safe, because the ++ // elements of the slice exist, are properly ++ // initialized, and properly aligned: ++ let s: &[$elem_ty] = slice::from_raw_parts( ++ ptr, $id::lanes() ++ ); ++ // this should always panic because the slice ++ // alignment does not match the alignment ++ // requirements for the vector type: ++ let _vec = $id::from_slice_aligned(s); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/slice/write_to_slice.rs b/third_party/rust/packed_simd/src/api/slice/write_to_slice.rs +new file mode 100644 +index 000000000000..fcb288da70fc +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/slice/write_to_slice.rs +@@ -0,0 +1,211 @@ ++//! Implements methods to write a vector type to a slice. ++ ++macro_rules! impl_slice_write_to_slice { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Writes the values of the vector to the `slice`. ++ /// ++ /// # Panics ++ /// ++ /// If `slice.len() < Self::lanes()` or `&slice[0]` is not ++ /// aligned to an `align_of::()` boundary. ++ #[inline] ++ pub fn write_to_slice_aligned(self, slice: &mut [$elem_ty]) { ++ unsafe { ++ assert!(slice.len() >= $elem_count); ++ let target_ptr = ++ slice.get_unchecked_mut(0) as *mut $elem_ty; ++ assert_eq!( ++ target_ptr ++ .align_offset(crate::mem::align_of::()), ++ 0 ++ ); ++ self.write_to_slice_aligned_unchecked(slice); ++ } ++ } ++ ++ /// Writes the values of the vector to the `slice`. ++ /// ++ /// # Panics ++ /// ++ /// If `slice.len() < Self::lanes()`. ++ #[inline] ++ pub fn write_to_slice_unaligned(self, slice: &mut [$elem_ty]) { ++ unsafe { ++ assert!(slice.len() >= $elem_count); ++ self.write_to_slice_unaligned_unchecked(slice); ++ } ++ } ++ ++ /// Writes the values of the vector to the `slice`. ++ /// ++ /// # Precondition ++ /// ++ /// If `slice.len() < Self::lanes()` or `&slice[0]` is not ++ /// aligned to an `align_of::()` boundary, the behavior is ++ /// undefined. ++ #[inline] ++ pub unsafe fn write_to_slice_aligned_unchecked( ++ self, slice: &mut [$elem_ty], ++ ) { ++ debug_assert!(slice.len() >= $elem_count); ++ let target_ptr = slice.get_unchecked_mut(0) as *mut $elem_ty; ++ debug_assert_eq!( ++ target_ptr.align_offset(crate::mem::align_of::()), ++ 0 ++ ); ++ ++ #[allow(clippy::cast_ptr_alignment)] ++ #[allow(clippy::cast_ptr_alignment)] ++ #[allow(clippy::cast_ptr_alignment)] ++ #[allow(clippy::cast_ptr_alignment)] ++ *(target_ptr as *mut Self) = self; ++ } ++ ++ /// Writes the values of the vector to the `slice`. ++ /// ++ /// # Precondition ++ /// ++ /// If `slice.len() < Self::lanes()` the behavior is undefined. ++ #[inline] ++ pub unsafe fn write_to_slice_unaligned_unchecked( ++ self, slice: &mut [$elem_ty], ++ ) { ++ debug_assert!(slice.len() >= $elem_count); ++ let target_ptr = ++ slice.get_unchecked_mut(0) as *mut $elem_ty as *mut u8; ++ let self_ptr = &self as *const Self as *const u8; ++ crate::ptr::copy_nonoverlapping( ++ self_ptr, ++ target_ptr, ++ crate::mem::size_of::(), ++ ); ++ } ++ } ++ ++ test_if! { ++ $test_tt: ++ paste::item! { ++ pub mod [<$id _slice_write_to_slice>] { ++ use super::*; ++ use crate::iter::Iterator; ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn write_to_slice_unaligned() { ++ let mut unaligned = [0 as $elem_ty; $id::lanes() + 1]; ++ let vec = $id::splat(42 as $elem_ty); ++ vec.write_to_slice_unaligned(&mut unaligned[1..]); ++ for (index, &b) in unaligned.iter().enumerate() { ++ if index == 0 { ++ assert_eq!(b, 0 as $elem_ty); ++ } else { ++ assert_eq!(b, 42 as $elem_ty); ++ assert_eq!(b, vec.extract(index - 1)); ++ } ++ } ++ } ++ ++ // FIXME: wasm-bindgen-test does not support #[should_panic] ++ // #[cfg_attr(not(target_arch = "wasm32"), test)] ++ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[cfg(not(target_arch = "wasm32"))] ++ #[test] ++ #[should_panic] ++ fn write_to_slice_unaligned_fail() { ++ let mut unaligned = [0 as $elem_ty; $id::lanes() + 1]; ++ let vec = $id::splat(42 as $elem_ty); ++ vec.write_to_slice_unaligned(&mut unaligned[2..]); ++ } ++ ++ union A { ++ data: [$elem_ty; 2 * $id::lanes()], ++ _vec: $id, ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] ++ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn write_to_slice_aligned() { ++ let mut aligned = A { ++ data: [0 as $elem_ty; 2 * $id::lanes()], ++ }; ++ let vec = $id::splat(42 as $elem_ty); ++ unsafe { ++ vec.write_to_slice_aligned( ++ &mut aligned.data[$id::lanes()..] ++ ); ++ for (idx, &b) in aligned.data.iter().enumerate() { ++ if idx < $id::lanes() { ++ assert_eq!(b, 0 as $elem_ty); ++ } else { ++ assert_eq!(b, 42 as $elem_ty); ++ assert_eq!( ++ b, vec.extract(idx - $id::lanes()) ++ ); ++ } ++ } ++ } ++ } ++ ++ // FIXME: wasm-bindgen-test does not support #[should_panic] ++ // #[cfg_attr(not(target_arch = "wasm32"), test)] ++ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[cfg(not(target_arch = "wasm32"))] ++ #[test] ++ #[should_panic] ++ fn write_to_slice_aligned_fail_lanes() { ++ let mut aligned = A { ++ data: [0 as $elem_ty; 2 * $id::lanes()], ++ }; ++ let vec = $id::splat(42 as $elem_ty); ++ unsafe { ++ vec.write_to_slice_aligned( ++ &mut aligned.data[2 * $id::lanes()..] ++ ) ++ }; ++ } ++ ++ // FIXME: wasm-bindgen-test does not support #[should_panic] ++ // #[cfg_attr(not(target_arch = "wasm32"), test)] ++ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ #[cfg(not(target_arch = "wasm32"))] ++ #[test] ++ #[should_panic] ++ fn write_to_slice_aligned_fail_align() { ++ unsafe { ++ let mut aligned = A { ++ data: [0 as $elem_ty; 2 * $id::lanes()], ++ }; ++ ++ // get a pointer to the front of data ++ let ptr: *mut $elem_ty ++ = aligned.data.as_mut_ptr() as *mut $elem_ty; ++ // offset pointer by one element ++ let ptr = ptr.wrapping_add(1); ++ ++ if ptr.align_offset(crate::mem::align_of::<$id>()) ++ == 0 { ++ // the pointer is properly aligned, so ++ // write_to_slice_aligned won't fail here (e.g. ++ // this can happen for i128x1). So we panic to ++ // make the "should_fail" test pass: ++ panic!("ok"); ++ } ++ ++ // create a slice - this is safe, because the ++ // elements of the slice exist, are properly ++ // initialized, and properly aligned: ++ let s: &mut [$elem_ty] ++ = slice::from_raw_parts_mut(ptr, $id::lanes()); ++ // this should always panic because the slice ++ // alignment does not match the alignment ++ // requirements for the vector type: ++ let vec = $id::splat(42 as $elem_ty); ++ vec.write_to_slice_aligned(s); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/api/swap_bytes.rs b/third_party/rust/packed_simd/src/api/swap_bytes.rs +new file mode 100644 +index 000000000000..53bba25bd311 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/api/swap_bytes.rs +@@ -0,0 +1,192 @@ ++//! Horizontal swap bytes ++ ++macro_rules! impl_swap_bytes { ++ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { ++ impl $id { ++ /// Reverses the byte order of the vector. ++ #[inline] ++ pub fn swap_bytes(self) -> Self { ++ super::codegen::swap_bytes::SwapBytes::swap_bytes(self) ++ } ++ ++ /// Converts self to little endian from the target's endianness. ++ /// ++ /// On little endian this is a no-op. On big endian the bytes are ++ /// swapped. ++ #[inline] ++ pub fn to_le(self) -> Self { ++ #[cfg(target_endian = "little")] ++ { ++ self ++ } ++ #[cfg(not(target_endian = "little"))] ++ { ++ self.swap_bytes() ++ } ++ } ++ ++ /// Converts self to big endian from the target's endianness. ++ /// ++ /// On big endian this is a no-op. On little endian the bytes are ++ /// swapped. ++ #[inline] ++ pub fn to_be(self) -> Self { ++ #[cfg(target_endian = "big")] ++ { ++ self ++ } ++ #[cfg(not(target_endian = "big"))] ++ { ++ self.swap_bytes() ++ } ++ } ++ ++ /// Converts a vector from little endian to the target's endianness. ++ /// ++ /// On little endian this is a no-op. On big endian the bytes are ++ /// swapped. ++ #[inline] ++ pub fn from_le(x: Self) -> Self { ++ #[cfg(target_endian = "little")] ++ { ++ x ++ } ++ #[cfg(not(target_endian = "little"))] ++ { ++ x.swap_bytes() ++ } ++ } ++ ++ /// Converts a vector from big endian to the target's endianness. ++ /// ++ /// On big endian this is a no-op. On little endian the bytes are ++ /// swapped. ++ #[inline] ++ pub fn from_be(x: Self) -> Self { ++ #[cfg(target_endian = "big")] ++ { ++ x ++ } ++ #[cfg(not(target_endian = "big"))] ++ { ++ x.swap_bytes() ++ } ++ } ++ } ++ ++ test_if! { ++ $test_tt: ++ paste::item_with_macros! { ++ pub mod [<$id _swap_bytes>] { ++ use super::*; ++ ++ const BYTES: [u8; 64] = [ ++ 0, 1, 2, 3, 4, 5, 6, 7, ++ 8, 9, 10, 11, 12, 13, 14, 15, ++ 16, 17, 18, 19, 20, 21, 22, 23, ++ 24, 25, 26, 27, 28, 29, 30, 31, ++ 32, 33, 34, 35, 36, 37, 38, 39, ++ 40, 41, 42, 43, 44, 45, 46, 47, ++ 48, 49, 50, 51, 52, 53, 54, 55, ++ 56, 57, 58, 59, 60, 61, 62, 63, ++ ]; ++ ++ macro_rules! swap { ++ ($func: ident) => {{ ++ // catch possible future >512 vectors ++ assert!(mem::size_of::<$id>() <= 64); ++ ++ let mut actual = BYTES; ++ let elems: &mut [$elem_ty] = unsafe { ++ slice::from_raw_parts_mut( ++ actual.as_mut_ptr() as *mut $elem_ty, ++ $id::lanes(), ++ ) ++ }; ++ ++ let vec = $id::from_slice_unaligned(elems); ++ $id::$func(vec).write_to_slice_unaligned(elems); ++ ++ actual ++ }}; ++ } ++ ++ macro_rules! test_swap { ++ ($func: ident) => {{ ++ let actual = swap!($func); ++ let expected = ++ BYTES.iter().rev() ++ .skip(64 - crate::mem::size_of::<$id>()); ++ assert!(actual.iter().zip(expected) ++ .all(|(x, y)| x == y)); ++ }}; ++ } ++ ++ macro_rules! test_no_swap { ++ ($func: ident) => {{ ++ let actual = swap!($func); ++ let expected = BYTES.iter() ++ .take(mem::size_of::<$id>()); ++ ++ assert!(actual.iter().zip(expected) ++ .all(|(x, y)| x == y)); ++ }}; ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn swap_bytes() { ++ test_swap!(swap_bytes); ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn to_le() { ++ #[cfg(target_endian = "little")] ++ { ++ test_no_swap!(to_le); ++ } ++ #[cfg(not(target_endian = "little"))] ++ { ++ test_swap!(to_le); ++ } ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn to_be() { ++ #[cfg(target_endian = "big")] ++ { ++ test_no_swap!(to_be); ++ } ++ #[cfg(not(target_endian = "big"))] ++ { ++ test_swap!(to_be); ++ } ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn from_le() { ++ #[cfg(target_endian = "little")] ++ { ++ test_no_swap!(from_le); ++ } ++ #[cfg(not(target_endian = "little"))] ++ { ++ test_swap!(from_le); ++ } ++ } ++ ++ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++ fn from_be() { ++ #[cfg(target_endian = "big")] ++ { ++ test_no_swap!(from_be); ++ } ++ #[cfg(not(target_endian = "big"))] ++ { ++ test_swap!(from_be); ++ } ++ } ++ } ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/codegen.rs b/third_party/rust/packed_simd/src/codegen.rs +new file mode 100644 +index 000000000000..b7ccd838603f +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen.rs +@@ -0,0 +1,59 @@ ++//! Code-generation utilities ++ ++crate mod bit_manip; ++crate mod llvm; ++crate mod math; ++crate mod reductions; ++crate mod shuffle; ++crate mod shuffle1_dyn; ++crate mod swap_bytes; ++ ++macro_rules! impl_simd_array { ++ ([$elem_ty:ident; $elem_count:expr]: ++ $tuple_id:ident | $($elem_tys:ident),*) => { ++ #[derive(Copy, Clone)] ++ #[repr(simd)] ++ pub struct $tuple_id($(crate $elem_tys),*); ++ //^^^^^^^ leaked through SimdArray ++ ++ impl crate::sealed::SimdArray for [$elem_ty; $elem_count] { ++ type Tuple = $tuple_id; ++ type T = $elem_ty; ++ const N: usize = $elem_count; ++ type NT = [u32; $elem_count]; ++ } ++ ++ impl crate::sealed::Simd for $tuple_id { ++ type Element = $elem_ty; ++ const LANES: usize = $elem_count; ++ type LanesType = [u32; $elem_count]; ++ } ++ ++ } ++} ++ ++crate mod pointer_sized_int; ++ ++crate mod v16; ++crate use self::v16::*; ++ ++crate mod v32; ++crate use self::v32::*; ++ ++crate mod v64; ++crate use self::v64::*; ++ ++crate mod v128; ++crate use self::v128::*; ++ ++crate mod v256; ++crate use self::v256::*; ++ ++crate mod v512; ++crate use self::v512::*; ++ ++crate mod vSize; ++crate use self::vSize::*; ++ ++crate mod vPtr; ++crate use self::vPtr::*; +diff --git a/third_party/rust/packed_simd/src/codegen/bit_manip.rs b/third_party/rust/packed_simd/src/codegen/bit_manip.rs +new file mode 100644 +index 000000000000..947266f5bce8 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/bit_manip.rs +@@ -0,0 +1,354 @@ ++//! LLVM bit manipulation intrinsics. ++#![rustfmt::skip] ++ ++use crate::*; ++ ++#[allow(improper_ctypes, dead_code)] ++extern "C" { ++ #[link_name = "llvm.ctlz.v2i8"] ++ fn ctlz_u8x2(x: u8x2, is_zero_undef: bool) -> u8x2; ++ #[link_name = "llvm.ctlz.v4i8"] ++ fn ctlz_u8x4(x: u8x4, is_zero_undef: bool) -> u8x4; ++ #[link_name = "llvm.ctlz.v8i8"] ++ fn ctlz_u8x8(x: u8x8, is_zero_undef: bool) -> u8x8; ++ #[link_name = "llvm.ctlz.v16i8"] ++ fn ctlz_u8x16(x: u8x16, is_zero_undef: bool) -> u8x16; ++ #[link_name = "llvm.ctlz.v32i8"] ++ fn ctlz_u8x32(x: u8x32, is_zero_undef: bool) -> u8x32; ++ #[link_name = "llvm.ctlz.v64i8"] ++ fn ctlz_u8x64(x: u8x64, is_zero_undef: bool) -> u8x64; ++ ++ #[link_name = "llvm.ctlz.v2i16"] ++ fn ctlz_u16x2(x: u16x2, is_zero_undef: bool) -> u16x2; ++ #[link_name = "llvm.ctlz.v4i16"] ++ fn ctlz_u16x4(x: u16x4, is_zero_undef: bool) -> u16x4; ++ #[link_name = "llvm.ctlz.v8i16"] ++ fn ctlz_u16x8(x: u16x8, is_zero_undef: bool) -> u16x8; ++ #[link_name = "llvm.ctlz.v16i16"] ++ fn ctlz_u16x16(x: u16x16, is_zero_undef: bool) -> u16x16; ++ #[link_name = "llvm.ctlz.v32i16"] ++ fn ctlz_u16x32(x: u16x32, is_zero_undef: bool) -> u16x32; ++ ++ #[link_name = "llvm.ctlz.v2i32"] ++ fn ctlz_u32x2(x: u32x2, is_zero_undef: bool) -> u32x2; ++ #[link_name = "llvm.ctlz.v4i32"] ++ fn ctlz_u32x4(x: u32x4, is_zero_undef: bool) -> u32x4; ++ #[link_name = "llvm.ctlz.v8i32"] ++ fn ctlz_u32x8(x: u32x8, is_zero_undef: bool) -> u32x8; ++ #[link_name = "llvm.ctlz.v16i32"] ++ fn ctlz_u32x16(x: u32x16, is_zero_undef: bool) -> u32x16; ++ ++ #[link_name = "llvm.ctlz.v2i64"] ++ fn ctlz_u64x2(x: u64x2, is_zero_undef: bool) -> u64x2; ++ #[link_name = "llvm.ctlz.v4i64"] ++ fn ctlz_u64x4(x: u64x4, is_zero_undef: bool) -> u64x4; ++ #[link_name = "llvm.ctlz.v8i64"] ++ fn ctlz_u64x8(x: u64x8, is_zero_undef: bool) -> u64x8; ++ ++ #[link_name = "llvm.ctlz.v1i128"] ++ fn ctlz_u128x1(x: u128x1, is_zero_undef: bool) -> u128x1; ++ #[link_name = "llvm.ctlz.v2i128"] ++ fn ctlz_u128x2(x: u128x2, is_zero_undef: bool) -> u128x2; ++ #[link_name = "llvm.ctlz.v4i128"] ++ fn ctlz_u128x4(x: u128x4, is_zero_undef: bool) -> u128x4; ++ ++ #[link_name = "llvm.cttz.v2i8"] ++ fn cttz_u8x2(x: u8x2, is_zero_undef: bool) -> u8x2; ++ #[link_name = "llvm.cttz.v4i8"] ++ fn cttz_u8x4(x: u8x4, is_zero_undef: bool) -> u8x4; ++ #[link_name = "llvm.cttz.v8i8"] ++ fn cttz_u8x8(x: u8x8, is_zero_undef: bool) -> u8x8; ++ #[link_name = "llvm.cttz.v16i8"] ++ fn cttz_u8x16(x: u8x16, is_zero_undef: bool) -> u8x16; ++ #[link_name = "llvm.cttz.v32i8"] ++ fn cttz_u8x32(x: u8x32, is_zero_undef: bool) -> u8x32; ++ #[link_name = "llvm.cttz.v64i8"] ++ fn cttz_u8x64(x: u8x64, is_zero_undef: bool) -> u8x64; ++ ++ #[link_name = "llvm.cttz.v2i16"] ++ fn cttz_u16x2(x: u16x2, is_zero_undef: bool) -> u16x2; ++ #[link_name = "llvm.cttz.v4i16"] ++ fn cttz_u16x4(x: u16x4, is_zero_undef: bool) -> u16x4; ++ #[link_name = "llvm.cttz.v8i16"] ++ fn cttz_u16x8(x: u16x8, is_zero_undef: bool) -> u16x8; ++ #[link_name = "llvm.cttz.v16i16"] ++ fn cttz_u16x16(x: u16x16, is_zero_undef: bool) -> u16x16; ++ #[link_name = "llvm.cttz.v32i16"] ++ fn cttz_u16x32(x: u16x32, is_zero_undef: bool) -> u16x32; ++ ++ #[link_name = "llvm.cttz.v2i32"] ++ fn cttz_u32x2(x: u32x2, is_zero_undef: bool) -> u32x2; ++ #[link_name = "llvm.cttz.v4i32"] ++ fn cttz_u32x4(x: u32x4, is_zero_undef: bool) -> u32x4; ++ #[link_name = "llvm.cttz.v8i32"] ++ fn cttz_u32x8(x: u32x8, is_zero_undef: bool) -> u32x8; ++ #[link_name = "llvm.cttz.v16i32"] ++ fn cttz_u32x16(x: u32x16, is_zero_undef: bool) -> u32x16; ++ ++ #[link_name = "llvm.cttz.v2i64"] ++ fn cttz_u64x2(x: u64x2, is_zero_undef: bool) -> u64x2; ++ #[link_name = "llvm.cttz.v4i64"] ++ fn cttz_u64x4(x: u64x4, is_zero_undef: bool) -> u64x4; ++ #[link_name = "llvm.cttz.v8i64"] ++ fn cttz_u64x8(x: u64x8, is_zero_undef: bool) -> u64x8; ++ ++ #[link_name = "llvm.cttz.v1i128"] ++ fn cttz_u128x1(x: u128x1, is_zero_undef: bool) -> u128x1; ++ #[link_name = "llvm.cttz.v2i128"] ++ fn cttz_u128x2(x: u128x2, is_zero_undef: bool) -> u128x2; ++ #[link_name = "llvm.cttz.v4i128"] ++ fn cttz_u128x4(x: u128x4, is_zero_undef: bool) -> u128x4; ++ ++ #[link_name = "llvm.ctpop.v2i8"] ++ fn ctpop_u8x2(x: u8x2) -> u8x2; ++ #[link_name = "llvm.ctpop.v4i8"] ++ fn ctpop_u8x4(x: u8x4) -> u8x4; ++ #[link_name = "llvm.ctpop.v8i8"] ++ fn ctpop_u8x8(x: u8x8) -> u8x8; ++ #[link_name = "llvm.ctpop.v16i8"] ++ fn ctpop_u8x16(x: u8x16) -> u8x16; ++ #[link_name = "llvm.ctpop.v32i8"] ++ fn ctpop_u8x32(x: u8x32) -> u8x32; ++ #[link_name = "llvm.ctpop.v64i8"] ++ fn ctpop_u8x64(x: u8x64) -> u8x64; ++ ++ #[link_name = "llvm.ctpop.v2i16"] ++ fn ctpop_u16x2(x: u16x2) -> u16x2; ++ #[link_name = "llvm.ctpop.v4i16"] ++ fn ctpop_u16x4(x: u16x4) -> u16x4; ++ #[link_name = "llvm.ctpop.v8i16"] ++ fn ctpop_u16x8(x: u16x8) -> u16x8; ++ #[link_name = "llvm.ctpop.v16i16"] ++ fn ctpop_u16x16(x: u16x16) -> u16x16; ++ #[link_name = "llvm.ctpop.v32i16"] ++ fn ctpop_u16x32(x: u16x32) -> u16x32; ++ ++ #[link_name = "llvm.ctpop.v2i32"] ++ fn ctpop_u32x2(x: u32x2) -> u32x2; ++ #[link_name = "llvm.ctpop.v4i32"] ++ fn ctpop_u32x4(x: u32x4) -> u32x4; ++ #[link_name = "llvm.ctpop.v8i32"] ++ fn ctpop_u32x8(x: u32x8) -> u32x8; ++ #[link_name = "llvm.ctpop.v16i32"] ++ fn ctpop_u32x16(x: u32x16) -> u32x16; ++ ++ #[link_name = "llvm.ctpop.v2i64"] ++ fn ctpop_u64x2(x: u64x2) -> u64x2; ++ #[link_name = "llvm.ctpop.v4i64"] ++ fn ctpop_u64x4(x: u64x4) -> u64x4; ++ #[link_name = "llvm.ctpop.v8i64"] ++ fn ctpop_u64x8(x: u64x8) -> u64x8; ++ ++ #[link_name = "llvm.ctpop.v1i128"] ++ fn ctpop_u128x1(x: u128x1) -> u128x1; ++ #[link_name = "llvm.ctpop.v2i128"] ++ fn ctpop_u128x2(x: u128x2) -> u128x2; ++ #[link_name = "llvm.ctpop.v4i128"] ++ fn ctpop_u128x4(x: u128x4) -> u128x4; ++} ++ ++crate trait BitManip { ++ fn ctpop(self) -> Self; ++ fn ctlz(self) -> Self; ++ fn cttz(self) -> Self; ++} ++ ++macro_rules! impl_bit_manip { ++ (inner: $ty:ident, $scalar:ty, $uty:ident, ++ $ctpop:ident, $ctlz:ident, $cttz:ident) => { ++ // FIXME: several LLVM intrinsics break on s390x https://github.com/rust-lang-nursery/packed_simd/issues/192 ++ #[cfg(target_arch = "s390x")] ++ impl_bit_manip! { scalar: $ty, $scalar } ++ #[cfg(not(target_arch = "s390x"))] ++ impl BitManip for $ty { ++ #[inline] ++ fn ctpop(self) -> Self { ++ let y: $uty = self.cast(); ++ unsafe { $ctpop(y).cast() } ++ } ++ ++ #[inline] ++ fn ctlz(self) -> Self { ++ let y: $uty = self.cast(); ++ // the ctxx intrinsics need compile-time constant ++ // `is_zero_undef` ++ unsafe { $ctlz(y, false).cast() } ++ } ++ ++ #[inline] ++ fn cttz(self) -> Self { ++ let y: $uty = self.cast(); ++ unsafe { $cttz(y, false).cast() } ++ } ++ } ++ }; ++ (sized_inner: $ty:ident, $scalar:ty, $uty:ident) => { ++ #[cfg(target_arch = "s390x")] ++ impl_bit_manip! { scalar: $ty, $scalar } ++ #[cfg(not(target_arch = "s390x"))] ++ impl BitManip for $ty { ++ #[inline] ++ fn ctpop(self) -> Self { ++ let y: $uty = self.cast(); ++ $uty::ctpop(y).cast() ++ } ++ ++ #[inline] ++ fn ctlz(self) -> Self { ++ let y: $uty = self.cast(); ++ $uty::ctlz(y).cast() ++ } ++ ++ #[inline] ++ fn cttz(self) -> Self { ++ let y: $uty = self.cast(); ++ $uty::cttz(y).cast() ++ } ++ } ++ }; ++ (scalar: $ty:ident, $scalar:ty) => { ++ impl BitManip for $ty { ++ #[inline] ++ fn ctpop(self) -> Self { ++ let mut ones = self; ++ for i in 0..Self::lanes() { ++ ones = ones ++ .replace(i, self.extract(i).count_ones() as $scalar); ++ } ++ ones ++ } ++ ++ #[inline] ++ fn ctlz(self) -> Self { ++ let mut lz = self; ++ for i in 0..Self::lanes() { ++ lz = lz.replace( ++ i, ++ self.extract(i).leading_zeros() as $scalar, ++ ); ++ } ++ lz ++ } ++ ++ #[inline] ++ fn cttz(self) -> Self { ++ let mut tz = self; ++ for i in 0..Self::lanes() { ++ tz = tz.replace( ++ i, ++ self.extract(i).trailing_zeros() as $scalar, ++ ); ++ } ++ tz ++ } ++ } ++ }; ++ ($uty:ident, $uscalar:ty, $ity:ident, $iscalar:ty, ++ $ctpop:ident, $ctlz:ident, $cttz:ident) => { ++ impl_bit_manip! { inner: $uty, $uscalar, $uty, $ctpop, $ctlz, $cttz } ++ impl_bit_manip! { inner: $ity, $iscalar, $uty, $ctpop, $ctlz, $cttz } ++ }; ++ (sized: $usize:ident, $uscalar:ty, $isize:ident, ++ $iscalar:ty, $ty:ident) => { ++ impl_bit_manip! { sized_inner: $usize, $uscalar, $ty } ++ impl_bit_manip! { sized_inner: $isize, $iscalar, $ty } ++ }; ++} ++ ++impl_bit_manip! { u8x2 , u8, i8x2, i8, ctpop_u8x2, ctlz_u8x2, cttz_u8x2 } ++impl_bit_manip! { u8x4 , u8, i8x4, i8, ctpop_u8x4, ctlz_u8x4, cttz_u8x4 } ++#[cfg(not(target_arch = "aarch64"))] // see below ++impl_bit_manip! { u8x8 , u8, i8x8, i8, ctpop_u8x8, ctlz_u8x8, cttz_u8x8 } ++impl_bit_manip! { u8x16 , u8, i8x16, i8, ctpop_u8x16, ctlz_u8x16, cttz_u8x16 } ++impl_bit_manip! { u8x32 , u8, i8x32, i8, ctpop_u8x32, ctlz_u8x32, cttz_u8x32 } ++impl_bit_manip! { u8x64 , u8, i8x64, i8, ctpop_u8x64, ctlz_u8x64, cttz_u8x64 } ++impl_bit_manip! { u16x2 , u16, i16x2, i16, ctpop_u16x2, ctlz_u16x2, cttz_u16x2 } ++impl_bit_manip! { u16x4 , u16, i16x4, i16, ctpop_u16x4, ctlz_u16x4, cttz_u16x4 } ++impl_bit_manip! { u16x8 , u16, i16x8, i16, ctpop_u16x8, ctlz_u16x8, cttz_u16x8 } ++impl_bit_manip! { u16x16 , u16, i16x16, i16, ctpop_u16x16, ctlz_u16x16, cttz_u16x16 } ++impl_bit_manip! { u16x32 , u16, i16x32, i16, ctpop_u16x32, ctlz_u16x32, cttz_u16x32 } ++impl_bit_manip! { u32x2 , u32, i32x2, i32, ctpop_u32x2, ctlz_u32x2, cttz_u32x2 } ++impl_bit_manip! { u32x4 , u32, i32x4, i32, ctpop_u32x4, ctlz_u32x4, cttz_u32x4 } ++impl_bit_manip! { u32x8 , u32, i32x8, i32, ctpop_u32x8, ctlz_u32x8, cttz_u32x8 } ++impl_bit_manip! { u32x16 , u32, i32x16, i32, ctpop_u32x16, ctlz_u32x16, cttz_u32x16 } ++impl_bit_manip! { u64x2 , u64, i64x2, i64, ctpop_u64x2, ctlz_u64x2, cttz_u64x2 } ++impl_bit_manip! { u64x4 , u64, i64x4, i64, ctpop_u64x4, ctlz_u64x4, cttz_u64x4 } ++impl_bit_manip! { u64x8 , u64, i64x8, i64, ctpop_u64x8, ctlz_u64x8, cttz_u64x8 } ++impl_bit_manip! { u128x1 , u128, i128x1, i128, ctpop_u128x1, ctlz_u128x1, cttz_u128x1 } ++impl_bit_manip! { u128x2 , u128, i128x2, i128, ctpop_u128x2, ctlz_u128x2, cttz_u128x2 } ++impl_bit_manip! { u128x4 , u128, i128x4, i128, ctpop_u128x4, ctlz_u128x4, cttz_u128x4 } ++ ++#[cfg(target_arch = "aarch64")] ++impl BitManip for u8x8 { ++ #[inline] ++ fn ctpop(self) -> Self { ++ let y: u8x8 = self.cast(); ++ unsafe { ctpop_u8x8(y).cast() } ++ } ++ ++ #[inline] ++ fn ctlz(self) -> Self { ++ let y: u8x8 = self.cast(); ++ unsafe { ctlz_u8x8(y, false).cast() } ++ } ++ ++ #[inline] ++ fn cttz(self) -> Self { ++ // FIXME: LLVM cttz.v8i8 broken on aarch64 https://github.com/rust-lang-nursery/packed_simd/issues/191 ++ // OPTIMIZE: adapt the algorithm used for v8i16/etc to Rust's aarch64 ++ // intrinsics ++ let mut tz = self; ++ for i in 0..Self::lanes() { ++ tz = tz.replace(i, self.extract(i).trailing_zeros() as u8); ++ } ++ tz ++ } ++} ++#[cfg(target_arch = "aarch64")] ++impl BitManip for i8x8 { ++ #[inline] ++ fn ctpop(self) -> Self { ++ let y: u8x8 = self.cast(); ++ unsafe { ctpop_u8x8(y).cast() } ++ } ++ ++ #[inline] ++ fn ctlz(self) -> Self { ++ let y: u8x8 = self.cast(); ++ unsafe { ctlz_u8x8(y, false).cast() } ++ } ++ ++ #[inline] ++ fn cttz(self) -> Self { ++ // FIXME: LLVM cttz.v8i8 broken on aarch64 https://github.com/rust-lang-nursery/packed_simd/issues/191 ++ // OPTIMIZE: adapt the algorithm used for v8i16/etc to Rust's aarch64 ++ // intrinsics ++ let mut tz = self; ++ for i in 0..Self::lanes() { ++ tz = tz.replace(i, self.extract(i).trailing_zeros() as i8); ++ } ++ tz ++ } ++} ++ ++cfg_if! { ++ if #[cfg(target_pointer_width = "8")] { ++ impl_bit_manip! { sized: usizex2, usize, isizex2, isize, u8x2 } ++ impl_bit_manip! { sized: usizex4, usize, isizex4, isize, u8x4 } ++ impl_bit_manip! { sized: usizex8, usize, isizex8, isize, u8x8 } ++ } else if #[cfg(target_pointer_width = "16")] { ++ impl_bit_manip! { sized: usizex2, usize, isizex2, isize, u16x2 } ++ impl_bit_manip! { sized: usizex4, usize, isizex4, isize, u16x4 } ++ impl_bit_manip! { sized: usizex8, usize, isizex8, isize, u16x8 } ++ } else if #[cfg(target_pointer_width = "32")] { ++ impl_bit_manip! { sized: usizex2, usize, isizex2, isize, u32x2 } ++ impl_bit_manip! { sized: usizex4, usize, isizex4, isize, u32x4 } ++ impl_bit_manip! { sized: usizex8, usize, isizex8, isize, u32x8 } ++ } else if #[cfg(target_pointer_width = "64")] { ++ impl_bit_manip! { sized: usizex2, usize, isizex2, isize, u64x2 } ++ impl_bit_manip! { sized: usizex4, usize, isizex4, isize, u64x4 } ++ impl_bit_manip! { sized: usizex8, usize, isizex8, isize, u64x8 } ++ } else { ++ compile_error!("unsupported target_pointer_width"); ++ } ++} +diff --git a/third_party/rust/packed_simd/src/codegen/llvm.rs b/third_party/rust/packed_simd/src/codegen/llvm.rs +new file mode 100644 +index 000000000000..91c2b0758dcf +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/llvm.rs +@@ -0,0 +1,99 @@ ++//! LLVM's platform intrinsics ++#![allow(dead_code)] ++ ++use crate::sealed::Shuffle; ++#[allow(unused_imports)] // FIXME: spurious warning? ++use crate::sealed::Simd; ++ ++// Shuffle intrinsics: expanded in users' crates, therefore public. ++extern "platform-intrinsic" { ++ // FIXME: Passing this intrinsics an `idx` array with an index that is ++ // out-of-bounds will produce a monomorphization-time error. ++ // https://github.com/rust-lang-nursery/packed_simd/issues/21 ++ pub fn simd_shuffle2(x: T, y: T, idx: [u32; 2]) -> U ++ where ++ T: Simd, ++ ::Element: Shuffle<[u32; 2], Output = U>; ++ ++ pub fn simd_shuffle4(x: T, y: T, idx: [u32; 4]) -> U ++ where ++ T: Simd, ++ ::Element: Shuffle<[u32; 4], Output = U>; ++ ++ pub fn simd_shuffle8(x: T, y: T, idx: [u32; 8]) -> U ++ where ++ T: Simd, ++ ::Element: Shuffle<[u32; 8], Output = U>; ++ ++ pub fn simd_shuffle16(x: T, y: T, idx: [u32; 16]) -> U ++ where ++ T: Simd, ++ ::Element: Shuffle<[u32; 16], Output = U>; ++ ++ pub fn simd_shuffle32(x: T, y: T, idx: [u32; 32]) -> U ++ where ++ T: Simd, ++ ::Element: Shuffle<[u32; 32], Output = U>; ++ ++ pub fn simd_shuffle64(x: T, y: T, idx: [u32; 64]) -> U ++ where ++ T: Simd, ++ ::Element: Shuffle<[u32; 64], Output = U>; ++} ++ ++pub use self::simd_shuffle16 as __shuffle_vector16; ++pub use self::simd_shuffle2 as __shuffle_vector2; ++pub use self::simd_shuffle32 as __shuffle_vector32; ++pub use self::simd_shuffle4 as __shuffle_vector4; ++pub use self::simd_shuffle64 as __shuffle_vector64; ++pub use self::simd_shuffle8 as __shuffle_vector8; ++ ++extern "platform-intrinsic" { ++ crate fn simd_eq(x: T, y: T) -> U; ++ crate fn simd_ne(x: T, y: T) -> U; ++ crate fn simd_lt(x: T, y: T) -> U; ++ crate fn simd_le(x: T, y: T) -> U; ++ crate fn simd_gt(x: T, y: T) -> U; ++ crate fn simd_ge(x: T, y: T) -> U; ++ ++ crate fn simd_insert(x: T, idx: u32, val: U) -> T; ++ crate fn simd_extract(x: T, idx: u32) -> U; ++ ++ crate fn simd_cast(x: T) -> U; ++ ++ crate fn simd_add(x: T, y: T) -> T; ++ crate fn simd_sub(x: T, y: T) -> T; ++ crate fn simd_mul(x: T, y: T) -> T; ++ crate fn simd_div(x: T, y: T) -> T; ++ crate fn simd_rem(x: T, y: T) -> T; ++ crate fn simd_shl(x: T, y: T) -> T; ++ crate fn simd_shr(x: T, y: T) -> T; ++ crate fn simd_and(x: T, y: T) -> T; ++ crate fn simd_or(x: T, y: T) -> T; ++ crate fn simd_xor(x: T, y: T) -> T; ++ ++ crate fn simd_reduce_add_unordered(x: T) -> U; ++ crate fn simd_reduce_mul_unordered(x: T) -> U; ++ crate fn simd_reduce_add_ordered(x: T, acc: U) -> U; ++ crate fn simd_reduce_mul_ordered(x: T, acc: U) -> U; ++ crate fn simd_reduce_min(x: T) -> U; ++ crate fn simd_reduce_max(x: T) -> U; ++ crate fn simd_reduce_min_nanless(x: T) -> U; ++ crate fn simd_reduce_max_nanless(x: T) -> U; ++ crate fn simd_reduce_and(x: T) -> U; ++ crate fn simd_reduce_or(x: T) -> U; ++ crate fn simd_reduce_xor(x: T) -> U; ++ crate fn simd_reduce_all(x: T) -> bool; ++ crate fn simd_reduce_any(x: T) -> bool; ++ ++ crate fn simd_select(m: M, a: T, b: T) -> T; ++ ++ crate fn simd_fmin(a: T, b: T) -> T; ++ crate fn simd_fmax(a: T, b: T) -> T; ++ ++ crate fn simd_fsqrt(a: T) -> T; ++ crate fn simd_fma(a: T, b: T, c: T) -> T; ++ ++ crate fn simd_gather(value: T, pointers: P, mask: M) -> T; ++ crate fn simd_scatter(value: T, pointers: P, mask: M); ++} +diff --git a/third_party/rust/packed_simd/src/codegen/math.rs b/third_party/rust/packed_simd/src/codegen/math.rs +new file mode 100644 +index 000000000000..f3997c7f1135 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/math.rs +@@ -0,0 +1,3 @@ ++//! Vertical math operations ++ ++crate mod float; +diff --git a/third_party/rust/packed_simd/src/codegen/math/float.rs b/third_party/rust/packed_simd/src/codegen/math/float.rs +new file mode 100644 +index 000000000000..5e89bf6ae6b0 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/math/float.rs +@@ -0,0 +1,18 @@ ++//! Vertical floating-point math operations. ++#![allow(clippy::useless_transmute)] ++ ++#[macro_use] ++crate mod macros; ++crate mod abs; ++crate mod cos; ++crate mod cos_pi; ++crate mod exp; ++crate mod ln; ++crate mod mul_add; ++crate mod mul_adde; ++crate mod powf; ++crate mod sin; ++crate mod sin_cos_pi; ++crate mod sin_pi; ++crate mod sqrt; ++crate mod sqrte; +diff --git a/third_party/rust/packed_simd/src/codegen/math/float/abs.rs b/third_party/rust/packed_simd/src/codegen/math/float/abs.rs +new file mode 100644 +index 000000000000..bc4421f61de2 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/math/float/abs.rs +@@ -0,0 +1,103 @@ ++//! Vertical floating-point `fabs` ++#![allow(unused)] ++ ++// FIXME 64-bit 1 elem vectors fabs ++ ++use crate::*; ++ ++crate trait Abs { ++ fn abs(self) -> Self; ++} ++ ++#[allow(improper_ctypes)] ++extern "C" { ++ #[link_name = "llvm.fabs.v2f32"] ++ fn fabs_v2f32(x: f32x2) -> f32x2; ++ #[link_name = "llvm.fabs.v4f32"] ++ fn fabs_v4f32(x: f32x4) -> f32x4; ++ #[link_name = "llvm.fabs.v8f32"] ++ fn fabs_v8f32(x: f32x8) -> f32x8; ++ #[link_name = "llvm.fabs.v16f32"] ++ fn fabs_v16f32(x: f32x16) -> f32x16; ++ /* FIXME 64-bit fabsgle elem vectors ++ #[link_name = "llvm.fabs.v1f64"] ++ fn fabs_v1f64(x: f64x1) -> f64x1; ++ */ ++ #[link_name = "llvm.fabs.v2f64"] ++ fn fabs_v2f64(x: f64x2) -> f64x2; ++ #[link_name = "llvm.fabs.v4f64"] ++ fn fabs_v4f64(x: f64x4) -> f64x4; ++ #[link_name = "llvm.fabs.v8f64"] ++ fn fabs_v8f64(x: f64x8) -> f64x8; ++ ++ #[link_name = "llvm.fabs.f32"] ++ fn fabs_f32(x: f32) -> f32; ++ #[link_name = "llvm.fabs.f64"] ++ fn fabs_f64(x: f64) -> f64; ++} ++ ++gen_unary_impl_table!(Abs, abs); ++ ++cfg_if! { ++ if #[cfg(target_arch = "s390x")] { ++ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 ++ impl_unary!(f32x2[f32; 2]: fabs_f32); ++ impl_unary!(f32x4[f32; 4]: fabs_f32); ++ impl_unary!(f32x8[f32; 8]: fabs_f32); ++ impl_unary!(f32x16[f32; 16]: fabs_f32); ++ ++ impl_unary!(f64x2[f64; 2]: fabs_f64); ++ impl_unary!(f64x4[f64; 4]: fabs_f64); ++ impl_unary!(f64x8[f64; 8]: fabs_f64); ++ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { ++ use sleef_sys::*; ++ cfg_if! { ++ if #[cfg(target_feature = "avx2")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_fabsf4_avx2128); ++ impl_unary!(f32x16[h => f32x8]: Sleef_fabsf8_avx2); ++ impl_unary!(f64x8[h => f64x4]: Sleef_fabsd4_avx2); ++ ++ impl_unary!(f32x4: Sleef_fabsf4_avx2128); ++ impl_unary!(f32x8: Sleef_fabsf8_avx2); ++ impl_unary!(f64x2: Sleef_fabsd2_avx2128); ++ impl_unary!(f64x4: Sleef_fabsd4_avx2); ++ } else if #[cfg(target_feature = "avx")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_fabsf4_sse4); ++ impl_unary!(f32x16[h => f32x8]: Sleef_fabsf8_avx); ++ impl_unary!(f64x8[h => f64x4]: Sleef_fabsd4_avx); ++ ++ impl_unary!(f32x4: Sleef_fabsf4_sse4); ++ impl_unary!(f32x8: Sleef_fabsf8_avx); ++ impl_unary!(f64x2: Sleef_fabsd2_sse4); ++ impl_unary!(f64x4: Sleef_fabsd4_avx); ++ } else if #[cfg(target_feature = "sse4.2")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_fabsf4_sse4); ++ impl_unary!(f32x16[q => f32x4]: Sleef_fabsf4_sse4); ++ impl_unary!(f64x8[q => f64x2]: Sleef_fabsd2_sse4); ++ ++ impl_unary!(f32x4: Sleef_fabsf4_sse4); ++ impl_unary!(f32x8[h => f32x4]: Sleef_fabsf4_sse4); ++ impl_unary!(f64x2: Sleef_fabsd2_sse4); ++ impl_unary!(f64x4[h => f64x2]: Sleef_fabsd2_sse4); ++ } else { ++ impl_unary!(f32x2[f32; 2]: fabs_f32); ++ impl_unary!(f32x16: fabs_v16f32); ++ impl_unary!(f64x8: fabs_v8f64); ++ ++ impl_unary!(f32x4: fabs_v4f32); ++ impl_unary!(f32x8: fabs_v8f32); ++ impl_unary!(f64x2: fabs_v2f64); ++ impl_unary!(f64x4: fabs_v4f64); ++ } ++ } ++ } else { ++ impl_unary!(f32x2[f32; 2]: fabs_f32); ++ impl_unary!(f32x4: fabs_v4f32); ++ impl_unary!(f32x8: fabs_v8f32); ++ impl_unary!(f32x16: fabs_v16f32); ++ ++ impl_unary!(f64x2: fabs_v2f64); ++ impl_unary!(f64x4: fabs_v4f64); ++ impl_unary!(f64x8: fabs_v8f64); ++ } ++} +diff --git a/third_party/rust/packed_simd/src/codegen/math/float/cos.rs b/third_party/rust/packed_simd/src/codegen/math/float/cos.rs +new file mode 100644 +index 000000000000..50f6c16da255 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/math/float/cos.rs +@@ -0,0 +1,103 @@ ++//! Vertical floating-point `cos` ++#![allow(unused)] ++ ++// FIXME 64-bit 1 elem vector cos ++ ++use crate::*; ++ ++crate trait Cos { ++ fn cos(self) -> Self; ++} ++ ++#[allow(improper_ctypes)] ++extern "C" { ++ #[link_name = "llvm.cos.v2f32"] ++ fn cos_v2f32(x: f32x2) -> f32x2; ++ #[link_name = "llvm.cos.v4f32"] ++ fn cos_v4f32(x: f32x4) -> f32x4; ++ #[link_name = "llvm.cos.v8f32"] ++ fn cos_v8f32(x: f32x8) -> f32x8; ++ #[link_name = "llvm.cos.v16f32"] ++ fn cos_v16f32(x: f32x16) -> f32x16; ++ /* FIXME 64-bit cosgle elem vectors ++ #[link_name = "llvm.cos.v1f64"] ++ fn cos_v1f64(x: f64x1) -> f64x1; ++ */ ++ #[link_name = "llvm.cos.v2f64"] ++ fn cos_v2f64(x: f64x2) -> f64x2; ++ #[link_name = "llvm.cos.v4f64"] ++ fn cos_v4f64(x: f64x4) -> f64x4; ++ #[link_name = "llvm.cos.v8f64"] ++ fn cos_v8f64(x: f64x8) -> f64x8; ++ ++ #[link_name = "llvm.cos.f32"] ++ fn cos_f32(x: f32) -> f32; ++ #[link_name = "llvm.cos.f64"] ++ fn cos_f64(x: f64) -> f64; ++} ++ ++gen_unary_impl_table!(Cos, cos); ++ ++cfg_if! { ++ if #[cfg(target_arch = "s390x")] { ++ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 ++ impl_unary!(f32x2[f32; 2]: cos_f32); ++ impl_unary!(f32x4[f32; 4]: cos_f32); ++ impl_unary!(f32x8[f32; 8]: cos_f32); ++ impl_unary!(f32x16[f32; 16]: cos_f32); ++ ++ impl_unary!(f64x2[f64; 2]: cos_f64); ++ impl_unary!(f64x4[f64; 4]: cos_f64); ++ impl_unary!(f64x8[f64; 8]: cos_f64); ++ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { ++ use sleef_sys::*; ++ cfg_if! { ++ if #[cfg(target_feature = "avx2")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_cosf4_u10avx2128); ++ impl_unary!(f32x16[h => f32x8]: Sleef_cosf8_u10avx2); ++ impl_unary!(f64x8[h => f64x4]: Sleef_cosd4_u10avx2); ++ ++ impl_unary!(f32x4: Sleef_cosf4_u10avx2128); ++ impl_unary!(f32x8: Sleef_cosf8_u10avx2); ++ impl_unary!(f64x2: Sleef_cosd2_u10avx2128); ++ impl_unary!(f64x4: Sleef_cosd4_u10avx2); ++ } else if #[cfg(target_feature = "avx")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_cosf4_u10sse4); ++ impl_unary!(f32x16[h => f32x8]: Sleef_cosf8_u10avx); ++ impl_unary!(f64x8[h => f64x4]: Sleef_cosd4_u10avx); ++ ++ impl_unary!(f32x4: Sleef_cosf4_u10sse4); ++ impl_unary!(f32x8: Sleef_cosf8_u10avx); ++ impl_unary!(f64x2: Sleef_cosd2_u10sse4); ++ impl_unary!(f64x4: Sleef_cosd4_u10avx); ++ } else if #[cfg(target_feature = "sse4.2")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_cosf4_u10sse4); ++ impl_unary!(f32x16[q => f32x4]: Sleef_cosf4_u10sse4); ++ impl_unary!(f64x8[q => f64x2]: Sleef_cosd2_u10sse4); ++ ++ impl_unary!(f32x4: Sleef_cosf4_u10sse4); ++ impl_unary!(f32x8[h => f32x4]: Sleef_cosf4_u10sse4); ++ impl_unary!(f64x2: Sleef_cosd2_u10sse4); ++ impl_unary!(f64x4[h => f64x2]: Sleef_cosd2_u10sse4); ++ } else { ++ impl_unary!(f32x2[f32; 2]: cos_f32); ++ impl_unary!(f32x16: cos_v16f32); ++ impl_unary!(f64x8: cos_v8f64); ++ ++ impl_unary!(f32x4: cos_v4f32); ++ impl_unary!(f32x8: cos_v8f32); ++ impl_unary!(f64x2: cos_v2f64); ++ impl_unary!(f64x4: cos_v4f64); ++ } ++ } ++ } else { ++ impl_unary!(f32x2[f32; 2]: cos_f32); ++ impl_unary!(f32x4: cos_v4f32); ++ impl_unary!(f32x8: cos_v8f32); ++ impl_unary!(f32x16: cos_v16f32); ++ ++ impl_unary!(f64x2: cos_v2f64); ++ impl_unary!(f64x4: cos_v4f64); ++ impl_unary!(f64x8: cos_v8f64); ++ } ++} +diff --git a/third_party/rust/packed_simd/src/codegen/math/float/cos_pi.rs b/third_party/rust/packed_simd/src/codegen/math/float/cos_pi.rs +new file mode 100644 +index 000000000000..ebff5fd1c751 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/math/float/cos_pi.rs +@@ -0,0 +1,87 @@ ++//! Vertical floating-point `cos` ++#![allow(unused)] ++ ++// FIXME 64-bit 1 elem vectors cos_pi ++ ++use crate::*; ++ ++crate trait CosPi { ++ fn cos_pi(self) -> Self; ++} ++ ++gen_unary_impl_table!(CosPi, cos_pi); ++ ++macro_rules! impl_def { ++ ($vid:ident, $PI:path) => { ++ impl CosPi for $vid { ++ #[inline] ++ fn cos_pi(self) -> Self { ++ (self * Self::splat($PI)).cos() ++ } ++ } ++ }; ++} ++macro_rules! impl_def32 { ++ ($vid:ident) => { ++ impl_def!($vid, crate::f32::consts::PI); ++ }; ++} ++macro_rules! impl_def64 { ++ ($vid:ident) => { ++ impl_def!($vid, crate::f64::consts::PI); ++ }; ++} ++ ++cfg_if! { ++ if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { ++ use sleef_sys::*; ++ cfg_if! { ++ if #[cfg(target_feature = "avx2")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_cospif4_u05avx2128); ++ impl_unary!(f32x16[h => f32x8]: Sleef_cospif8_u05avx2); ++ impl_unary!(f64x8[h => f64x4]: Sleef_cospid4_u05avx2); ++ ++ impl_unary!(f32x4: Sleef_cospif4_u05avx2128); ++ impl_unary!(f32x8: Sleef_cospif8_u05avx2); ++ impl_unary!(f64x2: Sleef_cospid2_u05avx2128); ++ impl_unary!(f64x4: Sleef_cospid4_u05avx2); ++ } else if #[cfg(target_feature = "avx")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_cospif4_u05sse4); ++ impl_unary!(f32x16[h => f32x8]: Sleef_cospif8_u05avx); ++ impl_unary!(f64x8[h => f64x4]: Sleef_cospid4_u05avx); ++ ++ impl_unary!(f32x4: Sleef_cospif4_u05sse4); ++ impl_unary!(f32x8: Sleef_cospif8_u05avx); ++ impl_unary!(f64x2: Sleef_cospid2_u05sse4); ++ impl_unary!(f64x4: Sleef_cospid4_u05avx); ++ } else if #[cfg(target_feature = "sse4.2")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_cospif4_u05sse4); ++ impl_unary!(f32x16[q => f32x4]: Sleef_cospif4_u05sse4); ++ impl_unary!(f64x8[q => f64x2]: Sleef_cospid2_u05sse4); ++ ++ impl_unary!(f32x4: Sleef_cospif4_u05sse4); ++ impl_unary!(f32x8[h => f32x4]: Sleef_cospif4_u05sse4); ++ impl_unary!(f64x2: Sleef_cospid2_u05sse4); ++ impl_unary!(f64x4[h => f64x2]: Sleef_cospid2_u05sse4); ++ } else { ++ impl_def32!(f32x2); ++ impl_def32!(f32x4); ++ impl_def32!(f32x8); ++ impl_def32!(f32x16); ++ ++ impl_def64!(f64x2); ++ impl_def64!(f64x4); ++ impl_def64!(f64x8); ++ } ++ } ++ } else { ++ impl_def32!(f32x2); ++ impl_def32!(f32x4); ++ impl_def32!(f32x8); ++ impl_def32!(f32x16); ++ ++ impl_def64!(f64x2); ++ impl_def64!(f64x4); ++ impl_def64!(f64x8); ++ } ++} +diff --git a/third_party/rust/packed_simd/src/codegen/math/float/exp.rs b/third_party/rust/packed_simd/src/codegen/math/float/exp.rs +new file mode 100644 +index 000000000000..00d10e9fa644 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/math/float/exp.rs +@@ -0,0 +1,112 @@ ++//! Vertical floating-point `exp` ++#![allow(unused)] ++ ++// FIXME 64-bit expgle elem vectors misexpg ++ ++use crate::*; ++ ++crate trait Exp { ++ fn exp(self) -> Self; ++} ++ ++#[allow(improper_ctypes)] ++extern "C" { ++ #[link_name = "llvm.exp.v2f32"] ++ fn exp_v2f32(x: f32x2) -> f32x2; ++ #[link_name = "llvm.exp.v4f32"] ++ fn exp_v4f32(x: f32x4) -> f32x4; ++ #[link_name = "llvm.exp.v8f32"] ++ fn exp_v8f32(x: f32x8) -> f32x8; ++ #[link_name = "llvm.exp.v16f32"] ++ fn exp_v16f32(x: f32x16) -> f32x16; ++ /* FIXME 64-bit expgle elem vectors ++ #[link_name = "llvm.exp.v1f64"] ++ fn exp_v1f64(x: f64x1) -> f64x1; ++ */ ++ #[link_name = "llvm.exp.v2f64"] ++ fn exp_v2f64(x: f64x2) -> f64x2; ++ #[link_name = "llvm.exp.v4f64"] ++ fn exp_v4f64(x: f64x4) -> f64x4; ++ #[link_name = "llvm.exp.v8f64"] ++ fn exp_v8f64(x: f64x8) -> f64x8; ++ ++ #[link_name = "llvm.exp.f32"] ++ fn exp_f32(x: f32) -> f32; ++ #[link_name = "llvm.exp.f64"] ++ fn exp_f64(x: f64) -> f64; ++} ++ ++gen_unary_impl_table!(Exp, exp); ++ ++cfg_if! { ++ if #[cfg(target_arch = "s390x")] { ++ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 ++ impl_unary!(f32x2[f32; 2]: exp_f32); ++ impl_unary!(f32x4[f32; 4]: exp_f32); ++ impl_unary!(f32x8[f32; 8]: exp_f32); ++ impl_unary!(f32x16[f32; 16]: exp_f32); ++ ++ impl_unary!(f64x2[f64; 2]: exp_f64); ++ impl_unary!(f64x4[f64; 4]: exp_f64); ++ impl_unary!(f64x8[f64; 8]: exp_f64); ++ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { ++ use sleef_sys::*; ++ cfg_if! { ++ if #[cfg(target_feature = "avx2")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10avx2128); ++ impl_unary!(f32x16[h => f32x8]: Sleef_expf8_u10avx2); ++ impl_unary!(f64x8[h => f64x4]: Sleef_expd4_u10avx2); ++ ++ impl_unary!(f32x4: Sleef_expf4_u10avx2128); ++ impl_unary!(f32x8: Sleef_expf8_u10avx2); ++ impl_unary!(f64x2: Sleef_expd2_u10avx2128); ++ impl_unary!(f64x4: Sleef_expd4_u10avx2); ++ } else if #[cfg(target_feature = "avx")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10sse4); ++ impl_unary!(f32x16[h => f32x8]: Sleef_expf8_u10avx); ++ impl_unary!(f64x8[h => f64x4]: Sleef_expd4_u10avx); ++ ++ impl_unary!(f32x4: Sleef_expf4_u10sse4); ++ impl_unary!(f32x8: Sleef_expf8_u10avx); ++ impl_unary!(f64x2: Sleef_expd2_u10sse4); ++ impl_unary!(f64x4: Sleef_expd4_u10avx); ++ } else if #[cfg(target_feature = "sse4.2")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10sse4); ++ impl_unary!(f32x16[q => f32x4]: Sleef_expf4_u10sse4); ++ impl_unary!(f64x8[q => f64x2]: Sleef_expd2_u10sse4); ++ ++ impl_unary!(f32x4: Sleef_expf4_u10sse4); ++ impl_unary!(f32x8[h => f32x4]: Sleef_expf4_u10sse4); ++ impl_unary!(f64x2: Sleef_expd2_u10sse4); ++ impl_unary!(f64x4[h => f64x2]: Sleef_expd2_u10sse4); ++ } else if #[cfg(target_feature = "sse2")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10sse2); ++ impl_unary!(f32x16[q => f32x4]: Sleef_expf4_u10sse2); ++ impl_unary!(f64x8[q => f64x2]: Sleef_expd2_u10sse2); ++ ++ impl_unary!(f32x4: Sleef_expf4_u10sse2); ++ impl_unary!(f32x8[h => f32x4]: Sleef_expf4_u10sse2); ++ impl_unary!(f64x2: Sleef_expd2_u10sse2); ++ impl_unary!(f64x4[h => f64x2]: Sleef_expd2_u10sse2); ++ } else { ++ impl_unary!(f32x2[f32; 2]: exp_f32); ++ impl_unary!(f32x16: exp_v16f32); ++ impl_unary!(f64x8: exp_v8f64); ++ ++ impl_unary!(f32x4: exp_v4f32); ++ impl_unary!(f32x8: exp_v8f32); ++ impl_unary!(f64x2: exp_v2f64); ++ impl_unary!(f64x4: exp_v4f64); ++ } ++ } ++ } else { ++ impl_unary!(f32x2[f32; 2]: exp_f32); ++ impl_unary!(f32x4: exp_v4f32); ++ impl_unary!(f32x8: exp_v8f32); ++ impl_unary!(f32x16: exp_v16f32); ++ ++ impl_unary!(f64x2: exp_v2f64); ++ impl_unary!(f64x4: exp_v4f64); ++ impl_unary!(f64x8: exp_v8f64); ++ } ++} +diff --git a/third_party/rust/packed_simd/src/codegen/math/float/ln.rs b/third_party/rust/packed_simd/src/codegen/math/float/ln.rs +new file mode 100644 +index 000000000000..88a5a6c6c158 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/math/float/ln.rs +@@ -0,0 +1,112 @@ ++//! Vertical floating-point `ln` ++#![allow(unused)] ++ ++// FIXME 64-bit lngle elem vectors mislng ++ ++use crate::*; ++ ++crate trait Ln { ++ fn ln(self) -> Self; ++} ++ ++#[allow(improper_ctypes)] ++extern "C" { ++ #[link_name = "llvm.log.v2f32"] ++ fn ln_v2f32(x: f32x2) -> f32x2; ++ #[link_name = "llvm.log.v4f32"] ++ fn ln_v4f32(x: f32x4) -> f32x4; ++ #[link_name = "llvm.log.v8f32"] ++ fn ln_v8f32(x: f32x8) -> f32x8; ++ #[link_name = "llvm.log.v16f32"] ++ fn ln_v16f32(x: f32x16) -> f32x16; ++ /* FIXME 64-bit lngle elem vectors ++ #[link_name = "llvm.log.v1f64"] ++ fn ln_v1f64(x: f64x1) -> f64x1; ++ */ ++ #[link_name = "llvm.log.v2f64"] ++ fn ln_v2f64(x: f64x2) -> f64x2; ++ #[link_name = "llvm.log.v4f64"] ++ fn ln_v4f64(x: f64x4) -> f64x4; ++ #[link_name = "llvm.log.v8f64"] ++ fn ln_v8f64(x: f64x8) -> f64x8; ++ ++ #[link_name = "llvm.log.f32"] ++ fn ln_f32(x: f32) -> f32; ++ #[link_name = "llvm.log.f64"] ++ fn ln_f64(x: f64) -> f64; ++} ++ ++gen_unary_impl_table!(Ln, ln); ++ ++cfg_if! { ++ if #[cfg(target_arch = "s390x")] { ++ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 ++ impl_unary!(f32x2[f32; 2]: ln_f32); ++ impl_unary!(f32x4[f32; 4]: ln_f32); ++ impl_unary!(f32x8[f32; 8]: ln_f32); ++ impl_unary!(f32x16[f32; 16]: ln_f32); ++ ++ impl_unary!(f64x2[f64; 2]: ln_f64); ++ impl_unary!(f64x4[f64; 4]: ln_f64); ++ impl_unary!(f64x8[f64; 8]: ln_f64); ++ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { ++ use sleef_sys::*; ++ cfg_if! { ++ if #[cfg(target_feature = "avx2")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_logf4_u10avx2128); ++ impl_unary!(f32x16[h => f32x8]: Sleef_logf8_u10avx2); ++ impl_unary!(f64x8[h => f64x4]: Sleef_logd4_u10avx2); ++ ++ impl_unary!(f32x4: Sleef_logf4_u10avx2128); ++ impl_unary!(f32x8: Sleef_logf8_u10avx2); ++ impl_unary!(f64x2: Sleef_logd2_u10avx2128); ++ impl_unary!(f64x4: Sleef_logd4_u10avx2); ++ } else if #[cfg(target_feature = "avx")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_logf4_u10sse4); ++ impl_unary!(f32x16[h => f32x8]: Sleef_logf8_u10avx); ++ impl_unary!(f64x8[h => f64x4]: Sleef_logd4_u10avx); ++ ++ impl_unary!(f32x4: Sleef_logf4_u10sse4); ++ impl_unary!(f32x8: Sleef_logf8_u10avx); ++ impl_unary!(f64x2: Sleef_logd2_u10sse4); ++ impl_unary!(f64x4: Sleef_logd4_u10avx); ++ } else if #[cfg(target_feature = "sse4.2")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_logf4_u10sse4); ++ impl_unary!(f32x16[q => f32x4]: Sleef_logf4_u10sse4); ++ impl_unary!(f64x8[q => f64x2]: Sleef_logd2_u10sse4); ++ ++ impl_unary!(f32x4: Sleef_logf4_u10sse4); ++ impl_unary!(f32x8[h => f32x4]: Sleef_logf4_u10sse4); ++ impl_unary!(f64x2: Sleef_logd2_u10sse4); ++ impl_unary!(f64x4[h => f64x2]: Sleef_logd2_u10sse4); ++ } else if #[cfg(target_feature = "sse2")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_logf4_u10sse2); ++ impl_unary!(f32x16[q => f32x4]: Sleef_logf4_u10sse2); ++ impl_unary!(f64x8[q => f64x2]: Sleef_logd2_u10sse2); ++ ++ impl_unary!(f32x4: Sleef_logf4_u10sse2); ++ impl_unary!(f32x8[h => f32x4]: Sleef_logf4_u10sse2); ++ impl_unary!(f64x2: Sleef_logd2_u10sse2); ++ impl_unary!(f64x4[h => f64x2]: Sleef_logd2_u10sse2); ++ } else { ++ impl_unary!(f32x2[f32; 2]: ln_f32); ++ impl_unary!(f32x16: ln_v16f32); ++ impl_unary!(f64x8: ln_v8f64); ++ ++ impl_unary!(f32x4: ln_v4f32); ++ impl_unary!(f32x8: ln_v8f32); ++ impl_unary!(f64x2: ln_v2f64); ++ impl_unary!(f64x4: ln_v4f64); ++ } ++ } ++ } else { ++ impl_unary!(f32x2[f32; 2]: ln_f32); ++ impl_unary!(f32x4: ln_v4f32); ++ impl_unary!(f32x8: ln_v8f32); ++ impl_unary!(f32x16: ln_v16f32); ++ ++ impl_unary!(f64x2: ln_v2f64); ++ impl_unary!(f64x4: ln_v4f64); ++ impl_unary!(f64x8: ln_v8f64); ++ } ++} +diff --git a/third_party/rust/packed_simd/src/codegen/math/float/macros.rs b/third_party/rust/packed_simd/src/codegen/math/float/macros.rs +new file mode 100644 +index 000000000000..02d0ca3f5c7a +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/math/float/macros.rs +@@ -0,0 +1,559 @@ ++//! Utility macros ++#![allow(unused)] ++ ++ ++macro_rules! impl_unary_ { ++ // implementation mapping 1:1 ++ (vec | $trait_id:ident, $trait_method:ident, $vec_id:ident, ++ $fun:ident) => { ++ impl $trait_id for $vec_id { ++ #[inline] ++ fn $trait_method(self) -> Self { ++ unsafe { ++ use crate::mem::transmute; ++ transmute($fun(transmute(self))) ++ } ++ } ++ } ++ }; ++ // implementation mapping 1:1 for when `$fun` is a generic function ++ // like some of the fp math rustc intrinsics (e.g. `fn fun(x: T) -> T`). ++ (gen | $trait_id:ident, $trait_method:ident, $vec_id:ident, ++ $fun:ident) => { ++ impl $trait_id for $vec_id { ++ #[inline] ++ fn $trait_method(self) -> Self { ++ unsafe { ++ use crate::mem::transmute; ++ transmute($fun(self.0)) ++ } ++ } ++ } ++ }; ++ (scalar | $trait_id:ident, $trait_method:ident, ++ $vec_id:ident, [$sid:ident; $scount:expr], $fun:ident) => { ++ impl $trait_id for $vec_id { ++ #[inline] ++ fn $trait_method(self) -> Self { ++ unsafe { ++ union U { ++ vec: $vec_id, ++ scalars: [$sid; $scount], ++ } ++ let mut scalars = U { vec: self }.scalars; ++ for i in &mut scalars { ++ *i = $fun(*i); ++ } ++ U { scalars }.vec ++ } ++ } ++ } ++ }; ++ // implementation calling fun twice on each of the vector halves: ++ (halves | $trait_id:ident, $trait_method:ident, $vec_id:ident, ++ $vech_id:ident, $fun:ident) => { ++ impl $trait_id for $vec_id { ++ #[inline] ++ fn $trait_method(self) -> Self { ++ unsafe { ++ use crate::mem::transmute; ++ union U { ++ vec: $vec_id, ++ halves: [$vech_id; 2], ++ } ++ ++ let mut halves = U { vec: self }.halves; ++ ++ *halves.get_unchecked_mut(0) = ++ transmute($fun(transmute(*halves.get_unchecked(0)))); ++ *halves.get_unchecked_mut(1) = ++ transmute($fun(transmute(*halves.get_unchecked(1)))); ++ ++ U { halves }.vec ++ } ++ } ++ } ++ }; ++ // implementation calling fun four times on each of the vector quarters: ++ (quarter | $trait_id:ident, $trait_method:ident, $vec_id:ident, ++ $vecq_id:ident, $fun:ident) => { ++ impl $trait_id for $vec_id { ++ #[inline] ++ fn $trait_method(self) -> Self { ++ unsafe { ++ use crate::mem::transmute; ++ union U { ++ vec: $vec_id, ++ quarters: [$vecq_id; 4], ++ } ++ ++ let mut quarters = U { vec: self }.quarters; ++ ++ *quarters.get_unchecked_mut(0) = ++ transmute($fun(transmute(*quarters.get_unchecked(0)))); ++ *quarters.get_unchecked_mut(1) = ++ transmute($fun(transmute(*quarters.get_unchecked(1)))); ++ *quarters.get_unchecked_mut(2) = ++ transmute($fun(transmute(*quarters.get_unchecked(2)))); ++ *quarters.get_unchecked_mut(3) = ++ transmute($fun(transmute(*quarters.get_unchecked(3)))); ++ ++ U { quarters }.vec ++ } ++ } ++ } ++ }; ++ // implementation calling fun once on a vector twice as large: ++ (twice | $trait_id:ident, $trait_method:ident, $vec_id:ident, ++ $vect_id:ident, $fun:ident) => { ++ impl $trait_id for $vec_id { ++ #[inline] ++ fn $trait_method(self) -> Self { ++ unsafe { ++ use crate::mem::{transmute, uninitialized}; ++ ++ union U { ++ vec: [$vec_id; 2], ++ twice: $vect_id, ++ } ++ ++ let twice = U { vec: [self, uninitialized()] }.twice; ++ let twice = transmute($fun(transmute(twice))); ++ ++ *(U { twice }.vec.get_unchecked(0)) ++ } ++ } ++ } ++ }; ++} ++ ++macro_rules! gen_unary_impl_table { ++ ($trait_id:ident, $trait_method:ident) => { ++ macro_rules! impl_unary { ++ ($vid:ident: $fun:ident) => { ++ impl_unary_!(vec | $trait_id, $trait_method, $vid, $fun); ++ }; ++ ($vid:ident[g]: $fun:ident) => { ++ impl_unary_!(gen | $trait_id, $trait_method, $vid, $fun); ++ }; ++ ($vid:ident[$sid:ident; $sc:expr]: $fun:ident) => { ++ impl_unary_!( ++ scalar | $trait_id, ++ $trait_method, ++ $vid, ++ [$sid; $sc], ++ $fun ++ ); ++ }; ++ ($vid:ident[s]: $fun:ident) => { ++ impl_unary_!(scalar | $trait_id, $trait_method, $vid, $fun); ++ }; ++ ($vid:ident[h => $vid_h:ident]: $fun:ident) => { ++ impl_unary_!( ++ halves | $trait_id, ++ $trait_method, ++ $vid, ++ $vid_h, ++ $fun ++ ); ++ }; ++ ($vid:ident[q => $vid_q:ident]: $fun:ident) => { ++ impl_unary_!( ++ quarter | $trait_id, ++ $trait_method, ++ $vid, ++ $vid_q, ++ $fun ++ ); ++ }; ++ ($vid:ident[t => $vid_t:ident]: $fun:ident) => { ++ impl_unary_!( ++ twice | $trait_id, ++ $trait_method, ++ $vid, ++ $vid_t, ++ $fun ++ ); ++ }; ++ } ++ }; ++} ++ ++macro_rules! impl_tertiary_ { ++ // implementation mapping 1:1 ++ (vec | $trait_id:ident, $trait_method:ident, $vec_id:ident, ++ $fun:ident) => { ++ impl $trait_id for $vec_id { ++ #[inline] ++ fn $trait_method(self, y: Self, z: Self) -> Self { ++ unsafe { ++ use crate::mem::transmute; ++ transmute($fun( ++ transmute(self), ++ transmute(y), ++ transmute(z), ++ )) ++ } ++ } ++ } ++ }; ++ (scalar | $trait_id:ident, $trait_method:ident, ++ $vec_id:ident, [$sid:ident; $scount:expr], $fun:ident) => { ++ impl $trait_id for $vec_id { ++ #[inline] ++ fn $trait_method(self, y: Self, z: Self) -> Self { ++ unsafe { ++ union U { ++ vec: $vec_id, ++ scalars: [$sid; $scount], ++ } ++ let mut x = U { vec: self }.scalars; ++ let y = U { vec: y }.scalars; ++ let z = U { vec: z }.scalars; ++ for (x, (y, z)) in (&mut scalars).zip(&y).zip(&z) { ++ *i = $fun(*i, *y, *z); ++ } ++ U { vec: x }.vec ++ } ++ } ++ } ++ }; ++ // implementation calling fun twice on each of the vector halves: ++ (halves | $trait_id:ident, $trait_method:ident, $vec_id:ident, ++ $vech_id:ident, $fun:ident) => { ++ impl $trait_id for $vec_id { ++ #[inline] ++ fn $trait_method(self, y: Self, z: Self) -> Self { ++ unsafe { ++ use crate::mem::transmute; ++ union U { ++ vec: $vec_id, ++ halves: [$vech_id; 2], ++ } ++ ++ let mut x_halves = U { vec: self }.halves; ++ let y_halves = U { vec: y }.halves; ++ let z_halves = U { vec: z }.halves; ++ ++ *x_halves.get_unchecked_mut(0) = transmute($fun( ++ transmute(*x_halves.get_unchecked(0)), ++ transmute(*y_halves.get_unchecked(0)), ++ transmute(*z_halves.get_unchecked(0)), ++ )); ++ *x_halves.get_unchecked_mut(1) = transmute($fun( ++ transmute(*x_halves.get_unchecked(1)), ++ transmute(*y_halves.get_unchecked(1)), ++ transmute(*z_halves.get_unchecked(1)), ++ )); ++ ++ U { halves: x_halves }.vec ++ } ++ } ++ } ++ }; ++ // implementation calling fun four times on each of the vector quarters: ++ (quarter | $trait_id:ident, $trait_method:ident, $vec_id:ident, ++ $vecq_id:ident, $fun:ident) => { ++ impl $trait_id for $vec_id { ++ #[inline] ++ fn $trait_method(self, y: Self, z: Self) -> Self { ++ unsafe { ++ use crate::mem::transmute; ++ union U { ++ vec: $vec_id, ++ quarters: [$vecq_id; 4], ++ } ++ ++ let mut x_quarters = U { vec: self }.quarters; ++ let y_quarters = U { vec: y }.quarters; ++ let z_quarters = U { vec: z }.quarters; ++ ++ *x_quarters.get_unchecked_mut(0) = transmute($fun( ++ transmute(*x_quarters.get_unchecked(0)), ++ transmute(*y_quarters.get_unchecked(0)), ++ transmute(*z_quarters.get_unchecked(0)), ++ )); ++ ++ *x_quarters.get_unchecked_mut(1) = transmute($fun( ++ transmute(*x_quarters.get_unchecked(1)), ++ transmute(*y_quarters.get_unchecked(1)), ++ transmute(*z_quarters.get_unchecked(1)), ++ )); ++ ++ *x_quarters.get_unchecked_mut(2) = transmute($fun( ++ transmute(*x_quarters.get_unchecked(2)), ++ transmute(*y_quarters.get_unchecked(2)), ++ transmute(*z_quarters.get_unchecked(2)), ++ )); ++ ++ *x_quarters.get_unchecked_mut(3) = transmute($fun( ++ transmute(*x_quarters.get_unchecked(3)), ++ transmute(*y_quarters.get_unchecked(3)), ++ transmute(*z_quarters.get_unchecked(3)), ++ )); ++ ++ U { quarters: x_quarters }.vec ++ } ++ } ++ } ++ }; ++ // implementation calling fun once on a vector twice as large: ++ (twice | $trait_id:ident, $trait_method:ident, $vec_id:ident, ++ $vect_id:ident, $fun:ident) => { ++ impl $trait_id for $vec_id { ++ #[inline] ++ fn $trait_method(self, y: Self, z: Self) -> Self { ++ unsafe { ++ use crate::mem::{transmute, uninitialized}; ++ ++ union U { ++ vec: [$vec_id; 2], ++ twice: $vect_id, ++ } ++ ++ let x_twice = U { vec: [self, uninitialized()] }.twice; ++ let y_twice = U { vec: [y, uninitialized()] }.twice; ++ let z_twice = U { vec: [z, uninitialized()] }.twice; ++ let twice: $vect_id = transmute($fun( ++ transmute(x_twice), ++ transmute(y_twice), ++ transmute(z_twice), ++ )); ++ ++ *(U { twice }.vec.get_unchecked(0)) ++ } ++ } ++ } ++ }; ++} ++ ++macro_rules! gen_tertiary_impl_table { ++ ($trait_id:ident, $trait_method:ident) => { ++ macro_rules! impl_tertiary { ++ ($vid:ident: $fun:ident) => { ++ impl_tertiary_!(vec | $trait_id, $trait_method, $vid, $fun); ++ }; ++ ($vid:ident[$sid:ident; $sc:expr]: $fun:ident) => { ++ impl_tertiary_!( ++ scalar | $trait_id, ++ $trait_method, ++ $vid, ++ [$sid; $sc], ++ $fun ++ ); ++ }; ++ ($vid:ident[s]: $fun:ident) => { ++ impl_tertiary_!(scalar | $trait_id, $trait_method, $vid, $fun); ++ }; ++ ($vid:ident[h => $vid_h:ident]: $fun:ident) => { ++ impl_tertiary_!( ++ halves | $trait_id, ++ $trait_method, ++ $vid, ++ $vid_h, ++ $fun ++ ); ++ }; ++ ($vid:ident[q => $vid_q:ident]: $fun:ident) => { ++ impl_tertiary_!( ++ quarter | $trait_id, ++ $trait_method, ++ $vid, ++ $vid_q, ++ $fun ++ ); ++ }; ++ ($vid:ident[t => $vid_t:ident]: $fun:ident) => { ++ impl_tertiary_!( ++ twice | $trait_id, ++ $trait_method, ++ $vid, ++ $vid_t, ++ $fun ++ ); ++ }; ++ } ++ }; ++} ++ ++macro_rules! impl_binary_ { ++ // implementation mapping 1:1 ++ (vec | $trait_id:ident, $trait_method:ident, $vec_id:ident, ++ $fun:ident) => { ++ impl $trait_id for $vec_id { ++ #[inline] ++ fn $trait_method(self, y: Self) -> Self { ++ unsafe { ++ use crate::mem::transmute; ++ transmute($fun(transmute(self), transmute(y))) ++ } ++ } ++ } ++ }; ++ (scalar | $trait_id:ident, $trait_method:ident, ++ $vec_id:ident, [$sid:ident; $scount:expr], $fun:ident) => { ++ impl $trait_id for $vec_id { ++ #[inline] ++ fn $trait_method(self, y: Self) -> Self { ++ unsafe { ++ union U { ++ vec: $vec_id, ++ scalars: [$sid; $scount], ++ } ++ let mut x = U { vec: self }.scalars; ++ let y = U { vec: y }.scalars; ++ for (x, y) in x.iter_mut().zip(&y) { ++ *x = $fun(*x, *y); ++ } ++ U { scalars: x }.vec ++ } ++ } ++ } ++ }; ++ // implementation calling fun twice on each of the vector halves: ++ (halves | $trait_id:ident, $trait_method:ident, $vec_id:ident, ++ $vech_id:ident, $fun:ident) => { ++ impl $trait_id for $vec_id { ++ #[inline] ++ fn $trait_method(self, y: Self) -> Self { ++ unsafe { ++ use crate::mem::transmute; ++ union U { ++ vec: $vec_id, ++ halves: [$vech_id; 2], ++ } ++ ++ let mut x_halves = U { vec: self }.halves; ++ let y_halves = U { vec: y }.halves; ++ ++ *x_halves.get_unchecked_mut(0) = transmute($fun( ++ transmute(*x_halves.get_unchecked(0)), ++ transmute(*y_halves.get_unchecked(0)), ++ )); ++ *x_halves.get_unchecked_mut(1) = transmute($fun( ++ transmute(*x_halves.get_unchecked(1)), ++ transmute(*y_halves.get_unchecked(1)), ++ )); ++ ++ U { halves: x_halves }.vec ++ } ++ } ++ } ++ }; ++ // implementation calling fun four times on each of the vector quarters: ++ (quarter | $trait_id:ident, $trait_method:ident, $vec_id:ident, ++ $vecq_id:ident, $fun:ident) => { ++ impl $trait_id for $vec_id { ++ #[inline] ++ fn $trait_method(self, y: Self) -> Self { ++ unsafe { ++ use crate::mem::transmute; ++ union U { ++ vec: $vec_id, ++ quarters: [$vecq_id; 4], ++ } ++ ++ let mut x_quarters = U { vec: self }.quarters; ++ let y_quarters = U { vec: y }.quarters; ++ ++ *x_quarters.get_unchecked_mut(0) = transmute($fun( ++ transmute(*x_quarters.get_unchecked(0)), ++ transmute(*y_quarters.get_unchecked(0)), ++ )); ++ ++ *x_quarters.get_unchecked_mut(1) = transmute($fun( ++ transmute(*x_quarters.get_unchecked(1)), ++ transmute(*y_quarters.get_unchecked(1)), ++ )); ++ ++ *x_quarters.get_unchecked_mut(2) = transmute($fun( ++ transmute(*x_quarters.get_unchecked(2)), ++ transmute(*y_quarters.get_unchecked(2)), ++ )); ++ ++ *x_quarters.get_unchecked_mut(3) = transmute($fun( ++ transmute(*x_quarters.get_unchecked(3)), ++ transmute(*y_quarters.get_unchecked(3)), ++ )); ++ ++ U { quarters: x_quarters }.vec ++ } ++ } ++ } ++ }; ++ // implementation calling fun once on a vector twice as large: ++ (twice | $trait_id:ident, $trait_method:ident, $vec_id:ident, ++ $vect_id:ident, $fun:ident) => { ++ impl $trait_id for $vec_id { ++ #[inline] ++ fn $trait_method(self, y: Self) -> Self { ++ unsafe { ++ use crate::mem::{transmute, uninitialized}; ++ ++ union U { ++ vec: [$vec_id; 2], ++ twice: $vect_id, ++ } ++ ++ let x_twice = U { vec: [self, uninitialized()] }.twice; ++ let y_twice = U { vec: [y, uninitialized()] }.twice; ++ let twice: $vect_id = transmute($fun( ++ transmute(x_twice), ++ transmute(y_twice), ++ )); ++ ++ *(U { twice }.vec.get_unchecked(0)) ++ } ++ } ++ } ++ }; ++} ++ ++macro_rules! gen_binary_impl_table { ++ ($trait_id:ident, $trait_method:ident) => { ++ macro_rules! impl_binary { ++ ($vid:ident: $fun:ident) => { ++ impl_binary_!(vec | $trait_id, $trait_method, $vid, $fun); ++ }; ++ ($vid:ident[$sid:ident; $sc:expr]: $fun:ident) => { ++ impl_binary_!( ++ scalar | $trait_id, ++ $trait_method, ++ $vid, ++ [$sid; $sc], ++ $fun ++ ); ++ }; ++ ($vid:ident[s]: $fun:ident) => { ++ impl_binary_!(scalar | $trait_id, $trait_method, $vid, $fun); ++ }; ++ ($vid:ident[h => $vid_h:ident]: $fun:ident) => { ++ impl_binary_!( ++ halves | $trait_id, ++ $trait_method, ++ $vid, ++ $vid_h, ++ $fun ++ ); ++ }; ++ ($vid:ident[q => $vid_q:ident]: $fun:ident) => { ++ impl_binary_!( ++ quarter | $trait_id, ++ $trait_method, ++ $vid, ++ $vid_q, ++ $fun ++ ); ++ }; ++ ($vid:ident[t => $vid_t:ident]: $fun:ident) => { ++ impl_binary_!( ++ twice | $trait_id, ++ $trait_method, ++ $vid, ++ $vid_t, ++ $fun ++ ); ++ }; ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/codegen/math/float/mul_add.rs b/third_party/rust/packed_simd/src/codegen/math/float/mul_add.rs +new file mode 100644 +index 000000000000..f48a57dc46c6 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/math/float/mul_add.rs +@@ -0,0 +1,109 @@ ++//! Vertical floating-point `mul_add` ++#![allow(unused)] ++use crate::*; ++ ++// FIXME: 64-bit 1 element mul_add ++ ++crate trait MulAdd { ++ fn mul_add(self, y: Self, z: Self) -> Self; ++} ++ ++#[cfg(not(target_arch = "s390x"))] ++#[allow(improper_ctypes)] ++extern "C" { ++ #[link_name = "llvm.fma.v2f32"] ++ fn fma_v2f32(x: f32x2, y: f32x2, z: f32x2) -> f32x2; ++ #[link_name = "llvm.fma.v4f32"] ++ fn fma_v4f32(x: f32x4, y: f32x4, z: f32x4) -> f32x4; ++ #[link_name = "llvm.fma.v8f32"] ++ fn fma_v8f32(x: f32x8, y: f32x8, z: f32x8) -> f32x8; ++ #[link_name = "llvm.fma.v16f32"] ++ fn fma_v16f32(x: f32x16, y: f32x16, z: f32x16) -> f32x16; ++ /* FIXME 64-bit single elem vectors ++ #[link_name = "llvm.fma.v1f64"] ++ fn fma_v1f64(x: f64x1, y: f64x1, z: f64x1) -> f64x1; ++ */ ++ #[link_name = "llvm.fma.v2f64"] ++ fn fma_v2f64(x: f64x2, y: f64x2, z: f64x2) -> f64x2; ++ #[link_name = "llvm.fma.v4f64"] ++ fn fma_v4f64(x: f64x4, y: f64x4, z: f64x4) -> f64x4; ++ #[link_name = "llvm.fma.v8f64"] ++ fn fma_v8f64(x: f64x8, y: f64x8, z: f64x8) -> f64x8; ++} ++ ++gen_tertiary_impl_table!(MulAdd, mul_add); ++ ++cfg_if! { ++ if #[cfg(target_arch = "s390x")] { ++ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 ++ macro_rules! impl_broken { ++ ($id:ident) => { ++ impl MulAdd for $id { ++ #[inline] ++ fn mul_add(self, y: Self, z: Self) -> Self { ++ self * y + z ++ } ++ } ++ }; ++ } ++ ++ impl_broken!(f32x2); ++ impl_broken!(f32x4); ++ impl_broken!(f32x8); ++ impl_broken!(f32x16); ++ ++ impl_broken!(f64x2); ++ impl_broken!(f64x4); ++ impl_broken!(f64x8); ++ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { ++ use sleef_sys::*; ++ cfg_if! { ++ if #[cfg(target_feature = "avx2")] { ++ impl_tertiary!(f32x2[t => f32x4]: Sleef_fmaf4_avx2128); ++ impl_tertiary!(f32x16[h => f32x8]: Sleef_fmaf8_avx2); ++ impl_tertiary!(f64x8[h => f64x4]: Sleef_fmad4_avx2); ++ ++ impl_tertiary!(f32x4: Sleef_fmaf4_avx2128); ++ impl_tertiary!(f32x8: Sleef_fmaf8_avx2); ++ impl_tertiary!(f64x2: Sleef_fmad2_avx2128); ++ impl_tertiary!(f64x4: Sleef_fmad4_avx2); ++ } else if #[cfg(target_feature = "avx")] { ++ impl_tertiary!(f32x2[t => f32x4]: Sleef_fmaf4_sse4); ++ impl_tertiary!(f32x16[h => f32x8]: Sleef_fmaf8_avx); ++ impl_tertiary!(f64x8[h => f64x4]: Sleef_fmad4_avx); ++ ++ impl_tertiary!(f32x4: Sleef_fmaf4_sse4); ++ impl_tertiary!(f32x8: Sleef_fmaf8_avx); ++ impl_tertiary!(f64x2: Sleef_fmad2_sse4); ++ impl_tertiary!(f64x4: Sleef_fmad4_avx); ++ } else if #[cfg(target_feature = "sse4.2")] { ++ impl_tertiary!(f32x2[t => f32x4]: Sleef_fmaf4_sse4); ++ impl_tertiary!(f32x16[q => f32x4]: Sleef_fmaf4_sse4); ++ impl_tertiary!(f64x8[q => f64x2]: Sleef_fmad2_sse4); ++ ++ impl_tertiary!(f32x4: Sleef_fmaf4_sse4); ++ impl_tertiary!(f32x8[h => f32x4]: Sleef_fmaf4_sse4); ++ impl_tertiary!(f64x2: Sleef_fmad2_sse4); ++ impl_tertiary!(f64x4[h => f64x2]: Sleef_fmad2_sse4); ++ } else { ++ impl_tertiary!(f32x2: fma_v2f32); ++ impl_tertiary!(f32x16: fma_v16f32); ++ impl_tertiary!(f64x8: fma_v8f64); ++ ++ impl_tertiary!(f32x4: fma_v4f32); ++ impl_tertiary!(f32x8: fma_v8f32); ++ impl_tertiary!(f64x2: fma_v2f64); ++ impl_tertiary!(f64x4: fma_v4f64); ++ } ++ } ++ } else { ++ impl_tertiary!(f32x2: fma_v2f32); ++ impl_tertiary!(f32x4: fma_v4f32); ++ impl_tertiary!(f32x8: fma_v8f32); ++ impl_tertiary!(f32x16: fma_v16f32); ++ // impl_tertiary!(f64x1: fma_v1f64); // FIXME 64-bit fmagle elem vectors ++ impl_tertiary!(f64x2: fma_v2f64); ++ impl_tertiary!(f64x4: fma_v4f64); ++ impl_tertiary!(f64x8: fma_v8f64); ++ } ++} +diff --git a/third_party/rust/packed_simd/src/codegen/math/float/mul_adde.rs b/third_party/rust/packed_simd/src/codegen/math/float/mul_adde.rs +new file mode 100644 +index 000000000000..8c41fb131d94 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/math/float/mul_adde.rs +@@ -0,0 +1,66 @@ ++//! Approximation for floating-point `mul_add` ++use crate::*; ++ ++// FIXME: 64-bit 1 element mul_adde ++ ++crate trait MulAddE { ++ fn mul_adde(self, y: Self, z: Self) -> Self; ++} ++ ++#[cfg(not(target_arch = "s390x"))] ++#[allow(improper_ctypes)] ++extern "C" { ++ #[link_name = "llvm.fmuladd.v2f32"] ++ fn fmuladd_v2f32(x: f32x2, y: f32x2, z: f32x2) -> f32x2; ++ #[link_name = "llvm.fmuladd.v4f32"] ++ fn fmuladd_v4f32(x: f32x4, y: f32x4, z: f32x4) -> f32x4; ++ #[link_name = "llvm.fmuladd.v8f32"] ++ fn fmuladd_v8f32(x: f32x8, y: f32x8, z: f32x8) -> f32x8; ++ #[link_name = "llvm.fmuladd.v16f32"] ++ fn fmuladd_v16f32(x: f32x16, y: f32x16, z: f32x16) -> f32x16; ++ /* FIXME 64-bit single elem vectors ++ #[link_name = "llvm.fmuladd.v1f64"] ++ fn fmuladd_v1f64(x: f64x1, y: f64x1, z: f64x1) -> f64x1; ++ */ ++ #[link_name = "llvm.fmuladd.v2f64"] ++ fn fmuladd_v2f64(x: f64x2, y: f64x2, z: f64x2) -> f64x2; ++ #[link_name = "llvm.fmuladd.v4f64"] ++ fn fmuladd_v4f64(x: f64x4, y: f64x4, z: f64x4) -> f64x4; ++ #[link_name = "llvm.fmuladd.v8f64"] ++ fn fmuladd_v8f64(x: f64x8, y: f64x8, z: f64x8) -> f64x8; ++} ++ ++macro_rules! impl_mul_adde { ++ ($id:ident : $fn:ident) => { ++ impl MulAddE for $id { ++ #[inline] ++ fn mul_adde(self, y: Self, z: Self) -> Self { ++ #[cfg(not(target_arch = "s390x"))] ++ { ++ use crate::mem::transmute; ++ unsafe { ++ transmute($fn( ++ transmute(self), ++ transmute(y), ++ transmute(z), ++ )) ++ } ++ } ++ #[cfg(target_arch = "s390x")] ++ { ++ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 ++ self * y + z ++ } ++ } ++ } ++ }; ++} ++ ++impl_mul_adde!(f32x2: fmuladd_v2f32); ++impl_mul_adde!(f32x4: fmuladd_v4f32); ++impl_mul_adde!(f32x8: fmuladd_v8f32); ++impl_mul_adde!(f32x16: fmuladd_v16f32); ++// impl_mul_adde!(f64x1: fma_v1f64); // FIXME 64-bit fmagle elem vectors ++impl_mul_adde!(f64x2: fmuladd_v2f64); ++impl_mul_adde!(f64x4: fmuladd_v4f64); ++impl_mul_adde!(f64x8: fmuladd_v8f64); +diff --git a/third_party/rust/packed_simd/src/codegen/math/float/powf.rs b/third_party/rust/packed_simd/src/codegen/math/float/powf.rs +new file mode 100644 +index 000000000000..bc15067d73a3 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/math/float/powf.rs +@@ -0,0 +1,112 @@ ++//! Vertical floating-point `powf` ++#![allow(unused)] ++ ++// FIXME 64-bit powfgle elem vectors mispowfg ++ ++use crate::*; ++ ++crate trait Powf { ++ fn powf(self, x: Self) -> Self; ++} ++ ++#[allow(improper_ctypes)] ++extern "C" { ++ #[link_name = "llvm.pow.v2f32"] ++ fn powf_v2f32(x: f32x2, y: f32x2) -> f32x2; ++ #[link_name = "llvm.pow.v4f32"] ++ fn powf_v4f32(x: f32x4, y: f32x4) -> f32x4; ++ #[link_name = "llvm.pow.v8f32"] ++ fn powf_v8f32(x: f32x8, y: f32x8) -> f32x8; ++ #[link_name = "llvm.pow.v16f32"] ++ fn powf_v16f32(x: f32x16, y: f32x16) -> f32x16; ++ /* FIXME 64-bit powfgle elem vectors ++ #[link_name = "llvm.pow.v1f64"] ++ fn powf_v1f64(x: f64x1, y: f64x1) -> f64x1; ++ */ ++ #[link_name = "llvm.pow.v2f64"] ++ fn powf_v2f64(x: f64x2, y: f64x2) -> f64x2; ++ #[link_name = "llvm.pow.v4f64"] ++ fn powf_v4f64(x: f64x4, y: f64x4) -> f64x4; ++ #[link_name = "llvm.pow.v8f64"] ++ fn powf_v8f64(x: f64x8, y: f64x8) -> f64x8; ++ ++ #[link_name = "llvm.pow.f32"] ++ fn powf_f32(x: f32, y: f32) -> f32; ++ #[link_name = "llvm.pow.f64"] ++ fn powf_f64(x: f64, y: f64) -> f64; ++} ++ ++gen_binary_impl_table!(Powf, powf); ++ ++cfg_if! { ++ if #[cfg(target_arch = "s390x")] { ++ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 ++ impl_binary!(f32x2[f32; 2]: powf_f32); ++ impl_binary!(f32x4[f32; 4]: powf_f32); ++ impl_binary!(f32x8[f32; 8]: powf_f32); ++ impl_binary!(f32x16[f32; 16]: powf_f32); ++ ++ impl_binary!(f64x2[f64; 2]: powf_f64); ++ impl_binary!(f64x4[f64; 4]: powf_f64); ++ impl_binary!(f64x8[f64; 8]: powf_f64); ++ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { ++ use sleef_sys::*; ++ cfg_if! { ++ if #[cfg(target_feature = "avx2")] { ++ impl_binary!(f32x2[t => f32x4]: Sleef_powf4_u10avx2128); ++ impl_binary!(f32x16[h => f32x8]: Sleef_powf8_u10avx2); ++ impl_binary!(f64x8[h => f64x4]: Sleef_powd4_u10avx2); ++ ++ impl_binary!(f32x4: Sleef_powf4_u10avx2128); ++ impl_binary!(f32x8: Sleef_powf8_u10avx2); ++ impl_binary!(f64x2: Sleef_powd2_u10avx2128); ++ impl_binary!(f64x4: Sleef_powd4_u10avx2); ++ } else if #[cfg(target_feature = "avx")] { ++ impl_binary!(f32x2[t => f32x4]: Sleef_powf4_u10sse4); ++ impl_binary!(f32x16[h => f32x8]: Sleef_powf8_u10avx); ++ impl_binary!(f64x8[h => f64x4]: Sleef_powd4_u10avx); ++ ++ impl_binary!(f32x4: Sleef_powf4_u10sse4); ++ impl_binary!(f32x8: Sleef_powf8_u10avx); ++ impl_binary!(f64x2: Sleef_powd2_u10sse4); ++ impl_binary!(f64x4: Sleef_powd4_u10avx); ++ } else if #[cfg(target_feature = "sse4.2")] { ++ impl_binary!(f32x2[t => f32x4]: Sleef_powf4_u10sse4); ++ impl_binary!(f32x16[q => f32x4]: Sleef_powf4_u10sse4); ++ impl_binary!(f64x8[q => f64x2]: Sleef_powd2_u10sse4); ++ ++ impl_binary!(f32x4: Sleef_powf4_u10sse4); ++ impl_binary!(f32x8[h => f32x4]: Sleef_powf4_u10sse4); ++ impl_binary!(f64x2: Sleef_powd2_u10sse4); ++ impl_binary!(f64x4[h => f64x2]: Sleef_powd2_u10sse4); ++ } else if #[cfg(target_feature = "sse2")] { ++ impl_binary!(f32x2[t => f32x4]: Sleef_powf4_u10sse2); ++ impl_binary!(f32x16[q => f32x4]: Sleef_powf4_u10sse2); ++ impl_binary!(f64x8[q => f64x2]: Sleef_powd2_u10sse2); ++ ++ impl_binary!(f32x4: Sleef_powf4_u10sse2); ++ impl_binary!(f32x8[h => f32x4]: Sleef_powf4_u10sse2); ++ impl_binary!(f64x2: Sleef_powd2_u10sse2); ++ impl_binary!(f64x4[h => f64x2]: Sleef_powd2_u10sse2); ++ } else { ++ impl_binary!(f32x2[f32; 2]: powf_f32); ++ impl_binary!(f32x4: powf_v4f32); ++ impl_binary!(f32x8: powf_v8f32); ++ impl_binary!(f32x16: powf_v16f32); ++ ++ impl_binary!(f64x2: powf_v2f64); ++ impl_binary!(f64x4: powf_v4f64); ++ impl_binary!(f64x8: powf_v8f64); ++ } ++ } ++ } else { ++ impl_binary!(f32x2[f32; 2]: powf_f32); ++ impl_binary!(f32x4: powf_v4f32); ++ impl_binary!(f32x8: powf_v8f32); ++ impl_binary!(f32x16: powf_v16f32); ++ ++ impl_binary!(f64x2: powf_v2f64); ++ impl_binary!(f64x4: powf_v4f64); ++ impl_binary!(f64x8: powf_v8f64); ++ } ++} +diff --git a/third_party/rust/packed_simd/src/codegen/math/float/sin.rs b/third_party/rust/packed_simd/src/codegen/math/float/sin.rs +new file mode 100644 +index 000000000000..7b014d07da8d +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/math/float/sin.rs +@@ -0,0 +1,103 @@ ++//! Vertical floating-point `sin` ++#![allow(unused)] ++ ++// FIXME 64-bit 1 elem vectors sin ++ ++use crate::*; ++ ++crate trait Sin { ++ fn sin(self) -> Self; ++} ++ ++#[allow(improper_ctypes)] ++extern "C" { ++ #[link_name = "llvm.sin.v2f32"] ++ fn sin_v2f32(x: f32x2) -> f32x2; ++ #[link_name = "llvm.sin.v4f32"] ++ fn sin_v4f32(x: f32x4) -> f32x4; ++ #[link_name = "llvm.sin.v8f32"] ++ fn sin_v8f32(x: f32x8) -> f32x8; ++ #[link_name = "llvm.sin.v16f32"] ++ fn sin_v16f32(x: f32x16) -> f32x16; ++ /* FIXME 64-bit single elem vectors ++ #[link_name = "llvm.sin.v1f64"] ++ fn sin_v1f64(x: f64x1) -> f64x1; ++ */ ++ #[link_name = "llvm.sin.v2f64"] ++ fn sin_v2f64(x: f64x2) -> f64x2; ++ #[link_name = "llvm.sin.v4f64"] ++ fn sin_v4f64(x: f64x4) -> f64x4; ++ #[link_name = "llvm.sin.v8f64"] ++ fn sin_v8f64(x: f64x8) -> f64x8; ++ ++ #[link_name = "llvm.sin.f32"] ++ fn sin_f32(x: f32) -> f32; ++ #[link_name = "llvm.sin.f64"] ++ fn sin_f64(x: f64) -> f64; ++} ++ ++gen_unary_impl_table!(Sin, sin); ++ ++cfg_if! { ++ if #[cfg(target_arch = "s390x")] { ++ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 ++ impl_unary!(f32x2[f32; 2]: sin_f32); ++ impl_unary!(f32x4[f32; 4]: sin_f32); ++ impl_unary!(f32x8[f32; 8]: sin_f32); ++ impl_unary!(f32x16[f32; 16]: sin_f32); ++ ++ impl_unary!(f64x2[f64; 2]: sin_f64); ++ impl_unary!(f64x4[f64; 4]: sin_f64); ++ impl_unary!(f64x8[f64; 8]: sin_f64); ++ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { ++ use sleef_sys::*; ++ cfg_if! { ++ if #[cfg(target_feature = "avx2")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_sinf4_u10avx2128); ++ impl_unary!(f32x16[h => f32x8]: Sleef_sinf8_u10avx2); ++ impl_unary!(f64x8[h => f64x4]: Sleef_sind4_u10avx2); ++ ++ impl_unary!(f32x4: Sleef_sinf4_u10avx2128); ++ impl_unary!(f32x8: Sleef_sinf8_u10avx2); ++ impl_unary!(f64x2: Sleef_sind2_u10avx2128); ++ impl_unary!(f64x4: Sleef_sind4_u10avx2); ++ } else if #[cfg(target_feature = "avx")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_sinf4_u10sse4); ++ impl_unary!(f32x16[h => f32x8]: Sleef_sinf8_u10avx); ++ impl_unary!(f64x8[h => f64x4]: Sleef_sind4_u10avx); ++ ++ impl_unary!(f32x4: Sleef_sinf4_u10sse4); ++ impl_unary!(f32x8: Sleef_sinf8_u10avx); ++ impl_unary!(f64x2: Sleef_sind2_u10sse4); ++ impl_unary!(f64x4: Sleef_sind4_u10avx); ++ } else if #[cfg(target_feature = "sse4.2")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_sinf4_u10sse4); ++ impl_unary!(f32x16[q => f32x4]: Sleef_sinf4_u10sse4); ++ impl_unary!(f64x8[q => f64x2]: Sleef_sind2_u10sse4); ++ ++ impl_unary!(f32x4: Sleef_sinf4_u10sse4); ++ impl_unary!(f32x8[h => f32x4]: Sleef_sinf4_u10sse4); ++ impl_unary!(f64x2: Sleef_sind2_u10sse4); ++ impl_unary!(f64x4[h => f64x2]: Sleef_sind2_u10sse4); ++ } else { ++ impl_unary!(f32x2[f32; 2]: sin_f32); ++ impl_unary!(f32x16: sin_v16f32); ++ impl_unary!(f64x8: sin_v8f64); ++ ++ impl_unary!(f32x4: sin_v4f32); ++ impl_unary!(f32x8: sin_v8f32); ++ impl_unary!(f64x2: sin_v2f64); ++ impl_unary!(f64x4: sin_v4f64); ++ } ++ } ++ } else { ++ impl_unary!(f32x2[f32; 2]: sin_f32); ++ impl_unary!(f32x4: sin_v4f32); ++ impl_unary!(f32x8: sin_v8f32); ++ impl_unary!(f32x16: sin_v16f32); ++ ++ impl_unary!(f64x2: sin_v2f64); ++ impl_unary!(f64x4: sin_v4f64); ++ impl_unary!(f64x8: sin_v8f64); ++ } ++} +diff --git a/third_party/rust/packed_simd/src/codegen/math/float/sin_cos_pi.rs b/third_party/rust/packed_simd/src/codegen/math/float/sin_cos_pi.rs +new file mode 100644 +index 000000000000..0f1249ec88f0 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/math/float/sin_cos_pi.rs +@@ -0,0 +1,195 @@ ++//! Vertical floating-point `sin_cos` ++#![allow(unused)] ++ ++// FIXME 64-bit 1 elem vectors sin_cos ++ ++use crate::*; ++ ++crate trait SinCosPi: Sized { ++ type Output; ++ fn sin_cos_pi(self) -> Self::Output; ++} ++ ++macro_rules! impl_def { ++ ($vid:ident, $PI:path) => { ++ impl SinCosPi for $vid { ++ type Output = (Self, Self); ++ #[inline] ++ fn sin_cos_pi(self) -> Self::Output { ++ let v = self * Self::splat($PI); ++ (v.sin(), v.cos()) ++ } ++ } ++ }; ++} ++ ++macro_rules! impl_def32 { ++ ($vid:ident) => { ++ impl_def!($vid, crate::f32::consts::PI); ++ }; ++} ++macro_rules! impl_def64 { ++ ($vid:ident) => { ++ impl_def!($vid, crate::f64::consts::PI); ++ }; ++} ++ ++macro_rules! impl_unary_t { ++ ($vid:ident: $fun:ident) => { ++ impl SinCosPi for $vid { ++ type Output = (Self, Self); ++ fn sin_cos_pi(self) -> Self::Output { ++ unsafe { ++ use crate::mem::transmute; ++ transmute($fun(transmute(self))) ++ } ++ } ++ } ++ }; ++ ($vid:ident[t => $vid_t:ident]: $fun:ident) => { ++ impl SinCosPi for $vid { ++ type Output = (Self, Self); ++ fn sin_cos_pi(self) -> Self::Output { ++ unsafe { ++ use crate::mem::{transmute, uninitialized}; ++ ++ union U { ++ vec: [$vid; 2], ++ twice: $vid_t, ++ } ++ ++ let twice = U { vec: [self, uninitialized()] }.twice; ++ let twice = transmute($fun(transmute(twice))); ++ ++ union R { ++ twice: ($vid_t, $vid_t), ++ vecs: ([$vid; 2], [$vid; 2]), ++ } ++ let r = R { twice }.vecs; ++ (*r.0.get_unchecked(0), *r.0.get_unchecked(1)) ++ } ++ } ++ } ++ }; ++ ($vid:ident[h => $vid_h:ident]: $fun:ident) => { ++ impl SinCosPi for $vid { ++ type Output = (Self, Self); ++ fn sin_cos_pi(self) -> Self::Output { ++ unsafe { ++ use crate::mem::transmute; ++ ++ union U { ++ vec: $vid, ++ halves: [$vid_h; 2], ++ } ++ ++ let halves = U { vec: self }.halves; ++ ++ let res_0: ($vid_h, $vid_h) = ++ transmute($fun(transmute(*halves.get_unchecked(0)))); ++ let res_1: ($vid_h, $vid_h) = ++ transmute($fun(transmute(*halves.get_unchecked(1)))); ++ ++ union R { ++ result: ($vid, $vid), ++ halves: ([$vid_h; 2], [$vid_h; 2]), ++ } ++ R { halves: ([res_0.0, res_1.0], [res_0.1, res_1.1]) } ++ .result ++ } ++ } ++ } ++ }; ++ ($vid:ident[q => $vid_q:ident]: $fun:ident) => { ++ impl SinCosPi for $vid { ++ type Output = (Self, Self); ++ fn sin_cos_pi(self) -> Self::Output { ++ unsafe { ++ use crate::mem::transmute; ++ ++ union U { ++ vec: $vid, ++ quarters: [$vid_q; 4], ++ } ++ ++ let quarters = U { vec: self }.quarters; ++ ++ let res_0: ($vid_q, $vid_q) = ++ transmute($fun(transmute(*quarters.get_unchecked(0)))); ++ let res_1: ($vid_q, $vid_q) = ++ transmute($fun(transmute(*quarters.get_unchecked(1)))); ++ let res_2: ($vid_q, $vid_q) = ++ transmute($fun(transmute(*quarters.get_unchecked(2)))); ++ let res_3: ($vid_q, $vid_q) = ++ transmute($fun(transmute(*quarters.get_unchecked(3)))); ++ ++ union R { ++ result: ($vid, $vid), ++ quarters: ([$vid_q; 4], [$vid_q; 4]), ++ } ++ R { ++ quarters: ( ++ [res_0.0, res_1.0, res_2.0, res_3.0], ++ [res_0.1, res_1.1, res_2.1, res_3.1], ++ ), ++ } ++ .result ++ } ++ } ++ } ++ }; ++} ++ ++cfg_if! { ++ if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { ++ use sleef_sys::*; ++ cfg_if! { ++ if #[cfg(target_feature = "avx2")] { ++ impl_unary_t!(f32x2[t => f32x4]: Sleef_sincospif4_u05avx2128); ++ impl_unary_t!(f32x16[h => f32x8]: Sleef_sincospif8_u05avx2); ++ impl_unary_t!(f64x8[h => f64x4]: Sleef_sincospid4_u05avx2); ++ ++ impl_unary_t!(f32x4: Sleef_sincospif4_u05avx2128); ++ impl_unary_t!(f32x8: Sleef_sincospif8_u05avx2); ++ impl_unary_t!(f64x2: Sleef_sincospid2_u05avx2128); ++ impl_unary_t!(f64x4: Sleef_sincospid4_u05avx2); ++ } else if #[cfg(target_feature = "avx")] { ++ impl_unary_t!(f32x2[t => f32x4]: Sleef_sincospif4_u05sse4); ++ impl_unary_t!(f32x16[h => f32x8]: Sleef_sincospif8_u05avx); ++ impl_unary_t!(f64x8[h => f64x4]: Sleef_sincospid4_u05avx); ++ ++ impl_unary_t!(f32x4: Sleef_sincospif4_u05sse4); ++ impl_unary_t!(f32x8: Sleef_sincospif8_u05avx); ++ impl_unary_t!(f64x2: Sleef_sincospid2_u05sse4); ++ impl_unary_t!(f64x4: Sleef_sincospid4_u05avx); ++ } else if #[cfg(target_feature = "sse4.2")] { ++ impl_unary_t!(f32x2[t => f32x4]: Sleef_sincospif4_u05sse4); ++ impl_unary_t!(f32x16[q => f32x4]: Sleef_sincospif4_u05sse4); ++ impl_unary_t!(f64x8[q => f64x2]: Sleef_sincospid2_u05sse4); ++ ++ impl_unary_t!(f32x4: Sleef_sincospif4_u05sse4); ++ impl_unary_t!(f32x8[h => f32x4]: Sleef_sincospif4_u05sse4); ++ impl_unary_t!(f64x2: Sleef_sincospid2_u05sse4); ++ impl_unary_t!(f64x4[h => f64x2]: Sleef_sincospid2_u05sse4); ++ } else { ++ impl_def32!(f32x2); ++ impl_def32!(f32x4); ++ impl_def32!(f32x8); ++ impl_def32!(f32x16); ++ ++ impl_def64!(f64x2); ++ impl_def64!(f64x4); ++ impl_def64!(f64x8); ++ } ++ } ++ } else { ++ impl_def32!(f32x2); ++ impl_def32!(f32x4); ++ impl_def32!(f32x8); ++ impl_def32!(f32x16); ++ ++ impl_def64!(f64x2); ++ impl_def64!(f64x4); ++ impl_def64!(f64x8); ++ } ++} +diff --git a/third_party/rust/packed_simd/src/codegen/math/float/sin_pi.rs b/third_party/rust/packed_simd/src/codegen/math/float/sin_pi.rs +new file mode 100644 +index 000000000000..72df98c93c91 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/math/float/sin_pi.rs +@@ -0,0 +1,87 @@ ++//! Vertical floating-point `sin_pi` ++#![allow(unused)] ++ ++// FIXME 64-bit 1 elem vectors sin_pi ++ ++use crate::*; ++ ++crate trait SinPi { ++ fn sin_pi(self) -> Self; ++} ++ ++gen_unary_impl_table!(SinPi, sin_pi); ++ ++macro_rules! impl_def { ++ ($vid:ident, $PI:path) => { ++ impl SinPi for $vid { ++ #[inline] ++ fn sin_pi(self) -> Self { ++ (self * Self::splat($PI)).sin() ++ } ++ } ++ }; ++} ++macro_rules! impl_def32 { ++ ($vid:ident) => { ++ impl_def!($vid, crate::f32::consts::PI); ++ }; ++} ++macro_rules! impl_def64 { ++ ($vid:ident) => { ++ impl_def!($vid, crate::f64::consts::PI); ++ }; ++} ++ ++cfg_if! { ++ if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { ++ use sleef_sys::*; ++ cfg_if! { ++ if #[cfg(target_feature = "avx2")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_sinpif4_u05avx2128); ++ impl_unary!(f32x16[h => f32x8]: Sleef_sinpif8_u05avx2); ++ impl_unary!(f64x8[h => f64x4]: Sleef_sinpid4_u05avx2); ++ ++ impl_unary!(f32x4: Sleef_sinpif4_u05avx2128); ++ impl_unary!(f32x8: Sleef_sinpif8_u05avx2); ++ impl_unary!(f64x2: Sleef_sinpid2_u05avx2128); ++ impl_unary!(f64x4: Sleef_sinpid4_u05avx2); ++ } else if #[cfg(target_feature = "avx")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_sinpif4_u05sse4); ++ impl_unary!(f32x16[h => f32x8]: Sleef_sinpif8_u05avx); ++ impl_unary!(f64x8[h => f64x4]: Sleef_sinpid4_u05avx); ++ ++ impl_unary!(f32x4: Sleef_sinpif4_u05sse4); ++ impl_unary!(f32x8: Sleef_sinpif8_u05avx); ++ impl_unary!(f64x2: Sleef_sinpid2_u05sse4); ++ impl_unary!(f64x4: Sleef_sinpid4_u05avx); ++ } else if #[cfg(target_feature = "sse4.2")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_sinpif4_u05sse4); ++ impl_unary!(f32x16[q => f32x4]: Sleef_sinpif4_u05sse4); ++ impl_unary!(f64x8[q => f64x2]: Sleef_sinpid2_u05sse4); ++ ++ impl_unary!(f32x4: Sleef_sinpif4_u05sse4); ++ impl_unary!(f32x8[h => f32x4]: Sleef_sinpif4_u05sse4); ++ impl_unary!(f64x2: Sleef_sinpid2_u05sse4); ++ impl_unary!(f64x4[h => f64x2]: Sleef_sinpid2_u05sse4); ++ } else { ++ impl_def32!(f32x2); ++ impl_def32!(f32x4); ++ impl_def32!(f32x8); ++ impl_def32!(f32x16); ++ ++ impl_def64!(f64x2); ++ impl_def64!(f64x4); ++ impl_def64!(f64x8); ++ } ++ } ++ } else { ++ impl_def32!(f32x2); ++ impl_def32!(f32x4); ++ impl_def32!(f32x8); ++ impl_def32!(f32x16); ++ ++ impl_def64!(f64x2); ++ impl_def64!(f64x4); ++ impl_def64!(f64x8); ++ } ++} +diff --git a/third_party/rust/packed_simd/src/codegen/math/float/sqrt.rs b/third_party/rust/packed_simd/src/codegen/math/float/sqrt.rs +new file mode 100644 +index 000000000000..7ce31df62662 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/math/float/sqrt.rs +@@ -0,0 +1,103 @@ ++//! Vertical floating-point `sqrt` ++#![allow(unused)] ++ ++// FIXME 64-bit 1 elem vectors sqrt ++ ++use crate::*; ++ ++crate trait Sqrt { ++ fn sqrt(self) -> Self; ++} ++ ++#[allow(improper_ctypes)] ++extern "C" { ++ #[link_name = "llvm.sqrt.v2f32"] ++ fn sqrt_v2f32(x: f32x2) -> f32x2; ++ #[link_name = "llvm.sqrt.v4f32"] ++ fn sqrt_v4f32(x: f32x4) -> f32x4; ++ #[link_name = "llvm.sqrt.v8f32"] ++ fn sqrt_v8f32(x: f32x8) -> f32x8; ++ #[link_name = "llvm.sqrt.v16f32"] ++ fn sqrt_v16f32(x: f32x16) -> f32x16; ++ /* FIXME 64-bit sqrtgle elem vectors ++ #[link_name = "llvm.sqrt.v1f64"] ++ fn sqrt_v1f64(x: f64x1) -> f64x1; ++ */ ++ #[link_name = "llvm.sqrt.v2f64"] ++ fn sqrt_v2f64(x: f64x2) -> f64x2; ++ #[link_name = "llvm.sqrt.v4f64"] ++ fn sqrt_v4f64(x: f64x4) -> f64x4; ++ #[link_name = "llvm.sqrt.v8f64"] ++ fn sqrt_v8f64(x: f64x8) -> f64x8; ++ ++ #[link_name = "llvm.sqrt.f32"] ++ fn sqrt_f32(x: f32) -> f32; ++ #[link_name = "llvm.sqrt.f64"] ++ fn sqrt_f64(x: f64) -> f64; ++} ++ ++gen_unary_impl_table!(Sqrt, sqrt); ++ ++cfg_if! { ++ if #[cfg(target_arch = "s390x")] { ++ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 ++ impl_unary!(f32x2[f32; 2]: sqrt_f32); ++ impl_unary!(f32x4[f32; 4]: sqrt_f32); ++ impl_unary!(f32x8[f32; 8]: sqrt_f32); ++ impl_unary!(f32x16[f32; 16]: sqrt_f32); ++ ++ impl_unary!(f64x2[f64; 2]: sqrt_f64); ++ impl_unary!(f64x4[f64; 4]: sqrt_f64); ++ impl_unary!(f64x8[f64; 8]: sqrt_f64); ++ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { ++ use sleef_sys::*; ++ cfg_if! { ++ if #[cfg(target_feature = "avx2")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_avx2128); ++ impl_unary!(f32x16[h => f32x8]: Sleef_sqrtf8_avx2); ++ impl_unary!(f64x8[h => f64x4]: Sleef_sqrtd4_avx2); ++ ++ impl_unary!(f32x4: Sleef_sqrtf4_avx2128); ++ impl_unary!(f32x8: Sleef_sqrtf8_avx2); ++ impl_unary!(f64x2: Sleef_sqrtd2_avx2128); ++ impl_unary!(f64x4: Sleef_sqrtd4_avx2); ++ } else if #[cfg(target_feature = "avx")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_sse4); ++ impl_unary!(f32x16[h => f32x8]: Sleef_sqrtf8_avx); ++ impl_unary!(f64x8[h => f64x4]: Sleef_sqrtd4_avx); ++ ++ impl_unary!(f32x4: Sleef_sqrtf4_sse4); ++ impl_unary!(f32x8: Sleef_sqrtf8_avx); ++ impl_unary!(f64x2: Sleef_sqrtd2_sse4); ++ impl_unary!(f64x4: Sleef_sqrtd4_avx); ++ } else if #[cfg(target_feature = "sse4.2")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_sse4); ++ impl_unary!(f32x16[q => f32x4]: Sleef_sqrtf4_sse4); ++ impl_unary!(f64x8[q => f64x2]: Sleef_sqrtd2_sse4); ++ ++ impl_unary!(f32x4: Sleef_sqrtf4_sse4); ++ impl_unary!(f32x8[h => f32x4]: Sleef_sqrtf4_sse4); ++ impl_unary!(f64x2: Sleef_sqrtd2_sse4); ++ impl_unary!(f64x4[h => f64x2]: Sleef_sqrtd2_sse4); ++ } else { ++ impl_unary!(f32x2[f32; 2]: sqrt_f32); ++ impl_unary!(f32x16: sqrt_v16f32); ++ impl_unary!(f64x8: sqrt_v8f64); ++ ++ impl_unary!(f32x4: sqrt_v4f32); ++ impl_unary!(f32x8: sqrt_v8f32); ++ impl_unary!(f64x2: sqrt_v2f64); ++ impl_unary!(f64x4: sqrt_v4f64); ++ } ++ } ++ } else { ++ impl_unary!(f32x2[f32; 2]: sqrt_f32); ++ impl_unary!(f32x4: sqrt_v4f32); ++ impl_unary!(f32x8: sqrt_v8f32); ++ impl_unary!(f32x16: sqrt_v16f32); ++ ++ impl_unary!(f64x2: sqrt_v2f64); ++ impl_unary!(f64x4: sqrt_v4f64); ++ impl_unary!(f64x8: sqrt_v8f64); ++ } ++} +diff --git a/third_party/rust/packed_simd/src/codegen/math/float/sqrte.rs b/third_party/rust/packed_simd/src/codegen/math/float/sqrte.rs +new file mode 100644 +index 000000000000..c1e379c34241 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/math/float/sqrte.rs +@@ -0,0 +1,67 @@ ++//! Vertical floating-point `sqrt` ++#![allow(unused)] ++ ++// FIXME 64-bit 1 elem vectors sqrte ++ ++use crate::llvm::simd_fsqrt; ++use crate::*; ++ ++crate trait Sqrte { ++ fn sqrte(self) -> Self; ++} ++ ++gen_unary_impl_table!(Sqrte, sqrte); ++ ++cfg_if! { ++ if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { ++ use sleef_sys::*; ++ cfg_if! { ++ if #[cfg(target_feature = "avx2")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_u35avx2128); ++ impl_unary!(f32x16[h => f32x8]: Sleef_sqrtf8_u35avx2); ++ impl_unary!(f64x8[h => f64x4]: Sleef_sqrtd4_u35avx2); ++ ++ impl_unary!(f32x4: Sleef_sqrtf4_u35avx2128); ++ impl_unary!(f32x8: Sleef_sqrtf8_u35avx2); ++ impl_unary!(f64x2: Sleef_sqrtd2_u35avx2128); ++ impl_unary!(f64x4: Sleef_sqrtd4_u35avx2); ++ } else if #[cfg(target_feature = "avx")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_u35sse4); ++ impl_unary!(f32x16[h => f32x8]: Sleef_sqrtf8_u35avx); ++ impl_unary!(f64x8[h => f64x4]: Sleef_sqrtd4_u35avx); ++ ++ impl_unary!(f32x4: Sleef_sqrtf4_u35sse4); ++ impl_unary!(f32x8: Sleef_sqrtf8_u35avx); ++ impl_unary!(f64x2: Sleef_sqrtd2_u35sse4); ++ impl_unary!(f64x4: Sleef_sqrtd4_u35avx); ++ } else if #[cfg(target_feature = "sse4.2")] { ++ impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_u35sse4); ++ impl_unary!(f32x16[q => f32x4]: Sleef_sqrtf4_u35sse4); ++ impl_unary!(f64x8[q => f64x2]: Sleef_sqrtd2_u35sse4); ++ ++ impl_unary!(f32x4: Sleef_sqrtf4_u35sse4); ++ impl_unary!(f32x8[h => f32x4]: Sleef_sqrtf4_u35sse4); ++ impl_unary!(f64x2: Sleef_sqrtd2_u35sse4); ++ impl_unary!(f64x4[h => f64x2]: Sleef_sqrtd2_u35sse4); ++ } else { ++ impl_unary!(f32x2[g]: simd_fsqrt); ++ impl_unary!(f32x16[g]: simd_fsqrt); ++ impl_unary!(f64x8[g]: simd_fsqrt); ++ ++ impl_unary!(f32x4[g]: simd_fsqrt); ++ impl_unary!(f32x8[g]: simd_fsqrt); ++ impl_unary!(f64x2[g]: simd_fsqrt); ++ impl_unary!(f64x4[g]: simd_fsqrt); ++ } ++ } ++ } else { ++ impl_unary!(f32x2[g]: simd_fsqrt); ++ impl_unary!(f32x4[g]: simd_fsqrt); ++ impl_unary!(f32x8[g]: simd_fsqrt); ++ impl_unary!(f32x16[g]: simd_fsqrt); ++ ++ impl_unary!(f64x2[g]: simd_fsqrt); ++ impl_unary!(f64x4[g]: simd_fsqrt); ++ impl_unary!(f64x8[g]: simd_fsqrt); ++ } ++} +diff --git a/third_party/rust/packed_simd/src/codegen/pointer_sized_int.rs b/third_party/rust/packed_simd/src/codegen/pointer_sized_int.rs +new file mode 100644 +index 000000000000..39f493d3b17f +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/pointer_sized_int.rs +@@ -0,0 +1,28 @@ ++//! Provides `isize` and `usize` ++ ++use cfg_if::cfg_if; ++ ++cfg_if! { ++ if #[cfg(target_pointer_width = "8")] { ++ crate type isize_ = i8; ++ crate type usize_ = u8; ++ } else if #[cfg(target_pointer_width = "16")] { ++ crate type isize_ = i16; ++ crate type usize_ = u16; ++ } else if #[cfg(target_pointer_width = "32")] { ++ crate type isize_ = i32; ++ crate type usize_ = u32; ++ ++ } else if #[cfg(target_pointer_width = "64")] { ++ crate type isize_ = i64; ++ crate type usize_ = u64; ++ } else if #[cfg(target_pointer_width = "64")] { ++ crate type isize_ = i64; ++ crate type usize_ = u64; ++ } else if #[cfg(target_pointer_width = "128")] { ++ crate type isize_ = i128; ++ crate type usize_ = u128; ++ } else { ++ compile_error!("unsupported target_pointer_width"); ++ } ++} +diff --git a/third_party/rust/packed_simd/src/codegen/reductions.rs b/third_party/rust/packed_simd/src/codegen/reductions.rs +new file mode 100644 +index 000000000000..7be4f5fabbea +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/reductions.rs +@@ -0,0 +1 @@ ++crate mod mask; +diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask.rs +new file mode 100644 +index 000000000000..97260c6d4e03 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/reductions/mask.rs +@@ -0,0 +1,69 @@ ++//! Code generation workaround for `all()` mask horizontal reduction. ++//! ++//! Works arround [LLVM bug 36702]. ++//! ++//! [LLVM bug 36702]: https://bugs.llvm.org/show_bug.cgi?id=36702 ++#![allow(unused_macros)] ++ ++use crate::*; ++ ++crate trait All: crate::marker::Sized { ++ unsafe fn all(self) -> bool; ++} ++ ++crate trait Any: crate::marker::Sized { ++ unsafe fn any(self) -> bool; ++} ++ ++#[macro_use] ++mod fallback_impl; ++ ++cfg_if! { ++ if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { ++ #[macro_use] ++ mod x86; ++ } else if #[cfg(all(target_arch = "arm", target_feature = "v7", ++ target_feature = "neon", ++ any(feature = "core_arch", libcore_neon)))] { ++ #[macro_use] ++ mod arm; ++ } else if #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] { ++ #[macro_use] ++ mod aarch64; ++ } else { ++ #[macro_use] ++ mod fallback; ++ } ++} ++ ++impl_mask_reductions!(m8x2); ++impl_mask_reductions!(m8x4); ++impl_mask_reductions!(m8x8); ++impl_mask_reductions!(m8x16); ++impl_mask_reductions!(m8x32); ++impl_mask_reductions!(m8x64); ++ ++impl_mask_reductions!(m16x2); ++impl_mask_reductions!(m16x4); ++impl_mask_reductions!(m16x8); ++impl_mask_reductions!(m16x16); ++impl_mask_reductions!(m16x32); ++ ++impl_mask_reductions!(m32x2); ++impl_mask_reductions!(m32x4); ++impl_mask_reductions!(m32x8); ++impl_mask_reductions!(m32x16); ++ ++// FIXME: 64-bit single element vector ++// impl_mask_reductions!(m64x1); ++impl_mask_reductions!(m64x2); ++impl_mask_reductions!(m64x4); ++impl_mask_reductions!(m64x8); ++ ++impl_mask_reductions!(m128x1); ++impl_mask_reductions!(m128x2); ++impl_mask_reductions!(m128x4); ++ ++impl_mask_reductions!(msizex2); ++impl_mask_reductions!(msizex4); ++impl_mask_reductions!(msizex8); +diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/aarch64.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/aarch64.rs +new file mode 100644 +index 000000000000..e9586eace1ff +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/aarch64.rs +@@ -0,0 +1,71 @@ ++//! Mask reductions implementation for `aarch64` targets ++ ++/// 128-bit wide vectors ++macro_rules! aarch64_128_neon_impl { ++ ($id:ident, $vmin:ident, $vmax:ident) => { ++ impl All for $id { ++ #[inline] ++ #[target_feature(enable = "neon")] ++ unsafe fn all(self) -> bool { ++ use crate::arch::aarch64::$vmin; ++ $vmin(crate::mem::transmute(self)) != 0 ++ } ++ } ++ impl Any for $id { ++ #[inline] ++ #[target_feature(enable = "neon")] ++ unsafe fn any(self) -> bool { ++ use crate::arch::aarch64::$vmax; ++ $vmax(crate::mem::transmute(self)) != 0 ++ } ++ } ++ } ++} ++ ++/// 64-bit wide vectors ++macro_rules! aarch64_64_neon_impl { ++ ($id:ident, $vec128:ident) => { ++ impl All for $id { ++ #[inline] ++ #[target_feature(enable = "neon")] ++ unsafe fn all(self) -> bool { ++ // Duplicates the 64-bit vector into a 128-bit one and ++ // calls all on that. ++ union U { ++ halves: ($id, $id), ++ vec: $vec128, ++ } ++ U { ++ halves: (self, self), ++ }.vec.all() ++ } ++ } ++ impl Any for $id { ++ #[inline] ++ #[target_feature(enable = "neon")] ++ unsafe fn any(self) -> bool { ++ union U { ++ halves: ($id, $id), ++ vec: $vec128, ++ } ++ U { ++ halves: (self, self), ++ }.vec.any() ++ } ++ } ++ }; ++} ++ ++/// Mask reduction implementation for `aarch64` targets ++macro_rules! impl_mask_reductions { ++ // 64-bit wide masks ++ (m8x8) => { aarch64_64_neon_impl!(m8x8, m8x16); }; ++ (m16x4) => { aarch64_64_neon_impl!(m16x4, m16x8); }; ++ (m32x2) => { aarch64_64_neon_impl!(m32x2, m32x4); }; ++ // 128-bit wide masks ++ (m8x16) => { aarch64_128_neon_impl!(m8x16, vminvq_u8, vmaxvq_u8); }; ++ (m16x8) => { aarch64_128_neon_impl!(m16x8, vminvq_u16, vmaxvq_u16); }; ++ (m32x4) => { aarch64_128_neon_impl!(m32x4, vminvq_u32, vmaxvq_u32); }; ++ // Fallback to LLVM's default code-generation: ++ ($id:ident) => { fallback_impl!($id); }; ++} +diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/arm.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/arm.rs +new file mode 100644 +index 000000000000..1987af7a9676 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/arm.rs +@@ -0,0 +1,54 @@ ++//! Mask reductions implementation for `arm` targets ++ ++/// Implementation for ARM + v7 + NEON for 64-bit or 128-bit wide vectors with ++/// more than two elements. ++macro_rules! arm_128_v7_neon_impl { ++ ($id:ident, $half:ident, $vpmin:ident, $vpmax:ident) => { ++ impl All for $id { ++ #[inline] ++ #[target_feature(enable = "v7,neon")] ++ unsafe fn all(self) -> bool { ++ use crate::arch::arm::$vpmin; ++ use crate::mem::transmute; ++ union U { ++ halves: ($half, $half), ++ vec: $id, ++ } ++ let halves = U { vec: self }.halves; ++ let h: $half = transmute($vpmin( ++ transmute(halves.0), ++ transmute(halves.1), ++ )); ++ h.all() ++ } ++ } ++ impl Any for $id { ++ #[inline] ++ #[target_feature(enable = "v7,neon")] ++ unsafe fn any(self) -> bool { ++ use crate::arch::arm::$vpmax; ++ use crate::mem::transmute; ++ union U { ++ halves: ($half, $half), ++ vec: $id, ++ } ++ let halves = U { vec: self }.halves; ++ let h: $half = transmute($vpmax( ++ transmute(halves.0), ++ transmute(halves.1), ++ )); ++ h.any() ++ } ++ } ++ }; ++} ++ ++/// Mask reduction implementation for `arm` targets ++macro_rules! impl_mask_reductions { ++ // 128-bit wide masks ++ (m8x16) => { arm_128_v7_neon_impl!(m8x16, m8x8, vpmin_u8, vpmax_u8); }; ++ (m16x8) => { arm_128_v7_neon_impl!(m16x8, m16x4, vpmin_u16, vpmax_u16); }; ++ (m32x4) => { arm_128_v7_neon_impl!(m32x4, m32x2, vpmin_u32, vpmax_u32); }; ++ // Fallback to LLVM's default code-generation: ++ ($id:ident) => { fallback_impl!($id); }; ++} +diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/fallback.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/fallback.rs +new file mode 100644 +index 000000000000..25e5c813abca +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/fallback.rs +@@ -0,0 +1,6 @@ ++//! Default mask reduction implementations. ++ ++/// Default mask reduction implementation ++macro_rules! impl_mask_reductions { ++ ($id:ident) => { fallback_impl!($id); }; ++} +diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/fallback_impl.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/fallback_impl.rs +new file mode 100644 +index 000000000000..0d246e2fdab6 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/fallback_impl.rs +@@ -0,0 +1,237 @@ ++//! Default implementation of a mask reduction for any target. ++ ++macro_rules! fallback_to_other_impl { ++ ($id:ident, $other:ident) => { ++ impl All for $id { ++ #[inline] ++ unsafe fn all(self) -> bool { ++ let m: $other = crate::mem::transmute(self); ++ m.all() ++ } ++ } ++ impl Any for $id { ++ #[inline] ++ unsafe fn any(self) -> bool { ++ let m: $other = crate::mem::transmute(self); ++ m.any() ++ } ++ } ++ }; ++} ++ ++/// Fallback implementation. ++macro_rules! fallback_impl { ++ // 16-bit wide masks: ++ (m8x2) => { ++ impl All for m8x2 { ++ #[inline] ++ unsafe fn all(self) -> bool { ++ let i: u16 = crate::mem::transmute(self); ++ i == u16::max_value() ++ } ++ } ++ impl Any for m8x2 { ++ #[inline] ++ unsafe fn any(self) -> bool { ++ let i: u16 = crate::mem::transmute(self); ++ i != 0 ++ } ++ } ++ }; ++ // 32-bit wide masks ++ (m8x4) => { ++ impl All for m8x4 { ++ #[inline] ++ unsafe fn all(self) -> bool { ++ let i: u32 = crate::mem::transmute(self); ++ i == u32::max_value() ++ } ++ } ++ impl Any for m8x4 { ++ #[inline] ++ unsafe fn any(self) -> bool { ++ let i: u32 = crate::mem::transmute(self); ++ i != 0 ++ } ++ } ++ }; ++ (m16x2) => { ++ fallback_to_other_impl!(m16x2, m8x4); ++ }; ++ // 64-bit wide masks: ++ (m8x8) => { ++ impl All for m8x8 { ++ #[inline] ++ unsafe fn all(self) -> bool { ++ let i: u64 = crate::mem::transmute(self); ++ i == u64::max_value() ++ } ++ } ++ impl Any for m8x8 { ++ #[inline] ++ unsafe fn any(self) -> bool { ++ let i: u64 = crate::mem::transmute(self); ++ i != 0 ++ } ++ } ++ }; ++ (m16x4) => { ++ fallback_to_other_impl!(m16x4, m8x8); ++ }; ++ (m32x2) => { ++ fallback_to_other_impl!(m32x2, m16x4); ++ }; ++ // FIXME: 64x1 maxk ++ // 128-bit wide masks: ++ (m8x16) => { ++ impl All for m8x16 { ++ #[inline] ++ unsafe fn all(self) -> bool { ++ let i: u128 = crate::mem::transmute(self); ++ i == u128::max_value() ++ } ++ } ++ impl Any for m8x16 { ++ #[inline] ++ unsafe fn any(self) -> bool { ++ let i: u128 = crate::mem::transmute(self); ++ i != 0 ++ } ++ } ++ }; ++ (m16x8) => { ++ fallback_to_other_impl!(m16x8, m8x16); ++ }; ++ (m32x4) => { ++ fallback_to_other_impl!(m32x4, m16x8); ++ }; ++ (m64x2) => { ++ fallback_to_other_impl!(m64x2, m32x4); ++ }; ++ (m128x1) => { ++ fallback_to_other_impl!(m128x1, m64x2); ++ }; ++ // 256-bit wide masks ++ (m8x32) => { ++ impl All for m8x32 { ++ #[inline] ++ unsafe fn all(self) -> bool { ++ let i: [u128; 2] = crate::mem::transmute(self); ++ let o: [u128; 2] = [u128::max_value(); 2]; ++ i == o ++ } ++ } ++ impl Any for m8x32 { ++ #[inline] ++ unsafe fn any(self) -> bool { ++ let i: [u128; 2] = crate::mem::transmute(self); ++ let o: [u128; 2] = [0; 2]; ++ i != o ++ } ++ } ++ }; ++ (m16x16) => { ++ fallback_to_other_impl!(m16x16, m8x32); ++ }; ++ (m32x8) => { ++ fallback_to_other_impl!(m32x8, m16x16); ++ }; ++ (m64x4) => { ++ fallback_to_other_impl!(m64x4, m32x8); ++ }; ++ (m128x2) => { ++ fallback_to_other_impl!(m128x2, m64x4); ++ }; ++ // 512-bit wide masks ++ (m8x64) => { ++ impl All for m8x64 { ++ #[inline] ++ unsafe fn all(self) -> bool { ++ let i: [u128; 4] = crate::mem::transmute(self); ++ let o: [u128; 4] = [u128::max_value(); 4]; ++ i == o ++ } ++ } ++ impl Any for m8x64 { ++ #[inline] ++ unsafe fn any(self) -> bool { ++ let i: [u128; 4] = crate::mem::transmute(self); ++ let o: [u128; 4] = [0; 4]; ++ i != o ++ } ++ } ++ }; ++ (m16x32) => { ++ fallback_to_other_impl!(m16x32, m8x64); ++ }; ++ (m32x16) => { ++ fallback_to_other_impl!(m32x16, m16x32); ++ }; ++ (m64x8) => { ++ fallback_to_other_impl!(m64x8, m32x16); ++ }; ++ (m128x4) => { ++ fallback_to_other_impl!(m128x4, m64x8); ++ }; ++ // Masks with pointer-sized elements64 ++ (msizex2) => { ++ cfg_if! { ++ if #[cfg(target_pointer_width = "64")] { ++ fallback_to_other_impl!(msizex2, m64x2); ++ } else if #[cfg(target_pointer_width = "32")] { ++ fallback_to_other_impl!(msizex2, m32x2); ++ } else { ++ compile_error!("unsupported target_pointer_width"); ++ } ++ } ++ }; ++ (msizex4) => { ++ cfg_if! { ++ if #[cfg(target_pointer_width = "64")] { ++ fallback_to_other_impl!(msizex4, m64x4); ++ } else if #[cfg(target_pointer_width = "32")] { ++ fallback_to_other_impl!(msizex4, m32x4); ++ } else { ++ compile_error!("unsupported target_pointer_width"); ++ } ++ } ++ }; ++ (msizex8) => { ++ cfg_if! { ++ if #[cfg(target_pointer_width = "64")] { ++ fallback_to_other_impl!(msizex8, m64x8); ++ } else if #[cfg(target_pointer_width = "32")] { ++ fallback_to_other_impl!(msizex8, m32x8); ++ } else { ++ compile_error!("unsupported target_pointer_width"); ++ } ++ } ++ }; ++} ++ ++macro_rules! recurse_half { ++ ($vid:ident, $vid_h:ident) => { ++ impl All for $vid { ++ #[inline] ++ unsafe fn all(self) -> bool { ++ union U { ++ halves: ($vid_h, $vid_h), ++ vec: $vid, ++ } ++ let halves = U { vec: self }.halves; ++ halves.0.all() && halves.1.all() ++ } ++ } ++ impl Any for $vid { ++ #[inline] ++ unsafe fn any(self) -> bool { ++ union U { ++ halves: ($vid_h, $vid_h), ++ vec: $vid, ++ } ++ let halves = U { vec: self }.halves; ++ halves.0.any() || halves.1.any() ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/x86.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86.rs +new file mode 100644 +index 000000000000..2ae4ed81c416 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86.rs +@@ -0,0 +1,194 @@ ++//! Mask reductions implementation for `x86` and `x86_64` targets ++ ++#[cfg(target_feature = "sse")] ++#[macro_use] ++mod sse; ++ ++#[cfg(target_feature = "sse2")] ++#[macro_use] ++mod sse2; ++ ++#[cfg(target_feature = "avx")] ++#[macro_use] ++mod avx; ++ ++#[cfg(target_feature = "avx2")] ++#[macro_use] ++mod avx2; ++ ++/// x86 64-bit m8x8 implementation ++macro_rules! x86_m8x8_impl { ++ ($id:ident) => { ++ cfg_if! { ++ if #[cfg(all(target_arch = "x86_64", target_feature = "sse"))] { ++ x86_m8x8_sse_impl!($id); ++ } else { ++ fallback_impl!($id); ++ } ++ } ++ }; ++} ++ ++/// x86 128-bit m8x16 implementation ++macro_rules! x86_m8x16_impl { ++ ($id:ident) => { ++ cfg_if! { ++ if #[cfg(target_feature = "sse2")] { ++ x86_m8x16_sse2_impl!($id); ++ } else { ++ fallback_impl!($id); ++ } ++ } ++ }; ++} ++ ++/// x86 128-bit m32x4 implementation ++macro_rules! x86_m32x4_impl { ++ ($id:ident) => { ++ cfg_if! { ++ if #[cfg(target_feature = "sse")] { ++ x86_m32x4_sse_impl!($id); ++ } else { ++ fallback_impl!($id); ++ } ++ } ++ }; ++} ++ ++/// x86 128-bit m64x2 implementation ++macro_rules! x86_m64x2_impl { ++ ($id:ident) => { ++ cfg_if! { ++ if #[cfg(target_feature = "sse2")] { ++ x86_m64x2_sse2_impl!($id); ++ } else if #[cfg(target_feature = "sse")] { ++ x86_m32x4_sse_impl!($id); ++ } else { ++ fallback_impl!($id); ++ } ++ } ++ }; ++} ++ ++/// x86 256-bit m8x32 implementation ++macro_rules! x86_m8x32_impl { ++ ($id:ident, $half_id:ident) => { ++ cfg_if! { ++ if #[cfg(target_feature = "avx2")] { ++ x86_m8x32_avx2_impl!($id); ++ } else if #[cfg(target_feature = "avx")] { ++ x86_m8x32_avx_impl!($id); ++ } else if #[cfg(target_feature = "sse2")] { ++ recurse_half!($id, $half_id); ++ } else { ++ fallback_impl!($id); ++ } ++ } ++ }; ++} ++ ++/// x86 256-bit m32x8 implementation ++macro_rules! x86_m32x8_impl { ++ ($id:ident, $half_id:ident) => { ++ cfg_if! { ++ if #[cfg(target_feature = "avx")] { ++ x86_m32x8_avx_impl!($id); ++ } else if #[cfg(target_feature = "sse")] { ++ recurse_half!($id, $half_id); ++ } else { ++ fallback_impl!($id); ++ } ++ } ++ }; ++} ++ ++/// x86 256-bit m64x4 implementation ++macro_rules! x86_m64x4_impl { ++ ($id:ident, $half_id:ident) => { ++ cfg_if! { ++ if #[cfg(target_feature = "avx")] { ++ x86_m64x4_avx_impl!($id); ++ } else if #[cfg(target_feature = "sse")] { ++ recurse_half!($id, $half_id); ++ } else { ++ fallback_impl!($id); ++ } ++ } ++ }; ++} ++ ++/// Fallback implementation. ++macro_rules! x86_intr_impl { ++ ($id:ident) => { ++ impl All for $id { ++ #[inline] ++ unsafe fn all(self) -> bool { ++ use crate::llvm::simd_reduce_all; ++ simd_reduce_all(self.0) ++ } ++ } ++ impl Any for $id { ++ #[inline] ++ unsafe fn any(self) -> bool { ++ use crate::llvm::simd_reduce_any; ++ simd_reduce_any(self.0) ++ } ++ } ++ }; ++} ++ ++/// Mask reduction implementation for `x86` and `x86_64` targets ++macro_rules! impl_mask_reductions { ++ // 64-bit wide masks ++ (m8x8) => { x86_m8x8_impl!(m8x8); }; ++ (m16x4) => { x86_m8x8_impl!(m16x4); }; ++ (m32x2) => { x86_m8x8_impl!(m32x2); }; ++ // 128-bit wide masks ++ (m8x16) => { x86_m8x16_impl!(m8x16); }; ++ (m16x8) => { x86_m8x16_impl!(m16x8); }; ++ (m32x4) => { x86_m32x4_impl!(m32x4); }; ++ (m64x2) => { x86_m64x2_impl!(m64x2); }; ++ (m128x1) => { x86_intr_impl!(m128x1); }; ++ // 256-bit wide masks: ++ (m8x32) => { x86_m8x32_impl!(m8x32, m8x16); }; ++ (m16x16) => { x86_m8x32_impl!(m16x16, m16x8); }; ++ (m32x8) => { x86_m32x8_impl!(m32x8, m32x4); }; ++ (m64x4) => { x86_m64x4_impl!(m64x4, m64x2); }; ++ (m128x2) => { x86_intr_impl!(m128x2); }; ++ (msizex2) => { ++ cfg_if! { ++ if #[cfg(target_pointer_width = "64")] { ++ fallback_to_other_impl!(msizex2, m64x2); ++ } else if #[cfg(target_pointer_width = "32")] { ++ fallback_to_other_impl!(msizex2, m32x2); ++ } else { ++ compile_error!("unsupported target_pointer_width"); ++ } ++ } ++ }; ++ (msizex4) => { ++ cfg_if! { ++ if #[cfg(target_pointer_width = "64")] { ++ fallback_to_other_impl!(msizex4, m64x4); ++ } else if #[cfg(target_pointer_width = "32")] { ++ fallback_to_other_impl!(msizex4, m32x4); ++ } else { ++ compile_error!("unsupported target_pointer_width"); ++ } ++ } ++ }; ++ (msizex8) => { ++ cfg_if! { ++ if #[cfg(target_pointer_width = "64")] { ++ fallback_to_other_impl!(msizex8, m64x8); ++ } else if #[cfg(target_pointer_width = "32")] { ++ fallback_to_other_impl!(msizex8, m32x8); ++ } else { ++ compile_error!("unsupported target_pointer_width"); ++ } ++ } ++ }; ++ ++ // Fallback to LLVM's default code-generation: ++ ($id:ident) => { fallback_impl!($id); }; ++} +diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx.rs +new file mode 100644 +index 000000000000..d18736fb0399 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx.rs +@@ -0,0 +1,101 @@ ++//! Mask reductions implementation for `x86` and `x86_64` targets with `AVX` ++ ++/// `x86`/`x86_64` 256-bit `AVX` implementation ++/// FIXME: it might be faster here to do two `_mm_movmask_epi8` ++#[cfg(target_feature = "avx")] ++macro_rules! x86_m8x32_avx_impl { ++ ($id:ident) => { ++ impl All for $id { ++ #[inline] ++ #[target_feature(enable = "avx")] ++ unsafe fn all(self) -> bool { ++ #[cfg(target_arch = "x86")] ++ use crate::arch::x86::_mm256_testc_si256; ++ #[cfg(target_arch = "x86_64")] ++ use crate::arch::x86_64::_mm256_testc_si256; ++ _mm256_testc_si256( ++ crate::mem::transmute(self), ++ crate::mem::transmute($id::splat(true)), ++ ) != 0 ++ } ++ } ++ impl Any for $id { ++ #[inline] ++ #[target_feature(enable = "avx")] ++ unsafe fn any(self) -> bool { ++ #[cfg(target_arch = "x86")] ++ use crate::arch::x86::_mm256_testz_si256; ++ #[cfg(target_arch = "x86_64")] ++ use crate::arch::x86_64::_mm256_testz_si256; ++ _mm256_testz_si256( ++ crate::mem::transmute(self), ++ crate::mem::transmute(self), ++ ) == 0 ++ } ++ } ++ }; ++} ++ ++/// `x86`/`x86_64` 256-bit m32x8 `AVX` implementation ++macro_rules! x86_m32x8_avx_impl { ++ ($id:ident) => { ++ impl All for $id { ++ #[inline] ++ #[target_feature(enable = "sse")] ++ unsafe fn all(self) -> bool { ++ #[cfg(target_arch = "x86")] ++ use crate::arch::x86::_mm256_movemask_ps; ++ #[cfg(target_arch = "x86_64")] ++ use crate::arch::x86_64::_mm256_movemask_ps; ++ // _mm256_movemask_ps(a) creates a 8bit mask containing the ++ // most significant bit of each lane of `a`. If all bits are ++ // set, then all 8 lanes of the mask are true. ++ _mm256_movemask_ps(crate::mem::transmute(self)) == 0b_1111_1111_i32 ++ } ++ } ++ impl Any for $id { ++ #[inline] ++ #[target_feature(enable = "sse")] ++ unsafe fn any(self) -> bool { ++ #[cfg(target_arch = "x86")] ++ use crate::arch::x86::_mm256_movemask_ps; ++ #[cfg(target_arch = "x86_64")] ++ use crate::arch::x86_64::_mm256_movemask_ps; ++ ++ _mm256_movemask_ps(crate::mem::transmute(self)) != 0 ++ } ++ } ++ }; ++} ++ ++/// `x86`/`x86_64` 256-bit m64x4 `AVX` implementation ++macro_rules! x86_m64x4_avx_impl { ++ ($id:ident) => { ++ impl All for $id { ++ #[inline] ++ #[target_feature(enable = "sse")] ++ unsafe fn all(self) -> bool { ++ #[cfg(target_arch = "x86")] ++ use crate::arch::x86::_mm256_movemask_pd; ++ #[cfg(target_arch = "x86_64")] ++ use crate::arch::x86_64::_mm256_movemask_pd; ++ // _mm256_movemask_pd(a) creates a 4bit mask containing the ++ // most significant bit of each lane of `a`. If all bits are ++ // set, then all 4 lanes of the mask are true. ++ _mm256_movemask_pd(crate::mem::transmute(self)) == 0b_1111_i32 ++ } ++ } ++ impl Any for $id { ++ #[inline] ++ #[target_feature(enable = "sse")] ++ unsafe fn any(self) -> bool { ++ #[cfg(target_arch = "x86")] ++ use crate::arch::x86::_mm256_movemask_pd; ++ #[cfg(target_arch = "x86_64")] ++ use crate::arch::x86_64::_mm256_movemask_pd; ++ ++ _mm256_movemask_pd(crate::mem::transmute(self)) != 0 ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx2.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx2.rs +new file mode 100644 +index 000000000000..d37d02342092 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx2.rs +@@ -0,0 +1,35 @@ ++//! Mask reductions implementation for `x86` and `x86_64` targets with `AVX2`. ++#![allow(unused)] ++ ++/// x86/x86_64 256-bit m8x32 AVX2 implementation ++macro_rules! x86_m8x32_avx2_impl { ++ ($id:ident) => { ++ impl All for $id { ++ #[inline] ++ #[target_feature(enable = "sse2")] ++ unsafe fn all(self) -> bool { ++ #[cfg(target_arch = "x86")] ++ use crate::arch::x86::_mm256_movemask_epi8; ++ #[cfg(target_arch = "x86_64")] ++ use crate::arch::x86_64::_mm256_movemask_epi8; ++ // _mm256_movemask_epi8(a) creates a 32bit mask containing the ++ // most significant bit of each byte of `a`. If all ++ // bits are set, then all 32 lanes of the mask are ++ // true. ++ _mm256_movemask_epi8(crate::mem::transmute(self)) == -1_i32 ++ } ++ } ++ impl Any for $id { ++ #[inline] ++ #[target_feature(enable = "sse2")] ++ unsafe fn any(self) -> bool { ++ #[cfg(target_arch = "x86")] ++ use crate::arch::x86::_mm256_movemask_epi8; ++ #[cfg(target_arch = "x86_64")] ++ use crate::arch::x86_64::_mm256_movemask_epi8; ++ ++ _mm256_movemask_epi8(crate::mem::transmute(self)) != 0 ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse.rs +new file mode 100644 +index 000000000000..7482f9430a14 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse.rs +@@ -0,0 +1,68 @@ ++//! Mask reductions implementation for `x86` and `x86_64` targets with `SSE`. ++#![allow(unused)] ++ ++/// `x86`/`x86_64` 128-bit `m32x4` `SSE` implementation ++macro_rules! x86_m32x4_sse_impl { ++ ($id:ident) => { ++ impl All for $id { ++ #[inline] ++ #[target_feature(enable = "sse")] ++ unsafe fn all(self) -> bool { ++ #[cfg(target_arch = "x86")] ++ use crate::arch::x86::_mm_movemask_ps; ++ #[cfg(target_arch = "x86_64")] ++ use crate::arch::x86_64::_mm_movemask_ps; ++ // _mm_movemask_ps(a) creates a 4bit mask containing the ++ // most significant bit of each lane of `a`. If all ++ // bits are set, then all 4 lanes of the mask are ++ // true. ++ _mm_movemask_ps(crate::mem::transmute(self)) ++ == 0b_1111_i32 ++ } ++ } ++ impl Any for $id { ++ #[inline] ++ #[target_feature(enable = "sse")] ++ unsafe fn any(self) -> bool { ++ #[cfg(target_arch = "x86")] ++ use crate::arch::x86::_mm_movemask_ps; ++ #[cfg(target_arch = "x86_64")] ++ use crate::arch::x86_64::_mm_movemask_ps; ++ ++ _mm_movemask_ps(crate::mem::transmute(self)) != 0 ++ } ++ } ++ }; ++} ++ ++macro_rules! x86_m8x8_sse_impl { ++ ($id:ident) => { ++ impl All for $id { ++ #[inline] ++ #[target_feature(enable = "sse")] ++ unsafe fn all(self) -> bool { ++ #[cfg(target_arch = "x86")] ++ use crate::arch::x86::_mm_movemask_pi8; ++ #[cfg(target_arch = "x86_64")] ++ use crate::arch::x86_64::_mm_movemask_pi8; ++ // _mm_movemask_pi8(a) creates an 8bit mask containing the most ++ // significant bit of each byte of `a`. If all bits are set, ++ // then all 8 lanes of the mask are true. ++ _mm_movemask_pi8(crate::mem::transmute(self)) ++ == u8::max_value() as i32 ++ } ++ } ++ impl Any for $id { ++ #[inline] ++ #[target_feature(enable = "sse")] ++ unsafe fn any(self) -> bool { ++ #[cfg(target_arch = "x86")] ++ use crate::arch::x86::_mm_movemask_pi8; ++ #[cfg(target_arch = "x86_64")] ++ use crate::arch::x86_64::_mm_movemask_pi8; ++ ++ _mm_movemask_pi8(crate::mem::transmute(self)) != 0 ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse2.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse2.rs +new file mode 100644 +index 000000000000..a99c606f5268 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse2.rs +@@ -0,0 +1,70 @@ ++//! Mask reductions implementation for `x86` and `x86_64` targets with `SSE2`. ++#![allow(unused)] ++ ++/// `x86`/`x86_64` 128-bit m64x2 `SSE2` implementation ++macro_rules! x86_m64x2_sse2_impl { ++ ($id:ident) => { ++ impl All for $id { ++ #[inline] ++ #[target_feature(enable = "sse")] ++ unsafe fn all(self) -> bool { ++ #[cfg(target_arch = "x86")] ++ use crate::arch::x86::_mm_movemask_pd; ++ #[cfg(target_arch = "x86_64")] ++ use crate::arch::x86_64::_mm_movemask_pd; ++ // _mm_movemask_pd(a) creates a 2bit mask containing the ++ // most significant bit of each lane of `a`. If all ++ // bits are set, then all 2 lanes of the mask are ++ // true. ++ _mm_movemask_pd(crate::mem::transmute(self)) ++ == 0b_11_i32 ++ } ++ } ++ impl Any for $id { ++ #[inline] ++ #[target_feature(enable = "sse")] ++ unsafe fn any(self) -> bool { ++ #[cfg(target_arch = "x86")] ++ use crate::arch::x86::_mm_movemask_pd; ++ #[cfg(target_arch = "x86_64")] ++ use crate::arch::x86_64::_mm_movemask_pd; ++ ++ _mm_movemask_pd(crate::mem::transmute(self)) != 0 ++ } ++ } ++ }; ++} ++ ++/// `x86`/`x86_64` 128-bit m8x16 `SSE2` implementation ++macro_rules! x86_m8x16_sse2_impl { ++ ($id:ident) => { ++ impl All for $id { ++ #[inline] ++ #[target_feature(enable = "sse2")] ++ unsafe fn all(self) -> bool { ++ #[cfg(target_arch = "x86")] ++ use crate::arch::x86::_mm_movemask_epi8; ++ #[cfg(target_arch = "x86_64")] ++ use crate::arch::x86_64::_mm_movemask_epi8; ++ // _mm_movemask_epi8(a) creates a 16bit mask containing the ++ // most significant bit of each byte of `a`. If all ++ // bits are set, then all 16 lanes of the mask are ++ // true. ++ _mm_movemask_epi8(crate::mem::transmute(self)) ++ == i32::from(u16::max_value()) ++ } ++ } ++ impl Any for $id { ++ #[inline] ++ #[target_feature(enable = "sse2")] ++ unsafe fn any(self) -> bool { ++ #[cfg(target_arch = "x86")] ++ use crate::arch::x86::_mm_movemask_epi8; ++ #[cfg(target_arch = "x86_64")] ++ use crate::arch::x86_64::_mm_movemask_epi8; ++ ++ _mm_movemask_epi8(crate::mem::transmute(self)) != 0 ++ } ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/codegen/shuffle.rs b/third_party/rust/packed_simd/src/codegen/shuffle.rs +new file mode 100644 +index 000000000000..35a9db905339 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/shuffle.rs +@@ -0,0 +1,302 @@ ++//! Implementations of the `ShuffleResult` trait for the different numbers of ++//! lanes and vector element types. ++ ++use crate::masks::*; ++use crate::sealed::Shuffle; ++ ++impl Shuffle<[u32; 2]> for i8 { ++ type Output = crate::codegen::i8x2; ++} ++impl Shuffle<[u32; 4]> for i8 { ++ type Output = crate::codegen::i8x4; ++} ++impl Shuffle<[u32; 8]> for i8 { ++ type Output = crate::codegen::i8x8; ++} ++impl Shuffle<[u32; 16]> for i8 { ++ type Output = crate::codegen::i8x16; ++} ++impl Shuffle<[u32; 32]> for i8 { ++ type Output = crate::codegen::i8x32; ++} ++impl Shuffle<[u32; 64]> for i8 { ++ type Output = crate::codegen::i8x64; ++} ++ ++impl Shuffle<[u32; 2]> for u8 { ++ type Output = crate::codegen::u8x2; ++} ++impl Shuffle<[u32; 4]> for u8 { ++ type Output = crate::codegen::u8x4; ++} ++impl Shuffle<[u32; 8]> for u8 { ++ type Output = crate::codegen::u8x8; ++} ++impl Shuffle<[u32; 16]> for u8 { ++ type Output = crate::codegen::u8x16; ++} ++impl Shuffle<[u32; 32]> for u8 { ++ type Output = crate::codegen::u8x32; ++} ++impl Shuffle<[u32; 64]> for u8 { ++ type Output = crate::codegen::u8x64; ++} ++ ++impl Shuffle<[u32; 2]> for m8 { ++ type Output = crate::codegen::m8x2; ++} ++impl Shuffle<[u32; 4]> for m8 { ++ type Output = crate::codegen::m8x4; ++} ++impl Shuffle<[u32; 8]> for m8 { ++ type Output = crate::codegen::m8x8; ++} ++impl Shuffle<[u32; 16]> for m8 { ++ type Output = crate::codegen::m8x16; ++} ++impl Shuffle<[u32; 32]> for m8 { ++ type Output = crate::codegen::m8x32; ++} ++impl Shuffle<[u32; 64]> for m8 { ++ type Output = crate::codegen::m8x64; ++} ++ ++impl Shuffle<[u32; 2]> for i16 { ++ type Output = crate::codegen::i16x2; ++} ++impl Shuffle<[u32; 4]> for i16 { ++ type Output = crate::codegen::i16x4; ++} ++impl Shuffle<[u32; 8]> for i16 { ++ type Output = crate::codegen::i16x8; ++} ++impl Shuffle<[u32; 16]> for i16 { ++ type Output = crate::codegen::i16x16; ++} ++impl Shuffle<[u32; 32]> for i16 { ++ type Output = crate::codegen::i16x32; ++} ++ ++impl Shuffle<[u32; 2]> for u16 { ++ type Output = crate::codegen::u16x2; ++} ++impl Shuffle<[u32; 4]> for u16 { ++ type Output = crate::codegen::u16x4; ++} ++impl Shuffle<[u32; 8]> for u16 { ++ type Output = crate::codegen::u16x8; ++} ++impl Shuffle<[u32; 16]> for u16 { ++ type Output = crate::codegen::u16x16; ++} ++impl Shuffle<[u32; 32]> for u16 { ++ type Output = crate::codegen::u16x32; ++} ++ ++impl Shuffle<[u32; 2]> for m16 { ++ type Output = crate::codegen::m16x2; ++} ++impl Shuffle<[u32; 4]> for m16 { ++ type Output = crate::codegen::m16x4; ++} ++impl Shuffle<[u32; 8]> for m16 { ++ type Output = crate::codegen::m16x8; ++} ++impl Shuffle<[u32; 16]> for m16 { ++ type Output = crate::codegen::m16x16; ++} ++impl Shuffle<[u32; 32]> for m16 { ++ type Output = crate::codegen::m16x32; ++} ++ ++impl Shuffle<[u32; 2]> for i32 { ++ type Output = crate::codegen::i32x2; ++} ++impl Shuffle<[u32; 4]> for i32 { ++ type Output = crate::codegen::i32x4; ++} ++impl Shuffle<[u32; 8]> for i32 { ++ type Output = crate::codegen::i32x8; ++} ++impl Shuffle<[u32; 16]> for i32 { ++ type Output = crate::codegen::i32x16; ++} ++ ++impl Shuffle<[u32; 2]> for u32 { ++ type Output = crate::codegen::u32x2; ++} ++impl Shuffle<[u32; 4]> for u32 { ++ type Output = crate::codegen::u32x4; ++} ++impl Shuffle<[u32; 8]> for u32 { ++ type Output = crate::codegen::u32x8; ++} ++impl Shuffle<[u32; 16]> for u32 { ++ type Output = crate::codegen::u32x16; ++} ++ ++impl Shuffle<[u32; 2]> for f32 { ++ type Output = crate::codegen::f32x2; ++} ++impl Shuffle<[u32; 4]> for f32 { ++ type Output = crate::codegen::f32x4; ++} ++impl Shuffle<[u32; 8]> for f32 { ++ type Output = crate::codegen::f32x8; ++} ++impl Shuffle<[u32; 16]> for f32 { ++ type Output = crate::codegen::f32x16; ++} ++ ++impl Shuffle<[u32; 2]> for m32 { ++ type Output = crate::codegen::m32x2; ++} ++impl Shuffle<[u32; 4]> for m32 { ++ type Output = crate::codegen::m32x4; ++} ++impl Shuffle<[u32; 8]> for m32 { ++ type Output = crate::codegen::m32x8; ++} ++impl Shuffle<[u32; 16]> for m32 { ++ type Output = crate::codegen::m32x16; ++} ++ ++/* FIXME: 64-bit single element vector ++impl Shuffle<[u32; 1]> for i64 { ++ type Output = crate::codegen::i64x1; ++} ++*/ ++impl Shuffle<[u32; 2]> for i64 { ++ type Output = crate::codegen::i64x2; ++} ++impl Shuffle<[u32; 4]> for i64 { ++ type Output = crate::codegen::i64x4; ++} ++impl Shuffle<[u32; 8]> for i64 { ++ type Output = crate::codegen::i64x8; ++} ++ ++/* FIXME: 64-bit single element vector ++impl Shuffle<[u32; 1]> for u64 { ++ type Output = crate::codegen::u64x1; ++} ++*/ ++impl Shuffle<[u32; 2]> for u64 { ++ type Output = crate::codegen::u64x2; ++} ++impl Shuffle<[u32; 4]> for u64 { ++ type Output = crate::codegen::u64x4; ++} ++impl Shuffle<[u32; 8]> for u64 { ++ type Output = crate::codegen::u64x8; ++} ++ ++/* FIXME: 64-bit single element vector ++impl Shuffle<[u32; 1]> for f64 { ++ type Output = crate::codegen::f64x1; ++} ++*/ ++impl Shuffle<[u32; 2]> for f64 { ++ type Output = crate::codegen::f64x2; ++} ++impl Shuffle<[u32; 4]> for f64 { ++ type Output = crate::codegen::f64x4; ++} ++impl Shuffle<[u32; 8]> for f64 { ++ type Output = crate::codegen::f64x8; ++} ++ ++/* FIXME: 64-bit single element vector ++impl Shuffle<[u32; 1]> for m64 { ++ type Output = crate::codegen::m64x1; ++} ++*/ ++impl Shuffle<[u32; 2]> for m64 { ++ type Output = crate::codegen::m64x2; ++} ++impl Shuffle<[u32; 4]> for m64 { ++ type Output = crate::codegen::m64x4; ++} ++impl Shuffle<[u32; 8]> for m64 { ++ type Output = crate::codegen::m64x8; ++} ++ ++impl Shuffle<[u32; 2]> for isize { ++ type Output = crate::codegen::isizex2; ++} ++impl Shuffle<[u32; 4]> for isize { ++ type Output = crate::codegen::isizex4; ++} ++impl Shuffle<[u32; 8]> for isize { ++ type Output = crate::codegen::isizex8; ++} ++ ++impl Shuffle<[u32; 2]> for usize { ++ type Output = crate::codegen::usizex2; ++} ++impl Shuffle<[u32; 4]> for usize { ++ type Output = crate::codegen::usizex4; ++} ++impl Shuffle<[u32; 8]> for usize { ++ type Output = crate::codegen::usizex8; ++} ++ ++impl Shuffle<[u32; 2]> for *const T { ++ type Output = crate::codegen::cptrx2; ++} ++impl Shuffle<[u32; 4]> for *const T { ++ type Output = crate::codegen::cptrx4; ++} ++impl Shuffle<[u32; 8]> for *const T { ++ type Output = crate::codegen::cptrx8; ++} ++ ++impl Shuffle<[u32; 2]> for *mut T { ++ type Output = crate::codegen::mptrx2; ++} ++impl Shuffle<[u32; 4]> for *mut T { ++ type Output = crate::codegen::mptrx4; ++} ++impl Shuffle<[u32; 8]> for *mut T { ++ type Output = crate::codegen::mptrx8; ++} ++ ++impl Shuffle<[u32; 2]> for msize { ++ type Output = crate::codegen::msizex2; ++} ++impl Shuffle<[u32; 4]> for msize { ++ type Output = crate::codegen::msizex4; ++} ++impl Shuffle<[u32; 8]> for msize { ++ type Output = crate::codegen::msizex8; ++} ++ ++impl Shuffle<[u32; 1]> for i128 { ++ type Output = crate::codegen::i128x1; ++} ++impl Shuffle<[u32; 2]> for i128 { ++ type Output = crate::codegen::i128x2; ++} ++impl Shuffle<[u32; 4]> for i128 { ++ type Output = crate::codegen::i128x4; ++} ++ ++impl Shuffle<[u32; 1]> for u128 { ++ type Output = crate::codegen::u128x1; ++} ++impl Shuffle<[u32; 2]> for u128 { ++ type Output = crate::codegen::u128x2; ++} ++impl Shuffle<[u32; 4]> for u128 { ++ type Output = crate::codegen::u128x4; ++} ++ ++impl Shuffle<[u32; 1]> for m128 { ++ type Output = crate::codegen::m128x1; ++} ++impl Shuffle<[u32; 2]> for m128 { ++ type Output = crate::codegen::m128x2; ++} ++impl Shuffle<[u32; 4]> for m128 { ++ type Output = crate::codegen::m128x4; ++} +diff --git a/third_party/rust/packed_simd/src/codegen/shuffle1_dyn.rs b/third_party/rust/packed_simd/src/codegen/shuffle1_dyn.rs +new file mode 100644 +index 000000000000..1e9f5816371a +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/shuffle1_dyn.rs +@@ -0,0 +1,432 @@ ++//! Shuffle vector lanes with run-time indices. ++ ++use crate::*; ++ ++pub trait Shuffle1Dyn { ++ type Indices; ++ fn shuffle1_dyn(self, _: Self::Indices) -> Self; ++} ++ ++// Fallback implementation ++macro_rules! impl_fallback { ++ ($id:ident) => { ++ impl Shuffle1Dyn for $id { ++ type Indices = Self; ++ #[inline] ++ fn shuffle1_dyn(self, indices: Self::Indices) -> Self { ++ let mut result = Self::splat(0); ++ for i in 0..$id::lanes() { ++ result = result ++ .replace(i, self.extract(indices.extract(i) as usize)); ++ } ++ result ++ } ++ } ++ }; ++} ++ ++macro_rules! impl_shuffle1_dyn { ++ (u8x8) => { ++ cfg_if! { ++ if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), ++ target_feature = "ssse3"))] { ++ impl Shuffle1Dyn for u8x8 { ++ type Indices = Self; ++ #[inline] ++ fn shuffle1_dyn(self, indices: Self::Indices) -> Self { ++ #[cfg(target_arch = "x86")] ++ use crate::arch::x86::_mm_shuffle_pi8; ++ #[cfg(target_arch = "x86_64")] ++ use crate::arch::x86_64::_mm_shuffle_pi8; ++ ++ unsafe { ++ crate::mem::transmute( ++ _mm_shuffle_pi8( ++ crate::mem::transmute(self.0), ++ crate::mem::transmute(indices.0) ++ ) ++ ) ++ } ++ } ++ } ++ } else if #[cfg(all( ++ any( ++ all(target_aarch = "aarch64", target_feature = "neon"), ++ all(target_aarch = "arm", target_feature = "v7", ++ target_feature = "neon") ++ ), ++ any(feature = "core_arch", libcore_neon) ++ ) ++ )] { ++ impl Shuffle1Dyn for u8x8 { ++ type Indices = Self; ++ #[inline] ++ fn shuffle1_dyn(self, indices: Self::Indices) -> Self { ++ #[cfg(targt_arch = "aarch64")] ++ use crate::arch::aarch64::vtbl1_u8; ++ #[cfg(targt_arch = "arm")] ++ use crate::arch::arm::vtbl1_u8; ++ ++ // This is safe because the binary is compiled with ++ // neon enabled at compile-time and can therefore only ++ // run on CPUs that have it enabled. ++ unsafe { ++ Simd(mem::transmute( ++ vtbl1_u8(mem::transmute(self.0), ++ crate::mem::transmute(indices.0)) ++ )) ++ } ++ } ++ } ++ } else { ++ impl_fallback!(u8x8); ++ } ++ } ++ }; ++ (u8x16) => { ++ cfg_if! { ++ if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), ++ target_feature = "ssse3"))] { ++ impl Shuffle1Dyn for u8x16 { ++ type Indices = Self; ++ #[inline] ++ fn shuffle1_dyn(self, indices: Self::Indices) -> Self { ++ #[cfg(target_arch = "x86")] ++ use crate::arch::x86::_mm_shuffle_epi8; ++ #[cfg(target_arch = "x86_64")] ++ use crate::arch::x86_64::_mm_shuffle_epi8; ++ // This is safe because the binary is compiled with ++ // ssse3 enabled at compile-time and can therefore only ++ // run on CPUs that have it enabled. ++ unsafe { ++ Simd(mem::transmute( ++ _mm_shuffle_epi8(mem::transmute(self.0), ++ crate::mem::transmute(indices)) ++ )) ++ } ++ } ++ } ++ } else if #[cfg(all(target_aarch = "aarch64", target_feature = "neon", ++ any(feature = "core_arch", libcore_neon)))] { ++ impl Shuffle1Dyn for u8x16 { ++ type Indices = Self; ++ #[inline] ++ fn shuffle1_dyn(self, indices: Self::Indices) -> Self { ++ use crate::arch::aarch64::vqtbl1q_u8; ++ ++ // This is safe because the binary is compiled with ++ // neon enabled at compile-time and can therefore only ++ // run on CPUs that have it enabled. ++ unsafe { ++ Simd(mem::transmute( ++ vqtbl1q_u8(mem::transmute(self.0), ++ crate::mem::transmute(indices.0)) ++ )) ++ } ++ } ++ } ++ } else if #[cfg(all(target_aarch = "arm", target_feature = "v7", ++ target_feature = "neon", ++ any(feature = "core_arch", libcore_neon)))] { ++ impl Shuffle1Dyn for u8x16 { ++ type Indices = Self; ++ #[inline] ++ fn shuffle1_dyn(self, indices: Self::Indices) -> Self { ++ use crate::arch::arm::vtbl2_u8; ++ ++ // This is safe because the binary is compiled with ++ // neon enabled at compile-time and can therefore only ++ // run on CPUs that have it enabled. ++ unsafe { ++ union U { ++ j: u8x16, ++ s: (u8x8, u8x8), ++ } ++ ++ let (i0, i1) = U { j: y }.s; ++ ++ let r0 = vtbl2_u8( ++ mem::transmute(x), ++ crate::mem::transmute(i0) ++ ); ++ let r1 = vtbl2_u8( ++ mem::transmute(x), ++ crate::mem::transmute(i1) ++ ); ++ ++ let r = U { s: (r0, r1) }.j; ++ ++ Simd(mem::transmute(r)) ++ } ++ } ++ } ++ } else { ++ impl_fallback!(u8x16); ++ } ++ } ++ }; ++ (u16x8) => { ++ impl Shuffle1Dyn for u16x8 { ++ type Indices = Self; ++ #[inline] ++ fn shuffle1_dyn(self, indices: Self::Indices) -> Self { ++ let indices: u8x8 = (indices * 2).cast(); ++ let indices: u8x16 = shuffle!( ++ indices, [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7] ++ ); ++ let v = u8x16::new( ++ 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 ++ ); ++ let indices = indices + v; ++ unsafe { ++ let s: u8x16 =crate::mem::transmute(self); ++ crate::mem::transmute(s.shuffle1_dyn(indices)) ++ } ++ } ++ } ++ }; ++ (u32x4) => { ++ cfg_if! { ++ if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), ++ target_feature = "avx"))] { ++ impl Shuffle1Dyn for u32x4 { ++ type Indices = Self; ++ #[inline] ++ fn shuffle1_dyn(self, indices: Self::Indices) -> Self { ++ #[cfg(target_arch = "x86")] ++ use crate::arch::x86::{_mm_permutevar_ps}; ++ #[cfg(target_arch = "x86_64")] ++ use crate::arch::x86_64::{_mm_permutevar_ps}; ++ ++ unsafe { ++ crate::mem::transmute( ++ _mm_permutevar_ps( ++ crate::mem::transmute(self.0), ++ crate::mem::transmute(indices.0) ++ ) ++ ) ++ } ++ } ++ } ++ } else { ++ impl Shuffle1Dyn for u32x4 { ++ type Indices = Self; ++ #[inline] ++ fn shuffle1_dyn(self, indices: Self::Indices) -> Self { ++ let indices: u8x4 = (indices * 4).cast(); ++ let indices: u8x16 = shuffle!( ++ indices, ++ [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3] ++ ); ++ let v = u8x16::new( ++ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 ++ ); ++ let indices = indices + v; ++ unsafe { ++ let s: u8x16 =crate::mem::transmute(self); ++ crate::mem::transmute(s.shuffle1_dyn(indices)) ++ } ++ } ++ } ++ } ++ } ++ }; ++ (u64x2) => { ++ cfg_if! { ++ if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), ++ target_feature = "avx"))] { ++ impl Shuffle1Dyn for u64x2 { ++ type Indices = Self; ++ #[inline] ++ fn shuffle1_dyn(self, indices: Self::Indices) -> Self { ++ #[cfg(target_arch = "x86")] ++ use crate::arch::x86::{_mm_permutevar_pd}; ++ #[cfg(target_arch = "x86_64")] ++ use crate::arch::x86_64::{_mm_permutevar_pd}; ++ // _mm_permutevar_pd uses the _second_ bit of each ++ // element to perform the selection, that is: 0b00 => 0, ++ // 0b10 => 1: ++ let indices = indices << 1; ++ unsafe { ++ crate::mem::transmute( ++ _mm_permutevar_pd( ++ crate::mem::transmute(self), ++ crate::mem::transmute(indices) ++ ) ++ ) ++ } ++ } ++ } ++ } else { ++ impl Shuffle1Dyn for u64x2 { ++ type Indices = Self; ++ #[inline] ++ fn shuffle1_dyn(self, indices: Self::Indices) -> Self { ++ let indices: u8x2 = (indices * 8).cast(); ++ let indices: u8x16 = shuffle!( ++ indices, ++ [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++ ); ++ let v = u8x16::new( ++ 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7 ++ ); ++ let indices = indices + v; ++ unsafe { ++ let s: u8x16 =crate::mem::transmute(self); ++ crate::mem::transmute(s.shuffle1_dyn(indices)) ++ } ++ } ++ } ++ } ++ } ++ }; ++ (u128x1) => { ++ impl Shuffle1Dyn for u128x1 { ++ type Indices = Self; ++ #[inline] ++ fn shuffle1_dyn(self, _indices: Self::Indices) -> Self { ++ self ++ } ++ } ++ }; ++ ($id:ident) => { impl_fallback!($id); } ++} ++ ++impl_shuffle1_dyn!(u8x2); ++impl_shuffle1_dyn!(u8x4); ++impl_shuffle1_dyn!(u8x8); ++impl_shuffle1_dyn!(u8x16); ++impl_shuffle1_dyn!(u8x32); ++impl_shuffle1_dyn!(u8x64); ++ ++impl_shuffle1_dyn!(u16x2); ++impl_shuffle1_dyn!(u16x4); ++impl_shuffle1_dyn!(u16x8); ++impl_shuffle1_dyn!(u16x16); ++impl_shuffle1_dyn!(u16x32); ++ ++impl_shuffle1_dyn!(u32x2); ++impl_shuffle1_dyn!(u32x4); ++impl_shuffle1_dyn!(u32x8); ++impl_shuffle1_dyn!(u32x16); ++ ++impl_shuffle1_dyn!(u64x2); ++impl_shuffle1_dyn!(u64x4); ++impl_shuffle1_dyn!(u64x8); ++ ++impl_shuffle1_dyn!(usizex2); ++impl_shuffle1_dyn!(usizex4); ++impl_shuffle1_dyn!(usizex8); ++ ++impl_shuffle1_dyn!(u128x1); ++impl_shuffle1_dyn!(u128x2); ++impl_shuffle1_dyn!(u128x4); ++ ++// Implementation for non-unsigned vector types ++macro_rules! impl_shuffle1_dyn_non_u { ++ ($id:ident, $uid:ident) => { ++ impl Shuffle1Dyn for $id { ++ type Indices = $uid; ++ #[inline] ++ fn shuffle1_dyn(self, indices: Self::Indices) -> Self { ++ unsafe { ++ let u: $uid = crate::mem::transmute(self); ++ crate::mem::transmute(u.shuffle1_dyn(indices)) ++ } ++ } ++ } ++ }; ++} ++ ++impl_shuffle1_dyn_non_u!(i8x2, u8x2); ++impl_shuffle1_dyn_non_u!(i8x4, u8x4); ++impl_shuffle1_dyn_non_u!(i8x8, u8x8); ++impl_shuffle1_dyn_non_u!(i8x16, u8x16); ++impl_shuffle1_dyn_non_u!(i8x32, u8x32); ++impl_shuffle1_dyn_non_u!(i8x64, u8x64); ++ ++impl_shuffle1_dyn_non_u!(i16x2, u16x2); ++impl_shuffle1_dyn_non_u!(i16x4, u16x4); ++impl_shuffle1_dyn_non_u!(i16x8, u16x8); ++impl_shuffle1_dyn_non_u!(i16x16, u16x16); ++impl_shuffle1_dyn_non_u!(i16x32, u16x32); ++ ++impl_shuffle1_dyn_non_u!(i32x2, u32x2); ++impl_shuffle1_dyn_non_u!(i32x4, u32x4); ++impl_shuffle1_dyn_non_u!(i32x8, u32x8); ++impl_shuffle1_dyn_non_u!(i32x16, u32x16); ++ ++impl_shuffle1_dyn_non_u!(i64x2, u64x2); ++impl_shuffle1_dyn_non_u!(i64x4, u64x4); ++impl_shuffle1_dyn_non_u!(i64x8, u64x8); ++ ++impl_shuffle1_dyn_non_u!(isizex2, usizex2); ++impl_shuffle1_dyn_non_u!(isizex4, usizex4); ++impl_shuffle1_dyn_non_u!(isizex8, usizex8); ++ ++impl_shuffle1_dyn_non_u!(i128x1, u128x1); ++impl_shuffle1_dyn_non_u!(i128x2, u128x2); ++impl_shuffle1_dyn_non_u!(i128x4, u128x4); ++ ++impl_shuffle1_dyn_non_u!(m8x2, u8x2); ++impl_shuffle1_dyn_non_u!(m8x4, u8x4); ++impl_shuffle1_dyn_non_u!(m8x8, u8x8); ++impl_shuffle1_dyn_non_u!(m8x16, u8x16); ++impl_shuffle1_dyn_non_u!(m8x32, u8x32); ++impl_shuffle1_dyn_non_u!(m8x64, u8x64); ++ ++impl_shuffle1_dyn_non_u!(m16x2, u16x2); ++impl_shuffle1_dyn_non_u!(m16x4, u16x4); ++impl_shuffle1_dyn_non_u!(m16x8, u16x8); ++impl_shuffle1_dyn_non_u!(m16x16, u16x16); ++impl_shuffle1_dyn_non_u!(m16x32, u16x32); ++ ++impl_shuffle1_dyn_non_u!(m32x2, u32x2); ++impl_shuffle1_dyn_non_u!(m32x4, u32x4); ++impl_shuffle1_dyn_non_u!(m32x8, u32x8); ++impl_shuffle1_dyn_non_u!(m32x16, u32x16); ++ ++impl_shuffle1_dyn_non_u!(m64x2, u64x2); ++impl_shuffle1_dyn_non_u!(m64x4, u64x4); ++impl_shuffle1_dyn_non_u!(m64x8, u64x8); ++ ++impl_shuffle1_dyn_non_u!(msizex2, usizex2); ++impl_shuffle1_dyn_non_u!(msizex4, usizex4); ++impl_shuffle1_dyn_non_u!(msizex8, usizex8); ++ ++impl_shuffle1_dyn_non_u!(m128x1, u128x1); ++impl_shuffle1_dyn_non_u!(m128x2, u128x2); ++impl_shuffle1_dyn_non_u!(m128x4, u128x4); ++ ++impl_shuffle1_dyn_non_u!(f32x2, u32x2); ++impl_shuffle1_dyn_non_u!(f32x4, u32x4); ++impl_shuffle1_dyn_non_u!(f32x8, u32x8); ++impl_shuffle1_dyn_non_u!(f32x16, u32x16); ++ ++impl_shuffle1_dyn_non_u!(f64x2, u64x2); ++impl_shuffle1_dyn_non_u!(f64x4, u64x4); ++impl_shuffle1_dyn_non_u!(f64x8, u64x8); ++ ++// Implementation for non-unsigned vector types ++macro_rules! impl_shuffle1_dyn_ptr { ++ ($id:ident, $uid:ident) => { ++ impl Shuffle1Dyn for $id { ++ type Indices = $uid; ++ #[inline] ++ fn shuffle1_dyn(self, indices: Self::Indices) -> Self { ++ unsafe { ++ let u: $uid = crate::mem::transmute(self); ++ crate::mem::transmute(u.shuffle1_dyn(indices)) ++ } ++ } ++ } ++ }; ++} ++ ++impl_shuffle1_dyn_ptr!(cptrx2, usizex2); ++impl_shuffle1_dyn_ptr!(cptrx4, usizex4); ++impl_shuffle1_dyn_ptr!(cptrx8, usizex8); ++ ++impl_shuffle1_dyn_ptr!(mptrx2, usizex2); ++impl_shuffle1_dyn_ptr!(mptrx4, usizex4); ++impl_shuffle1_dyn_ptr!(mptrx8, usizex8); +diff --git a/third_party/rust/packed_simd/src/codegen/swap_bytes.rs b/third_party/rust/packed_simd/src/codegen/swap_bytes.rs +new file mode 100644 +index 000000000000..b435fb5da120 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/swap_bytes.rs +@@ -0,0 +1,189 @@ ++//! Horizontal swap bytes reductions. ++ ++// FIXME: investigate using `llvm.bswap` ++// https://github.com/rust-lang-nursery/packed_simd/issues/19 ++ ++use crate::*; ++ ++crate trait SwapBytes { ++ fn swap_bytes(self) -> Self; ++} ++ ++macro_rules! impl_swap_bytes { ++ (v16: $($id:ident,)+) => { ++ $( ++ impl SwapBytes for $id { ++ #[inline] ++ fn swap_bytes(self) -> Self { ++ unsafe { shuffle!(self, [1, 0]) } ++ } ++ } ++ )+ ++ }; ++ (v32: $($id:ident,)+) => { ++ $( ++ impl SwapBytes for $id { ++ #[inline] ++ #[allow(clippy::useless_transmute)] ++ fn swap_bytes(self) -> Self { ++ unsafe { ++ let bytes: u8x4 = crate::mem::transmute(self); ++ let result: u8x4 = shuffle!(bytes, [3, 2, 1, 0]); ++ crate::mem::transmute(result) ++ } ++ } ++ } ++ )+ ++ }; ++ (v64: $($id:ident,)+) => { ++ $( ++ impl SwapBytes for $id { ++ #[inline] ++ #[allow(clippy::useless_transmute)] ++ fn swap_bytes(self) -> Self { ++ unsafe { ++ let bytes: u8x8 = crate::mem::transmute(self); ++ let result: u8x8 = shuffle!( ++ bytes, [7, 6, 5, 4, 3, 2, 1, 0] ++ ); ++ crate::mem::transmute(result) ++ } ++ } ++ } ++ )+ ++ }; ++ (v128: $($id:ident,)+) => { ++ $( ++ impl SwapBytes for $id { ++ #[inline] ++ #[allow(clippy::useless_transmute)] ++ fn swap_bytes(self) -> Self { ++ unsafe { ++ let bytes: u8x16 = crate::mem::transmute(self); ++ let result: u8x16 = shuffle!(bytes, [ ++ 15, 14, 13, 12, 11, 10, 9, 8, ++ 7, 6, 5, 4, 3, 2, 1, 0 ++ ]); ++ crate::mem::transmute(result) ++ } ++ } ++ } ++ )+ ++ }; ++ (v256: $($id:ident,)+) => { ++ $( ++ impl SwapBytes for $id { ++ #[inline] ++ #[allow(clippy::useless_transmute)] ++ fn swap_bytes(self) -> Self { ++ unsafe { ++ let bytes: u8x32 = crate::mem::transmute(self); ++ let result: u8x32 = shuffle!(bytes, [ ++ 31, 30, 29, 28, 27, 26, 25, 24, ++ 23, 22, 21, 20, 19, 18, 17, 16, ++ 15, 14, 13, 12, 11, 10, 9, 8, ++ 7, 6, 5, 4, 3, 2, 1, 0 ++ ]); ++ crate::mem::transmute(result) ++ } ++ } ++ } ++ )+ ++ }; ++ (v512: $($id:ident,)+) => { ++ $( ++ impl SwapBytes for $id { ++ #[inline] ++ #[allow(clippy::useless_transmute)] ++ fn swap_bytes(self) -> Self { ++ unsafe { ++ let bytes: u8x64 = crate::mem::transmute(self); ++ let result: u8x64 = shuffle!(bytes, [ ++ 63, 62, 61, 60, 59, 58, 57, 56, ++ 55, 54, 53, 52, 51, 50, 49, 48, ++ 47, 46, 45, 44, 43, 42, 41, 40, ++ 39, 38, 37, 36, 35, 34, 33, 32, ++ 31, 30, 29, 28, 27, 26, 25, 24, ++ 23, 22, 21, 20, 19, 18, 17, 16, ++ 15, 14, 13, 12, 11, 10, 9, 8, ++ 7, 6, 5, 4, 3, 2, 1, 0 ++ ]); ++ crate::mem::transmute(result) ++ } ++ } ++ } ++ )+ ++ }; ++} ++ ++impl_swap_bytes!(v16: u8x2, i8x2,); ++impl_swap_bytes!(v32: u8x4, i8x4, u16x2, i16x2,); ++// FIXME: 64-bit single element vector ++impl_swap_bytes!( ++ v64: u8x8, ++ i8x8, ++ u16x4, ++ i16x4, ++ u32x2, ++ i32x2, /* u64x1, i64x1, */ ++); ++ ++impl_swap_bytes!( ++ v128: u8x16, ++ i8x16, ++ u16x8, ++ i16x8, ++ u32x4, ++ i32x4, ++ u64x2, ++ i64x2, ++ u128x1, ++ i128x1, ++); ++impl_swap_bytes!( ++ v256: u8x32, ++ i8x32, ++ u16x16, ++ i16x16, ++ u32x8, ++ i32x8, ++ u64x4, ++ i64x4, ++ u128x2, ++ i128x2, ++); ++ ++impl_swap_bytes!( ++ v512: u8x64, ++ i8x64, ++ u16x32, ++ i16x32, ++ u32x16, ++ i32x16, ++ u64x8, ++ i64x8, ++ u128x4, ++ i128x4, ++); ++ ++cfg_if! { ++ if #[cfg(target_pointer_width = "8")] { ++ impl_swap_bytes!(v16: isizex2, usizex2,); ++ impl_swap_bytes!(v32: isizex4, usizex4,); ++ impl_swap_bytes!(v64: isizex8, usizex8,); ++ } else if #[cfg(target_pointer_width = "16")] { ++ impl_swap_bytes!(v32: isizex2, usizex2,); ++ impl_swap_bytes!(v64: isizex4, usizex4,); ++ impl_swap_bytes!(v128: isizex8, usizex8,); ++ } else if #[cfg(target_pointer_width = "32")] { ++ impl_swap_bytes!(v64: isizex2, usizex2,); ++ impl_swap_bytes!(v128: isizex4, usizex4,); ++ impl_swap_bytes!(v256: isizex8, usizex8,); ++ } else if #[cfg(target_pointer_width = "64")] { ++ impl_swap_bytes!(v128: isizex2, usizex2,); ++ impl_swap_bytes!(v256: isizex4, usizex4,); ++ impl_swap_bytes!(v512: isizex8, usizex8,); ++ } else { ++ compile_error!("unsupported target_pointer_width"); ++ } ++} +diff --git a/third_party/rust/packed_simd/src/codegen/v128.rs b/third_party/rust/packed_simd/src/codegen/v128.rs +new file mode 100644 +index 000000000000..9506424fadad +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/v128.rs +@@ -0,0 +1,46 @@ ++//! Internal 128-bit wide vector types ++ ++use crate::masks::*; ++ ++#[rustfmt::skip] ++impl_simd_array!( ++ [i8; 16]: i8x16 | ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8 ++); ++#[rustfmt::skip] ++impl_simd_array!( ++ [u8; 16]: u8x16 | ++ u8, u8, u8, u8, ++ u8, u8, u8, u8, ++ u8, u8, u8, u8, ++ u8, u8, u8, u8 ++); ++#[rustfmt::skip] ++impl_simd_array!( ++ [m8; 16]: m8x16 | ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8 ++); ++ ++impl_simd_array!([i16; 8]: i16x8 | i16, i16, i16, i16, i16, i16, i16, i16); ++impl_simd_array!([u16; 8]: u16x8 | u16, u16, u16, u16, u16, u16, u16, u16); ++impl_simd_array!([m16; 8]: m16x8 | i16, i16, i16, i16, i16, i16, i16, i16); ++ ++impl_simd_array!([i32; 4]: i32x4 | i32, i32, i32, i32); ++impl_simd_array!([u32; 4]: u32x4 | u32, u32, u32, u32); ++impl_simd_array!([f32; 4]: f32x4 | f32, f32, f32, f32); ++impl_simd_array!([m32; 4]: m32x4 | i32, i32, i32, i32); ++ ++impl_simd_array!([i64; 2]: i64x2 | i64, i64); ++impl_simd_array!([u64; 2]: u64x2 | u64, u64); ++impl_simd_array!([f64; 2]: f64x2 | f64, f64); ++impl_simd_array!([m64; 2]: m64x2 | i64, i64); ++ ++impl_simd_array!([i128; 1]: i128x1 | i128); ++impl_simd_array!([u128; 1]: u128x1 | u128); ++impl_simd_array!([m128; 1]: m128x1 | i128); +diff --git a/third_party/rust/packed_simd/src/codegen/v16.rs b/third_party/rust/packed_simd/src/codegen/v16.rs +new file mode 100644 +index 000000000000..4d55a6d8998e +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/v16.rs +@@ -0,0 +1,7 @@ ++//! Internal 16-bit wide vector types ++ ++use crate::masks::*; ++ ++impl_simd_array!([i8; 2]: i8x2 | i8, i8); ++impl_simd_array!([u8; 2]: u8x2 | u8, u8); ++impl_simd_array!([m8; 2]: m8x2 | i8, i8); +diff --git a/third_party/rust/packed_simd/src/codegen/v256.rs b/third_party/rust/packed_simd/src/codegen/v256.rs +new file mode 100644 +index 000000000000..5ca4759f0c0a +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/v256.rs +@@ -0,0 +1,78 @@ ++//! Internal 256-bit wide vector types ++ ++use crate::masks::*; ++ ++#[rustfmt::skip] ++impl_simd_array!( ++ [i8; 32]: i8x32 | ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8 ++); ++#[rustfmt::skip] ++impl_simd_array!( ++ [u8; 32]: u8x32 | ++ u8, u8, u8, u8, ++ u8, u8, u8, u8, ++ u8, u8, u8, u8, ++ u8, u8, u8, u8, ++ u8, u8, u8, u8, ++ u8, u8, u8, u8, ++ u8, u8, u8, u8, ++ u8, u8, u8, u8 ++); ++#[rustfmt::skip] ++impl_simd_array!( ++ [m8; 32]: m8x32 | ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8 ++); ++#[rustfmt::skip] ++impl_simd_array!( ++ [i16; 16]: i16x16 | ++ i16, i16, i16, i16, ++ i16, i16, i16, i16, ++ i16, i16, i16, i16, ++ i16, i16, i16, i16 ++); ++#[rustfmt::skip] ++impl_simd_array!( ++ [u16; 16]: u16x16 | ++ u16, u16, u16, u16, ++ u16, u16, u16, u16, ++ u16, u16, u16, u16, ++ u16, u16, u16, u16 ++); ++#[rustfmt::skip] ++impl_simd_array!( ++ [m16; 16]: m16x16 | ++ i16, i16, i16, i16, ++ i16, i16, i16, i16, ++ i16, i16, i16, i16, ++ i16, i16, i16, i16 ++); ++ ++impl_simd_array!([i32; 8]: i32x8 | i32, i32, i32, i32, i32, i32, i32, i32); ++impl_simd_array!([u32; 8]: u32x8 | u32, u32, u32, u32, u32, u32, u32, u32); ++impl_simd_array!([f32; 8]: f32x8 | f32, f32, f32, f32, f32, f32, f32, f32); ++impl_simd_array!([m32; 8]: m32x8 | i32, i32, i32, i32, i32, i32, i32, i32); ++ ++impl_simd_array!([i64; 4]: i64x4 | i64, i64, i64, i64); ++impl_simd_array!([u64; 4]: u64x4 | u64, u64, u64, u64); ++impl_simd_array!([f64; 4]: f64x4 | f64, f64, f64, f64); ++impl_simd_array!([m64; 4]: m64x4 | i64, i64, i64, i64); ++ ++impl_simd_array!([i128; 2]: i128x2 | i128, i128); ++impl_simd_array!([u128; 2]: u128x2 | u128, u128); ++impl_simd_array!([m128; 2]: m128x2 | i128, i128); +diff --git a/third_party/rust/packed_simd/src/codegen/v32.rs b/third_party/rust/packed_simd/src/codegen/v32.rs +new file mode 100644 +index 000000000000..ae1dabd00c22 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/v32.rs +@@ -0,0 +1,11 @@ ++//! Internal 32-bit wide vector types ++ ++use crate::masks::*; ++ ++impl_simd_array!([i8; 4]: i8x4 | i8, i8, i8, i8); ++impl_simd_array!([u8; 4]: u8x4 | u8, u8, u8, u8); ++impl_simd_array!([m8; 4]: m8x4 | i8, i8, i8, i8); ++ ++impl_simd_array!([i16; 2]: i16x2 | i16, i16); ++impl_simd_array!([u16; 2]: u16x2 | u16, u16); ++impl_simd_array!([m16; 2]: m16x2 | i16, i16); +diff --git a/third_party/rust/packed_simd/src/codegen/v512.rs b/third_party/rust/packed_simd/src/codegen/v512.rs +new file mode 100644 +index 000000000000..bf95110340d6 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/v512.rs +@@ -0,0 +1,145 @@ ++//! Internal 512-bit wide vector types ++ ++use crate::masks::*; ++ ++#[rustfmt::skip] ++impl_simd_array!( ++ [i8; 64]: i8x64 | ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8 ++); ++#[rustfmt::skip] ++impl_simd_array!( ++ [u8; 64]: u8x64 | ++ u8, u8, u8, u8, ++ u8, u8, u8, u8, ++ u8, u8, u8, u8, ++ u8, u8, u8, u8, ++ u8, u8, u8, u8, ++ u8, u8, u8, u8, ++ u8, u8, u8, u8, ++ u8, u8, u8, u8, ++ ++ u8, u8, u8, u8, ++ u8, u8, u8, u8, ++ u8, u8, u8, u8, ++ u8, u8, u8, u8, ++ u8, u8, u8, u8, ++ u8, u8, u8, u8, ++ u8, u8, u8, u8, ++ u8, u8, u8, u8 ++); ++#[rustfmt::skip] ++impl_simd_array!( ++ [m8; 64]: m8x64 | ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8, ++ i8, i8, i8, i8 ++); ++#[rustfmt::skip] ++impl_simd_array!( ++ [i16; 32]: i16x32 | ++ i16, i16, i16, i16, ++ i16, i16, i16, i16, ++ i16, i16, i16, i16, ++ i16, i16, i16, i16, ++ i16, i16, i16, i16, ++ i16, i16, i16, i16, ++ i16, i16, i16, i16, ++ i16, i16, i16, i16 ++); ++#[rustfmt::skip] ++impl_simd_array!( ++ [u16; 32]: u16x32 | ++ u16, u16, u16, u16, ++ u16, u16, u16, u16, ++ u16, u16, u16, u16, ++ u16, u16, u16, u16, ++ u16, u16, u16, u16, ++ u16, u16, u16, u16, ++ u16, u16, u16, u16, ++ u16, u16, u16, u16 ++); ++#[rustfmt::skip] ++impl_simd_array!( ++ [m16; 32]: m16x32 | ++ i16, i16, i16, i16, ++ i16, i16, i16, i16, ++ i16, i16, i16, i16, ++ i16, i16, i16, i16, ++ i16, i16, i16, i16, ++ i16, i16, i16, i16, ++ i16, i16, i16, i16, ++ i16, i16, i16, i16 ++); ++ ++#[rustfmt::skip] ++impl_simd_array!( ++ [i32; 16]: i32x16 | ++ i32, i32, i32, i32, ++ i32, i32, i32, i32, ++ i32, i32, i32, i32, ++ i32, i32, i32, i32 ++); ++#[rustfmt::skip] ++impl_simd_array!( ++ [u32; 16]: u32x16 | ++ u32, u32, u32, u32, ++ u32, u32, u32, u32, ++ u32, u32, u32, u32, ++ u32, u32, u32, u32 ++); ++#[rustfmt::skip] ++impl_simd_array!( ++ [f32; 16]: f32x16 | ++ f32, f32, f32, f32, ++ f32, f32, f32, f32, ++ f32, f32, f32, f32, ++ f32, f32, f32, f32 ++); ++#[rustfmt::skip] ++impl_simd_array!( ++ [m32; 16]: m32x16 | ++ i32, i32, i32, i32, ++ i32, i32, i32, i32, ++ i32, i32, i32, i32, ++ i32, i32, i32, i32 ++); ++ ++impl_simd_array!([i64; 8]: i64x8 | i64, i64, i64, i64, i64, i64, i64, i64); ++impl_simd_array!([u64; 8]: u64x8 | u64, u64, u64, u64, u64, u64, u64, u64); ++impl_simd_array!([f64; 8]: f64x8 | f64, f64, f64, f64, f64, f64, f64, f64); ++impl_simd_array!([m64; 8]: m64x8 | i64, i64, i64, i64, i64, i64, i64, i64); ++ ++impl_simd_array!([i128; 4]: i128x4 | i128, i128, i128, i128); ++impl_simd_array!([u128; 4]: u128x4 | u128, u128, u128, u128); ++impl_simd_array!([m128; 4]: m128x4 | i128, i128, i128, i128); +diff --git a/third_party/rust/packed_simd/src/codegen/v64.rs b/third_party/rust/packed_simd/src/codegen/v64.rs +new file mode 100644 +index 000000000000..3cfb67c1a013 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/v64.rs +@@ -0,0 +1,21 @@ ++//! Internal 64-bit wide vector types ++ ++use crate::masks::*; ++ ++impl_simd_array!([i8; 8]: i8x8 | i8, i8, i8, i8, i8, i8, i8, i8); ++impl_simd_array!([u8; 8]: u8x8 | u8, u8, u8, u8, u8, u8, u8, u8); ++impl_simd_array!([m8; 8]: m8x8 | i8, i8, i8, i8, i8, i8, i8, i8); ++ ++impl_simd_array!([i16; 4]: i16x4 | i16, i16, i16, i16); ++impl_simd_array!([u16; 4]: u16x4 | u16, u16, u16, u16); ++impl_simd_array!([m16; 4]: m16x4 | i16, i16, i16, i16); ++ ++impl_simd_array!([i32; 2]: i32x2 | i32, i32); ++impl_simd_array!([u32; 2]: u32x2 | u32, u32); ++impl_simd_array!([f32; 2]: f32x2 | f32, f32); ++impl_simd_array!([m32; 2]: m32x2 | i32, i32); ++ ++impl_simd_array!([i64; 1]: i64x1 | i64); ++impl_simd_array!([u64; 1]: u64x1 | u64); ++impl_simd_array!([f64; 1]: f64x1 | f64); ++impl_simd_array!([m64; 1]: m64x1 | i64); +diff --git a/third_party/rust/packed_simd/src/codegen/vPtr.rs b/third_party/rust/packed_simd/src/codegen/vPtr.rs +new file mode 100644 +index 000000000000..1f2bc7714dd9 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/vPtr.rs +@@ -0,0 +1,33 @@ ++//! Pointer vector types ++ ++macro_rules! impl_simd_ptr { ++ ([$ptr_ty:ty; $elem_count:expr]: $tuple_id:ident | $ty:ident ++ | $($tys:ty),*) => { ++ #[derive(Copy, Clone)] ++ #[repr(simd)] ++ pub struct $tuple_id<$ty>($(crate $tys),*); ++ //^^^^^^^ leaked through SimdArray ++ ++ impl<$ty> crate::sealed::SimdArray for [$ptr_ty; $elem_count] { ++ type Tuple = $tuple_id<$ptr_ty>; ++ type T = $ptr_ty; ++ const N: usize = $elem_count; ++ type NT = [u32; $elem_count]; ++ } ++ ++ impl<$ty> crate::sealed::Simd for $tuple_id<$ptr_ty> { ++ type Element = $ptr_ty; ++ const LANES: usize = $elem_count; ++ type LanesType = [u32; $elem_count]; ++ } ++ ++ } ++} ++ ++impl_simd_ptr!([*const T; 2]: cptrx2 | T | T, T); ++impl_simd_ptr!([*const T; 4]: cptrx4 | T | T, T, T, T); ++impl_simd_ptr!([*const T; 8]: cptrx8 | T | T, T, T, T, T, T, T, T); ++ ++impl_simd_ptr!([*mut T; 2]: mptrx2 | T | T, T); ++impl_simd_ptr!([*mut T; 4]: mptrx4 | T | T, T, T, T); ++impl_simd_ptr!([*mut T; 8]: mptrx8 | T | T, T, T, T, T, T, T, T); +diff --git a/third_party/rust/packed_simd/src/codegen/vSize.rs b/third_party/rust/packed_simd/src/codegen/vSize.rs +new file mode 100644 +index 000000000000..3911b21340c8 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/codegen/vSize.rs +@@ -0,0 +1,43 @@ ++//! Vector types with pointer-sized elements ++ ++use crate::codegen::pointer_sized_int::{isize_, usize_}; ++use crate::masks::*; ++ ++impl_simd_array!([isize; 2]: isizex2 | isize_, isize_); ++impl_simd_array!([usize; 2]: usizex2 | usize_, usize_); ++impl_simd_array!([msize; 2]: msizex2 | isize_, isize_); ++ ++impl_simd_array!([isize; 4]: isizex4 | isize_, isize_, isize_, isize_); ++impl_simd_array!([usize; 4]: usizex4 | usize_, usize_, usize_, usize_); ++impl_simd_array!([msize; 4]: msizex4 | isize_, isize_, isize_, isize_); ++ ++impl_simd_array!( ++ [isize; 8]: isizex8 | isize_, ++ isize_, ++ isize_, ++ isize_, ++ isize_, ++ isize_, ++ isize_, ++ isize_ ++); ++impl_simd_array!( ++ [usize; 8]: usizex8 | usize_, ++ usize_, ++ usize_, ++ usize_, ++ usize_, ++ usize_, ++ usize_, ++ usize_ ++); ++impl_simd_array!( ++ [msize; 8]: msizex8 | isize_, ++ isize_, ++ isize_, ++ isize_, ++ isize_, ++ isize_, ++ isize_, ++ isize_ ++); +diff --git a/third_party/rust/packed_simd/src/lib.rs b/third_party/rust/packed_simd/src/lib.rs +new file mode 100644 +index 000000000000..d73645e72fbe +--- /dev/null ++++ b/third_party/rust/packed_simd/src/lib.rs +@@ -0,0 +1,327 @@ ++//! # Portable packed SIMD vectors ++//! ++//! This crate is proposed for stabilization as `std::packed_simd` in [RFC2366: ++//! `std::simd`](https://github.com/rust-lang/rfcs/pull/2366) . ++//! ++//! The examples available in the ++//! [`examples/`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples) ++//! sub-directory of the crate showcase how to use the library in practice. ++//! ++//! ## Table of contents ++//! ++//! - [Introduction](#introduction) ++//! - [Vector types](#vector-types) ++//! - [Conditional operations](#conditional-operations) ++//! - [Conversions](#conversions) ++//! - [Performance ++//! guide](https://rust-lang-nursery.github.io/packed_simd/perf-guide/) ++//! ++//! ## Introduction ++//! ++//! This crate exports [`Simd<[T; N]>`][`Simd`]: a packed vector of `N` ++//! elements of type `T` as well as many type aliases for this type: for ++//! example, [`f32x4`], which is just an alias for `Simd<[f32; 4]>`. ++//! ++//! The operations on packed vectors are, by default, "vertical", that is, they ++//! are applied to each vector lane in isolation of the others: ++//! ++//! ``` ++//! # use packed_simd::*; ++//! let a = i32x4::new(1, 2, 3, 4); ++//! let b = i32x4::new(5, 6, 7, 8); ++//! assert_eq!(a + b, i32x4::new(6, 8, 10, 12)); ++//! ``` ++//! ++//! Many "horizontal" operations are also provided: ++//! ++//! ``` ++//! # use packed_simd::*; ++//! # let a = i32x4::new(1, 2, 3, 4); ++//! assert_eq!(a.wrapping_sum(), 10); ++//! ``` ++//! ++//! In virtually all architectures vertical operations are fast, while ++//! horizontal operations are, by comparison, much slower. That is, the ++//! most portably-efficient way of performing a reduction over a slice ++//! is to collect the results into a vector using vertical operations, ++//! and performing a single horizontal operation at the end: ++//! ++//! ``` ++//! # use packed_simd::*; ++//! fn reduce(x: &[i32]) -> i32 { ++//! assert!(x.len() % 4 == 0); ++//! let mut sum = i32x4::splat(0); // [0, 0, 0, 0] ++//! for i in (0..x.len()).step_by(4) { ++//! sum += i32x4::from_slice_unaligned(&x[i..]); ++//! } ++//! sum.wrapping_sum() ++//! } ++//! ++//! let x = [0, 1, 2, 3, 4, 5, 6, 7]; ++//! assert_eq!(reduce(&x), 28); ++//! ``` ++//! ++//! ## Vector types ++//! ++//! The vector type aliases are named according to the following scheme: ++//! ++//! > `{element_type}x{number_of_lanes} == Simd<[element_type; ++//! number_of_lanes]>` ++//! ++//! where the following element types are supported: ++//! ++//! * `i{element_width}`: signed integer ++//! * `u{element_width}`: unsigned integer ++//! * `f{element_width}`: float ++//! * `m{element_width}`: mask (see below) ++//! * `*{const,mut} T`: `const` and `mut` pointers ++//! ++//! ## Basic operations ++//! ++//! ``` ++//! # use packed_simd::*; ++//! // Sets all elements to `0`: ++//! let a = i32x4::splat(0); ++//! ++//! // Reads a vector from a slice: ++//! let mut arr = [0, 0, 0, 1, 2, 3, 4, 5]; ++//! let b = i32x4::from_slice_unaligned(&arr); ++//! ++//! // Reads the 4-th element of a vector: ++//! assert_eq!(b.extract(3), 1); ++//! ++//! // Returns a new vector where the 4-th element is replaced with `1`: ++//! let a = a.replace(3, 1); ++//! assert_eq!(a, b); ++//! ++//! // Writes a vector to a slice: ++//! let a = a.replace(2, 1); ++//! a.write_to_slice_unaligned(&mut arr[4..]); ++//! assert_eq!(arr, [0, 0, 0, 1, 0, 0, 1, 1]); ++//! ``` ++//! ++//! ## Conditional operations ++//! ++//! One often needs to perform an operation on some lanes of the vector. Vector ++//! masks, like `m32x4`, allow selecting on which vector lanes an operation is ++//! to be performed: ++//! ++//! ``` ++//! # use packed_simd::*; ++//! let a = i32x4::new(1, 1, 2, 2); ++//! ++//! // Add `1` to the first two lanes of the vector. ++//! let m = m16x4::new(true, true, false, false); ++//! let a = m.select(a + 1, a); ++//! assert_eq!(a, i32x4::splat(2)); ++//! ``` ++//! ++//! The elements of a vector mask are either `true` or `false`. Here `true` ++//! means that a lane is "selected", while `false` means that a lane is not ++//! selected. ++//! ++//! All vector masks implement a `mask.select(a: T, b: T) -> T` method that ++//! works on all vectors that have the same number of lanes as the mask. The ++//! resulting vector contains the elements of `a` for those lanes for which the ++//! mask is `true`, and the elements of `b` otherwise. ++//! ++//! The example constructs a mask with the first two lanes set to `true` and ++//! the last two lanes set to `false`. This selects the first two lanes of `a + ++//! 1` and the last two lanes of `a`, producing a vector where the first two ++//! lanes have been incremented by `1`. ++//! ++//! > note: mask `select` can be used on vector types that have the same number ++//! > of lanes as the mask. The example shows this by using [`m16x4`] instead ++//! > of [`m32x4`]. It is _typically_ more performant to use a mask element ++//! > width equal to the element width of the vectors being operated upon. ++//! > This is, however, not true for 512-bit wide vectors when targetting ++//! > AVX-512, where the most efficient masks use only 1-bit per element. ++//! ++//! All vertical comparison operations returns masks: ++//! ++//! ``` ++//! # use packed_simd::*; ++//! let a = i32x4::new(1, 1, 3, 3); ++//! let b = i32x4::new(2, 2, 0, 0); ++//! ++//! // ge: >= (Greater Eequal; see also lt, le, gt, eq, ne). ++//! let m = a.ge(i32x4::splat(2)); ++//! ++//! if m.any() { ++//! // all / any / none allow coherent control flow ++//! let d = m.select(a, b); ++//! assert_eq!(d, i32x4::new(2, 2, 3, 3)); ++//! } ++//! ``` ++//! ++//! ## Conversions ++//! ++//! * **lossless widening conversions**: [`From`]/[`Into`] are implemented for ++//! vectors with the same number of lanes when the conversion is value ++//! preserving (same as in `std`). ++//! ++//! * **safe bitwise conversions**: The cargo feature `into_bits` provides the ++//! `IntoBits/FromBits` traits (`x.into_bits()`). These perform safe bitwise ++//! `transmute`s when all bit patterns of the source type are valid bit ++//! patterns of the target type and are also implemented for the ++//! architecture-specific vector types of `std::arch`. For example, `let x: ++//! u8x8 = m8x8::splat(true).into_bits();` is provided because all `m8x8` bit ++//! patterns are valid `u8x8` bit patterns. However, the opposite is not ++//! true, not all `u8x8` bit patterns are valid `m8x8` bit-patterns, so this ++//! operation cannot be peformed safely using `x.into_bits()`; one needs to ++//! use `unsafe { crate::mem::transmute(x) }` for that, making sure that the ++//! value in the `u8x8` is a valid bit-pattern of `m8x8`. ++//! ++//! * **numeric casts** (`as`): are peformed using [`FromCast`]/[`Cast`] ++//! (`x.cast()`), just like `as`: ++//! ++//! * casting integer vectors whose lane types have the same size (e.g. ++//! `i32xN` -> `u32xN`) is a **no-op**, ++//! ++//! * casting from a larger integer to a smaller integer (e.g. `u32xN` -> ++//! `u8xN`) will **truncate**, ++//! ++//! * casting from a smaller integer to a larger integer (e.g. `u8xN` -> ++//! `u32xN`) will: ++//! * **zero-extend** if the source is unsigned, or ++//! * **sign-extend** if the source is signed, ++//! ++//! * casting from a float to an integer will **round the float towards ++//! zero**, ++//! ++//! * casting from an integer to float will produce the floating point ++//! representation of the integer, **rounding to nearest, ties to even**, ++//! ++//! * casting from an `f32` to an `f64` is perfect and lossless, ++//! ++//! * casting from an `f64` to an `f32` **rounds to nearest, ties to even**. ++//! ++//! Numeric casts are not very "precise": sometimes lossy, sometimes value ++//! preserving, etc. ++ ++#![feature( ++ repr_simd, ++ const_fn, ++ platform_intrinsics, ++ stdsimd, ++ aarch64_target_feature, ++ arm_target_feature, ++ link_llvm_intrinsics, ++ core_intrinsics, ++ stmt_expr_attributes, ++ align_offset, ++ mmx_target_feature, ++ crate_visibility_modifier, ++ custom_inner_attributes ++)] ++#![allow(non_camel_case_types, non_snake_case, ++ clippy::cast_possible_truncation, ++ clippy::cast_lossless, ++ clippy::cast_possible_wrap, ++ clippy::cast_precision_loss, ++ // This lint is currently broken for generic code ++ // See https://github.com/rust-lang/rust-clippy/issues/3410 ++ clippy::use_self ++)] ++#![cfg_attr(test, feature(hashmap_internals))] ++#![deny(warnings, rust_2018_idioms, clippy::missing_inline_in_public_items)] ++#![no_std] ++ ++use cfg_if::cfg_if; ++ ++cfg_if! { ++ if #[cfg(feature = "core_arch")] { ++ #[allow(unused_imports)] ++ use core_arch as arch; ++ } else { ++ #[allow(unused_imports)] ++ use core::arch; ++ } ++} ++ ++#[cfg(all(target_arch = "wasm32", test))] ++use wasm_bindgen_test::*; ++ ++#[allow(unused_imports)] ++use core::{ ++ /* arch (handled above), */ cmp, f32, f64, fmt, hash, hint, i128, ++ i16, i32, i64, i8, intrinsics, isize, iter, marker, mem, ops, ptr, slice, ++ u128, u16, u32, u64, u8, usize, ++}; ++ ++#[macro_use] ++mod testing; ++#[macro_use] ++mod api; ++mod codegen; ++mod sealed; ++ ++/// Packed SIMD vector type. ++/// ++/// # Examples ++/// ++/// ``` ++/// # use packed_simd::Simd; ++/// let v = Simd::<[i32; 4]>::new(0, 1, 2, 3); ++/// assert_eq!(v.extract(2), 2); ++/// ``` ++#[repr(transparent)] ++#[derive(Copy, Clone)] ++pub struct Simd( ++ // FIXME: this type should be private, ++ // but it currently must be public for the ++ // `shuffle!` macro to work: it needs to ++ // access the internal `repr(simd)` type ++ // to call the shuffle intrinsics. ++ #[doc(hidden)] pub ::Tuple, ++); ++ ++/// Wrapper over `T` implementing a lexicoraphical order via the `PartialOrd` ++/// and/or `Ord` traits. ++#[repr(transparent)] ++#[derive(Copy, Clone, Debug)] ++#[allow(clippy::missing_inline_in_public_items)] ++pub struct LexicographicallyOrdered(T); ++ ++mod masks; ++pub use self::masks::*; ++ ++mod v16; ++pub use self::v16::*; ++ ++mod v32; ++pub use self::v32::*; ++ ++mod v64; ++pub use self::v64::*; ++ ++mod v128; ++pub use self::v128::*; ++ ++mod v256; ++pub use self::v256::*; ++ ++mod v512; ++pub use self::v512::*; ++ ++mod vSize; ++pub use self::vSize::*; ++ ++mod vPtr; ++pub use self::vPtr::*; ++ ++pub use self::api::cast::*; ++ ++#[cfg(feature = "into_bits")] ++pub use self::api::into_bits::*; ++ ++// Re-export the shuffle intrinsics required by the `shuffle!` macro. ++#[doc(hidden)] ++pub use self::codegen::llvm::{ ++ __shuffle_vector16, __shuffle_vector2, __shuffle_vector32, ++ __shuffle_vector4, __shuffle_vector64, __shuffle_vector8, ++}; ++ ++crate mod llvm { ++ crate use crate::codegen::llvm::*; ++} +diff --git a/third_party/rust/packed_simd/src/masks.rs b/third_party/rust/packed_simd/src/masks.rs +new file mode 100644 +index 000000000000..f83c4da95750 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/masks.rs +@@ -0,0 +1,128 @@ ++//! Mask types ++ ++macro_rules! impl_mask_ty { ++ ($id:ident : $elem_ty:ident | #[$doc:meta]) => { ++ #[$doc] ++ #[derive(Copy, Clone)] ++ pub struct $id($elem_ty); ++ ++ impl crate::sealed::Mask for $id { ++ fn test(&self) -> bool { ++ $id::test(self) ++ } ++ } ++ ++ impl $id { ++ /// Instantiate a mask with `value` ++ #[inline] ++ pub fn new(x: bool) -> Self { ++ if x { ++ $id(!0) ++ } else { ++ $id(0) ++ } ++ } ++ /// Test if the mask is set ++ #[inline] ++ pub fn test(&self) -> bool { ++ self.0 != 0 ++ } ++ } ++ ++ impl Default for $id { ++ #[inline] ++ fn default() -> Self { ++ $id(0) ++ } ++ } ++ ++ #[allow(clippy::partialeq_ne_impl)] ++ impl PartialEq<$id> for $id { ++ #[inline] ++ fn eq(&self, other: &Self) -> bool { ++ self.0 == other.0 ++ } ++ #[inline] ++ fn ne(&self, other: &Self) -> bool { ++ self.0 != other.0 ++ } ++ } ++ ++ impl Eq for $id {} ++ ++ impl PartialOrd<$id> for $id { ++ #[inline] ++ fn partial_cmp( ++ &self, other: &Self, ++ ) -> Option { ++ use crate::cmp::Ordering; ++ if self == other { ++ Some(Ordering::Equal) ++ } else if self.0 > other.0 { ++ // Note: ++ // * false = 0_i ++ // * true == !0_i == -1_i ++ Some(Ordering::Less) ++ } else { ++ Some(Ordering::Greater) ++ } ++ } ++ ++ #[inline] ++ fn lt(&self, other: &Self) -> bool { ++ self.0 > other.0 ++ } ++ #[inline] ++ fn gt(&self, other: &Self) -> bool { ++ self.0 < other.0 ++ } ++ #[inline] ++ fn le(&self, other: &Self) -> bool { ++ self.0 >= other.0 ++ } ++ #[inline] ++ fn ge(&self, other: &Self) -> bool { ++ self.0 <= other.0 ++ } ++ } ++ ++ impl Ord for $id { ++ #[inline] ++ fn cmp(&self, other: &Self) -> crate::cmp::Ordering { ++ match self.partial_cmp(other) { ++ Some(x) => x, ++ None => unsafe { crate::hint::unreachable_unchecked() }, ++ } ++ } ++ } ++ ++ impl crate::hash::Hash for $id { ++ #[inline] ++ fn hash(&self, state: &mut H) { ++ (self.0 != 0).hash(state); ++ } ++ } ++ ++ impl crate::fmt::Debug for $id { ++ #[inline] ++ fn fmt( ++ &self, fmtter: &mut crate::fmt::Formatter<'_>, ++ ) -> Result<(), crate::fmt::Error> { ++ write!(fmtter, "{}({})", stringify!($id), self.0 != 0) ++ } ++ } ++ }; ++} ++ ++impl_mask_ty!(m8: i8 | /// 8-bit wide mask. ++); ++impl_mask_ty!(m16: i16 | /// 16-bit wide mask. ++); ++impl_mask_ty!(m32: i32 | /// 32-bit wide mask. ++); ++impl_mask_ty!(m64: i64 | /// 64-bit wide mask. ++); ++impl_mask_ty!(m128: i128 | /// 128-bit wide mask. ++); ++impl_mask_ty!(msize: isize | /// isize-wide mask. ++); +diff --git a/third_party/rust/packed_simd/src/sealed.rs b/third_party/rust/packed_simd/src/sealed.rs +new file mode 100644 +index 000000000000..832acd3f1d54 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/sealed.rs +@@ -0,0 +1,41 @@ ++//! Sealed traits ++ ++/// Trait implemented by arrays that can be SIMD types. ++#[doc(hidden)] ++pub trait SimdArray { ++ /// The type of the #[repr(simd)] type. ++ type Tuple: Copy + Clone; ++ /// The element type of the vector. ++ type T; ++ /// The number of elements in the array. ++ const N: usize; ++ /// The type: `[u32; Self::N]`. ++ type NT; ++} ++ ++/// This traits is used to constraint the arguments ++/// and result type of the portable shuffles. ++#[doc(hidden)] ++pub trait Shuffle { ++ // Lanes is a `[u32; N]` where `N` is the number of vector lanes ++ ++ /// The result type of the shuffle. ++ type Output; ++} ++ ++/// This trait is implemented by all SIMD vector types. ++#[doc(hidden)] ++pub trait Simd { ++ /// Element type of the SIMD vector ++ type Element; ++ /// The number of elements in the SIMD vector. ++ const LANES: usize; ++ /// The type: `[u32; Self::N]`. ++ type LanesType; ++} ++ ++/// This trait is implemented by all mask types ++#[doc(hidden)] ++pub trait Mask { ++ fn test(&self) -> bool; ++} +diff --git a/third_party/rust/packed_simd/src/testing.rs b/third_party/rust/packed_simd/src/testing.rs +new file mode 100644 +index 000000000000..fcbcf9e2ac8e +--- /dev/null ++++ b/third_party/rust/packed_simd/src/testing.rs +@@ -0,0 +1,8 @@ ++//! Testing macros and other utilities. ++ ++#[macro_use] ++mod macros; ++ ++#[cfg(test)] ++#[macro_use] ++crate mod utils; +diff --git a/third_party/rust/packed_simd/src/testing/macros.rs b/third_party/rust/packed_simd/src/testing/macros.rs +new file mode 100644 +index 000000000000..6008634c76ce +--- /dev/null ++++ b/third_party/rust/packed_simd/src/testing/macros.rs +@@ -0,0 +1,44 @@ ++//! Testing macros ++ ++macro_rules! test_if { ++ ($cfg_tt:tt: $it:item) => { ++ #[cfg(any( ++ // Test everything if: ++ // ++ // * tests are enabled, ++ // * no features about exclusively testing ++ // specific vector classes are enabled ++ all(test, not(any( ++ test_v16, ++ test_v32, ++ test_v64, ++ test_v128, ++ test_v256, ++ test_v512, ++ test_none, // disables all tests ++ ))), ++ // Test if: ++ // ++ // * tests are enabled ++ // * a particular cfg token tree returns true ++ all(test, $cfg_tt), ++ ))] ++ $it ++ }; ++} ++ ++#[cfg(test)] ++#[allow(unused)] ++macro_rules! ref_ { ++ ($anything:tt) => { ++ &$anything ++ }; ++} ++ ++#[cfg(test)] ++#[allow(unused)] ++macro_rules! ref_mut_ { ++ ($anything:tt) => { ++ &mut $anything ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/testing/utils.rs b/third_party/rust/packed_simd/src/testing/utils.rs +new file mode 100644 +index 000000000000..7b8f21ac1c55 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/testing/utils.rs +@@ -0,0 +1,135 @@ ++//! Testing utilities ++ ++#![allow(dead_code)] ++ ++use crate::{cmp::PartialOrd, fmt::Debug, LexicographicallyOrdered}; ++ ++/// Tests PartialOrd for `a` and `b` where `a < b` is true. ++pub fn test_lt( ++ a: LexicographicallyOrdered, b: LexicographicallyOrdered, ++) where ++ LexicographicallyOrdered: Debug + PartialOrd, ++{ ++ assert!(a < b, "{:?}, {:?}", a, b); ++ assert!(b > a, "{:?}, {:?}", a, b); ++ ++ assert!(!(a == b), "{:?}, {:?}", a, b); ++ assert!(a != b, "{:?}, {:?}", a, b); ++ ++ assert!(a <= b, "{:?}, {:?}", a, b); ++ assert!(b >= a, "{:?}, {:?}", a, b); ++ ++ // Irreflexivity ++ assert!(!(a < a), "{:?}, {:?}", a, b); ++ assert!(!(b < b), "{:?}, {:?}", a, b); ++ assert!(!(a > a), "{:?}, {:?}", a, b); ++ assert!(!(b > b), "{:?}, {:?}", a, b); ++ ++ assert!(a <= a, "{:?}, {:?}", a, b); ++ assert!(b <= b, "{:?}, {:?}", a, b); ++} ++ ++/// Tests PartialOrd for `a` and `b` where `a <= b` is true. ++pub fn test_le( ++ a: LexicographicallyOrdered, b: LexicographicallyOrdered, ++) where ++ LexicographicallyOrdered: Debug + PartialOrd, ++{ ++ assert!(a <= b, "{:?}, {:?}", a, b); ++ assert!(b >= a, "{:?}, {:?}", a, b); ++ ++ assert!(a == b || a < b, "{:?}, {:?}", a, b); ++ assert!(a == b || b > a, "{:?}, {:?}", a, b); ++ ++ if a == b { ++ assert!(!(a < b), "{:?}, {:?}", a, b); ++ assert!(!(b > a), "{:?}, {:?}", a, b); ++ ++ assert!(!(a != b), "{:?}, {:?}", a, b); ++ } else { ++ assert!(a != b, "{:?}, {:?}", a, b); ++ test_lt(a, b); ++ } ++} ++ ++/// Test PartialOrd::partial_cmp for `a` and `b` returning `Ordering` ++pub fn test_cmp( ++ a: LexicographicallyOrdered, b: LexicographicallyOrdered, ++ o: Option, ++) where ++ LexicographicallyOrdered: PartialOrd + Debug, ++ T: Debug + crate::sealed::Simd + Copy + Clone, ++ ::Element: Default + Copy + Clone + PartialOrd, ++{ ++ assert!(T::LANES <= 64, "array length in these two arrays needs updating"); ++ let mut arr_a: [T::Element; 64] = [Default::default(); 64]; ++ let mut arr_b: [T::Element; 64] = [Default::default(); 64]; ++ ++ unsafe { ++ crate::ptr::write_unaligned( ++ arr_a.as_mut_ptr() as *mut LexicographicallyOrdered, ++ a, ++ ) ++ } ++ unsafe { ++ crate::ptr::write_unaligned( ++ arr_b.as_mut_ptr() as *mut LexicographicallyOrdered, ++ b, ++ ) ++ } ++ let expected = arr_a[0..T::LANES].partial_cmp(&arr_b[0..T::LANES]); ++ let result = a.partial_cmp(&b); ++ assert_eq!(expected, result, "{:?}, {:?}", a, b); ++ assert_eq!(o, result, "{:?}, {:?}", a, b); ++ match o { ++ Some(crate::cmp::Ordering::Less) => { ++ test_lt(a, b); ++ test_le(a, b); ++ } ++ Some(crate::cmp::Ordering::Greater) => { ++ test_lt(b, a); ++ test_le(b, a); ++ } ++ Some(crate::cmp::Ordering::Equal) => { ++ assert!(a == b, "{:?}, {:?}", a, b); ++ assert!(!(a != b), "{:?}, {:?}", a, b); ++ assert!(!(a < b), "{:?}, {:?}", a, b); ++ assert!(!(b < a), "{:?}, {:?}", a, b); ++ assert!(!(a > b), "{:?}, {:?}", a, b); ++ assert!(!(b > a), "{:?}, {:?}", a, b); ++ ++ test_le(a, b); ++ test_le(b, a); ++ } ++ None => { ++ assert!(!(a == b), "{:?}, {:?}", a, b); ++ assert!(!(a != b), "{:?}, {:?}", a, b); ++ assert!(!(a < b), "{:?}, {:?}", a, b); ++ assert!(!(a > b), "{:?}, {:?}", a, b); ++ assert!(!(b < a), "{:?}, {:?}", a, b); ++ assert!(!(b > a), "{:?}, {:?}", a, b); ++ assert!(!(a <= b), "{:?}, {:?}", a, b); ++ assert!(!(b <= a), "{:?}, {:?}", a, b); ++ assert!(!(a >= b), "{:?}, {:?}", a, b); ++ assert!(!(b >= a), "{:?}, {:?}", a, b); ++ } ++ } ++} ++ ++// Returns a tuple containing two distinct pointer values of the same type as ++// the element type of the Simd vector `$id`. ++#[allow(unused)] ++macro_rules! ptr_vals { ++ ($id:ty) => { ++ // expands to an expression ++ #[allow(unused_unsafe)] ++ unsafe { ++ // all bits cleared ++ let clear: <$id as sealed::Simd>::Element = crate::mem::zeroed(); ++ // all bits set ++ let set: <$id as sealed::Simd>::Element = ++ crate::mem::transmute(-1_isize); ++ (clear, set) ++ } ++ }; ++} +diff --git a/third_party/rust/packed_simd/src/v128.rs b/third_party/rust/packed_simd/src/v128.rs +new file mode 100644 +index 000000000000..1d0282dc4278 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/v128.rs +@@ -0,0 +1,80 @@ ++//! 128-bit wide vector types ++#![rustfmt::skip] ++ ++use crate::*; ++ ++impl_i!([i8; 16]: i8x16, m8x16 | i8 | test_v128 | ++ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | ++ From: | ++ /// A 128-bit vector with 16 `i8` lanes. ++); ++impl_u!([u8; 16]: u8x16, m8x16 | u8 | test_v128 | ++ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | ++ From: | ++ /// A 128-bit vector with 16 `u8` lanes. ++); ++impl_m!([m8; 16]: m8x16 | i8 | test_v128 | ++ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | ++ From: m16x16 | ++ /// A 128-bit vector mask with 16 `m8` lanes. ++); ++ ++impl_i!([i16; 8]: i16x8, m16x8 | i16 | test_v128 | x0, x1, x2, x3, x4, x5, x6, x7 | ++ From: i8x8, u8x8 | ++ /// A 128-bit vector with 8 `i16` lanes. ++); ++impl_u!([u16; 8]: u16x8, m16x8 | u16| test_v128 | x0, x1, x2, x3, x4, x5, x6, x7 | ++ From: u8x8 | ++ /// A 128-bit vector with 8 `u16` lanes. ++); ++impl_m!([m16; 8]: m16x8 | i16 | test_v128 | x0, x1, x2, x3, x4, x5, x6, x7 | ++ From: m8x8, m32x8 | ++ /// A 128-bit vector mask with 8 `m16` lanes. ++); ++ ++impl_i!([i32; 4]: i32x4, m32x4 | i32 | test_v128 | x0, x1, x2, x3 | ++ From: i8x4, u8x4, i16x4, u16x4 | ++ /// A 128-bit vector with 4 `i32` lanes. ++); ++impl_u!([u32; 4]: u32x4, m32x4 | u32| test_v128 | x0, x1, x2, x3 | ++ From: u8x4, u16x4 | ++ /// A 128-bit vector with 4 `u32` lanes. ++); ++impl_f!([f32; 4]: f32x4, m32x4 | f32 | test_v128 | x0, x1, x2, x3 | ++ From: i8x4, u8x4, i16x4, u16x4 | ++ /// A 128-bit vector with 4 `f32` lanes. ++); ++impl_m!([m32; 4]: m32x4 | i32 | test_v128 | x0, x1, x2, x3 | ++ From: m8x4, m16x4, m64x4 | ++ /// A 128-bit vector mask with 4 `m32` lanes. ++); ++ ++impl_i!([i64; 2]: i64x2, m64x2 | i64 | test_v128 | x0, x1 | ++ From: i8x2, u8x2, i16x2, u16x2, i32x2, u32x2 | ++ /// A 128-bit vector with 2 `i64` lanes. ++); ++impl_u!([u64; 2]: u64x2, m64x2 | u64 | test_v128 | x0, x1 | ++ From: u8x2, u16x2, u32x2 | ++ /// A 128-bit vector with 2 `u64` lanes. ++); ++impl_f!([f64; 2]: f64x2, m64x2 | f64 | test_v128 | x0, x1 | ++ From: i8x2, u8x2, i16x2, u16x2, i32x2, u32x2, f32x2 | ++ /// A 128-bit vector with 2 `f64` lanes. ++); ++impl_m!([m64; 2]: m64x2 | i64 | test_v128 | x0, x1 | ++ From: m8x2, m16x2, m32x2, m128x2 | ++ /// A 128-bit vector mask with 2 `m64` lanes. ++); ++ ++impl_i!([i128; 1]: i128x1, m128x1 | i128 | test_v128 | x0 | ++ From: /*i8x1, u8x1, i16x1, u16x1, i32x1, u32x1, i64x1, u64x1 */ | // FIXME: unary small vector types ++ /// A 128-bit vector with 1 `i128` lane. ++); ++impl_u!([u128; 1]: u128x1, m128x1 | u128 | test_v128 | x0 | ++ From: /*u8x1, u16x1, u32x1, u64x1 */ | // FIXME: unary small vector types ++ /// A 128-bit vector with 1 `u128` lane. ++); ++impl_m!([m128; 1]: m128x1 | i128 | test_v128 | x0 | ++ From: /*m8x1, m16x1, m32x1, m64x1 */ | // FIXME: unary small vector types ++ /// A 128-bit vector mask with 1 `m128` lane. ++); +diff --git a/third_party/rust/packed_simd/src/v16.rs b/third_party/rust/packed_simd/src/v16.rs +new file mode 100644 +index 000000000000..67a3832d2530 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/v16.rs +@@ -0,0 +1,16 @@ ++//! 16-bit wide vector types ++ ++use crate::*; ++ ++impl_i!([i8; 2]: i8x2, m8x2 | i8 | test_v16 | x0, x1 | ++ From: | ++ /// A 16-bit vector with 2 `i8` lanes. ++); ++impl_u!([u8; 2]: u8x2, m8x2 | u8 | test_v16 | x0, x1 | ++ From: | ++ /// A 16-bit vector with 2 `u8` lanes. ++); ++impl_m!([m8; 2]: m8x2 | i8 | test_v16 | x0, x1 | ++ From: m16x2, m32x2, m64x2, m128x2 | ++ /// A 16-bit vector mask with 2 `m8` lanes. ++); +diff --git a/third_party/rust/packed_simd/src/v256.rs b/third_party/rust/packed_simd/src/v256.rs +new file mode 100644 +index 000000000000..6b59336f68b6 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/v256.rs +@@ -0,0 +1,86 @@ ++//! 256-bit wide vector types ++#![rustfmt::skip] ++ ++use crate::*; ++ ++impl_i!([i8; 32]: i8x32, m8x32 | i8 | test_v256 | ++ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, ++ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 | ++ From: | ++ /// A 256-bit vector with 32 `i8` lanes. ++); ++impl_u!([u8; 32]: u8x32, m8x32 | u8 | test_v256 | ++ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, ++ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 | ++ From: | ++ /// A 256-bit vector with 32 `u8` lanes. ++); ++impl_m!([m8; 32]: m8x32 | i8 | test_v256 | ++ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, ++ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 | ++ From: | ++ /// A 256-bit vector mask with 32 `m8` lanes. ++); ++ ++impl_i!([i16; 16]: i16x16, m16x16 | i16 | test_v256 | ++ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | ++ From: i8x16, u8x16 | ++ /// A 256-bit vector with 16 `i16` lanes. ++); ++impl_u!([u16; 16]: u16x16, m16x16 | u16 | test_v256 | ++ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | ++ From: u8x16 | ++ /// A 256-bit vector with 16 `u16` lanes. ++); ++impl_m!([m16; 16]: m16x16 | i16 | test_v256 | ++ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | ++ From: m8x16 | ++ /// A 256-bit vector mask with 16 `m16` lanes. ++); ++ ++impl_i!([i32; 8]: i32x8, m32x8 | i32 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7 | ++ From: i8x8, u8x8, i16x8, u16x8 | ++ /// A 256-bit vector with 8 `i32` lanes. ++); ++impl_u!([u32; 8]: u32x8, m32x8 | u32 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7 | ++ From: u8x8, u16x8 | ++ /// A 256-bit vector with 8 `u32` lanes. ++); ++impl_f!([f32; 8]: f32x8, m32x8 | f32 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7 | ++ From: i8x8, u8x8, i16x8, u16x8 | ++ /// A 256-bit vector with 8 `f32` lanes. ++); ++impl_m!([m32; 8]: m32x8 | i32 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7 | ++ From: m8x8, m16x8 | ++ /// A 256-bit vector mask with 8 `m32` lanes. ++); ++ ++impl_i!([i64; 4]: i64x4, m64x4 | i64 | test_v256 | x0, x1, x2, x3 | ++ From: i8x4, u8x4, i16x4, u16x4, i32x4, u32x4 | ++ /// A 256-bit vector with 4 `i64` lanes. ++); ++impl_u!([u64; 4]: u64x4, m64x4 | u64 | test_v256 | x0, x1, x2, x3 | ++ From: u8x4, u16x4, u32x4 | ++ /// A 256-bit vector with 4 `u64` lanes. ++); ++impl_f!([f64; 4]: f64x4, m64x4 | f64 | test_v256 | x0, x1, x2, x3 | ++ From: i8x4, u8x4, i16x4, u16x4, i32x4, u32x4, f32x4 | ++ /// A 256-bit vector with 4 `f64` lanes. ++); ++impl_m!([m64; 4]: m64x4 | i64 | test_v256 | x0, x1, x2, x3 | ++ From: m8x4, m16x4, m32x4 | ++ /// A 256-bit vector mask with 4 `m64` lanes. ++); ++ ++impl_i!([i128; 2]: i128x2, m128x2 | i128 | test_v256 | x0, x1 | ++ From: i8x2, u8x2, i16x2, u16x2, i32x2, u32x2, i64x2, u64x2 | ++ /// A 256-bit vector with 2 `i128` lanes. ++); ++impl_u!([u128; 2]: u128x2, m128x2 | u128 | test_v256 | x0, x1 | ++ From: u8x2, u16x2, u32x2, u64x2 | ++ /// A 256-bit vector with 2 `u128` lanes. ++); ++impl_m!([m128; 2]: m128x2 | i128 | test_v256 | x0, x1 | ++ From: m8x2, m16x2, m32x2, m64x2 | ++ /// A 256-bit vector mask with 2 `m128` lanes. ++); +diff --git a/third_party/rust/packed_simd/src/v32.rs b/third_party/rust/packed_simd/src/v32.rs +new file mode 100644 +index 000000000000..09cef9bdd472 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/v32.rs +@@ -0,0 +1,29 @@ ++//! 32-bit wide vector types ++ ++use crate::*; ++ ++impl_i!([i8; 4]: i8x4, m8x4 | i8 | test_v32 | x0, x1, x2, x3 | ++ From: | ++ /// A 32-bit vector with 4 `i8` lanes. ++); ++impl_u!([u8; 4]: u8x4, m8x4 | u8 | test_v32 | x0, x1, x2, x3 | ++ From: | ++ /// A 32-bit vector with 4 `u8` lanes. ++); ++impl_m!([m8; 4]: m8x4 | i8 | test_v32 | x0, x1, x2, x3 | ++ From: m16x4, m32x4, m64x4 | ++ /// A 32-bit vector mask with 4 `m8` lanes. ++); ++ ++impl_i!([i16; 2]: i16x2, m16x2 | i16 | test_v32 | x0, x1 | ++ From: i8x2, u8x2 | ++ /// A 32-bit vector with 2 `i16` lanes. ++); ++impl_u!([u16; 2]: u16x2, m16x2 | u16 | test_v32 | x0, x1 | ++ From: u8x2 | ++ /// A 32-bit vector with 2 `u16` lanes. ++); ++impl_m!([m16; 2]: m16x2 | i16 | test_v32 | x0, x1 | ++ From: m8x2, m32x2, m64x2, m128x2 | ++ /// A 32-bit vector mask with 2 `m16` lanes. ++); +diff --git a/third_party/rust/packed_simd/src/v512.rs b/third_party/rust/packed_simd/src/v512.rs +new file mode 100644 +index 000000000000..b1714aded369 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/v512.rs +@@ -0,0 +1,99 @@ ++//! 512-bit wide vector types ++#![rustfmt::skip] ++ ++use crate::*; ++ ++impl_i!([i8; 64]: i8x64, m8x64 | i8 | test_v512 | ++ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, ++ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31, ++ x32, x33, x34, x35, x36, x37, x38, x39, x40, x41, x42, x43, x44, x45, x46, x47, ++ x48, x49, x50, x51, x52, x53, x54, x55, x56, x57, x58, x59, x60, x61, x62, x63 | ++ From: | ++ /// A 512-bit vector with 64 `i8` lanes. ++); ++impl_u!([u8; 64]: u8x64, m8x64 | u8 | test_v512 | ++ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, ++ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31, ++ x32, x33, x34, x35, x36, x37, x38, x39, x40, x41, x42, x43, x44, x45, x46, x47, ++ x48, x49, x50, x51, x52, x53, x54, x55, x56, x57, x58, x59, x60, x61, x62, x63 | ++ From: | ++ /// A 512-bit vector with 64 `u8` lanes. ++); ++impl_m!([m8; 64]: m8x64 | i8 | test_v512 | ++ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, ++ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31, ++ x32, x33, x34, x35, x36, x37, x38, x39, x40, x41, x42, x43, x44, x45, x46, x47, ++ x48, x49, x50, x51, x52, x53, x54, x55, x56, x57, x58, x59, x60, x61, x62, x63 | ++ From: | ++ /// A 512-bit vector mask with 64 `m8` lanes. ++); ++ ++impl_i!([i16; 32]: i16x32, m16x32 | i16 | test_v512 | ++ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, ++ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 | ++ From: i8x32, u8x32 | ++ /// A 512-bit vector with 32 `i16` lanes. ++); ++impl_u!([u16; 32]: u16x32, m16x32 | u16 | test_v512 | ++ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, ++ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 | ++ From: u8x32 | ++ /// A 512-bit vector with 32 `u16` lanes. ++); ++impl_m!([m16; 32]: m16x32 | i16 | test_v512 | ++ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, ++ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 | ++ From: m8x32 | ++ /// A 512-bit vector mask with 32 `m16` lanes. ++); ++ ++impl_i!([i32; 16]: i32x16, m32x16 | i32 | test_v512 | ++ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | ++ From: i8x16, u8x16, i16x16, u16x16 | ++ /// A 512-bit vector with 16 `i32` lanes. ++); ++impl_u!([u32; 16]: u32x16, m32x16 | u32 | test_v512 | ++ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | ++ From: u8x16, u16x16 | ++ /// A 512-bit vector with 16 `u32` lanes. ++); ++impl_f!([f32; 16]: f32x16, m32x16 | f32 | test_v512 | ++ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | ++ From: i8x16, u8x16, i16x16, u16x16 | ++ /// A 512-bit vector with 16 `f32` lanes. ++); ++impl_m!([m32; 16]: m32x16 | i32 | test_v512 | ++ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | ++ From: m8x16, m16x16 | ++ /// A 512-bit vector mask with 16 `m32` lanes. ++); ++ ++impl_i!([i64; 8]: i64x8, m64x8 | i64 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 | ++ From: i8x8, u8x8, i16x8, u16x8, i32x8, u32x8 | ++ /// A 512-bit vector with 8 `i64` lanes. ++); ++impl_u!([u64; 8]: u64x8, m64x8 | u64 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 | ++ From: u8x8, u16x8, u32x8 | ++ /// A 512-bit vector with 8 `u64` lanes. ++); ++impl_f!([f64; 8]: f64x8, m64x8 | f64 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 | ++ From: i8x8, u8x8, i16x8, u16x8, i32x8, u32x8, f32x8 | ++ /// A 512-bit vector with 8 `f64` lanes. ++); ++impl_m!([m64; 8]: m64x8 | i64 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 | ++ From: m8x8, m16x8, m32x8 | ++ /// A 512-bit vector mask with 8 `m64` lanes. ++); ++ ++impl_i!([i128; 4]: i128x4, m128x4 | i128 | test_v512 | x0, x1, x2, x3 | ++ From: i8x4, u8x4, i16x4, u16x4, i32x4, u32x4, i64x4, u64x4 | ++ /// A 512-bit vector with 4 `i128` lanes. ++); ++impl_u!([u128; 4]: u128x4, m128x4 | u128 | test_v512 | x0, x1, x2, x3 | ++ From: u8x4, u16x4, u32x4, u64x4 | ++ /// A 512-bit vector with 4 `u128` lanes. ++); ++impl_m!([m128; 4]: m128x4 | i128 | test_v512 | x0, x1, x2, x3 | ++ From: m8x4, m16x4, m32x4, m64x4 | ++ /// A 512-bit vector mask with 4 `m128` lanes. ++); +diff --git a/third_party/rust/packed_simd/src/v64.rs b/third_party/rust/packed_simd/src/v64.rs +new file mode 100644 +index 000000000000..1ee6219c040b +--- /dev/null ++++ b/third_party/rust/packed_simd/src/v64.rs +@@ -0,0 +1,66 @@ ++//! 64-bit wide vector types ++#![rustfmt::skip] ++ ++use super::*; ++ ++impl_i!([i8; 8]: i8x8, m8x8 | i8 | test_v64 | x0, x1, x2, x3, x4, x5, x6, x7 | ++ From: | ++ /// A 64-bit vector with 8 `i8` lanes. ++); ++impl_u!([u8; 8]: u8x8, m8x8 | u8 | test_v64 | x0, x1, x2, x3, x4, x5, x6, x7 | ++ From: | ++ /// A 64-bit vector with 8 `u8` lanes. ++); ++impl_m!([m8; 8]: m8x8 | i8 | test_v64 | x0, x1, x2, x3, x4, x5, x6, x7 | ++ From: m16x8, m32x8 | ++ /// A 64-bit vector mask with 8 `m8` lanes. ++); ++ ++impl_i!([i16; 4]: i16x4, m16x4 | i16 | test_v64 | x0, x1, x2, x3 | ++ From: i8x4, u8x4 | ++ /// A 64-bit vector with 4 `i16` lanes. ++); ++impl_u!([u16; 4]: u16x4, m16x4 | u16 | test_v64 | x0, x1, x2, x3 | ++ From: u8x4 | ++ /// A 64-bit vector with 4 `u16` lanes. ++); ++impl_m!([m16; 4]: m16x4 | i16 | test_v64 | x0, x1, x2, x3 | ++ From: m8x4, m32x4, m64x4 | ++ /// A 64-bit vector mask with 4 `m16` lanes. ++); ++ ++impl_i!([i32; 2]: i32x2, m32x2 | i32 | test_v64 | x0, x1 | ++ From: i8x2, u8x2, i16x2, u16x2 | ++ /// A 64-bit vector with 2 `i32` lanes. ++); ++impl_u!([u32; 2]: u32x2, m32x2 | u32 | test_v64 | x0, x1 | ++ From: u8x2, u16x2 | ++ /// A 64-bit vector with 2 `u32` lanes. ++); ++impl_m!([m32; 2]: m32x2 | i32 | test_v64 | x0, x1 | ++ From: m8x2, m16x2, m64x2, m128x2 | ++ /// A 64-bit vector mask with 2 `m32` lanes. ++); ++impl_f!([f32; 2]: f32x2, m32x2 | f32 | test_v64 | x0, x1 | ++ From: i8x2, u8x2, i16x2, u16x2 | ++ /// A 64-bit vector with 2 `f32` lanes. ++); ++ ++/* ++impl_i!([i64; 1]: i64x1, m64x1 | i64 | test_v64 | x0 | ++ From: /*i8x1, u8x1, i16x1, u16x1, i32x1, u32x1*/ | // FIXME: primitive to vector conversion ++ /// A 64-bit vector with 1 `i64` lanes. ++); ++impl_u!([u64; 1]: u64x1, m64x1 | u64 | test_v64 | x0 | ++ From: /*u8x1, u16x1, u32x1*/ | // FIXME: primitive to vector conversion ++ /// A 64-bit vector with 1 `u64` lanes. ++); ++impl_m!([m64; 1]: m64x1 | i64 | test_v64 | x0 | ++ From: /*m8x1, m16x1, m32x1, */ m128x1 | // FIXME: unary small vector types ++ /// A 64-bit vector mask with 1 `m64` lanes. ++); ++impl_f!([f64; 1]: f64x1, m64x1 | f64 | test_v64 | x0 | ++ From: /*i8x1, u8x1, i16x1, u16x1, i32x1, u32x1, f32x1*/ | // FIXME: unary small vector types ++ /// A 64-bit vector with 1 `f64` lanes. ++); ++*/ +diff --git a/third_party/rust/packed_simd/src/vPtr.rs b/third_party/rust/packed_simd/src/vPtr.rs +new file mode 100644 +index 000000000000..fe9fb28ffa89 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/vPtr.rs +@@ -0,0 +1,34 @@ ++//! Vectors of pointers ++#![rustfmt::skip] ++ ++use crate::*; ++ ++impl_const_p!( ++ [*const T; 2]: cptrx2, msizex2, usizex2, isizex2 | test_v128 | x0, x1 | From: | ++ /// A vector with 2 `*const T` lanes ++); ++ ++impl_mut_p!( ++ [*mut T; 2]: mptrx2, msizex2, usizex2, isizex2 | test_v128 | x0, x1 | From: | ++ /// A vector with 2 `*mut T` lanes ++); ++ ++impl_const_p!( ++ [*const T; 4]: cptrx4, msizex4, usizex4, isizex4 | test_v256 | x0, x1, x2, x3 | From: | ++ /// A vector with 4 `*const T` lanes ++); ++ ++impl_mut_p!( ++ [*mut T; 4]: mptrx4, msizex4, usizex4, isizex4 | test_v256 | x0, x1, x2, x3 | From: | ++ /// A vector with 4 `*mut T` lanes ++); ++ ++impl_const_p!( ++ [*const T; 8]: cptrx8, msizex8, usizex8, isizex8 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 | From: | ++ /// A vector with 8 `*const T` lanes ++); ++ ++impl_mut_p!( ++ [*mut T; 8]: mptrx8, msizex8, usizex8, isizex8 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 | From: | ++ /// A vector with 8 `*mut T` lanes ++); +diff --git a/third_party/rust/packed_simd/src/vSize.rs b/third_party/rust/packed_simd/src/vSize.rs +new file mode 100644 +index 000000000000..5594323372b4 +--- /dev/null ++++ b/third_party/rust/packed_simd/src/vSize.rs +@@ -0,0 +1,53 @@ ++//! Vectors with pointer-sized elements ++ ++use crate::codegen::pointer_sized_int::{isize_, usize_}; ++use crate::*; ++ ++impl_i!([isize; 2]: isizex2, msizex2 | isize_ | test_v128 | ++ x0, x1| ++ From: | ++ /// A vector with 2 `isize` lanes. ++); ++ ++impl_u!([usize; 2]: usizex2, msizex2 | usize_ | test_v128 | ++ x0, x1| ++ From: | ++ /// A vector with 2 `usize` lanes. ++); ++impl_m!([msize; 2]: msizex2 | isize_ | test_v128 | ++ x0, x1 | ++ From: | ++ /// A vector mask with 2 `msize` lanes. ++); ++ ++impl_i!([isize; 4]: isizex4, msizex4 | isize_ | test_v256 | ++ x0, x1, x2, x3 | ++ From: | ++ /// A vector with 4 `isize` lanes. ++); ++impl_u!([usize; 4]: usizex4, msizex4 | usize_ | test_v256 | ++ x0, x1, x2, x3| ++ From: | ++ /// A vector with 4 `usize` lanes. ++); ++impl_m!([msize; 4]: msizex4 | isize_ | test_v256 | ++ x0, x1, x2, x3 | ++ From: | ++ /// A vector mask with 4 `msize` lanes. ++); ++ ++impl_i!([isize; 8]: isizex8, msizex8 | isize_ | test_v512 | ++ x0, x1, x2, x3, x4, x5, x6, x7 | ++ From: | ++ /// A vector with 4 `isize` lanes. ++); ++impl_u!([usize; 8]: usizex8, msizex8 | usize_ | test_v512 | ++ x0, x1, x2, x3, x4, x5, x6, x7 | ++ From: | ++ /// A vector with 8 `usize` lanes. ++); ++impl_m!([msize; 8]: msizex8 | isize_ | test_v512 | ++ x0, x1, x2, x3, x4, x5, x6, x7 | ++ From: | ++ /// A vector mask with 8 `msize` lanes. ++); +diff --git a/third_party/rust/packed_simd/tests/endianness.rs b/third_party/rust/packed_simd/tests/endianness.rs +new file mode 100644 +index 000000000000..1e6b4f354301 +--- /dev/null ++++ b/third_party/rust/packed_simd/tests/endianness.rs +@@ -0,0 +1,262 @@ ++#[cfg(target_arch = "wasm32")] ++use wasm_bindgen_test::*; ++ ++use packed_simd::*; ++use std::{mem, slice}; ++ ++#[cfg_attr(not(target_arch = "wasm32"), test)] ++#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++fn endian_indexing() { ++ let v = i32x4::new(0, 1, 2, 3); ++ assert_eq!(v.extract(0), 0); ++ assert_eq!(v.extract(1), 1); ++ assert_eq!(v.extract(2), 2); ++ assert_eq!(v.extract(3), 3); ++} ++ ++#[cfg_attr(not(target_arch = "wasm32"), test)] ++#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++fn endian_bitcasts() { ++ #[cfg_attr(rustfmt, rustfmt_skip)] ++ let x = i8x16::new( ++ 0, 1, 2, 3, 4, 5, 6, 7, ++ 8, 9, 10, 11, 12, 13, 14, 15, ++ ); ++ let t: i16x8 = unsafe { mem::transmute(x) }; ++ let e: i16x8 = if cfg!(target_endian = "little") { ++ i16x8::new(256, 770, 1284, 1798, 2312, 2826, 3340, 3854) ++ } else { ++ i16x8::new(1, 515, 1029, 1543, 2057, 2571, 3085, 3599) ++ }; ++ assert_eq!(t, e); ++} ++ ++#[cfg_attr(not(target_arch = "wasm32"), test)] ++#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++fn endian_casts() { ++ #[cfg_attr(rustfmt, rustfmt_skip)] ++ let x = i8x16::new( ++ 0, 1, 2, 3, 4, 5, 6, 7, ++ 8, 9, 10, 11, 12, 13, 14, 15, ++ ); ++ let t: i16x16 = x.into(); // simd_cast ++ #[cfg_attr(rustfmt, rustfmt_skip)] ++ let e = i16x16::new( ++ 0, 1, 2, 3, 4, 5, 6, 7, ++ 8, 9, 10, 11, 12, 13, 14, 15, ++ ); ++ assert_eq!(t, e); ++} ++ ++#[cfg_attr(not(target_arch = "wasm32"), test)] ++#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++fn endian_load_and_stores() { ++ #[cfg_attr(rustfmt, rustfmt_skip)] ++ let x = i8x16::new( ++ 0, 1, 2, 3, 4, 5, 6, 7, ++ 8, 9, 10, 11, 12, 13, 14, 15, ++ ); ++ let mut y: [i16; 8] = [0; 8]; ++ x.write_to_slice_unaligned(unsafe { ++ slice::from_raw_parts_mut(&mut y as *mut _ as *mut i8, 16) ++ }); ++ ++ let e: [i16; 8] = if cfg!(target_endian = "little") { ++ [256, 770, 1284, 1798, 2312, 2826, 3340, 3854] ++ } else { ++ [1, 515, 1029, 1543, 2057, 2571, 3085, 3599] ++ }; ++ assert_eq!(y, e); ++ ++ let z = i8x16::from_slice_unaligned(unsafe { ++ slice::from_raw_parts(&y as *const _ as *const i8, 16) ++ }); ++ assert_eq!(z, x); ++} ++ ++#[cfg_attr(not(target_arch = "wasm32"), test)] ++#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++fn endian_array_union() { ++ union A { ++ data: [f32; 4], ++ vec: f32x4, ++ } ++ let x: [f32; 4] = unsafe { A { vec: f32x4::new(0., 1., 2., 3.) }.data }; ++ assert_eq!(x[0], 0_f32); ++ assert_eq!(x[1], 1_f32); ++ assert_eq!(x[2], 2_f32); ++ assert_eq!(x[3], 3_f32); ++ let y: f32x4 = unsafe { A { data: [3., 2., 1., 0.] }.vec }; ++ assert_eq!(y, f32x4::new(3., 2., 1., 0.)); ++ ++ union B { ++ data: [i8; 16], ++ vec: i8x16, ++ } ++ #[cfg_attr(rustfmt, rustfmt_skip)] ++ let x = i8x16::new( ++ 0, 1, 2, 3, 4, 5, 6, 7, ++ 8, 9, 10, 11, 12, 13, 14, 15, ++ ); ++ let x: [i8; 16] = unsafe { B { vec: x }.data }; ++ ++ for i in 0..16 { ++ assert_eq!(x[i], i as i8); ++ } ++ ++ #[cfg_attr(rustfmt, rustfmt_skip)] ++ let y = [ ++ 15, 14, 13, 12, 11, 19, 9, 8, ++ 7, 6, 5, 4, 3, 2, 1, 0 ++ ]; ++ #[cfg_attr(rustfmt, rustfmt_skip)] ++ let e = i8x16::new( ++ 15, 14, 13, 12, 11, 19, 9, 8, ++ 7, 6, 5, 4, 3, 2, 1, 0 ++ ); ++ let z = unsafe { B { data: y }.vec }; ++ assert_eq!(z, e); ++ ++ union C { ++ data: [i16; 8], ++ vec: i8x16, ++ } ++ #[cfg_attr(rustfmt, rustfmt_skip)] ++ let x = i8x16::new( ++ 0, 1, 2, 3, 4, 5, 6, 7, ++ 8, 9, 10, 11, 12, 13, 14, 15, ++ ); ++ let x: [i16; 8] = unsafe { C { vec: x }.data }; ++ ++ let e: [i16; 8] = if cfg!(target_endian = "little") { ++ [256, 770, 1284, 1798, 2312, 2826, 3340, 3854] ++ } else { ++ [1, 515, 1029, 1543, 2057, 2571, 3085, 3599] ++ }; ++ assert_eq!(x, e); ++} ++ ++#[cfg_attr(not(target_arch = "wasm32"), test)] ++#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] ++fn endian_tuple_access() { ++ type F32x4T = (f32, f32, f32, f32); ++ union A { ++ data: F32x4T, ++ vec: f32x4, ++ } ++ let x: F32x4T = unsafe { A { vec: f32x4::new(0., 1., 2., 3.) }.data }; ++ assert_eq!(x.0, 0_f32); ++ assert_eq!(x.1, 1_f32); ++ assert_eq!(x.2, 2_f32); ++ assert_eq!(x.3, 3_f32); ++ let y: f32x4 = unsafe { A { data: (3., 2., 1., 0.) }.vec }; ++ assert_eq!(y, f32x4::new(3., 2., 1., 0.)); ++ ++ #[cfg_attr(rustfmt, rustfmt_skip)] ++ type I8x16T = (i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8); ++ union B { ++ data: I8x16T, ++ vec: i8x16, ++ } ++ ++ #[cfg_attr(rustfmt, rustfmt_skip)] ++ let x = i8x16::new( ++ 0, 1, 2, 3, 4, 5, 6, 7, ++ 8, 9, 10, 11, 12, 13, 14, 15, ++ ); ++ let x: I8x16T = unsafe { B { vec: x }.data }; ++ ++ assert_eq!(x.0, 0); ++ assert_eq!(x.1, 1); ++ assert_eq!(x.2, 2); ++ assert_eq!(x.3, 3); ++ assert_eq!(x.4, 4); ++ assert_eq!(x.5, 5); ++ assert_eq!(x.6, 6); ++ assert_eq!(x.7, 7); ++ assert_eq!(x.8, 8); ++ assert_eq!(x.9, 9); ++ assert_eq!(x.10, 10); ++ assert_eq!(x.11, 11); ++ assert_eq!(x.12, 12); ++ assert_eq!(x.13, 13); ++ assert_eq!(x.14, 14); ++ assert_eq!(x.15, 15); ++ ++ #[cfg_attr(rustfmt, rustfmt_skip)] ++ let y = ( ++ 15, 14, 13, 12, 11, 10, 9, 8, ++ 7, 6, 5, 4, 3, 2, 1, 0 ++ ); ++ let z: i8x16 = unsafe { B { data: y }.vec }; ++ #[cfg_attr(rustfmt, rustfmt_skip)] ++ let e = i8x16::new( ++ 15, 14, 13, 12, 11, 10, 9, 8, ++ 7, 6, 5, 4, 3, 2, 1, 0 ++ ); ++ assert_eq!(e, z); ++ ++ #[cfg_attr(rustfmt, rustfmt_skip)] ++ type I16x8T = (i16, i16, i16, i16, i16, i16, i16, i16); ++ union C { ++ data: I16x8T, ++ vec: i8x16, ++ } ++ ++ #[cfg_attr(rustfmt, rustfmt_skip)] ++ let x = i8x16::new( ++ 0, 1, 2, 3, 4, 5, 6, 7, ++ 8, 9, 10, 11, 12, 13, 14, 15, ++ ); ++ let x: I16x8T = unsafe { C { vec: x }.data }; ++ ++ let e: [i16; 8] = if cfg!(target_endian = "little") { ++ [256, 770, 1284, 1798, 2312, 2826, 3340, 3854] ++ } else { ++ [1, 515, 1029, 1543, 2057, 2571, 3085, 3599] ++ }; ++ assert_eq!(x.0, e[0]); ++ assert_eq!(x.1, e[1]); ++ assert_eq!(x.2, e[2]); ++ assert_eq!(x.3, e[3]); ++ assert_eq!(x.4, e[4]); ++ assert_eq!(x.5, e[5]); ++ assert_eq!(x.6, e[6]); ++ assert_eq!(x.7, e[7]); ++ ++ #[cfg_attr(rustfmt, rustfmt_skip)] ++ #[repr(C)] ++ #[derive(Copy ,Clone)] ++ pub struct Tup(pub i8, pub i8, pub i16, pub i8, pub i8, pub i16, ++ pub i8, pub i8, pub i16, pub i8, pub i8, pub i16); ++ ++ union D { ++ data: Tup, ++ vec: i8x16, ++ } ++ ++ #[cfg_attr(rustfmt, rustfmt_skip)] ++ let x = i8x16::new( ++ 0, 1, 2, 3, 4, 5, 6, 7, ++ 8, 9, 10, 11, 12, 13, 14, 15, ++ ); ++ let x: Tup = unsafe { D { vec: x }.data }; ++ ++ let e: [i16; 12] = if cfg!(target_endian = "little") { ++ [0, 1, 770, 4, 5, 1798, 8, 9, 2826, 12, 13, 3854] ++ } else { ++ [0, 1, 515, 4, 5, 1543, 8, 9, 2571, 12, 13, 3599] ++ }; ++ assert_eq!(x.0 as i16, e[0]); ++ assert_eq!(x.1 as i16, e[1]); ++ assert_eq!(x.2 as i16, e[2]); ++ assert_eq!(x.3 as i16, e[3]); ++ assert_eq!(x.4 as i16, e[4]); ++ assert_eq!(x.5 as i16, e[5]); ++ assert_eq!(x.6 as i16, e[6]); ++ assert_eq!(x.7 as i16, e[7]); ++ assert_eq!(x.8 as i16, e[8]); ++ assert_eq!(x.9 as i16, e[9]); ++ assert_eq!(x.10 as i16, e[10]); ++ assert_eq!(x.11 as i16, e[11]); ++} +diff --git a/third_party/rust/simd/.cargo-checksum.json b/third_party/rust/simd/.cargo-checksum.json +deleted file mode 100644 +index 5e8c154cda72..000000000000 +--- a/third_party/rust/simd/.cargo-checksum.json ++++ /dev/null +@@ -1 +0,0 @@ +-{"files":{"Cargo.toml":"0c7a480c62d7b42604098fa1dd6453be79629112569c494efa75d7fd0998fd69","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6d3a9431e65e69c73a8923e6517b889d17549b23db406b9ec027710d16af701f","README.md":"f426ca32bb44fee39d83d51e481fe6b2640d4b78cb097c952cd75800b886f2fd","benches/mandelbrot.rs":"051b5199e66bca6cf7774e9024915fd4e1349ab39726a10a14e06b60d65d87a4","benches/matrix.rs":"048a21dacdb62365e0105d00d2c8cd6bd2396ac81134f2bff7eb4f7d095fb735","build.rs":"69c9c9029ca969a4bb3f11429bc1424fa75af46143eb0d853b4db3a512859b32","examples/axpy.rs":"4307626045d64ec08361c97c9c72c5dc8d361bdc88f64453b97ac0212041a1b2","examples/convert.rs":"8e658fde050f8a0d8b84ad7570446b10fcf544afbd551b940ca340474f324840","examples/dot-product.rs":"6fe2e007c147af5353804173a593c5b9d57dbccec156e1da37e9e32537363f91","examples/fannkuch-redux-nosimd.rs":"7b2fbde35e8666929d14d67328471cb0483d038a5325232f8db148b30865312b","examples/fannkuch-redux.rs":"ea21fdbd2274488a62cc984acad6e0b65d52f24fb4ff63b7057a3a667e9c8aae","examples/mandelbrot.rs":"71be242543c1e487145d7f16341c05d05d86109de4d9e94c5d6bc9a9c6ed9766","examples/matrix-inverse.rs":"93dbc55c66a72e5f7bc730072f35682523fa20dd362755d8443ad6982143cb5d","examples/nbody-nosimd.rs":"9cf46ea02e266c20f811318f1c5856d5afb9575b2d48d552fbd978f5c1856bdb","examples/nbody.rs":"a864311affab262024479d6348ff51af43d809e9ad332ec30ea4aacceaa2eae1","examples/ops.rs":"b08ea83583df71d0052895d677320a9888da5b6729c9b70636d31ede5128bb7f","examples/spectral-norm-nosimd.rs":"ffc8512ecde779078ea467f38f423a0ea623c63da7078193f9dd370200773f79","examples/spectral-norm.rs":"edb09c9d477f83939098cfb77a27cc298bc7a0c8a8e29cece0cccae0d70d890e","src/aarch64/mod.rs":"83f52775364c98de0cecb7e1509530c18972e932469f5f1522aa24a735d0fa37","src/aarch64/neon.rs":"3c05ea43b7261b9af9c0d904b37de01c2ba99caedcb464700f16617b672965a1","src/arm/mod.rs":"dcdd90bc0b39abaf86a0c8946d442b16313563fbae1ff03248628275c74d8617","src/arm/neon.rs":"71d0bb6dac5f58599bb825449701a05cf32f6eca1918e80d060b746e69751c37","src/common.rs":"c5a7b937c5cd8c3bccf0fb20d5d77770c0d9b0dd9fa06a661c6f2ddf118e65c0","src/lib.rs":"a24a207e65468de2189297380747e2f2f33ec2317f4b83f0665d34b1c09feb08","src/sixty_four.rs":"d168776d02acf943bda8044b24e644b7a9584197a223eba1a7c3024b205dc87d","src/v256.rs":"34bfde3676e23f6925db5d0408ae838e3aab7706128fd7c33e855b8579c69318","src/x86/avx.rs":"efcf2120a904a89b0adf2d3d3bdd0ca17df2ec058410af23fb7e81915873f808","src/x86/avx2.rs":"3bcb3f391ad5f16f0a6da0bc1301329beb478ad6265bd3b2c9c124fc2e6198e5","src/x86/mod.rs":"0acc5a5e2672e2a0fddc11065663be8b8fa2da87320ea291fa86ff8c2f33edf5","src/x86/sse2.rs":"8807fb04bbfb404e17fcacf1e21d22616f8b377540a227b1fd03c121879122dd","src/x86/sse3.rs":"9bd01a4f08069ca4f445952e744d651efe887e3835b18872e757375f0d053bd2","src/x86/sse4_1.rs":"9ceb80dd70a7e7dfeef508cb935e1a2637175bc87a3b090f5dea691ff6aa0516","src/x86/sse4_2.rs":"c59321aed8decdce4d0d8570cff46aed02e1a8265647ef7702e9b180fc581254","src/x86/ssse3.rs":"2290f0269bae316b8e0491495645ee38a9bd73525c8572759c1328341c3bdb4c"},"package":"0048b17eb9577ac545c61d85c3559b41dfb4cbea41c9bd9ca6a4f73ff05fda84"} +\ No newline at end of file +diff --git a/third_party/rust/simd/Cargo.toml b/third_party/rust/simd/Cargo.toml +deleted file mode 100644 +index 30279b93556c..000000000000 +--- a/third_party/rust/simd/Cargo.toml ++++ /dev/null +@@ -1,37 +0,0 @@ +-# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +-# +-# When uploading crates to the registry Cargo will automatically +-# "normalize" Cargo.toml files for maximal compatibility +-# with all versions of Cargo and also rewrite `path` dependencies +-# to registry (e.g. crates.io) dependencies +-# +-# If you believe there's an error in this file please file an +-# issue against the rust-lang/cargo repository. If you're +-# editing this file be aware that the upstream Cargo.toml +-# will likely look very different (and much more reasonable) +- +-[package] +-name = "simd" +-version = "0.2.3" +-authors = ["Huon Wilson "] +-description = "`simd` offers limited cross-platform access to SIMD instructions on\nCPUs, as well as raw interfaces to platform-specific instructions.\n(To be obsoleted by the `std::simd` implementation RFC 2366.)\n" +-documentation = "https://docs.rs/simd/" +-readme = "README.md" +-keywords = ["simd", "data-parallel"] +-license = "MIT/Apache-2.0" +-repository = "https://github.com/hsivonen/simd" +-[package.metadata.docs.rs] +-features = ["doc"] +-[dependencies.serde] +-version = "1.0" +-optional = true +- +-[dependencies.serde_derive] +-version = "1.0" +-optional = true +-[dev-dependencies.cfg-if] +-version = "0.1" +- +-[features] +-doc = [] +-with-serde = ["serde", "serde_derive"] +diff --git a/third_party/rust/simd/README.md b/third_party/rust/simd/README.md +deleted file mode 100644 +index 1c34f49bcd91..000000000000 +--- a/third_party/rust/simd/README.md ++++ /dev/null +@@ -1,11 +0,0 @@ +-# `simd` +- +-[![Build Status](https://travis-ci.org/hsivonen/simd.svg?branch=master)](https://travis-ci.org/hsivonen/simd) +-[![crates.io](https://meritbadge.herokuapp.com/simd)](https://crates.io/crates/simd) +-[![docs.rs](https://docs.rs/simd/badge.svg)](https://docs.rs/simd/) +- +-`simd` offers a basic interface to the SIMD functionality of CPUs. (Note: This crate fails to build unless the target is aarch64, x86_64, i686 (i.e. SSE2 enabled; not i586) or an ARMv7 target (thumb or not) with NEON enabled.) +- +-This crate is expected to become _obsolete_ once the implementation of [RFC 2366](https://github.com/rust-lang/rfcs/pull/2366) lands in the standard library. +- +-[Documentation](https://docs.rs/simd) +diff --git a/third_party/rust/simd/benches/mandelbrot.rs b/third_party/rust/simd/benches/mandelbrot.rs +deleted file mode 100755 +index 61061a4a301f..000000000000 +--- a/third_party/rust/simd/benches/mandelbrot.rs ++++ /dev/null +@@ -1,117 +0,0 @@ +-#![feature(test)] +-#![feature(cfg_target_feature)] +- +-extern crate simd; +-extern crate test; +- +-use test::black_box as bb; +-use test::Bencher as B; +-use simd::{f32x4, u32x4}; +-#[cfg(any(target_feature = "avx", target_feature = "avx2"))] +-use simd::x86::avx::{f32x8, u32x8}; +- +-fn naive(c_x: f32, c_y: f32, max_iter: u32) -> u32 { +- let mut x = c_x; +- let mut y = c_y; +- let mut count = 0; +- while count < max_iter { +- let xy = x * y; +- let xx = x * x; +- let yy = y * y; +- let sum = xx + yy; +- if sum > 4.0 { +- break +- } +- count += 1; +- x = xx - yy + c_x; +- y = xy * 2.0 + c_y; +- } +- count +-} +- +-fn simd4(c_x: f32x4, c_y: f32x4, max_iter: u32) -> u32x4 { +- let mut x = c_x; +- let mut y = c_y; +- +- let mut count = u32x4::splat(0); +- for _ in 0..max_iter as usize { +- let xy = x * y; +- let xx = x * x; +- let yy = y * y; +- let sum = xx + yy; +- let mask = sum.lt(f32x4::splat(4.0)); +- +- if !mask.any() { break } +- count = count + mask.to_i().select(u32x4::splat(1), u32x4::splat(0)); +- +- x = xx - yy + c_x; +- y = xy + xy + c_y; +- } +- count +-} +- +-#[cfg(target_feature = "avx")] +-fn simd8(c_x: f32x8, c_y: f32x8, max_iter: u32) -> u32x8 { +- let mut x = c_x; +- let mut y = c_y; +- +- let mut count = u32x8::splat(0); +- for _ in 0..max_iter as usize { +- let xy = x * y; +- let xx = x * x; +- let yy = y * y; +- let sum = xx + yy; +- let mask = sum.lt(f32x8::splat(4.0)); +- +- if !mask.any() { break } +- count = count + mask.to_i().select(u32x8::splat(1), u32x8::splat(0)); +- +- x = xx - yy + c_x; +- y = xy + xy + c_y; +- } +- count +-} +- +-const SCALE: f32 = 3.0 / 100.0; +-const N: u32 = 100; +-#[bench] +-fn mandel_naive(b: &mut B) { +- b.iter(|| { +- for j in 0..100 { +- let y = -1.5 + (j as f32) * SCALE; +- for i in 0..100 { +- let x = -2.2 + (i as f32) * SCALE; +- bb(naive(x, y, N)); +- } +- } +- }) +-} +-#[bench] +-fn mandel_simd4(b: &mut B) { +- let tweak = u32x4::new(0, 1, 2, 3); +- b.iter(|| { +- for j in 0..100 { +- let y = f32x4::splat(-1.5) + f32x4::splat(SCALE) * u32x4::splat(j).to_f32(); +- for i in 0..25 { +- let i = u32x4::splat(i * 4) + tweak; +- let x = f32x4::splat(-2.2) + f32x4::splat(SCALE) * i.to_f32(); +- bb(simd4(x, y, N)); +- } +- } +- }) +-} +-#[cfg(any(target_feature = "avx", target_feature = "avx2"))] +-#[bench] +-fn mandel_simd8(b: &mut B) { +- let tweak = u32x8::new(0, 1, 2, 3, 4, 5, 6, 7); +- b.iter(|| { +- for j in 0..100 { +- let y = f32x8::splat(-1.5) + f32x8::splat(SCALE) * u32x8::splat(j).to_f32(); +- for i in 0..13 { // 100 not divisible by 8 :( +- let i = u32x8::splat(i * 8) + tweak; +- let x = f32x8::splat(-2.2) + f32x8::splat(SCALE) * i.to_f32(); +- bb(simd8(x, y, N)); +- } +- } +- }) +-} +diff --git a/third_party/rust/simd/benches/matrix.rs b/third_party/rust/simd/benches/matrix.rs +deleted file mode 100755 +index 36aa88237492..000000000000 +--- a/third_party/rust/simd/benches/matrix.rs ++++ /dev/null +@@ -1,485 +0,0 @@ +-#![feature(test)] +-#![feature(cfg_target_feature)] +-extern crate test; +-extern crate simd; +- +-use test::black_box as bb; +-use test::Bencher as B; +-use simd::f32x4; +-#[cfg(target_feature = "avx")] +-use simd::x86::avx::{f32x8, f64x4}; +-// #[cfg(target_feature = "avx2")] +-// use simd::x86::avx2::Avx2F32x8; +- +- +-#[bench] +-fn multiply_naive(b: &mut B) { +- let x = [[1.0_f32; 4]; 4]; +- let y = [[2.0; 4]; 4]; +- b.iter(|| { +- for _ in 0..100 { +- let (x, y) = bb((&x, &y)); +- +- bb(&[[x[0][0] * y[0][0] + x[1][0] * y[0][1] + x[2][0] * y[0][2] + x[3][0] * y[0][3], +- x[0][1] * y[0][0] + x[1][1] * y[0][1] + x[2][1] * y[0][2] + x[3][1] * y[0][3], +- x[0][2] * y[0][0] + x[1][2] * y[0][1] + x[2][2] * y[0][2] + x[3][2] * y[0][3], +- x[0][3] * y[0][0] + x[1][3] * y[0][1] + x[2][3] * y[0][2] + x[3][3] * y[0][3]], +- [x[0][0] * y[1][0] + x[1][0] * y[1][1] + x[2][0] * y[1][2] + x[3][0] * y[1][3], +- x[0][1] * y[1][0] + x[1][1] * y[1][1] + x[2][1] * y[1][2] + x[3][1] * y[1][3], +- x[0][2] * y[1][0] + x[1][2] * y[1][1] + x[2][2] * y[1][2] + x[3][2] * y[1][3], +- x[0][3] * y[1][0] + x[1][3] * y[1][1] + x[2][3] * y[1][2] + x[3][3] * y[1][3]], +- [x[0][0] * y[2][0] + x[1][0] * y[2][1] + x[2][0] * y[2][2] + x[3][0] * y[2][3], +- x[0][1] * y[2][0] + x[1][1] * y[2][1] + x[2][1] * y[2][2] + x[3][1] * y[2][3], +- x[0][2] * y[2][0] + x[1][2] * y[2][1] + x[2][2] * y[2][2] + x[3][2] * y[2][3], +- x[0][3] * y[2][0] + x[1][3] * y[2][1] + x[2][3] * y[2][2] + x[3][3] * y[2][3]], +- [x[0][0] * y[3][0] + x[1][0] * y[3][1] + x[2][0] * y[3][2] + x[3][0] * y[3][3], +- x[0][1] * y[3][0] + x[1][1] * y[3][1] + x[2][1] * y[3][2] + x[3][1] * y[3][3], +- x[0][2] * y[3][0] + x[1][2] * y[3][1] + x[2][2] * y[3][2] + x[3][2] * y[3][3], +- x[0][3] * y[3][0] + x[1][3] * y[3][1] + x[2][3] * y[3][2] + x[3][3] * y[3][3]], +- ]); +- } +- }) +-} +- +-#[bench] +-fn multiply_simd4_32(b: &mut B) { +- let x = [f32x4::splat(1.0_f32); 4]; +- let y = [f32x4::splat(2.0); 4]; +- b.iter(|| { +- for _ in 0..100 { +- let (x, y) = bb((&x, &y)); +- +- let y0 = y[0]; +- let y1 = y[1]; +- let y2 = y[2]; +- let y3 = y[3]; +- bb(&[f32x4::splat(y0.extract(0)) * x[0] + +- f32x4::splat(y0.extract(1)) * x[1] + +- f32x4::splat(y0.extract(2)) * x[2] + +- f32x4::splat(y0.extract(3)) * x[3], +- f32x4::splat(y1.extract(0)) * x[0] + +- f32x4::splat(y1.extract(1)) * x[1] + +- f32x4::splat(y1.extract(2)) * x[2] + +- f32x4::splat(y1.extract(3)) * x[3], +- f32x4::splat(y2.extract(0)) * x[0] + +- f32x4::splat(y2.extract(1)) * x[1] + +- f32x4::splat(y2.extract(2)) * x[2] + +- f32x4::splat(y2.extract(3)) * x[3], +- f32x4::splat(y3.extract(0)) * x[0] + +- f32x4::splat(y3.extract(1)) * x[1] + +- f32x4::splat(y3.extract(2)) * x[2] + +- f32x4::splat(y3.extract(3)) * x[3], +- ]); +- } +- }) +-} +- +-#[cfg(target_feature = "avx")] +-#[bench] +-fn multiply_simd4_64(b: &mut B) { +- let x = [f64x4::splat(1.0_f64); 4]; +- let y = [f64x4::splat(2.0); 4]; +- b.iter(|| { +- for _ in 0..100 { +- let (x, y) = bb((&x, &y)); +- +- let y0 = y[0]; +- let y1 = y[1]; +- let y2 = y[2]; +- let y3 = y[3]; +- bb(&[f64x4::splat(y0.extract(0)) * x[0] + +- f64x4::splat(y0.extract(1)) * x[1] + +- f64x4::splat(y0.extract(2)) * x[2] + +- f64x4::splat(y0.extract(3)) * x[3], +- f64x4::splat(y1.extract(0)) * x[0] + +- f64x4::splat(y1.extract(1)) * x[1] + +- f64x4::splat(y1.extract(2)) * x[2] + +- f64x4::splat(y1.extract(3)) * x[3], +- f64x4::splat(y2.extract(0)) * x[0] + +- f64x4::splat(y2.extract(1)) * x[1] + +- f64x4::splat(y2.extract(2)) * x[2] + +- f64x4::splat(y2.extract(3)) * x[3], +- f64x4::splat(y3.extract(0)) * x[0] + +- f64x4::splat(y3.extract(1)) * x[1] + +- f64x4::splat(y3.extract(2)) * x[2] + +- f64x4::splat(y3.extract(3)) * x[3], +- ]); +- } +- }) +-} +- +-#[bench] +-fn inverse_naive(b: &mut B) { +- let mut x = [[0_f32; 4]; 4]; +- for i in 0..4 { x[i][i] = 1.0 } +- +- b.iter(|| { +- for _ in 0..100 { +- let x = bb(&x); +- +- let mut t = [[0_f32; 4]; 4]; +- for i in 0..4 { +- t[0][i] = x[i][0]; +- t[1][i] = x[i][1]; +- t[2][i] = x[i][2]; +- t[3][i] = x[i][3]; +- } +- +- let _0 = t[2][2] * t[3][3]; +- let _1 = t[2][3] * t[3][2]; +- let _2 = t[2][1] * t[3][3]; +- let _3 = t[2][3] * t[3][1]; +- let _4 = t[2][1] * t[3][2]; +- let _5 = t[2][2] * t[3][1]; +- let _6 = t[2][0] * t[3][3]; +- let _7 = t[2][3] * t[3][0]; +- let _8 = t[2][0] * t[3][2]; +- let _9 = t[2][2] * t[3][0]; +- let _10 = t[2][0] * t[3][1]; +- let _11 = t[2][1] * t[3][0]; +- +- let d00 = _0 * t[1][1] + _3 * t[1][2] + _4 * t[1][3] - +- (_1 * t[1][1] + _2 * t[1][2] + _5 * t[1][3]); +- let d01 = _1 * t[1][0] + _6 * t[1][2] + _9 * t[1][3] - +- (_0 * t[1][0] + _7 * t[1][2] + _8 * t[1][3]); +- let d02 = _2 * t[1][0] + _7 * t[1][1] + _10 * t[1][3] - +- (_3 * t[1][0] + _6 * t[1][1] + _11 * t[1][3]); +- let d03 = _5 * t[1][0] + _8 * t[1][1] + _11 * t[1][2] - +- (_4 * t[1][0] + _9 * t[1][1] + _10 * t[1][2]); +- let d10 = _1 * t[0][1] + _2 * t[0][2] + _5 * t[0][3] - +- (_0 * t[0][1] + _3 * t[0][2] + _4 * t[0][3]); +- let d11 = _0 * t[0][0] + _7 * t[0][2] + _8 * t[0][3] - +- (_1 * t[0][0] + _6 * t[0][2] + _9 * t[0][3]); +- let d12 = _3 * t[0][0] + _6 * t[0][1] + _11 * t[0][3] - +- (_2 * t[0][0] + _7 * t[0][1] + _10 * t[0][3]); +- let d13 = _4 * t[0][0] + _9 * t[0][1] + _10 * t[0][2] - +- (_5 * t[0][0] + _8 * t[0][1] + _11 * t[0][2]); +- +- let _0 = t[0][2] * t[1][3]; +- let _1 = t[0][3] * t[1][2]; +- let _2 = t[0][1] * t[1][3]; +- let _3 = t[0][3] * t[1][1]; +- let _4 = t[0][1] * t[1][2]; +- let _5 = t[0][2] * t[1][1]; +- let _6 = t[0][0] * t[1][3]; +- let _7 = t[0][3] * t[1][0]; +- let _8 = t[0][0] * t[1][2]; +- let _9 = t[0][2] * t[1][0]; +- let _10 = t[0][0] * t[1][1]; +- let _11 = t[0][1] * t[1][0]; +- +- let d20 = _0*t[3][1] + _3*t[3][2] + _4*t[3][3]- +- (_1*t[3][1] + _2*t[3][2] + _5*t[3][3]); +- let d21 = _1*t[3][0] + _6*t[3][2] + _9*t[3][3]- +- (_0*t[3][0] + _7*t[3][2] + _8*t[3][3]); +- let d22 = _2*t[3][0] + _7*t[3][1] + _10*t[3][3]- +- (_3*t[3][0] + _6*t[3][1] + _11*t[3][3]); +- let d23 = _5*t[3][0] + _8*t[3][1] + _11*t[3][2]- +- (_4*t[3][0] + _9*t[3][1] + _10*t[3][2]); +- let d30 = _2*t[2][2] + _5*t[2][3] + _1*t[2][1]- +- (_4*t[2][3] + _0*t[2][1] + _3*t[2][2]); +- let d31 = _8*t[2][3] + _0*t[2][0] + _7*t[2][2]- +- (_6*t[2][2] + _9*t[2][3] + _1*t[2][0]); +- let d32 = _6*t[2][1] + _11*t[2][3] + _3*t[2][0]- +- (_10*t[2][3] + _2*t[2][0] + _7*t[2][1]); +- let d33 = _10*t[2][2] + _4*t[2][0] + _9*t[2][1]- +- (_8*t[2][1] + _11*t[2][2] + _5*t[2][0]); +- +- let det = t[0][0] * d00 + t[0][1] * d01 + t[0][2] * d02 + t[0][3] * d03; +- +- let det = 1.0 / det; +- let mut ret = [[d00, d01, d02, d03], +- [d10, d11, d12, d13], +- [d20, d21, d22, d23], +- [d30, d31, d32, d33]]; +- for i in 0..4 { +- for j in 0..4 { +- ret[i][j] *= det; +- } +- } +- bb(&ret); +- } +- }) +-} +- +-#[bench] +-fn inverse_simd4(b: &mut B) { +- let mut x = [f32x4::splat(0_f32); 4]; +- for i in 0..4 { x[i] = x[i].replace(i as u32, 1.0); } +- +- fn shuf0145(v: f32x4, w: f32x4) -> f32x4 { +- f32x4::new(v.extract(0), v.extract(1), +- w.extract(4 - 4), w.extract(5 - 4)) +- } +- fn shuf0246(v: f32x4, w: f32x4) -> f32x4 { +- f32x4::new(v.extract(0), v.extract(2), +- w.extract(4 - 4), w.extract(6 - 4)) +- } +- fn shuf1357(v: f32x4, w: f32x4) -> f32x4 { +- f32x4::new(v.extract(1), v.extract(3), +- w.extract(5 - 4), w.extract(7 - 4)) +- } +- fn shuf2367(v: f32x4, w: f32x4) -> f32x4 { +- f32x4::new(v.extract(2), v.extract(3), +- w.extract(6 - 4), w.extract(7 - 4)) +- } +- +- fn swiz1032(v: f32x4) -> f32x4 { +- f32x4::new(v.extract(1), v.extract(0), +- v.extract(3), v.extract(2)) +- } +- fn swiz2301(v: f32x4) -> f32x4 { +- f32x4::new(v.extract(2), v.extract(3), +- v.extract(0), v.extract(1)) +- } +- +- b.iter(|| { +- for _ in 0..100 { +- let src0; +- let src1; +- let src2; +- let src3; +- let mut tmp1; +- let row0; +- let mut row1; +- let mut row2; +- let mut row3; +- let mut minor0; +- let mut minor1; +- let mut minor2; +- let mut minor3; +- let mut det; +- +- let x = bb(&x); +- src0 = x[0]; +- src1 = x[1]; +- src2 = x[2]; +- src3 = x[3]; +- +- tmp1 = shuf0145(src0, src1); +- row1 = shuf0145(src2, src3); +- row0 = shuf0246(tmp1, row1); +- row1 = shuf1357(row1, tmp1); +- +- tmp1 = shuf2367(src0, src1); +- row3 = shuf2367(src2, src3); +- row2 = shuf0246(tmp1, row3); +- row3 = shuf0246(row3, tmp1); +- +- +- tmp1 = row2 * row3; +- tmp1 = swiz1032(tmp1); +- minor0 = row1 * tmp1; +- minor1 = row0 * tmp1; +- tmp1 = swiz2301(tmp1); +- minor0 = (row1 * tmp1) - minor0; +- minor1 = (row0 * tmp1) - minor1; +- minor1 = swiz2301(minor1); +- +- +- tmp1 = row1 * row2; +- tmp1 = swiz1032(tmp1); +- minor0 = (row3 * tmp1) + minor0; +- minor3 = row0 * tmp1; +- tmp1 = swiz2301(tmp1); +- +- minor0 = minor0 - row3 * tmp1; +- minor3 = row0 * tmp1 - minor3; +- minor3 = swiz2301(minor3); +- +- +- tmp1 = row3 * swiz2301(row1); +- tmp1 = swiz1032(tmp1); +- row2 = swiz2301(row2); +- minor0 = row2 * tmp1 + minor0; +- minor2 = row0 * tmp1; +- tmp1 = swiz2301(tmp1); +- minor0 = minor0 - row2 * tmp1; +- minor2 = row0 * tmp1 - minor2; +- minor2 = swiz2301(minor2); +- +- +- tmp1 = row0 * row1; +- tmp1 = swiz1032(tmp1); +- minor2 = minor2 + row3 * tmp1; +- minor3 = row2 * tmp1 - minor3; +- tmp1 = swiz2301(tmp1); +- minor2 = row3 * tmp1 - minor2; +- minor3 = minor3 - row2 * tmp1; +- +- +- +- tmp1 = row0 * row3; +- tmp1 = swiz1032(tmp1); +- minor1 = minor1 - row2 * tmp1; +- minor2 = row1 * tmp1 + minor2; +- tmp1 = swiz2301(tmp1); +- minor1 = row2 * tmp1 + minor1; +- minor2 = minor2 - row1 * tmp1; +- +- tmp1 = row0 * row2; +- tmp1 = swiz1032(tmp1); +- minor1 = row3 * tmp1 + minor1; +- minor3 = minor3 - row1 * tmp1; +- tmp1 = swiz2301(tmp1); +- minor1 = minor1 - row3 * tmp1; +- minor3 = row1 * tmp1 + minor3; +- +- det = row0 * minor0; +- det = swiz2301(det) + det; +- det = swiz1032(det) + det; +- //tmp1 = det.approx_reciprocal(); det = tmp1 * (f32x4::splat(2.0) - det * tmp1); +- det = f32x4::splat(1.0) / det; +- +- bb(&[minor0 * det, minor1 * det, minor2 * det, minor3 * det]); +- } +- }) +- +-} +- +-#[bench] +-fn transpose_naive(b: &mut B) { +- let x = [[0_f32; 4]; 4]; +- b.iter(|| { +- for _ in 0..100 { +- let x = bb(&x); +- bb(&[[x[0][0], x[1][0], x[2][0], x[3][0]], +- [x[0][1], x[1][1], x[2][1], x[3][1]], +- [x[0][2], x[1][2], x[2][2], x[3][2]], +- [x[0][3], x[1][3], x[2][3], x[3][3]]]); +- } +- }) +-} +- +-#[bench] +-fn transpose_simd4(b: &mut B) { +- let x = [f32x4::splat(0_f32); 4]; +- +- fn shuf0246(v: f32x4, w: f32x4) -> f32x4 { +- f32x4::new(v.extract(0), v.extract(2), +- w.extract(4 - 4), w.extract(6 - 4)) +- } +- fn shuf1357(v: f32x4, w: f32x4) -> f32x4 { +- f32x4::new(v.extract(1), v.extract(3), +- w.extract(5 - 4), w.extract(7 - 4)) +- } +- b.iter(|| { +- for _ in 0..100 { +- let x = bb(&x); +- let x0 = x[0]; +- let x1 = x[1]; +- let x2 = x[2]; +- let x3 = x[3]; +- +- let a0 = shuf0246(x0, x1); +- let a1 = shuf0246(x2, x3); +- let a2 = shuf1357(x0, x1); +- let a3 = shuf1357(x2, x3); +- +- let b0 = shuf0246(a0, a1); +- let b1 = shuf0246(a2, a3); +- let b2 = shuf1357(a0, a1); +- let b3 = shuf1357(a2, a3); +- bb(&[b0, b1, b2, b3]); +- } +- }) +-} +- +-#[cfg(target_feature = "avx")] +-#[bench] +-fn transpose_simd8_naive(b: &mut B) { +- let x = [f32x8::splat(0_f32); 2]; +- +- fn shuf0246(v: f32x8, w: f32x8) -> f32x8 { +- f32x8::new(v.extract(0), v.extract(2), v.extract(4), v.extract(6), +- w.extract(0), w.extract(2), w.extract(4), w.extract(6)) +- } +- fn shuf1357(v: f32x8, w: f32x8) -> f32x8 { +- f32x8::new(v.extract(1), v.extract(3), v.extract(5), v.extract(7), +- w.extract(1), w.extract(3), w.extract(5), w.extract(7),) +- } +- b.iter(|| { +- for _ in 0..100 { +- let x = bb(&x); +- let x01 = x[0]; +- let x23 = x[1]; +- +- let a01 = shuf0246(x01, x23); +- let a23 = shuf1357(x01, x23); +- +- let b01 = shuf0246(a01, a23); +- let b23 = shuf1357(a01, a23); +- bb(&[b01, b23]); +- } +- }) +-} +- +-#[cfg(target_feature = "avx")] +-#[bench] +-fn transpose_simd8_avx2_vpermps(b: &mut B) { +- let x = [f32x8::splat(0_f32); 2]; +- +- // efficient on AVX2 using vpermps +- fn perm04152637(v: f32x8) -> f32x8 { +- // broken on rustc 1.7.0-nightly (1ddaf8bdf 2015-12-12) +- // v.permutevar(i32x8::new(0, 4, 1, 5, 2, 6, 3, 7)) +- f32x8::new(v.extract(0), v.extract(4), v.extract(1), v.extract(5), +- v.extract(2), v.extract(6), v.extract(3), v.extract(7)) +- } +- fn shuf_lo(v: f32x8, w: f32x8) -> f32x8 { +- f32x8::new(v.extract(0), v.extract(1), w.extract(0), w.extract(1), +- v.extract(4), v.extract(5), w.extract(4), w.extract(5),) +- } +- fn shuf_hi(v: f32x8, w: f32x8) -> f32x8 { +- f32x8::new(v.extract(2), v.extract(3), w.extract(2), w.extract(3), +- v.extract(6), v.extract(7), w.extract(6), w.extract(7),) +- } +- b.iter(|| { +- for _ in 0..100 { +- let x = bb(&x); +- let x01 = x[0]; +- let x23 = x[1]; +- +- let a01 = perm04152637(x01); +- let a23 = perm04152637(x23); +- +- let b01 = shuf_lo(a01, a23); +- let b23 = shuf_hi(a01, a23); +- bb(&[b01, b23]); +- } +- }) +-} +- +-#[cfg(target_feature = "avx")] +-#[bench] +-fn transpose_simd8_avx2_vpermpd(b: &mut B) { +- let x = [f32x8::splat(0_f32); 2]; +- +- // efficient on AVX2 using vpermpd +- fn perm01452367(v: f32x8) -> f32x8 { +- f32x8::new(v.extract(0), v.extract(1), v.extract(4), v.extract(5), +- v.extract(2), v.extract(3), v.extract(6), v.extract(7)) +- } +- fn shuf_lo_ps(v: f32x8, w: f32x8) -> f32x8 { +- f32x8::new(v.extract(0), w.extract(0), v.extract(1), w.extract(1), +- v.extract(4), w.extract(4), v.extract(5), w.extract(5),) +- } +- fn shuf_hi_ps(v: f32x8, w: f32x8) -> f32x8 { +- f32x8::new(v.extract(2), w.extract(2), v.extract(3), w.extract(3), +- v.extract(6), w.extract(6), v.extract(7), w.extract(7),) +- } +- b.iter(|| { +- for _ in 0..100 { +- let x = bb(&x); +- let x01 = x[0]; +- let x23 = x[1]; +- +- let a01 = perm01452367(x01); +- let a23 = perm01452367(x23); +- +- let b01 = shuf_lo_ps(a01, a23); +- let b23 = shuf_hi_ps(a01, a23); +- bb(&[b01, b23]); +- } +- }) +-} +diff --git a/third_party/rust/simd/build.rs b/third_party/rust/simd/build.rs +deleted file mode 100644 +index 61b5330a1846..000000000000 +--- a/third_party/rust/simd/build.rs ++++ /dev/null +@@ -1,3 +0,0 @@ +-fn main() { +- println!("cargo:rustc-env=RUSTC_BOOTSTRAP=1"); +-} +diff --git a/third_party/rust/simd/examples/axpy.rs b/third_party/rust/simd/examples/axpy.rs +deleted file mode 100755 +index 7862721b254d..000000000000 +--- a/third_party/rust/simd/examples/axpy.rs ++++ /dev/null +@@ -1,65 +0,0 @@ +-#![feature(cfg_target_feature)] +-extern crate simd; +-use simd::f32x4; +-#[cfg(target_feature = "avx")] +-use simd::x86::avx::f32x8; +- +-#[inline(never)] +-pub fn axpy(z: &mut [f32], a: f32, x: &[f32], y: &[f32]) { +- assert_eq!(x.len(), y.len()); +- assert_eq!(x.len(), z.len()); +- +- let len = std::cmp::min(std::cmp::min(x.len(), y.len()), z.len()); +- +- let mut i = 0; +- while i < len & !3 { +- let x = f32x4::load(x, i); +- let y = f32x4::load(y, i); +- (f32x4::splat(a) * x + y).store(z, i); +- i += 4 +- } +-} +- +-#[cfg(target_feature = "avx")] +-#[inline(never)] +-pub fn axpy8(z: &mut [f32], a: f32, x: &[f32], y: &[f32]) { +- assert_eq!(x.len(), y.len()); +- assert_eq!(x.len(), z.len()); +- +- let len = std::cmp::min(std::cmp::min(x.len(), y.len()), z.len()); +- +- let mut i = 0; +- while i < len & !7 { +- let x = f32x8::load(x, i); +- let y = f32x8::load(y, i); +- (f32x8::splat(a) * x + y).store(z, i); +- i += 8 +- } +-} +- +- +-#[cfg(not(target_feature = "avx"))] +-pub fn axpy8(_: &mut [f32], _: f32, _: &[f32], _: &[f32]) { +- unimplemented!() +-} +- +- +-fn main() { +- let mut z = vec![0.; 4]; +- axpy(&mut z, 2., &[1.0, 3.0, 5.0, 7.0], &[2.0, 4.0, 6.0, 8.0]); +- println!("{:?}", z); +- let mut z = vec![0.; 8]; +- axpy(&mut z, 3., &[1.0, 3.0, 6.0, 7.0, 10.0, 6.0, 3.0, 2.0], +- &[2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0]); +- println!("{:?}", z); +- +- if cfg!(target_feature = "avx") { +- let mut z = vec![0.; 4]; +- axpy8(&mut z, 2., &[1.0, 3.0, 5.0, 7.0], &[2.0, 4.0, 6.0, 8.0]); +- println!("{:?}", z); +- let mut z = vec![0.; 8]; +- axpy8(&mut z, 3., &[1.0, 3.0, 6.0, 7.0, 10.0, 6.0, 3.0, 2.0], +- &[2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0]); +- println!("{:?}", z); +- } +-} +diff --git a/third_party/rust/simd/examples/convert.rs b/third_party/rust/simd/examples/convert.rs +deleted file mode 100644 +index 11823a4b50d2..000000000000 +--- a/third_party/rust/simd/examples/convert.rs ++++ /dev/null +@@ -1,38 +0,0 @@ +-extern crate simd; +-use simd::f32x4; +- +-#[inline(never)] +-pub fn convert_scalar(x: &mut [i32], y: &[f32]) { +- assert_eq!(x.len(), y.len()); +- +- let mut i = 0; +- while i < x.len() & !3 { +- x[i] = y[i] as i32; +- i += 1; +- } +-} +- +-#[inline(never)] +-pub fn convert(x: &mut [i32], y: &[f32]) { +- assert_eq!(x.len(), y.len()); +- +- let mut i = 0; +- while i < x.len() & !3 { +- let v = f32x4::load(y, i); +- v.to_i32().store(x, i); +- i += 4 +- } +-} +- +-fn main() { +- let x = &mut [0; 12]; +- let y = [1.0; 12]; +- convert(x, &y); +- convert_scalar(x, &y); +- println!("{:?}", x); +- let x = &mut [0; 16]; +- let y = [1.0; 16]; +- convert(x, &y); +- convert_scalar(x, &y); +- println!("{:?}", x); +-} +diff --git a/third_party/rust/simd/examples/dot-product.rs b/third_party/rust/simd/examples/dot-product.rs +deleted file mode 100755 +index 9f0e1d35c799..000000000000 +--- a/third_party/rust/simd/examples/dot-product.rs ++++ /dev/null +@@ -1,60 +0,0 @@ +-#![feature(cfg_target_feature)] +-extern crate simd; +-use simd::f32x4; +-#[cfg(target_feature = "avx")] +-use simd::x86::avx::{f32x8, LowHigh128}; +- +-#[inline(never)] +-pub fn dot(x: &[f32], y: &[f32]) -> f32 { +- assert_eq!(x.len(), y.len()); +- +- let len = std::cmp::min(x.len(), y.len()); +- +- let mut sum = f32x4::splat(0.0); +- let mut i = 0; +- while i < len & !3 { +- let x = f32x4::load(x, i); +- let y = f32x4::load(y, i); +- sum = sum + x * y; +- i += 4 +- } +- sum.extract(0) + sum.extract(1) + sum.extract(2) + sum.extract(3) +-} +- +-#[cfg(target_feature = "avx")] +-#[inline(never)] +-pub fn dot8(x: &[f32], y: &[f32]) -> f32 { +- assert_eq!(x.len(), y.len()); +- +- let len = std::cmp::min(x.len(), y.len()); +- +- let mut sum = f32x8::splat(0.0); +- let mut i = 0; +- while i < len & !7 { +- let x = f32x8::load(x, i); +- let y = f32x8::load(y, i); +- sum = sum + x * y; +- i += 8 +- } +- let sum = sum.low() + sum.high(); +- sum.extract(0) + sum.extract(1) + sum.extract(2) + sum.extract(3) +-} +- +- +-#[cfg(not(target_feature = "avx"))] +-pub fn dot8(_: &[f32], _: &[f32]) -> f32 { +- unimplemented!() +-} +- +- +-fn main() { +- println!("{}", dot(&[1.0, 3.0, 5.0, 7.0], &[2.0, 4.0, 6.0, 8.0])); +- println!("{}", dot(&[1.0, 3.0, 6.0, 7.0, 10.0, 6.0, 3.0, 2.0], +- &[2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0])); +- +- if cfg!(target_feature = "avx") { +- println!("{}", dot8(&[1.0, 3.0, 5.0, 7.0], &[2.0, 4.0, 6.0, 8.0])); +- println!("{}", dot8(&[1.0, 3.0, 6.0, 7.0, 10.0, 6.0, 3.0, 2.0], +- &[2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0])); +- } +-} +diff --git a/third_party/rust/simd/examples/fannkuch-redux-nosimd.rs b/third_party/rust/simd/examples/fannkuch-redux-nosimd.rs +deleted file mode 100644 +index fa30b2283f93..000000000000 +--- a/third_party/rust/simd/examples/fannkuch-redux-nosimd.rs ++++ /dev/null +@@ -1,156 +0,0 @@ +-// The Computer Language Benchmarks Game +-// http://benchmarksgame.alioth.debian.org/ +-// +-// contributed by the Rust Project Developers +-// contributed by TeXitoi +- +-use std::{cmp, mem}; +-use std::thread; +- +-fn rotate(x: &mut [i32]) { +- let mut prev = x[0]; +- for place in x.iter_mut().rev() { +- prev = mem::replace(place, prev) +- } +-} +- +-fn next_permutation(perm: &mut [i32], count: &mut [i32]) { +- for i in 1..perm.len() { +- rotate(&mut perm[.. i + 1]); +- let count_i = &mut count[i]; +- if *count_i >= i as i32 { +- *count_i = 0; +- } else { +- *count_i += 1; +- break +- } +- } +-} +- +-#[derive(Clone, Copy)] +-struct P { +- p: [i32; 16], +-} +- +-#[derive(Clone, Copy)] +-struct Perm { +- cnt: [i32; 16], +- fact: [u32; 16], +- n: u32, +- permcount: u32, +- perm: P, +-} +- +-impl Perm { +- fn new(n: u32) -> Perm { +- let mut fact = [1; 16]; +- for i in 1 .. n as usize + 1 { +- fact[i] = fact[i - 1] * i as u32; +- } +- Perm { +- cnt: [0; 16], +- fact: fact, +- n: n, +- permcount: 0, +- perm: P { p: [0; 16 ] } +- } +- } +- +- fn get(&mut self, mut idx: i32) -> P { +- let mut pp = [0u8; 16]; +- self.permcount = idx as u32; +- for (i, place) in self.perm.p.iter_mut().enumerate() { +- *place = i as i32 + 1; +- } +- +- for i in (1 .. self.n as usize).rev() { +- let d = idx / self.fact[i] as i32; +- self.cnt[i] = d; +- idx %= self.fact[i] as i32; +- for (place, val) in pp.iter_mut().zip(self.perm.p[..(i+1)].iter()) { +- *place = (*val) as u8 +- } +- +- let d = d as usize; +- for j in 0 .. i + 1 { +- self.perm.p[j] = if j + d <= i {pp[j + d]} else {pp[j+d-i-1]} as i32; +- } +- } +- +- self.perm +- } +- +- fn count(&self) -> u32 { self.permcount } +- fn max(&self) -> u32 { self.fact[self.n as usize] } +- +- fn next(&mut self) -> P { +- next_permutation(&mut self.perm.p, &mut self.cnt); +- self.permcount += 1; +- +- self.perm +- } +-} +- +- +-fn reverse(tperm: &mut [i32], k: usize) { +- tperm[..k].reverse() +-} +- +-fn work(mut perm: Perm, n: usize, max: usize) -> (i32, i32) { +- let mut checksum = 0; +- let mut maxflips = 0; +- +- let mut p = perm.get(n as i32); +- +- while perm.count() < max as u32 { +- let mut flips = 0; +- +- while p.p[0] != 1 { +- let k = p.p[0] as usize; +- reverse(&mut p.p, k); +- flips += 1; +- } +- +- checksum += if perm.count() % 2 == 0 {flips} else {-flips}; +- maxflips = cmp::max(maxflips, flips); +- +- p = perm.next(); +- } +- +- (checksum, maxflips) +-} +- +-fn fannkuch(n: i32) -> (i32, i32) { +- let perm = Perm::new(n as u32); +- +- let n = 1; +- let mut futures = vec![]; +- let k = perm.max() / n; +- +- for j in (0..).map(|x| x * k).take_while(|&j| j < k * n) { +- let max = cmp::min(j+k, perm.max()); +- +- futures.push(thread::spawn(move|| { +- work(perm, j as usize, max as usize) +- })) +- } +- +- let mut checksum = 0; +- let mut maxflips = 0; +- for fut in futures.into_iter() { +- let (cs, mf) = fut.join().unwrap(); +- checksum += cs; +- maxflips = cmp::max(maxflips, mf); +- } +- (checksum, maxflips) +-} +- +-fn main() { +- let n = std::env::args_os().nth(1) +- .and_then(|s| s.into_string().ok()) +- .and_then(|n| n.parse().ok()) +- .unwrap_or(7); +- +- let (checksum, maxflips) = fannkuch(n); +- println!("{}\nPfannkuchen({}) = {}", checksum, n, maxflips); +-} +diff --git a/third_party/rust/simd/examples/fannkuch-redux.rs b/third_party/rust/simd/examples/fannkuch-redux.rs +deleted file mode 100755 +index 2e52ae721135..000000000000 +--- a/third_party/rust/simd/examples/fannkuch-redux.rs ++++ /dev/null +@@ -1,233 +0,0 @@ +-#![feature(cfg_target_feature)] +-extern crate simd; +-#[macro_use] extern crate cfg_if; +-use simd::u8x16; +- +-use std::{env, process}; +- +-cfg_if! { +- if #[cfg(target_arch = "aarch64")] { +- #[inline(always)] +- fn shuffle(x: u8x16, y: u8x16) -> u8x16 { +- use simd::aarch64::neon::*; +- y.table_lookup_1(x) +- } +- } else if #[cfg(all(target_arch = "arm", +- target_feature = "neon"))] { +- #[inline(always)] +- fn shuffle(x: u8x16, y: u8x16) -> u8x16 { +- use simd::arm::neon::*; +- #[inline(always)] +- fn split(x: u8x16) -> (u8x8, u8x8) { +- unsafe {std::mem::transmute(x)} +- } +- fn join(x: u8x8, y: u8x8) -> u8x16 { +- unsafe {std::mem::transmute((x, y))} +- } +- +- let (t0, t1) = split(x); +- let (i0, i1) = split(y); +- join(i0.table_lookup_2(t0, t1), +- i1.table_lookup_2(t0, t1)) +- } +- } else if #[cfg(target_feature = "ssse3")] { +- #[inline(always)] +- fn shuffle(x: u8x16, y: u8x16) -> u8x16 { +- use simd::x86::ssse3::*; +- x.shuffle_bytes(y) +- } +- } else { +- // slow fallback, so tests work +- #[inline(always)] +- fn shuffle(x: u8x16, y: u8x16) -> u8x16 { +- u8x16::new(x.extract(y.extract(0) as u32), +- x.extract(y.extract(1) as u32), +- x.extract(y.extract(2) as u32), +- x.extract(y.extract(3) as u32), +- x.extract(y.extract(4) as u32), +- x.extract(y.extract(5) as u32), +- x.extract(y.extract(6) as u32), +- x.extract(y.extract(7) as u32), +- x.extract(y.extract(8) as u32), +- x.extract(y.extract(9) as u32), +- x.extract(y.extract(10) as u32), +- x.extract(y.extract(11) as u32), +- x.extract(y.extract(12) as u32), +- x.extract(y.extract(13) as u32), +- x.extract(y.extract(14) as u32), +- x.extract(y.extract(15) as u32)) +- } +- } +-} +-struct State { +- s: [u8; 16], +- flip_masks: [u8x16; 16], +- rotate_masks: [u8x16; 16], +- +- maxflips: i32, +- odd: u16, +- checksum: i32, +-} +-impl State { +- fn new() -> State { +- State { +- s: [0; 16], +- flip_masks: [u8x16::splat(0); 16], +- rotate_masks: [u8x16::splat(0); 16], +- +- maxflips: 0, +- odd: 0, +- checksum: 0, +- } +- } +- #[inline(never)] +- fn rotate_sisd(&mut self, n: usize) { +- let c = self.s[0]; +- for i in 1..(n + 1) { +- self.s[i - 1] = self.s[i]; +- } +- self.s[n] = c; +- } +- #[inline(never)] +- fn popmasks(&mut self) { +- let mut mask = [0_u8; 16]; +- for i in 0..16 { +- for j in 0..16 { mask[j] = j as u8; } +- +- for x in 0..(i+1)/2 { +- mask.swap(x, i - x); +- } +- +- self.flip_masks[i] = u8x16::load(&mask, 0); +- +- for j in 0..16 { self.s[j] = j as u8; } +- self.rotate_sisd(i); +- self.rotate_masks[i] = self.load_s(); +- } +- } +- fn rotate(&mut self, n: usize) { +- shuffle(self.load_s(), self.rotate_masks[n]).store(&mut self.s, 0) +- } +- +- fn load_s(&self) -> u8x16 { +- u8x16::load(&self.s, 0) +- } +- +- +- #[inline(never)] +- fn tk(&mut self, n: usize) { +- #[derive(Copy, Clone, Debug)] +- struct Perm { +- perm: u8x16, +- start: u8, +- odd: u16 +- } +- +- let mut perms = [Perm { perm: u8x16::splat(0), start: 0 , odd: 0 }; 60]; +- +- let mut i = 0; +- let mut c = [0_u8; 16]; +- let mut perm_max = 0; +- +- while i < n { +- while i < n && perm_max < 60 { +- self.rotate(i); +- if c[i] as usize >= i { +- c[i] = 0; +- i += 1; +- continue +- } +- +- c[i] += 1; +- i = 1; +- self.odd = !self.odd; +- if self.s[0] != 0 { +- if self.s[self.s[0] as usize] != 0 { +- perms[perm_max].perm = self.load_s(); +- perms[perm_max].start = self.s[0]; +- perms[perm_max].odd = self.odd; +- perm_max += 1; +- } else { +- if self.maxflips == 0 { self.maxflips = 1 } +- self.checksum += if self.odd != 0 { -1 } else { 1 }; +- } +- } +- } +- +- let mut k = 0; +- while k < std::cmp::max(1, perm_max) - 1 { +- let pk = &perms[k]; +- let pk1 = &perms[k + 1]; +- //println!("perm1 {:?}\nperm2 {:?}", pk.perm, pk1.perm); +- let mut perm1 = pk.perm; +- let mut perm2 = pk1.perm; +- +- let mut f1 = 0; +- let mut f2 = 0; +- let mut toterm1 = pk.start; +- let mut toterm2 = pk1.start; +- +- while toterm1 != 0 && toterm2 != 0 { +- perm1 = shuffle(perm1, self.flip_masks[toterm1 as usize]); +- perm2 = shuffle(perm2, self.flip_masks[toterm2 as usize]); +- toterm1 = perm1.extract(0); +- toterm2 = perm2.extract(0); +- +- f1 += 1; f2 += 1; +- } +- while toterm1 != 0 { +- perm1 = shuffle(perm1, self.flip_masks[toterm1 as usize]); +- toterm1 = perm1.extract(0); +- f1 += 1; +- } +- while toterm2 != 0 { +- perm2 = shuffle(perm2, self.flip_masks[toterm2 as usize]); +- toterm2 = perm2.extract(0); +- f2 += 1; +- } +- +- if f1 > self.maxflips { self.maxflips = f1 } +- if f2 > self.maxflips { self.maxflips = f2 } +- self.checksum += if pk.odd != 0 { -f1 } else { f1 }; +- self.checksum += if pk1.odd != 0 { -f2 } else { f2 }; +- +- k += 2; +- } +- while k < perm_max { +- let pk = &perms[k]; +- let mut perm = pk.perm; +- let mut f = 0; +- let mut toterm = pk.start; +- while toterm != 0 { +- perm = shuffle(perm, self.flip_masks[toterm as usize]); +- toterm = perm.extract(0); +- f += 1; +- } +- if f > self.maxflips { self.maxflips = f } +- self.checksum += if pk.odd != 0 { -f } else { f }; +- k += 1 +- } +- perm_max = 0; +- } +- } +-} +- +-fn main() { +- let mut state = State::new(); +- state.popmasks(); +- +- let args = env::args().collect::>(); +- if args.len() < 2 { +- println!("usage: {} number", args[0]); +- process::exit(1) +- } +- let max_n = args[1].parse().unwrap(); +- if max_n < 3 || max_n > 15 { +- println!("range: must be 3 <= n <= 14"); +- process::exit(1); +- } +- for i in 0..max_n { state.s[i] = i as u8 } +- state.tk(max_n); +- +- println!("{}\nPfannkuchen({}) = {}", state.checksum, max_n, state.maxflips); +-} +diff --git a/third_party/rust/simd/examples/mandelbrot.rs b/third_party/rust/simd/examples/mandelbrot.rs +deleted file mode 100755 +index c6f1320a0784..000000000000 +--- a/third_party/rust/simd/examples/mandelbrot.rs ++++ /dev/null +@@ -1,125 +0,0 @@ +-#![feature(iterator_step_by, test)] +- +-extern crate test; +-extern crate simd; +-use simd::{f32x4, u32x4}; +-use std::io::prelude::*; +- +-#[inline(never)] +-fn mandelbrot_naive(c_x: f32, c_y: f32, max_iter: u32) -> u32 { +- let mut x = c_x; +- let mut y = c_y; +- let mut count = 0; +- while count < max_iter { +- let xy = x * y; +- let xx = x * x; +- let yy = y * y; +- let sum = xx + yy; +- if sum > 4.0 { +- break +- } +- count += 1; +- x = xx - yy + c_x; +- y = xy * 2.0 + c_y; +- } +- count +-} +- +-#[inline(never)] +-fn mandelbrot_vector(c_x: f32x4, c_y: f32x4, max_iter: u32) -> u32x4 { +- let mut x = c_x; +- let mut y = c_y; +- +- let mut count = u32x4::splat(0); +- for _ in 0..max_iter as usize { +- let xy = x * y; +- let xx = x * x; +- let yy = y * y; +- let sum = xx + yy; +- let mask = sum.lt(f32x4::splat(4.0)); +- +- if !mask.any() { break } +- count = count + mask.to_i().select(u32x4::splat(1), +- u32x4::splat(0)); +- +- x = xx - yy + c_x; +- y = xy + xy + c_y; +- } +- count +-} +- +-const COLOURS: &'static [(f32, f32, f32)] = &[(0.0, 7.0, 100.0), +- (32.0, 107.0, 203.0), +- (237.0, 255.0, 255.0), +- (255.0, 170.0, 0.0), +- (0.0, 2.0, 0.0)]; +-const SCALE: f32 = 12.0; +-const LIMIT: u32 = 100; +- +-#[inline(never)] +-fn output_one(buf: &mut [u8], val: u32) { +- let (r, g, b); +- if val == LIMIT { +- r = 0; +- g = 0; +- b = 0; +- } else { +- let val = (val as f32 % SCALE) * (COLOURS.len() as f32) / SCALE; +- let left = val as usize % COLOURS.len(); +- let right = (left + 1) % COLOURS.len(); +- +- let p = val - left as f32; +- let (r1, g1, b1) = COLOURS[left]; +- let (r2, g2, b2) = COLOURS[right]; +- r = (r1 + (r2 - r1) * p) as u8; +- g = (g1 + (g2 - g1) * p) as u8; +- b = (b1 + (b2 - b1) * p) as u8; +- } +- buf[0] = r; +- buf[1] = g; +- buf[2] = b; +-} +- +-fn main() { +- let mut args = std::env::args(); +- args.next(); +- let width = args.next().unwrap().parse().unwrap(); +- let height = args.next().unwrap().parse().unwrap(); +- +- let left = -2.2; +- let right = left + 3.0; +- let top = 1.0; +- let bottom = top - 2.0; +- +- let width_step: f32 = (right - left) / width as f32; +- let height_step: f32 = (bottom - top) / height as f32; +- +- let adjust = f32x4::splat(width_step) * f32x4::new(0., 1., 2., 3.); +- +- println!("P6 {} {} 255", width, height); +- let mut line = vec![0; width * 3]; +- +- if args.next().is_none() { +- for i in 0..height { +- let y = f32x4::splat(top + height_step * i as f32); +- for j in (0..width).step_by(4) { +- let x = f32x4::splat(left + width_step * j as f32) + adjust; +- let ret = mandelbrot_vector(x, y, LIMIT); +- test::black_box(ret); +- for k in 0..4 { let val = ret.extract(k as u32); output_one(&mut line[3*(j + k)..3*(j + k + 1)], val); } +- } +- ::std::io::stdout().write(&line).unwrap(); +- } +- } else { +- for i in 0..height { +- let y = top + height_step * i as f32; +- for j in 0..width { +- let x = left + width_step * j as f32; +- let val = mandelbrot_naive(x, y, LIMIT); +- test::black_box(val); +- output_one(&mut line[3*j..3*(j + 1)], val); +- } +- ::std::io::stdout().write(&line).unwrap(); +- } +- } +-} +diff --git a/third_party/rust/simd/examples/matrix-inverse.rs b/third_party/rust/simd/examples/matrix-inverse.rs +deleted file mode 100644 +index e6eb7ffc4655..000000000000 +--- a/third_party/rust/simd/examples/matrix-inverse.rs ++++ /dev/null +@@ -1,281 +0,0 @@ +-extern crate simd; +-use simd::f32x4; +- +-fn mul(x: &[f32x4; 4], y: &[f32x4; 4]) -> [f32x4; 4] { +- let y0 = y[0]; +- let y1 = y[1]; +- let y2 = y[2]; +- let y3 = y[3]; +- [f32x4::splat(y0.extract(0)) * x[0] + +- f32x4::splat(y0.extract(1)) * x[1] + +- f32x4::splat(y0.extract(2)) * x[2] + +- f32x4::splat(y0.extract(3)) * x[3], +- f32x4::splat(y1.extract(0)) * x[0] + +- f32x4::splat(y1.extract(1)) * x[1] + +- f32x4::splat(y1.extract(2)) * x[2] + +- f32x4::splat(y1.extract(3)) * x[3], +- f32x4::splat(y2.extract(0)) * x[0] + +- f32x4::splat(y2.extract(1)) * x[1] + +- f32x4::splat(y2.extract(2)) * x[2] + +- f32x4::splat(y2.extract(3)) * x[3], +- f32x4::splat(y3.extract(0)) * x[0] + +- f32x4::splat(y3.extract(1)) * x[1] + +- f32x4::splat(y3.extract(2)) * x[2] + +- f32x4::splat(y3.extract(3)) * x[3], +- ] +-} +- +-#[allow(dead_code)] +-fn inverse_naive(x: &[[f32; 4]; 4]) -> [[f32; 4]; 4] { +- let mut t = [[0_f32; 4]; 4]; +- for i in 0..4 { +- t[0][i] = x[i][0]; +- t[1][i] = x[i][1]; +- t[2][i] = x[i][2]; +- t[3][i] = x[i][3]; +- } +- println!("{:?}", t); +- +- let _0 = t[2][2] * t[3][3]; +- let _1 = t[2][3] * t[3][2]; +- let _2 = t[2][1] * t[3][3]; +- let _3 = t[2][3] * t[3][1]; +- let _4 = t[2][1] * t[3][2]; +- let _5 = t[2][2] * t[3][1]; +- let _6 = t[2][0] * t[3][3]; +- let _7 = t[2][3] * t[3][0]; +- let _8 = t[2][0] * t[3][2]; +- let _9 = t[2][2] * t[3][0]; +- let _10 = t[2][0] * t[3][1]; +- let _11 = t[2][1] * t[3][0]; +- let v = [_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11]; +- println!("{:?}", v); +- +- let d00 = _0 * t[1][1] + _3 * t[1][2] + _4 * t[1][3] - +- (_1 * t[1][1] + _2 * t[1][2] + _5 * t[1][3]); +- let d01 = _1 * t[1][0] + _6 * t[1][2] + _9 * t[1][3] - +- (_0 * t[1][0] + _7 * t[1][2] + _8 * t[1][3]); +- let d02 = _2 * t[1][0] + _7 * t[1][1] + _10 * t[1][3] - +- (_3 * t[1][0] + _6 * t[1][1] + _11 * t[1][3]); +- let d03 = _5 * t[1][0] + _8 * t[1][1] + _11 * t[1][2] - +- (_4 * t[1][0] + _9 * t[1][1] + _10 * t[1][2]); +- let d10 = _1 * t[0][1] + _2 * t[0][2] + _5 * t[0][3] - +- (_0 * t[0][1] + _3 * t[0][2] + _4 * t[0][3]); +- let d11 = _0 * t[0][0] + _7 * t[0][2] + _8 * t[0][3] - +- (_1 * t[0][0] + _6 * t[0][2] + _9 * t[0][3]); +- let d12 = _3 * t[0][0] + _6 * t[0][1] + _11 * t[0][3] - +- (_2 * t[0][0] + _7 * t[0][1] + _10 * t[0][3]); +- let d13 = _4 * t[0][0] + _9 * t[0][1] + _10 * t[0][2] - +- (_5 * t[0][0] + _8 * t[0][1] + _11 * t[0][2]); +- +- println!("{:?}", [d00, d01, d02, d03, d10, d11, d12, d13]); +- +- let _0 = t[0][2] * t[1][3]; +- let _1 = t[0][3] * t[1][2]; +- let _2 = t[0][1] * t[1][3]; +- let _3 = t[0][3] * t[1][1]; +- let _4 = t[0][1] * t[1][2]; +- let _5 = t[0][2] * t[1][1]; +- let _6 = t[0][0] * t[1][3]; +- let _7 = t[0][3] * t[1][0]; +- let _8 = t[0][0] * t[1][2]; +- let _9 = t[0][2] * t[1][0]; +- let _10 = t[0][0] * t[1][1]; +- let _11 = t[0][1] * t[1][0]; +- let v = [_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11]; +- println!("{:?}", v); +- +- let d20 = _0*t[3][1] + _3*t[3][2] + _4*t[3][3]- +- (_1*t[3][1] + _2*t[3][2] + _5*t[3][3]); +- let d21 = _1*t[3][0] + _6*t[3][2] + _9*t[3][3]- +- (_0*t[3][0] + _7*t[3][2] + _8*t[3][3]); +- let d22 = _2*t[3][0] + _7*t[3][1] + _10*t[3][3]- +- (_3*t[3][0] + _6*t[3][1] + _11*t[3][3]); +- let d23 = _5*t[3][0] + _8*t[3][1] + _11*t[3][2]- +- (_4*t[3][0] + _9*t[3][1] + _10*t[3][2]); +- let d30 = _2*t[2][2] + _5*t[2][3] + _1*t[2][1]- +- (_4*t[2][3] + _0*t[2][1] + _3*t[2][2]); +- let d31 = _8*t[2][3] + _0*t[2][0] + _7*t[2][2]- +- (_6*t[2][2] + _9*t[2][3] + _1*t[2][0]); +- let d32 = _6*t[2][1] + _11*t[2][3] + _3*t[2][0]- +- (_10*t[2][3] + _2*t[2][0] + _7*t[2][1]); +- let d33 = _10*t[2][2] + _4*t[2][0] + _9*t[2][1]- +- (_8*t[2][1] + _11*t[2][2] + _5*t[2][0]); +- +- println!("{:?}", [d20, d21, d22, d23, d30, d31, d32, d33]); +- +- let det = t[0][0] * d00 + t[0][1] * d01 + t[0][2] * d02 + t[0][3] * d03; +- +- let det = 1.0 / det; +- let mut ret = [[d00, d01, d02, d03], +- [d10, d11, d12, d13], +- [d20, d21, d22, d23], +- [d30, d31, d32, d33]]; +- for i in 0..4 { +- for j in 0..4 { +- ret[i][j] *= det; +- } +- } +- ret +-} +- +-fn inverse_simd4(x: &[f32x4; 4]) -> [f32x4; 4] { +- let src0 = x[0]; +- let src1 = x[1]; +- let src2 = x[2]; +- let src3 = x[3]; +- +- let tmp1 = f32x4::new(src0.extract(0), src0.extract(1), +- src1.extract(4 - 4), src1.extract(5 - 4)); +- let row1 = f32x4::new(src2.extract(0), src2.extract(1), +- src3.extract(4 - 4), src3.extract(5 - 4)); +- let row0 = f32x4::new(tmp1.extract(0), tmp1.extract(2), +- row1.extract(4 - 4), row1.extract(6 - 4)); +- let row1 = f32x4::new(row1.extract(1), row1.extract(3), +- tmp1.extract(5 - 4), tmp1.extract(7 - 4)); +- +- let tmp1 = f32x4::new(src0.extract(2), src0.extract(3), +- src1.extract(6 - 4), src1.extract(7 - 4)); +- let row3 = f32x4::new(src2.extract(2), src2.extract(3), +- src3.extract(6 - 4), src3.extract(7 - 4)); +- let row2 = f32x4::new(tmp1.extract(0), tmp1.extract(2), +- row3.extract(4 - 4), row3.extract(6 - 4)); +- let row3 = f32x4::new(row3.extract(1), row3.extract(3), +- tmp1.extract(5 - 4), tmp1.extract(7 - 4)); +- +- +- let tmp1 = row2 * row3; +- let tmp1 = f32x4::new(tmp1.extract(1), tmp1.extract(0), +- tmp1.extract(3), tmp1.extract(2)); +- let minor0 = row1 * tmp1; +- let minor1 = row0 * tmp1; +- let tmp1 = f32x4::new(tmp1.extract(2), tmp1.extract(3), +- tmp1.extract(0), tmp1.extract(1)); +- let minor0 = (row1 * tmp1) - minor0; +- let minor1 = (row0 * tmp1) - minor1; +- let minor1 = f32x4::new(minor1.extract(2), minor1.extract(3), +- minor1.extract(0), minor1.extract(1)); +- //println!("{:?}", minor1); +- +- +- let tmp1 = row1 * row2; +- let tmp1 = f32x4::new(tmp1.extract(1), tmp1.extract(0), +- tmp1.extract(3), tmp1.extract(2)); +- let minor0 = (row3 * tmp1) + minor0; +- let minor3 = row0 * tmp1; +- let tmp1 = f32x4::new(tmp1.extract(2), tmp1.extract(3), +- tmp1.extract(0), tmp1.extract(1)); +- +- let minor0 = minor0 - row3 * tmp1; +- let minor3 = row0 * tmp1 - minor3; +- let minor3 = f32x4::new(minor3.extract(2), minor3.extract(3), +- minor3.extract(0), minor3.extract(1)); +- //println!("{:?}", minor1); +- +- +- let tmp1 = row3 * f32x4::new(row1.extract(2), row1.extract(3), +- row1.extract(0), row1.extract(1)); +- let tmp1 = f32x4::new(tmp1.extract(1), tmp1.extract(0), +- tmp1.extract(3), tmp1.extract(2)); +- let row2 = f32x4::new(row2.extract(2), row2.extract(3), +- row2.extract(0), row2.extract(1)); +- let minor0 = row2 * tmp1 + minor0; +- let minor2 = row0 * tmp1; +- let tmp1 = f32x4::new(tmp1.extract(2), tmp1.extract(3), +- tmp1.extract(0), tmp1.extract(1)); +- let minor0 = minor0 - row2 * tmp1; +- let minor2 = row0 * tmp1 - minor2; +- let minor2 = f32x4::new(minor2.extract(2), minor2.extract(3), +- minor2.extract(0), minor2.extract(1)); +- //println!("{:?}", minor1); +- +- +- let tmp1 = row0 * row1; +- let tmp1 = f32x4::new(tmp1.extract(1), tmp1.extract(0), +- tmp1.extract(3), tmp1.extract(2)); +- let minor2 = minor2 + row3 * tmp1; +- let minor3 = row2 * tmp1 - minor3; +- let tmp1 = f32x4::new(tmp1.extract(2), tmp1.extract(3), +- tmp1.extract(0), tmp1.extract(1)); +- let minor2 = row3 * tmp1 - minor2; +- let minor3 = minor3 - row2 * tmp1; +- //println!("{:?}", minor1); +- +- +- +- let tmp1 = row0 * row3; +- let tmp1 = f32x4::new(tmp1.extract(1), tmp1.extract(0), +- tmp1.extract(3), tmp1.extract(2)); +- let minor1 = minor1 - row2 * tmp1; +- let minor2 = row1 * tmp1 + minor2; +- let tmp1 = f32x4::new(tmp1.extract(2), tmp1.extract(3), +- tmp1.extract(0), tmp1.extract(1)); +- let minor1 = row2 * tmp1 + minor1; +- let minor2 = minor2 - row1 * tmp1; +- //println!("{:?}", minor1); +- +- let tmp1 = row0 * row2; +- let tmp1 = f32x4::new(tmp1.extract(1), tmp1.extract(0), +- tmp1.extract(3), tmp1.extract(2)); +- let minor1 = row3 * tmp1 + minor1; +- let minor3 = minor3 - row1 * tmp1; +- let tmp1 = f32x4::new(tmp1.extract(2), tmp1.extract(3), +- tmp1.extract(0), tmp1.extract(1)); +- let minor1 = minor1 - row3 * tmp1; +- let minor3 = row1 * tmp1 + minor3; +- //println!("{:?}", minor1); +- +- let det = row0 * minor0; +- let det = f32x4::new(det.extract(2), det.extract(3), +- det.extract(0), det.extract(1)) + det; +- let det = f32x4::new(det.extract(1), det.extract(0), +- det.extract(3), det.extract(2)) + det; +- let tmp1 = det.approx_reciprocal(); +- let det = tmp1 + tmp1 - det * tmp1 * tmp1; +- +-// let det = f32x4::splat(det.extract(0)); +- +- [minor0 * det, minor1 * det, minor2 * det, minor3 * det] +-} +- +-fn p(x: &[f32x4; 4]) { +- for xx in x { +- for i in 0..4 { +- let v = xx.extract(i); +- if v == 0.0 { +- print!("{}{:6.2}", if i > 0 {", "} else {"|"}, ""); +- } else { +- print!("{}{:6.2}", if i > 0 {", "} else {"|"}, xx.extract(i)); +- } +- } +- println!(" |"); +- } +-} +- +-fn main() { +- let x = [f32x4::new(-100.0, 6.0, 100.0, 1.0), +- f32x4::new(3.0, 1.0, 0.0, 1.0), +- f32x4::new(2.0, 1.0, 1.0, 1.0), +- f32x4::new(-10.0, 1.0, 1.0, 1.0)]; +- +- /* let mut x_ = [[0.0; 4]; 4]; +- for i in 0..4 { +- for j in 0..4 { +- x_[i][j] = x[i].extract(j as u32) +- } +- } +- +- let ret = inverse_naive(&x_); +- let mut y = [f32x4::splat(0.0); 4]; +- for i in 0..4 { +- for j in 0..4 { +- y[i] = y[i].replace(j as u32, ret[i][j]) +- } +-}*/ +- let y = inverse_simd4(&x); +- p(&x); +- println!(""); +- p(&y); +- println!(""); +- p(&mul(&x, &y)) +-} +diff --git a/third_party/rust/simd/examples/nbody-nosimd.rs b/third_party/rust/simd/examples/nbody-nosimd.rs +deleted file mode 100644 +index d5f1bb422ff2..000000000000 +--- a/third_party/rust/simd/examples/nbody-nosimd.rs ++++ /dev/null +@@ -1,156 +0,0 @@ +-// The Computer Language Benchmarks Game +-// http://benchmarksgame.alioth.debian.org/ +-// +-// contributed by the Rust Project Developers +-// contributed by TeXitoi +- +-const PI: f64 = 3.141592653589793; +-const SOLAR_MASS: f64 = 4.0 * PI * PI; +-const YEAR: f64 = 365.24; +-const N_BODIES: usize = 5; +- +-static BODIES: [Planet;N_BODIES] = [ +- // Sun +- Planet { +- x: 0.0, y: 0.0, z: 0.0, +- vx: 0.0, vy: 0.0, vz: 0.0, +- mass: SOLAR_MASS, +- }, +- // Jupiter +- Planet { +- x: 4.84143144246472090e+00, +- y: -1.16032004402742839e+00, +- z: -1.03622044471123109e-01, +- vx: 1.66007664274403694e-03 * YEAR, +- vy: 7.69901118419740425e-03 * YEAR, +- vz: -6.90460016972063023e-05 * YEAR, +- mass: 9.54791938424326609e-04 * SOLAR_MASS, +- }, +- // Saturn +- Planet { +- x: 8.34336671824457987e+00, +- y: 4.12479856412430479e+00, +- z: -4.03523417114321381e-01, +- vx: -2.76742510726862411e-03 * YEAR, +- vy: 4.99852801234917238e-03 * YEAR, +- vz: 2.30417297573763929e-05 * YEAR, +- mass: 2.85885980666130812e-04 * SOLAR_MASS, +- }, +- // Uranus +- Planet { +- x: 1.28943695621391310e+01, +- y: -1.51111514016986312e+01, +- z: -2.23307578892655734e-01, +- vx: 2.96460137564761618e-03 * YEAR, +- vy: 2.37847173959480950e-03 * YEAR, +- vz: -2.96589568540237556e-05 * YEAR, +- mass: 4.36624404335156298e-05 * SOLAR_MASS, +- }, +- // Neptune +- Planet { +- x: 1.53796971148509165e+01, +- y: -2.59193146099879641e+01, +- z: 1.79258772950371181e-01, +- vx: 2.68067772490389322e-03 * YEAR, +- vy: 1.62824170038242295e-03 * YEAR, +- vz: -9.51592254519715870e-05 * YEAR, +- mass: 5.15138902046611451e-05 * SOLAR_MASS, +- }, +-]; +- +-#[derive(Clone, Copy)] +-struct Planet { +- x: f64, y: f64, z: f64, +- vx: f64, vy: f64, vz: f64, +- mass: f64, +-} +- +-fn advance(bodies: &mut [Planet;N_BODIES], dt: f64, steps: i32) { +- for _ in 0..steps { +- let mut b_slice: &mut [_] = bodies; +- loop { +- let bi = match shift_mut_ref(&mut b_slice) { +- Some(bi) => bi, +- None => break +- }; +- for bj in b_slice.iter_mut() { +- let dx = bi.x - bj.x; +- let dy = bi.y - bj.y; +- let dz = bi.z - bj.z; +- +- let d2 = dx * dx + dy * dy + dz * dz; +- let mag = dt / (d2 * d2.sqrt()); +- +- let massj_mag = bj.mass * mag; +- bi.vx -= dx * massj_mag; +- bi.vy -= dy * massj_mag; +- bi.vz -= dz * massj_mag; +- +- let massi_mag = bi.mass * mag; +- bj.vx += dx * massi_mag; +- bj.vy += dy * massi_mag; +- bj.vz += dz * massi_mag; +- } +- bi.x += dt * bi.vx; +- bi.y += dt * bi.vy; +- bi.z += dt * bi.vz; +- } +- } +-} +- +-fn energy(bodies: &[Planet;N_BODIES]) -> f64 { +- let mut e = 0.0; +- let mut bodies = bodies.iter(); +- loop { +- let bi = match bodies.next() { +- Some(bi) => bi, +- None => break +- }; +- e += (bi.vx * bi.vx + bi.vy * bi.vy + bi.vz * bi.vz) * bi.mass / 2.0; +- for bj in bodies.clone() { +- let dx = bi.x - bj.x; +- let dy = bi.y - bj.y; +- let dz = bi.z - bj.z; +- let dist = (dx * dx + dy * dy + dz * dz).sqrt(); +- e -= bi.mass * bj.mass / dist; +- } +- } +- e +-} +- +-fn offset_momentum(bodies: &mut [Planet;N_BODIES]) { +- let mut px = 0.0; +- let mut py = 0.0; +- let mut pz = 0.0; +- for bi in bodies.iter() { +- px += bi.vx * bi.mass; +- py += bi.vy * bi.mass; +- pz += bi.vz * bi.mass; +- } +- let sun = &mut bodies[0]; +- sun.vx = - px / SOLAR_MASS; +- sun.vy = - py / SOLAR_MASS; +- sun.vz = - pz / SOLAR_MASS; +-} +- +-fn main() { +- let n = std::env::args().nth(1).expect("need one arg").parse().unwrap(); +- let mut bodies = BODIES; +- +- offset_momentum(&mut bodies); +- println!("{:.9}", energy(&bodies)); +- +- advance(&mut bodies, 0.01, n); +- +- println!("{:.9}", energy(&bodies)); +-} +- +-/// Pop a mutable reference off the head of a slice, mutating the slice to no +-/// longer contain the mutable reference. +-fn shift_mut_ref<'a, T>(r: &mut &'a mut [T]) -> Option<&'a mut T> { +- if r.len() == 0 { return None } +- let tmp = std::mem::replace(r, &mut []); +- let (h, t) = tmp.split_at_mut(1); +- *r = t; +- Some(&mut h[0]) +-} +diff --git a/third_party/rust/simd/examples/nbody.rs b/third_party/rust/simd/examples/nbody.rs +deleted file mode 100755 +index d6d4e88e3741..000000000000 +--- a/third_party/rust/simd/examples/nbody.rs ++++ /dev/null +@@ -1,170 +0,0 @@ +-#![feature(cfg_target_feature)] +- +-extern crate simd; +- +-#[cfg(target_feature = "sse2")] +-use simd::x86::sse2::*; +-#[cfg(target_arch = "aarch64")] +-use simd::aarch64::neon::*; +- +-const PI: f64 = 3.141592653589793; +-const SOLAR_MASS: f64 = 4.0 * PI * PI; +-const DAYS_PER_YEAR: f64 = 365.24; +- +-struct Body { +- x: [f64; 3], +- _fill: f64, +- v: [f64; 3], +- mass: f64, +-} +- +-impl Body { +- fn new(x0: f64, x1: f64, x2: f64, +- v0: f64, v1: f64, v2: f64, +- mass: f64) -> Body { +- Body { +- x: [x0, x1, x2], +- _fill: 0.0, +- v: [v0, v1, v2], +- mass: mass, +- } +- } +-} +- +-const N_BODIES: usize = 5; +-const N: usize = N_BODIES * (N_BODIES - 1) / 2; +-fn offset_momentum(bodies: &mut [Body; N_BODIES]) { +- let (sun, rest) = bodies.split_at_mut(1); +- let sun = &mut sun[0]; +- for body in rest { +- for k in 0..3 { +- sun.v[k] -= body.v[k] * body.mass / SOLAR_MASS; +- } +- } +-} +-fn advance(bodies: &mut [Body; N_BODIES], dt: f64) { +- let mut r = [[0.0; 4]; N]; +- let mut mag = [0.0; N]; +- +- let mut dx = [f64x2::splat(0.0); 3]; +- let mut dsquared; +- let mut distance; +- let mut dmag; +- +- let mut i = 0; +- for j in 0..N_BODIES { +- for k in j+1..N_BODIES { +- for m in 0..3 { +- r[i][m] = bodies[j].x[m] - bodies[k].x[m]; +- } +- i += 1; +- } +- } +- +- i = 0; +- while i < N { +- for m in 0..3 { +- dx[m] = f64x2::new(r[i][m], r[i+1][m]); +- } +- +- dsquared = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; +- distance = dsquared.to_f32().approx_rsqrt().to_f64(); +- for _ in 0..2 { +- distance = distance * f64x2::splat(1.5) - +- ((f64x2::splat(0.5) * dsquared) * distance) * (distance * distance) +- } +- dmag = f64x2::splat(dt) / dsquared * distance; +- dmag.store(&mut mag, i); +- +- i += 2; +- } +- +- i = 0; +- for j in 0..N_BODIES { +- for k in j+1..N_BODIES { +- for m in 0..3 { +- bodies[j].v[m] -= r[i][m] * bodies[k].mass * mag[i]; +- bodies[k].v[m] += r[i][m] * bodies[j].mass * mag[i]; +- } +- i += 1 +- } +- } +- for body in bodies { +- for m in 0..3 { +- body.x[m] += dt * body.v[m] +- } +- } +-} +- +-fn energy(bodies: &[Body; N_BODIES]) -> f64 { +- let mut e = 0.0; +- for i in 0..N_BODIES { +- let bi = &bodies[i]; +- e += bi.mass * (bi.v[0] * bi.v[0] + bi.v[1] * bi.v[1] + bi.v[2] * bi.v[2]) / 2.0; +- for j in i+1..N_BODIES { +- let bj = &bodies[j]; +- let mut dx = [0.0; 3]; +- for k in 0..3 { +- dx[k] = bi.x[k] - bj.x[k]; +- } +- let mut distance = 0.0; +- for &d in &dx { distance += d * d } +- e -= bi.mass * bj.mass / distance.sqrt() +- } +- } +- e +-} +- +-fn main() { +- let mut bodies: [Body; N_BODIES] = [ +- /* sun */ +- Body::new(0.0, 0.0, 0.0, +- 0.0, 0.0, 0.0, +- SOLAR_MASS), +- /* jupiter */ +- Body::new(4.84143144246472090e+00, +- -1.16032004402742839e+00, +- -1.03622044471123109e-01 , +- 1.66007664274403694e-03 * DAYS_PER_YEAR, +- 7.69901118419740425e-03 * DAYS_PER_YEAR, +- -6.90460016972063023e-05 * DAYS_PER_YEAR , +- 9.54791938424326609e-04 * SOLAR_MASS +- ), +- /* saturn */ +- Body::new(8.34336671824457987e+00, +- 4.12479856412430479e+00, +- -4.03523417114321381e-01 , +- -2.76742510726862411e-03 * DAYS_PER_YEAR, +- 4.99852801234917238e-03 * DAYS_PER_YEAR, +- 2.30417297573763929e-05 * DAYS_PER_YEAR , +- 2.85885980666130812e-04 * SOLAR_MASS +- ), +- /* uranus */ +- Body::new(1.28943695621391310e+01, +- -1.51111514016986312e+01, +- -2.23307578892655734e-01 , +- 2.96460137564761618e-03 * DAYS_PER_YEAR, +- 2.37847173959480950e-03 * DAYS_PER_YEAR, +- -2.96589568540237556e-05 * DAYS_PER_YEAR , +- 4.36624404335156298e-05 * SOLAR_MASS +- ), +- /* neptune */ +- Body::new(1.53796971148509165e+01, +- -2.59193146099879641e+01, +- 1.79258772950371181e-01 , +- 2.68067772490389322e-03 * DAYS_PER_YEAR, +- 1.62824170038242295e-03 * DAYS_PER_YEAR, +- -9.51592254519715870e-05 * DAYS_PER_YEAR , +- 5.15138902046611451e-05 * SOLAR_MASS +- ) +- ]; +- +- let n: usize = std::env::args().nth(1).expect("need one arg").parse().unwrap(); +- +- offset_momentum(&mut bodies); +- println!("{:.9}", energy(&bodies)); +- for _ in 0..n { +- advance(&mut bodies, 0.01); +- } +- println!("{:.9}", energy(&bodies)); +-} +diff --git a/third_party/rust/simd/examples/ops.rs b/third_party/rust/simd/examples/ops.rs +deleted file mode 100644 +index f8c919101e3c..000000000000 +--- a/third_party/rust/simd/examples/ops.rs ++++ /dev/null +@@ -1,10 +0,0 @@ +-extern crate simd; +- +-use simd::*; +- +-#[allow(unused_variables)] +-fn main() { +- let x = i32x4::splat(1_i32); +- let y = -x; +- let z = !x; +-} +diff --git a/third_party/rust/simd/examples/spectral-norm-nosimd.rs b/third_party/rust/simd/examples/spectral-norm-nosimd.rs +deleted file mode 100644 +index 919f9c61990f..000000000000 +--- a/third_party/rust/simd/examples/spectral-norm-nosimd.rs ++++ /dev/null +@@ -1,106 +0,0 @@ +-// The Computer Language Benchmarks Game +-// http://benchmarksgame.alioth.debian.org/ +-// +-// contributed by the Rust Project Developers +-// contributed by TeXitoi +- +-#![allow(non_snake_case)] +- +-use std::iter::repeat; +-//use std::thread; +- +-// As std::simd::f64x2 is unstable, we provide a similar interface, +-// expecting llvm to autovectorize its usage. +-#[allow(non_camel_case_types)] +-struct f64x2(f64, f64); +-impl std::ops::Add for f64x2 { +- type Output = Self; +- fn add(self, rhs: Self) -> Self { +- f64x2(self.0 + rhs.0, self.1 + rhs.1) +- } +-} +-impl std::ops::Div for f64x2 { +- type Output = Self; +- fn div(self, rhs: Self) -> Self { +- f64x2(self.0 / rhs.0, self.1 / rhs.1) +- } +-} +- +-fn main() { +- let n: usize = std::env::args().nth(1).expect("need one arg").parse().unwrap(); +- let answer = spectralnorm(n); +- println!("{:.9}", answer); +-} +- +-fn spectralnorm(n: usize) -> f64 { +- assert!(n % 2 == 0, "only even lengths are accepted"); +- let mut u = repeat(1.0).take(n).collect::>(); +- let mut v = u.clone(); +- let mut tmp = v.clone(); +- for _ in 0..10 { +- mult_AtAv(&u, &mut v, &mut tmp); +- mult_AtAv(&v, &mut u, &mut tmp); +- } +- (dot(&u, &v) / dot(&v, &v)).sqrt() +-} +- +-fn mult_AtAv(v: &[f64], out: &mut [f64], tmp: &mut [f64]) { +- mult_Av(v, tmp); +- mult_Atv(tmp, out); +-} +- +-fn mult_Av(v: &[f64], out: &mut [f64]) { +- parallel(out, |start, out| mult(v, out, start, |i, j| A(i, j))); +-} +- +-fn mult_Atv(v: &[f64], out: &mut [f64]) { +- parallel(out, |start, out| mult(v, out, start, |i, j| A(j, i))); +-} +- +-fn mult(v: &[f64], out: &mut [f64], start: usize, a: F) +- where F: Fn(usize, usize) -> f64 { +- for (i, slot) in out.iter_mut().enumerate().map(|(i, s)| (i + start, s)) { +- let mut sum = f64x2(0.0, 0.0); +- for (j, chunk) in v.chunks(2).enumerate().map(|(j, s)| (2 * j, s)) { +- let top = f64x2(chunk[0], chunk[1]); +- let bot = f64x2(a(i, j), a(i, j + 1)); +- sum = sum + top / bot; +- } +- let f64x2(a, b) = sum; +- *slot = a + b; +- } +-} +- +-fn A(i: usize, j: usize) -> f64 { +- ((i + j) * (i + j + 1) / 2 + i + 1) as f64 +-} +- +-fn dot(v: &[f64], u: &[f64]) -> f64 { +- v.iter().zip(u.iter()).map(|(a, b)| *a * *b).fold(0., |acc, i| acc + i) +-} +- +-//struct Racy(T); +-//unsafe impl Send for Racy {} +- +-// Executes a closure in parallel over the given mutable slice. The closure `f` +-// is run in parallel and yielded the starting index within `v` as well as a +-// sub-slice of `v`. +-fn parallel<'a, T, F>(v: &mut [T], ref f: F) +- where T: 'static + Send + Sync, +-F: Fn(usize, &mut [T]) + Sync +-{ +- f(0, v); +- /*let size = v.len() / 4 + 1; +- let jhs = v.chunks_mut(size).enumerate().map(|(i, chunk)| { +- // Need to convert `f` and `chunk` to something that can cross the task +- // boundary. +- let f = Racy(f as *const F as *const usize); +- let raw = Racy((&mut chunk[0] as *mut T, chunk.len())); +- thread::spawn(move|| { +- let f = f.0 as *const F; +- let raw = raw.0; +- unsafe { (*f)(i * size, std::slice::from_raw_parts_mut(raw.0, raw.1)) } +- }) +- }).collect::>(); +- for jh in jhs { jh.join().unwrap(); }*/ +-} +diff --git a/third_party/rust/simd/examples/spectral-norm.rs b/third_party/rust/simd/examples/spectral-norm.rs +deleted file mode 100755 +index 656f52e4fad0..000000000000 +--- a/third_party/rust/simd/examples/spectral-norm.rs ++++ /dev/null +@@ -1,74 +0,0 @@ +-#![feature(cfg_target_feature)] +-#![allow(non_snake_case)] +- +-extern crate simd; +- +-#[cfg(target_feature = "sse2")] +-use simd::x86::sse2::f64x2; +-#[cfg(target_arch = "aarch64")] +-use simd::aarch64::neon::f64x2; +- +-fn A(i: usize, j: usize) -> f64 { +- ((i + j) * (i + j + 1) / 2 + i + 1) as f64 +-} +- +-fn dot(x: &[f64], y: &[f64]) -> f64 { +- x.iter().zip(y).map(|(&x, &y)| x * y).fold(0.0, |a, b| a + b) +-} +- +-fn mult_Av(v: &[f64], out: &mut [f64]) { +- assert!(v.len() == out.len()); +- assert!(v.len() % 2 == 0); +- +- for i in 0..v.len() { +- let mut sum = f64x2::splat(0.0); +- +- let mut j = 0; +- while j < v.len() { +- let b = f64x2::load(v, j); +- let a = f64x2::new(A(i, j), A(i, j + 1)); +- sum = sum + b / a; +- j += 2 +- } +- out[i] = sum.extract(0) + sum.extract(1); +- } +-} +- +-fn mult_Atv(v: &[f64], out: &mut [f64]) { +- assert!(v.len() == out.len()); +- assert!(v.len() % 2 == 0); +- +- for i in 0..v.len() { +- let mut sum = f64x2::splat(0.0); +- +- let mut j = 0; +- while j < v.len() { +- let b = f64x2::load(v, j); +- let a = f64x2::new(A(j, i), A(j + 1, i)); +- sum = sum + b / a; +- j += 2 +- } +- out[i] = sum.extract(0) + sum.extract(1); +- } +-} +- +-fn mult_AtAv(v: &[f64], out: &mut [f64], tmp: &mut [f64]) { +- mult_Av(v, tmp); +- mult_Atv(tmp, out); +-} +- +-fn main() { +- let mut n: usize = std::env::args().nth(1).expect("need one arg").parse().unwrap(); +- if n % 2 == 1 { n += 1 } +- +- let mut u = vec![1.0; n]; +- let mut v = u.clone(); +- let mut tmp = u.clone(); +- +- for _ in 0..10 { +- mult_AtAv(&u, &mut v, &mut tmp); +- mult_AtAv(&v, &mut u, &mut tmp); +- } +- +- println!("{:.9}", (dot(&u, &v) / dot(&v, &v)).sqrt()); +-} +diff --git a/third_party/rust/simd/src/aarch64/mod.rs b/third_party/rust/simd/src/aarch64/mod.rs +deleted file mode 100644 +index 5ba0a302b4d1..000000000000 +--- a/third_party/rust/simd/src/aarch64/mod.rs ++++ /dev/null +@@ -1,3 +0,0 @@ +-//! Features specific to AArch64 CPUs. +- +-pub mod neon; +diff --git a/third_party/rust/simd/src/aarch64/neon.rs b/third_party/rust/simd/src/aarch64/neon.rs +deleted file mode 100644 +index 0cca05a52788..000000000000 +--- a/third_party/rust/simd/src/aarch64/neon.rs ++++ /dev/null +@@ -1,681 +0,0 @@ +-use super::super::*; +-use {simd_cast, f32x2}; +- +-pub use sixty_four::{f64x2, i64x2, u64x2, bool64ix2, bool64fx2}; +-#[repr(simd)] +-#[derive(Copy, Clone)] +-pub struct u32x2(u32, u32); +-#[repr(simd)] +-#[derive(Copy, Clone)] +-pub struct i32x2(i32, i32); +- +-#[repr(simd)] +-#[derive(Copy, Clone)] +-pub struct u16x4(u16, u16, u16, u16); +-#[repr(simd)] +-#[derive(Copy, Clone)] +-pub struct i16x4(i16, i16, i16, i16); +-#[repr(simd)] +-#[derive(Copy, Clone)] +-pub struct u8x8(u8, u8, u8, u8, +- u8, u8, u8, u8); +-#[repr(simd)] +-#[derive(Copy, Clone)] +-pub struct i8x8(i8, i8, i8, i8, +- i8, i8, i8, i8); +- +-#[repr(simd)] +-#[derive(Copy, Clone)] +-pub struct i64x1(i64); +-#[repr(simd)] +-#[derive(Copy, Clone)] +-pub struct u64x1(u64); +-#[repr(simd)] +-#[derive(Copy, Clone)] +-pub struct f64x1(f64); +- +-#[allow(dead_code)] +-extern "platform-intrinsic" { +- fn aarch64_vhadd_s8(x: i8x8, y: i8x8) -> i8x8; +- fn aarch64_vhadd_u8(x: u8x8, y: u8x8) -> u8x8; +- fn aarch64_vhadd_s16(x: i16x4, y: i16x4) -> i16x4; +- fn aarch64_vhadd_u16(x: u16x4, y: u16x4) -> u16x4; +- fn aarch64_vhadd_s32(x: i32x2, y: i32x2) -> i32x2; +- fn aarch64_vhadd_u32(x: u32x2, y: u32x2) -> u32x2; +- fn aarch64_vhaddq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn aarch64_vhaddq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn aarch64_vhaddq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn aarch64_vhaddq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn aarch64_vhaddq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn aarch64_vhaddq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn aarch64_vrhadd_s8(x: i8x8, y: i8x8) -> i8x8; +- fn aarch64_vrhadd_u8(x: u8x8, y: u8x8) -> u8x8; +- fn aarch64_vrhadd_s16(x: i16x4, y: i16x4) -> i16x4; +- fn aarch64_vrhadd_u16(x: u16x4, y: u16x4) -> u16x4; +- fn aarch64_vrhadd_s32(x: i32x2, y: i32x2) -> i32x2; +- fn aarch64_vrhadd_u32(x: u32x2, y: u32x2) -> u32x2; +- fn aarch64_vrhaddq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn aarch64_vrhaddq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn aarch64_vrhaddq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn aarch64_vrhaddq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn aarch64_vrhaddq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn aarch64_vrhaddq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn aarch64_vqadd_s8(x: i8x8, y: i8x8) -> i8x8; +- fn aarch64_vqadd_u8(x: u8x8, y: u8x8) -> u8x8; +- fn aarch64_vqadd_s16(x: i16x4, y: i16x4) -> i16x4; +- fn aarch64_vqadd_u16(x: u16x4, y: u16x4) -> u16x4; +- fn aarch64_vqadd_s32(x: i32x2, y: i32x2) -> i32x2; +- fn aarch64_vqadd_u32(x: u32x2, y: u32x2) -> u32x2; +- fn aarch64_vqadd_s64(x: i64x1, y: i64x1) -> i64x1; +- fn aarch64_vqadd_u64(x: u64x1, y: u64x1) -> u64x1; +- fn aarch64_vqaddq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn aarch64_vqaddq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn aarch64_vqaddq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn aarch64_vqaddq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn aarch64_vqaddq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn aarch64_vqaddq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn aarch64_vqaddq_s64(x: i64x2, y: i64x2) -> i64x2; +- fn aarch64_vqaddq_u64(x: u64x2, y: u64x2) -> u64x2; +- fn aarch64_vuqadd_s8(x: i8x16, y: u8x16) -> i8x16; +- fn aarch64_vuqadd_s16(x: i16x8, y: u16x8) -> i16x8; +- fn aarch64_vuqadd_s32(x: i32x4, y: u32x4) -> i32x4; +- fn aarch64_vuqadd_s64(x: i64x2, y: u64x2) -> i64x2; +- fn aarch64_vsqadd_u8(x: u8x16, y: i8x16) -> u8x16; +- fn aarch64_vsqadd_u16(x: u16x8, y: i16x8) -> u16x8; +- fn aarch64_vsqadd_u32(x: u32x4, y: i32x4) -> u32x4; +- fn aarch64_vsqadd_u64(x: u64x2, y: i64x2) -> u64x2; +- fn aarch64_vraddhn_s16(x: i16x8, y: i16x8) -> i8x8; +- fn aarch64_vraddhn_u16(x: u16x8, y: u16x8) -> u8x8; +- fn aarch64_vraddhn_s32(x: i32x4, y: i32x4) -> i16x4; +- fn aarch64_vraddhn_u32(x: u32x4, y: u32x4) -> u16x4; +- fn aarch64_vraddhn_s64(x: i64x2, y: i64x2) -> i32x2; +- fn aarch64_vraddhn_u64(x: u64x2, y: u64x2) -> u32x2; +- fn aarch64_vfmulx_f32(x: f32x2, y: f32x2) -> f32x2; +- fn aarch64_vfmulx_f64(x: f64x1, y: f64x1) -> f64x1; +- fn aarch64_vfmulxq_f32(x: f32x4, y: f32x4) -> f32x4; +- fn aarch64_vfmulxq_f64(x: f64x2, y: f64x2) -> f64x2; +- fn aarch64_vfma_f32(x: f32x2, y: f32x2) -> f32x2; +- fn aarch64_vfma_f64(x: f64x1, y: f64x1) -> f64x1; +- fn aarch64_vfmaq_f32(x: f32x4, y: f32x4) -> f32x4; +- fn aarch64_vfmaq_f64(x: f64x2, y: f64x2) -> f64x2; +- fn aarch64_vqdmulh_s16(x: i16x4, y: i16x4) -> i16x4; +- fn aarch64_vqdmulh_s32(x: i32x2, y: i32x2) -> i32x2; +- fn aarch64_vqdmulhq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn aarch64_vqdmulhq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn aarch64_vqrdmulh_s16(x: i16x4, y: i16x4) -> i16x4; +- fn aarch64_vqrdmulh_s32(x: i32x2, y: i32x2) -> i32x2; +- fn aarch64_vqrdmulhq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn aarch64_vqrdmulhq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn aarch64_vmull_s8(x: i8x8, y: i8x8) -> i16x8; +- fn aarch64_vmull_u8(x: u8x8, y: u8x8) -> u16x8; +- fn aarch64_vmull_s16(x: i16x4, y: i16x4) -> i32x4; +- fn aarch64_vmull_u16(x: u16x4, y: u16x4) -> u32x4; +- fn aarch64_vmull_s32(x: i32x2, y: i32x2) -> i64x2; +- fn aarch64_vmull_u32(x: u32x2, y: u32x2) -> u64x2; +- fn aarch64_vqdmullq_s8(x: i8x8, y: i8x8) -> i16x8; +- fn aarch64_vqdmullq_s16(x: i16x4, y: i16x4) -> i32x4; +- fn aarch64_vhsub_s8(x: i8x8, y: i8x8) -> i8x8; +- fn aarch64_vhsub_u8(x: u8x8, y: u8x8) -> u8x8; +- fn aarch64_vhsub_s16(x: i16x4, y: i16x4) -> i16x4; +- fn aarch64_vhsub_u16(x: u16x4, y: u16x4) -> u16x4; +- fn aarch64_vhsub_s32(x: i32x2, y: i32x2) -> i32x2; +- fn aarch64_vhsub_u32(x: u32x2, y: u32x2) -> u32x2; +- fn aarch64_vhsubq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn aarch64_vhsubq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn aarch64_vhsubq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn aarch64_vhsubq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn aarch64_vhsubq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn aarch64_vhsubq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn aarch64_vqsub_s8(x: i8x8, y: i8x8) -> i8x8; +- fn aarch64_vqsub_u8(x: u8x8, y: u8x8) -> u8x8; +- fn aarch64_vqsub_s16(x: i16x4, y: i16x4) -> i16x4; +- fn aarch64_vqsub_u16(x: u16x4, y: u16x4) -> u16x4; +- fn aarch64_vqsub_s32(x: i32x2, y: i32x2) -> i32x2; +- fn aarch64_vqsub_u32(x: u32x2, y: u32x2) -> u32x2; +- fn aarch64_vqsub_s64(x: i64x1, y: i64x1) -> i64x1; +- fn aarch64_vqsub_u64(x: u64x1, y: u64x1) -> u64x1; +- fn aarch64_vqsubq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn aarch64_vqsubq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn aarch64_vqsubq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn aarch64_vqsubq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn aarch64_vqsubq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn aarch64_vqsubq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn aarch64_vqsubq_s64(x: i64x2, y: i64x2) -> i64x2; +- fn aarch64_vqsubq_u64(x: u64x2, y: u64x2) -> u64x2; +- fn aarch64_vrsubhn_s16(x: i16x8, y: i16x8) -> i8x8; +- fn aarch64_vrsubhn_u16(x: u16x8, y: u16x8) -> u8x8; +- fn aarch64_vrsubhn_s32(x: i32x4, y: i32x4) -> i16x4; +- fn aarch64_vrsubhn_u32(x: u32x4, y: u32x4) -> u16x4; +- fn aarch64_vrsubhn_s64(x: i64x2, y: i64x2) -> i32x2; +- fn aarch64_vrsubhn_u64(x: u64x2, y: u64x2) -> u32x2; +- fn aarch64_vabd_s8(x: i8x8, y: i8x8) -> i8x8; +- fn aarch64_vabd_u8(x: u8x8, y: u8x8) -> u8x8; +- fn aarch64_vabd_s16(x: i16x4, y: i16x4) -> i16x4; +- fn aarch64_vabd_u16(x: u16x4, y: u16x4) -> u16x4; +- fn aarch64_vabd_s32(x: i32x2, y: i32x2) -> i32x2; +- fn aarch64_vabd_u32(x: u32x2, y: u32x2) -> u32x2; +- fn aarch64_vabd_f32(x: f32x2, y: f32x2) -> f32x2; +- fn aarch64_vabd_f64(x: f64x1, y: f64x1) -> f64x1; +- fn aarch64_vabdq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn aarch64_vabdq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn aarch64_vabdq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn aarch64_vabdq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn aarch64_vabdq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn aarch64_vabdq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn aarch64_vabdq_f32(x: f32x4, y: f32x4) -> f32x4; +- fn aarch64_vabdq_f64(x: f64x2, y: f64x2) -> f64x2; +- fn aarch64_vmax_s8(x: i8x8, y: i8x8) -> i8x8; +- fn aarch64_vmax_u8(x: u8x8, y: u8x8) -> u8x8; +- fn aarch64_vmax_s16(x: i16x4, y: i16x4) -> i16x4; +- fn aarch64_vmax_u16(x: u16x4, y: u16x4) -> u16x4; +- fn aarch64_vmax_s32(x: i32x2, y: i32x2) -> i32x2; +- fn aarch64_vmax_u32(x: u32x2, y: u32x2) -> u32x2; +- fn aarch64_vmax_f32(x: f32x2, y: f32x2) -> f32x2; +- fn aarch64_vmax_f64(x: f64x1, y: f64x1) -> f64x1; +- fn aarch64_vmaxq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn aarch64_vmaxq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn aarch64_vmaxq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn aarch64_vmaxq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn aarch64_vmaxq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn aarch64_vmaxq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn aarch64_vmaxq_f32(x: f32x4, y: f32x4) -> f32x4; +- fn aarch64_vmaxq_f64(x: f64x2, y: f64x2) -> f64x2; +- fn aarch64_vmin_s8(x: i8x8, y: i8x8) -> i8x8; +- fn aarch64_vmin_u8(x: u8x8, y: u8x8) -> u8x8; +- fn aarch64_vmin_s16(x: i16x4, y: i16x4) -> i16x4; +- fn aarch64_vmin_u16(x: u16x4, y: u16x4) -> u16x4; +- fn aarch64_vmin_s32(x: i32x2, y: i32x2) -> i32x2; +- fn aarch64_vmin_u32(x: u32x2, y: u32x2) -> u32x2; +- fn aarch64_vmin_f32(x: f32x2, y: f32x2) -> f32x2; +- fn aarch64_vmin_f64(x: f64x1, y: f64x1) -> f64x1; +- fn aarch64_vminq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn aarch64_vminq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn aarch64_vminq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn aarch64_vminq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn aarch64_vminq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn aarch64_vminq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn aarch64_vminq_f32(x: f32x4, y: f32x4) -> f32x4; +- fn aarch64_vminq_f64(x: f64x2, y: f64x2) -> f64x2; +- fn aarch64_vmaxnm_f32(x: f32x2, y: f32x2) -> f32x2; +- fn aarch64_vmaxnm_f64(x: f64x1, y: f64x1) -> f64x1; +- fn aarch64_vmaxnmq_f32(x: f32x4, y: f32x4) -> f32x4; +- fn aarch64_vmaxnmq_f64(x: f64x2, y: f64x2) -> f64x2; +- fn aarch64_vminnm_f32(x: f32x2, y: f32x2) -> f32x2; +- fn aarch64_vminnm_f64(x: f64x1, y: f64x1) -> f64x1; +- fn aarch64_vminnmq_f32(x: f32x4, y: f32x4) -> f32x4; +- fn aarch64_vminnmq_f64(x: f64x2, y: f64x2) -> f64x2; +- fn aarch64_vshl_s8(x: i8x8, y: i8x8) -> i8x8; +- fn aarch64_vshl_u8(x: u8x8, y: i8x8) -> u8x8; +- fn aarch64_vshl_s16(x: i16x4, y: i16x4) -> i16x4; +- fn aarch64_vshl_u16(x: u16x4, y: i16x4) -> u16x4; +- fn aarch64_vshl_s32(x: i32x2, y: i32x2) -> i32x2; +- fn aarch64_vshl_u32(x: u32x2, y: i32x2) -> u32x2; +- fn aarch64_vshl_s64(x: i64x1, y: i64x1) -> i64x1; +- fn aarch64_vshl_u64(x: u64x1, y: i64x1) -> u64x1; +- fn aarch64_vshlq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn aarch64_vshlq_u8(x: u8x16, y: i8x16) -> u8x16; +- fn aarch64_vshlq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn aarch64_vshlq_u16(x: u16x8, y: i16x8) -> u16x8; +- fn aarch64_vshlq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn aarch64_vshlq_u32(x: u32x4, y: i32x4) -> u32x4; +- fn aarch64_vshlq_s64(x: i64x2, y: i64x2) -> i64x2; +- fn aarch64_vshlq_u64(x: u64x2, y: i64x2) -> u64x2; +- fn aarch64_vqshl_s8(x: i8x8, y: i8x8) -> i8x8; +- fn aarch64_vqshl_u8(x: u8x8, y: i8x8) -> u8x8; +- fn aarch64_vqshl_s16(x: i16x4, y: i16x4) -> i16x4; +- fn aarch64_vqshl_u16(x: u16x4, y: i16x4) -> u16x4; +- fn aarch64_vqshl_s32(x: i32x2, y: i32x2) -> i32x2; +- fn aarch64_vqshl_u32(x: u32x2, y: i32x2) -> u32x2; +- fn aarch64_vqshl_s64(x: i64x1, y: i64x1) -> i64x1; +- fn aarch64_vqshl_u64(x: u64x1, y: i64x1) -> u64x1; +- fn aarch64_vqshlq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn aarch64_vqshlq_u8(x: u8x16, y: i8x16) -> u8x16; +- fn aarch64_vqshlq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn aarch64_vqshlq_u16(x: u16x8, y: i16x8) -> u16x8; +- fn aarch64_vqshlq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn aarch64_vqshlq_u32(x: u32x4, y: i32x4) -> u32x4; +- fn aarch64_vqshlq_s64(x: i64x2, y: i64x2) -> i64x2; +- fn aarch64_vqshlq_u64(x: u64x2, y: i64x2) -> u64x2; +- fn aarch64_vrshl_s8(x: i8x8, y: i8x8) -> i8x8; +- fn aarch64_vrshl_u8(x: u8x8, y: i8x8) -> u8x8; +- fn aarch64_vrshl_s16(x: i16x4, y: i16x4) -> i16x4; +- fn aarch64_vrshl_u16(x: u16x4, y: i16x4) -> u16x4; +- fn aarch64_vrshl_s32(x: i32x2, y: i32x2) -> i32x2; +- fn aarch64_vrshl_u32(x: u32x2, y: i32x2) -> u32x2; +- fn aarch64_vrshl_s64(x: i64x1, y: i64x1) -> i64x1; +- fn aarch64_vrshl_u64(x: u64x1, y: i64x1) -> u64x1; +- fn aarch64_vrshlq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn aarch64_vrshlq_u8(x: u8x16, y: i8x16) -> u8x16; +- fn aarch64_vrshlq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn aarch64_vrshlq_u16(x: u16x8, y: i16x8) -> u16x8; +- fn aarch64_vrshlq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn aarch64_vrshlq_u32(x: u32x4, y: i32x4) -> u32x4; +- fn aarch64_vrshlq_s64(x: i64x2, y: i64x2) -> i64x2; +- fn aarch64_vrshlq_u64(x: u64x2, y: i64x2) -> u64x2; +- fn aarch64_vqrshl_s8(x: i8x8, y: i8x8) -> i8x8; +- fn aarch64_vqrshl_u8(x: u8x8, y: i8x8) -> u8x8; +- fn aarch64_vqrshl_s16(x: i16x4, y: i16x4) -> i16x4; +- fn aarch64_vqrshl_u16(x: u16x4, y: i16x4) -> u16x4; +- fn aarch64_vqrshl_s32(x: i32x2, y: i32x2) -> i32x2; +- fn aarch64_vqrshl_u32(x: u32x2, y: i32x2) -> u32x2; +- fn aarch64_vqrshl_s64(x: i64x1, y: i64x1) -> i64x1; +- fn aarch64_vqrshl_u64(x: u64x1, y: i64x1) -> u64x1; +- fn aarch64_vqrshlq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn aarch64_vqrshlq_u8(x: u8x16, y: i8x16) -> u8x16; +- fn aarch64_vqrshlq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn aarch64_vqrshlq_u16(x: u16x8, y: i16x8) -> u16x8; +- fn aarch64_vqrshlq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn aarch64_vqrshlq_u32(x: u32x4, y: i32x4) -> u32x4; +- fn aarch64_vqrshlq_s64(x: i64x2, y: i64x2) -> i64x2; +- fn aarch64_vqrshlq_u64(x: u64x2, y: i64x2) -> u64x2; +- fn aarch64_vqshrun_n_s16(x: i16x8, y: u32) -> i8x8; +- fn aarch64_vqshrun_n_s32(x: i32x4, y: u32) -> i16x4; +- fn aarch64_vqshrun_n_s64(x: i64x2, y: u32) -> i32x2; +- fn aarch64_vqrshrun_n_s16(x: i16x8, y: u32) -> i8x8; +- fn aarch64_vqrshrun_n_s32(x: i32x4, y: u32) -> i16x4; +- fn aarch64_vqrshrun_n_s64(x: i64x2, y: u32) -> i32x2; +- fn aarch64_vqshrn_n_s16(x: i16x8, y: u32) -> i8x8; +- fn aarch64_vqshrn_n_u16(x: u16x8, y: u32) -> u8x8; +- fn aarch64_vqshrn_n_s32(x: i32x4, y: u32) -> i16x4; +- fn aarch64_vqshrn_n_u32(x: u32x4, y: u32) -> u16x4; +- fn aarch64_vqshrn_n_s64(x: i64x2, y: u32) -> i32x2; +- fn aarch64_vqshrn_n_u64(x: u64x2, y: u32) -> u32x2; +- fn aarch64_vrshrn_n_s16(x: i16x8, y: u32) -> i8x8; +- fn aarch64_vrshrn_n_u16(x: u16x8, y: u32) -> u8x8; +- fn aarch64_vrshrn_n_s32(x: i32x4, y: u32) -> i16x4; +- fn aarch64_vrshrn_n_u32(x: u32x4, y: u32) -> u16x4; +- fn aarch64_vrshrn_n_s64(x: i64x2, y: u32) -> i32x2; +- fn aarch64_vrshrn_n_u64(x: u64x2, y: u32) -> u32x2; +- fn aarch64_vqrshrn_n_s16(x: i16x8, y: u32) -> i8x8; +- fn aarch64_vqrshrn_n_u16(x: u16x8, y: u32) -> u8x8; +- fn aarch64_vqrshrn_n_s32(x: i32x4, y: u32) -> i16x4; +- fn aarch64_vqrshrn_n_u32(x: u32x4, y: u32) -> u16x4; +- fn aarch64_vqrshrn_n_s64(x: i64x2, y: u32) -> i32x2; +- fn aarch64_vqrshrn_n_u64(x: u64x2, y: u32) -> u32x2; +- fn aarch64_vsri_s8(x: i8x8, y: i8x8) -> i8x8; +- fn aarch64_vsri_u8(x: u8x8, y: u8x8) -> u8x8; +- fn aarch64_vsri_s16(x: i16x4, y: i16x4) -> i16x4; +- fn aarch64_vsri_u16(x: u16x4, y: u16x4) -> u16x4; +- fn aarch64_vsri_s32(x: i32x2, y: i32x2) -> i32x2; +- fn aarch64_vsri_u32(x: u32x2, y: u32x2) -> u32x2; +- fn aarch64_vsri_s64(x: i64x1, y: i64x1) -> i64x1; +- fn aarch64_vsri_u64(x: u64x1, y: u64x1) -> u64x1; +- fn aarch64_vsriq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn aarch64_vsriq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn aarch64_vsriq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn aarch64_vsriq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn aarch64_vsriq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn aarch64_vsriq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn aarch64_vsriq_s64(x: i64x2, y: i64x2) -> i64x2; +- fn aarch64_vsriq_u64(x: u64x2, y: u64x2) -> u64x2; +- fn aarch64_vsli_s8(x: i8x8, y: i8x8) -> i8x8; +- fn aarch64_vsli_u8(x: u8x8, y: u8x8) -> u8x8; +- fn aarch64_vsli_s16(x: i16x4, y: i16x4) -> i16x4; +- fn aarch64_vsli_u16(x: u16x4, y: u16x4) -> u16x4; +- fn aarch64_vsli_s32(x: i32x2, y: i32x2) -> i32x2; +- fn aarch64_vsli_u32(x: u32x2, y: u32x2) -> u32x2; +- fn aarch64_vsli_s64(x: i64x1, y: i64x1) -> i64x1; +- fn aarch64_vsli_u64(x: u64x1, y: u64x1) -> u64x1; +- fn aarch64_vsliq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn aarch64_vsliq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn aarch64_vsliq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn aarch64_vsliq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn aarch64_vsliq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn aarch64_vsliq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn aarch64_vsliq_s64(x: i64x2, y: i64x2) -> i64x2; +- fn aarch64_vsliq_u64(x: u64x2, y: u64x2) -> u64x2; +- fn aarch64_vvqmovn_s16(x: i16x8) -> i8x8; +- fn aarch64_vvqmovn_u16(x: u16x8) -> u8x8; +- fn aarch64_vvqmovn_s32(x: i32x4) -> i16x4; +- fn aarch64_vvqmovn_u32(x: u32x4) -> u16x4; +- fn aarch64_vvqmovn_s64(x: i64x2) -> i32x2; +- fn aarch64_vvqmovn_u64(x: u64x2) -> u32x2; +- fn aarch64_vabs_s8(x: i8x8) -> i8x8; +- fn aarch64_vabs_s16(x: i16x4) -> i16x4; +- fn aarch64_vabs_s32(x: i32x2) -> i32x2; +- fn aarch64_vabs_s64(x: i64x1) -> i64x1; +- fn aarch64_vabsq_s8(x: i8x16) -> i8x16; +- fn aarch64_vabsq_s16(x: i16x8) -> i16x8; +- fn aarch64_vabsq_s32(x: i32x4) -> i32x4; +- fn aarch64_vabsq_s64(x: i64x2) -> i64x2; +- fn aarch64_vabs_f32(x: f32x2) -> f32x2; +- fn aarch64_vabs_f64(x: f64x1) -> f64x1; +- fn aarch64_vabsq_f32(x: f32x4) -> f32x4; +- fn aarch64_vabsq_f64(x: f64x2) -> f64x2; +- fn aarch64_vqabs_s8(x: i8x8) -> i8x8; +- fn aarch64_vqabs_s16(x: i16x4) -> i16x4; +- fn aarch64_vqabs_s32(x: i32x2) -> i32x2; +- fn aarch64_vqabs_s64(x: i64x1) -> i64x1; +- fn aarch64_vqabsq_s8(x: i8x16) -> i8x16; +- fn aarch64_vqabsq_s16(x: i16x8) -> i16x8; +- fn aarch64_vqabsq_s32(x: i32x4) -> i32x4; +- fn aarch64_vqabsq_s64(x: i64x2) -> i64x2; +- fn aarch64_vqneg_s8(x: i8x8) -> i8x8; +- fn aarch64_vqneg_s16(x: i16x4) -> i16x4; +- fn aarch64_vqneg_s32(x: i32x2) -> i32x2; +- fn aarch64_vqneg_s64(x: i64x1) -> i64x1; +- fn aarch64_vqnegq_s8(x: i8x16) -> i8x16; +- fn aarch64_vqnegq_s16(x: i16x8) -> i16x8; +- fn aarch64_vqnegq_s32(x: i32x4) -> i32x4; +- fn aarch64_vqnegq_s64(x: i64x2) -> i64x2; +- fn aarch64_vclz_s8(x: i8x8) -> i8x8; +- fn aarch64_vclz_u8(x: u8x8) -> u8x8; +- fn aarch64_vclz_s16(x: i16x4) -> i16x4; +- fn aarch64_vclz_u16(x: u16x4) -> u16x4; +- fn aarch64_vclz_s32(x: i32x2) -> i32x2; +- fn aarch64_vclz_u32(x: u32x2) -> u32x2; +- fn aarch64_vclzq_s8(x: i8x16) -> i8x16; +- fn aarch64_vclzq_u8(x: u8x16) -> u8x16; +- fn aarch64_vclzq_s16(x: i16x8) -> i16x8; +- fn aarch64_vclzq_u16(x: u16x8) -> u16x8; +- fn aarch64_vclzq_s32(x: i32x4) -> i32x4; +- fn aarch64_vclzq_u32(x: u32x4) -> u32x4; +- fn aarch64_vcls_s8(x: i8x8) -> i8x8; +- fn aarch64_vcls_u8(x: u8x8) -> u8x8; +- fn aarch64_vcls_s16(x: i16x4) -> i16x4; +- fn aarch64_vcls_u16(x: u16x4) -> u16x4; +- fn aarch64_vcls_s32(x: i32x2) -> i32x2; +- fn aarch64_vcls_u32(x: u32x2) -> u32x2; +- fn aarch64_vclsq_s8(x: i8x16) -> i8x16; +- fn aarch64_vclsq_u8(x: u8x16) -> u8x16; +- fn aarch64_vclsq_s16(x: i16x8) -> i16x8; +- fn aarch64_vclsq_u16(x: u16x8) -> u16x8; +- fn aarch64_vclsq_s32(x: i32x4) -> i32x4; +- fn aarch64_vclsq_u32(x: u32x4) -> u32x4; +- fn aarch64_vcnt_s8(x: i8x8) -> i8x8; +- fn aarch64_vcnt_u8(x: u8x8) -> u8x8; +- fn aarch64_vcntq_s8(x: i8x16) -> i8x16; +- fn aarch64_vcntq_u8(x: u8x16) -> u8x16; +- fn aarch64_vrecpe_u32(x: u32x2) -> u32x2; +- fn aarch64_vrecpe_f32(x: f32x2) -> f32x2; +- fn aarch64_vrecpe_f64(x: f64x1) -> f64x1; +- fn aarch64_vrecpeq_u32(x: u32x4) -> u32x4; +- fn aarch64_vrecpeq_f32(x: f32x4) -> f32x4; +- fn aarch64_vrecpeq_f64(x: f64x2) -> f64x2; +- fn aarch64_vrecps_f32(x: f32x2, y: f32x2) -> f32x2; +- fn aarch64_vrecps_f64(x: f64x1, y: f64x1) -> f64x1; +- fn aarch64_vrecpsq_f32(x: f32x4, y: f32x4) -> f32x4; +- fn aarch64_vrecpsq_f64(x: f64x2, y: f64x2) -> f64x2; +- fn aarch64_vsqrt_f32(x: f32x2) -> f32x2; +- fn aarch64_vsqrt_f64(x: f64x1) -> f64x1; +- fn aarch64_vsqrtq_f32(x: f32x4) -> f32x4; +- fn aarch64_vsqrtq_f64(x: f64x2) -> f64x2; +- fn aarch64_vrsqrte_u32(x: u32x2) -> u32x2; +- fn aarch64_vrsqrte_f32(x: f32x2) -> f32x2; +- fn aarch64_vrsqrte_f64(x: f64x1) -> f64x1; +- fn aarch64_vrsqrteq_u32(x: u32x4) -> u32x4; +- fn aarch64_vrsqrteq_f32(x: f32x4) -> f32x4; +- fn aarch64_vrsqrteq_f64(x: f64x2) -> f64x2; +- fn aarch64_vrsqrts_f32(x: f32x2, y: f32x2) -> f32x2; +- fn aarch64_vrsqrts_f64(x: f64x1, y: f64x1) -> f64x1; +- fn aarch64_vrsqrtsq_f32(x: f32x4, y: f32x4) -> f32x4; +- fn aarch64_vrsqrtsq_f64(x: f64x2, y: f64x2) -> f64x2; +- fn aarch64_vrbit_s8(x: i8x8) -> i8x8; +- fn aarch64_vrbit_u8(x: u8x8) -> u8x8; +- fn aarch64_vrbitq_s8(x: i8x16) -> i8x16; +- fn aarch64_vrbitq_u8(x: u8x16) -> u8x16; +- fn aarch64_vpadd_s8(x: i8x8, y: i8x8) -> i8x8; +- fn aarch64_vpadd_u8(x: u8x8, y: u8x8) -> u8x8; +- fn aarch64_vpadd_s16(x: i16x4, y: i16x4) -> i16x4; +- fn aarch64_vpadd_u16(x: u16x4, y: u16x4) -> u16x4; +- fn aarch64_vpadd_s32(x: i32x2, y: i32x2) -> i32x2; +- fn aarch64_vpadd_u32(x: u32x2, y: u32x2) -> u32x2; +- fn aarch64_vpadd_f32(x: f32x2, y: f32x2) -> f32x2; +- fn aarch64_vpaddq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn aarch64_vpaddq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn aarch64_vpaddq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn aarch64_vpaddq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn aarch64_vpaddq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn aarch64_vpaddq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn aarch64_vpaddq_f32(x: f32x4, y: f32x4) -> f32x4; +- fn aarch64_vpaddq_s64(x: i64x2, y: i64x2) -> i64x2; +- fn aarch64_vpaddq_u64(x: u64x2, y: u64x2) -> u64x2; +- fn aarch64_vpaddq_f64(x: f64x2, y: f64x2) -> f64x2; +- fn aarch64_vpaddl_s16(x: i8x8) -> i16x4; +- fn aarch64_vpaddl_u16(x: u8x8) -> u16x4; +- fn aarch64_vpaddl_s32(x: i16x4) -> i32x2; +- fn aarch64_vpaddl_u32(x: u16x4) -> u32x2; +- fn aarch64_vpaddl_s64(x: i32x2) -> i64x1; +- fn aarch64_vpaddl_u64(x: u32x2) -> u64x1; +- fn aarch64_vpaddlq_s16(x: i8x16) -> i16x8; +- fn aarch64_vpaddlq_u16(x: u8x16) -> u16x8; +- fn aarch64_vpaddlq_s32(x: i16x8) -> i32x4; +- fn aarch64_vpaddlq_u32(x: u16x8) -> u32x4; +- fn aarch64_vpaddlq_s64(x: i32x4) -> i64x2; +- fn aarch64_vpaddlq_u64(x: u32x4) -> u64x2; +- fn aarch64_vpmax_s8(x: i8x8, y: i8x8) -> i8x8; +- fn aarch64_vpmax_u8(x: u8x8, y: u8x8) -> u8x8; +- fn aarch64_vpmax_s16(x: i16x4, y: i16x4) -> i16x4; +- fn aarch64_vpmax_u16(x: u16x4, y: u16x4) -> u16x4; +- fn aarch64_vpmax_s32(x: i32x2, y: i32x2) -> i32x2; +- fn aarch64_vpmax_u32(x: u32x2, y: u32x2) -> u32x2; +- fn aarch64_vpmax_f32(x: f32x2, y: f32x2) -> f32x2; +- fn aarch64_vpmaxq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn aarch64_vpmaxq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn aarch64_vpmaxq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn aarch64_vpmaxq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn aarch64_vpmaxq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn aarch64_vpmaxq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn aarch64_vpmaxq_f32(x: f32x4, y: f32x4) -> f32x4; +- fn aarch64_vpmaxq_s64(x: i64x2, y: i64x2) -> i64x2; +- fn aarch64_vpmaxq_u64(x: u64x2, y: u64x2) -> u64x2; +- fn aarch64_vpmaxq_f64(x: f64x2, y: f64x2) -> f64x2; +- fn aarch64_vpmin_s8(x: i8x8, y: i8x8) -> i8x8; +- fn aarch64_vpmin_u8(x: u8x8, y: u8x8) -> u8x8; +- fn aarch64_vpmin_s16(x: i16x4, y: i16x4) -> i16x4; +- fn aarch64_vpmin_u16(x: u16x4, y: u16x4) -> u16x4; +- fn aarch64_vpmin_s32(x: i32x2, y: i32x2) -> i32x2; +- fn aarch64_vpmin_u32(x: u32x2, y: u32x2) -> u32x2; +- fn aarch64_vpmin_f32(x: f32x2, y: f32x2) -> f32x2; +- fn aarch64_vpminq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn aarch64_vpminq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn aarch64_vpminq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn aarch64_vpminq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn aarch64_vpminq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn aarch64_vpminq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn aarch64_vpminq_f32(x: f32x4, y: f32x4) -> f32x4; +- fn aarch64_vpminq_s64(x: i64x2, y: i64x2) -> i64x2; +- fn aarch64_vpminq_u64(x: u64x2, y: u64x2) -> u64x2; +- fn aarch64_vpminq_f64(x: f64x2, y: f64x2) -> f64x2; +- fn aarch64_vpmaxnm_s8(x: i8x8, y: i8x8) -> i8x8; +- fn aarch64_vpmaxnm_u8(x: u8x8, y: u8x8) -> u8x8; +- fn aarch64_vpmaxnm_s16(x: i16x4, y: i16x4) -> i16x4; +- fn aarch64_vpmaxnm_u16(x: u16x4, y: u16x4) -> u16x4; +- fn aarch64_vpmaxnm_s32(x: i32x2, y: i32x2) -> i32x2; +- fn aarch64_vpmaxnm_u32(x: u32x2, y: u32x2) -> u32x2; +- fn aarch64_vpmaxnm_f32(x: f32x2, y: f32x2) -> f32x2; +- fn aarch64_vpmaxnmq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn aarch64_vpmaxnmq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn aarch64_vpmaxnmq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn aarch64_vpmaxnmq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn aarch64_vpmaxnmq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn aarch64_vpmaxnmq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn aarch64_vpmaxnmq_f32(x: f32x4, y: f32x4) -> f32x4; +- fn aarch64_vpmaxnmq_s64(x: i64x2, y: i64x2) -> i64x2; +- fn aarch64_vpmaxnmq_u64(x: u64x2, y: u64x2) -> u64x2; +- fn aarch64_vpmaxnmq_f64(x: f64x2, y: f64x2) -> f64x2; +- fn aarch64_vpminnm_f32(x: f32x2, y: f32x2) -> f32x2; +- fn aarch64_vpminnmq_f32(x: f32x4, y: f32x4) -> f32x4; +- fn aarch64_vpminnmq_f64(x: f64x2, y: f64x2) -> f64x2; +- fn aarch64_vaddv_s8(x: i8x8) -> i8; +- fn aarch64_vaddv_u8(x: u8x8) -> u8; +- fn aarch64_vaddv_s16(x: i16x4) -> i16; +- fn aarch64_vaddv_u16(x: u16x4) -> u16; +- fn aarch64_vaddv_s32(x: i32x2) -> i32; +- fn aarch64_vaddv_u32(x: u32x2) -> u32; +- fn aarch64_vaddv_f32(x: f32x2) -> f32; +- fn aarch64_vaddvq_s8(x: i8x16) -> i8; +- fn aarch64_vaddvq_u8(x: u8x16) -> u8; +- fn aarch64_vaddvq_s16(x: i16x8) -> i16; +- fn aarch64_vaddvq_u16(x: u16x8) -> u16; +- fn aarch64_vaddvq_s32(x: i32x4) -> i32; +- fn aarch64_vaddvq_u32(x: u32x4) -> u32; +- fn aarch64_vaddvq_f32(x: f32x4) -> f32; +- fn aarch64_vaddvq_s64(x: i64x2) -> i64; +- fn aarch64_vaddvq_u64(x: u64x2) -> u64; +- fn aarch64_vaddvq_f64(x: f64x2) -> f64; +- fn aarch64_vaddlv_s8(x: i8x8) -> i16; +- fn aarch64_vaddlv_u8(x: u8x8) -> u16; +- fn aarch64_vaddlv_s16(x: i16x4) -> i32; +- fn aarch64_vaddlv_u16(x: u16x4) -> u32; +- fn aarch64_vaddlv_s32(x: i32x2) -> i64; +- fn aarch64_vaddlv_u32(x: u32x2) -> u64; +- fn aarch64_vaddlvq_s8(x: i8x16) -> i16; +- fn aarch64_vaddlvq_u8(x: u8x16) -> u16; +- fn aarch64_vaddlvq_s16(x: i16x8) -> i32; +- fn aarch64_vaddlvq_u16(x: u16x8) -> u32; +- fn aarch64_vaddlvq_s32(x: i32x4) -> i64; +- fn aarch64_vaddlvq_u32(x: u32x4) -> u64; +- fn aarch64_vmaxv_s8(x: i8x8) -> i8; +- fn aarch64_vmaxv_u8(x: u8x8) -> u8; +- fn aarch64_vmaxv_s16(x: i16x4) -> i16; +- fn aarch64_vmaxv_u16(x: u16x4) -> u16; +- fn aarch64_vmaxv_s32(x: i32x2) -> i32; +- fn aarch64_vmaxv_u32(x: u32x2) -> u32; +- fn aarch64_vmaxv_f32(x: f32x2) -> f32; +- fn aarch64_vmaxvq_s8(x: i8x16) -> i8; +- fn aarch64_vmaxvq_u8(x: u8x16) -> u8; +- fn aarch64_vmaxvq_s16(x: i16x8) -> i16; +- fn aarch64_vmaxvq_u16(x: u16x8) -> u16; +- fn aarch64_vmaxvq_s32(x: i32x4) -> i32; +- fn aarch64_vmaxvq_u32(x: u32x4) -> u32; +- fn aarch64_vmaxvq_f32(x: f32x4) -> f32; +- fn aarch64_vmaxvq_f64(x: f64x2) -> f64; +- fn aarch64_vminv_s8(x: i8x8) -> i8; +- fn aarch64_vminv_u8(x: u8x8) -> u8; +- fn aarch64_vminv_s16(x: i16x4) -> i16; +- fn aarch64_vminv_u16(x: u16x4) -> u16; +- fn aarch64_vminv_s32(x: i32x2) -> i32; +- fn aarch64_vminv_u32(x: u32x2) -> u32; +- fn aarch64_vminv_f32(x: f32x2) -> f32; +- fn aarch64_vminvq_s8(x: i8x16) -> i8; +- fn aarch64_vminvq_u8(x: u8x16) -> u8; +- fn aarch64_vminvq_s16(x: i16x8) -> i16; +- fn aarch64_vminvq_u16(x: u16x8) -> u16; +- fn aarch64_vminvq_s32(x: i32x4) -> i32; +- fn aarch64_vminvq_u32(x: u32x4) -> u32; +- fn aarch64_vminvq_f32(x: f32x4) -> f32; +- fn aarch64_vminvq_f64(x: f64x2) -> f64; +- fn aarch64_vmaxnmv_f32(x: f32x2) -> f32; +- fn aarch64_vmaxnmvq_f32(x: f32x4) -> f32; +- fn aarch64_vmaxnmvq_f64(x: f64x2) -> f64; +- fn aarch64_vminnmv_f32(x: f32x2) -> f32; +- fn aarch64_vminnmvq_f32(x: f32x4) -> f32; +- fn aarch64_vminnmvq_f64(x: f64x2) -> f64; +- fn aarch64_vqtbl1_s8(x: i8x16, y: u8x8) -> i8x8; +- fn aarch64_vqtbl1_u8(x: u8x16, y: u8x8) -> u8x8; +- fn aarch64_vqtbl1q_s8(x: i8x16, y: u8x16) -> i8x16; +- fn aarch64_vqtbl1q_u8(x: u8x16, y: u8x16) -> u8x16; +- fn aarch64_vqtbx1_s8(x: i8x8, y: i8x16, z: u8x8) -> i8x8; +- fn aarch64_vqtbx1_u8(x: u8x8, y: u8x16, z: u8x8) -> u8x8; +- fn aarch64_vqtbx1q_s8(x: i8x16, y: i8x16, z: u8x16) -> i8x16; +- fn aarch64_vqtbx1q_u8(x: u8x16, y: u8x16, z: u8x16) -> u8x16; +- fn aarch64_vqtbl2_s8(x: (i8x16, i8x16), y: u8x8) -> i8x8; +- fn aarch64_vqtbl2_u8(x: (u8x16, u8x16), y: u8x8) -> u8x8; +- fn aarch64_vqtbl2q_s8(x: (i8x16, i8x16), y: u8x16) -> i8x16; +- fn aarch64_vqtbl2q_u8(x: (u8x16, u8x16), y: u8x16) -> u8x16; +- fn aarch64_vqtbx2_s8(x: (i8x16, i8x16), y: u8x8) -> i8x8; +- fn aarch64_vqtbx2_u8(x: (u8x16, u8x16), y: u8x8) -> u8x8; +- fn aarch64_vqtbx2q_s8(x: (i8x16, i8x16), y: u8x16) -> i8x16; +- fn aarch64_vqtbx2q_u8(x: (u8x16, u8x16), y: u8x16) -> u8x16; +- fn aarch64_vqtbl3_s8(x: (i8x16, i8x16, i8x16), y: u8x8) -> i8x8; +- fn aarch64_vqtbl3_u8(x: (u8x16, u8x16, u8x16), y: u8x8) -> u8x8; +- fn aarch64_vqtbl3q_s8(x: (i8x16, i8x16, i8x16), y: u8x16) -> i8x16; +- fn aarch64_vqtbl3q_u8(x: (u8x16, u8x16, u8x16), y: u8x16) -> u8x16; +- fn aarch64_vqtbx3_s8(x: i8x8, y: (i8x16, i8x16, i8x16), z: u8x8) -> i8x8; +- fn aarch64_vqtbx3_u8(x: u8x8, y: (u8x16, u8x16, u8x16), z: u8x8) -> u8x8; +- fn aarch64_vqtbx3q_s8(x: i8x16, y: (i8x16, i8x16, i8x16), z: u8x16) -> i8x16; +- fn aarch64_vqtbx3q_u8(x: u8x16, y: (u8x16, u8x16, u8x16), z: u8x16) -> u8x16; +- fn aarch64_vqtbl4_s8(x: (i8x16, i8x16, i8x16, i8x16), y: u8x8) -> i8x8; +- fn aarch64_vqtbl4_u8(x: (u8x16, u8x16, u8x16, u8x16), y: u8x8) -> u8x8; +- fn aarch64_vqtbl4q_s8(x: (i8x16, i8x16, i8x16, i8x16), y: u8x16) -> i8x16; +- fn aarch64_vqtbl4q_u8(x: (u8x16, u8x16, u8x16, u8x16), y: u8x16) -> u8x16; +- fn aarch64_vqtbx4_s8(x: i8x8, y: (i8x16, i8x16, i8x16, i8x16), z: u8x8) -> i8x8; +- fn aarch64_vqtbx4_u8(x: u8x8, y: (u8x16, u8x16, u8x16, u8x16), z: u8x8) -> u8x8; +- fn aarch64_vqtbx4q_s8(x: i8x16, y: (i8x16, i8x16, i8x16, i8x16), z: u8x16) -> i8x16; +- fn aarch64_vqtbx4q_u8(x: u8x16, y: (u8x16, u8x16, u8x16, u8x16), z: u8x16) -> u8x16; +-} +- +-pub trait Aarch64F32x4 { +- fn to_f64(self) -> f64x2; +-} +-impl Aarch64F32x4 for f32x4 { +- #[inline] +- fn to_f64(self) -> f64x2 { +- unsafe { +- simd_cast(f32x2(self.0, self.1)) +- } +- } +-} +- +-pub trait Aarch64U8x16 { +- fn table_lookup_1(self, t0: u8x16) -> u8x16; +-} +-impl Aarch64U8x16 for u8x16 { +- #[inline] +- fn table_lookup_1(self, t0: u8x16) -> u8x16 { +- unsafe {aarch64_vqtbl1q_u8(t0, self)} +- } +-} +-pub trait Aarch64I8x16 { +- fn table_lookup_1(self, t0: i8x16) -> i8x16; +-} +-impl Aarch64I8x16 for i8x16 { +- #[inline] +- fn table_lookup_1(self, t0: i8x16) -> i8x16 { +- unsafe {aarch64_vqtbl2q_s8((t0, t0), ::bitcast(self))} +- } +-} +- +-#[doc(hidden)] +-pub mod common { +- use super::super::super::*; +- use core::mem; +- +- #[inline] +- pub fn f32x4_sqrt(x: f32x4) -> f32x4 { +- unsafe {super::aarch64_vsqrtq_f32(x)} +- } +- #[inline] +- pub fn f32x4_approx_rsqrt(x: f32x4) -> f32x4 { +- unsafe {super::aarch64_vrsqrteq_f32(x)} +- } +- #[inline] +- pub fn f32x4_approx_reciprocal(x: f32x4) -> f32x4 { +- unsafe {super::aarch64_vrecpeq_f32(x)} +- } +- #[inline] +- pub fn f32x4_max(x: f32x4, y: f32x4) -> f32x4 { +- unsafe {super::aarch64_vmaxq_f32(x, y)} +- } +- #[inline] +- pub fn f32x4_min(x: f32x4, y: f32x4) -> f32x4 { +- unsafe {super::aarch64_vminq_f32(x, y)} +- } +- +- macro_rules! bools { +- ($($ty: ty, $all: ident ($min: ident), $any: ident ($max: ident);)*) => { +- $( +- #[inline] +- pub fn $all(x: $ty) -> bool { +- unsafe { +- super::$min(mem::transmute(x)) != 0 +- } +- } +- #[inline] +- pub fn $any(x: $ty) -> bool { +- unsafe { +- super::$max(mem::transmute(x)) != 0 +- } +- } +- )* +- } +- } +- +- bools! { +- bool32fx4, bool32fx4_all(aarch64_vminvq_u32), bool32fx4_any(aarch64_vmaxvq_u32); +- bool8ix16, bool8ix16_all(aarch64_vminvq_u8), bool8ix16_any(aarch64_vmaxvq_u8); +- bool16ix8, bool16ix8_all(aarch64_vminvq_u16), bool16ix8_any(aarch64_vmaxvq_u16); +- bool32ix4, bool32ix4_all(aarch64_vminvq_u32), bool32ix4_any(aarch64_vmaxvq_u32); +- } +-} +diff --git a/third_party/rust/simd/src/arm/mod.rs b/third_party/rust/simd/src/arm/mod.rs +deleted file mode 100644 +index 0d451103840b..000000000000 +--- a/third_party/rust/simd/src/arm/mod.rs ++++ /dev/null +@@ -1,4 +0,0 @@ +-//! Features specific to ARM CPUs. +- +-#[cfg(any(feature = "doc", target_feature = "neon"))] +-pub mod neon; +diff --git a/third_party/rust/simd/src/arm/neon.rs b/third_party/rust/simd/src/arm/neon.rs +deleted file mode 100644 +index 8c90a72bb0dc..000000000000 +--- a/third_party/rust/simd/src/arm/neon.rs ++++ /dev/null +@@ -1,622 +0,0 @@ +-use super::super::*; +-use sixty_four::{i64x2, u64x2}; +- +-#[repr(simd)] +-#[derive(Debug, Copy, Clone)] +-pub struct u32x2(u32, u32); +-#[repr(simd)] +-#[derive(Debug, Copy, Clone)] +-pub struct i32x2(i32, i32); +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct bool32ix2(i32, i32); +- +-#[repr(simd)] +-#[derive(Debug, Copy, Clone)] +-pub struct f32x2(f32, f32); +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct bool32fx2(i32, i32); +- +-#[repr(simd)] +-#[derive(Debug, Copy, Clone)] +-pub struct u16x4(u16, u16, u16, u16); +-#[repr(simd)] +-#[derive(Debug, Copy, Clone)] +-pub struct i16x4(i16, i16, i16, i16); +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct bool16ix4(i16, i16, i16, i16); +- +-#[repr(simd)] +-#[derive(Debug, Copy, Clone)] +-pub struct u8x8(u8, u8, u8, u8, +- u8, u8, u8, u8); +-#[repr(simd)] +-#[derive(Debug, Copy, Clone)] +-pub struct i8x8(i8, i8, i8, i8, +- i8, i8, i8, i8); +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct bool8ix8(i8, i8, i8, i8, +- i8, i8, i8, i8); +- +-#[repr(simd)] +-#[derive(Debug, Copy, Clone)] +-pub struct i64x1(i64); +-#[repr(simd)] +-#[derive(Debug, Copy, Clone)] +-pub struct u64x1(u64); +- +-macro_rules! half_bools { +- ($($ty: ty, $as_u: ty, $elem: ty, $all: ident ($min: ident), $any: ident ($max: ident);)*) => { +- $( +- impl $ty { +- #[inline] +- pub fn $all(self) -> bool { +- unsafe { +- let t: $as_u = bitcast(self); +- let y = $min(t, mem::uninitialized()); +- let y32: u32x2 = bitcast(y); +- y32.0 == 0xFFFFFFFF +- } +- } +- #[inline] +- pub fn $any(self) -> bool { +- unsafe { +- let t: $as_u = bitcast(self); +- let y = $max(t, mem::uninitialized()); +- let y32: u32x2 = bitcast(y); +- y32.0 != 0 +- } +- } +- } +- +- impl Clone for $ty { +- #[inline] fn clone(&self) -> Self { +- *self +- } +- } +- +- unsafe impl Simd for $ty { +- type Bool = $ty; +- type Elem = $elem; +- } +- +- )* +- } +-} +- +-half_bools! { +- bool32fx2, u32x2, i32, bool32fx2_all(arm_vpmin_u32), bool32fx2_any(arm_vpmax_u32); +- bool8ix8, u8x8, i8, bool8ix8_all(arm_vpmin_u8), bool8ix8_any(arm_vpmax_u8); +- bool16ix4, u16x4, i16, bool16ix4_all(arm_vpmin_u16), bool16ix4_any(arm_vpmax_u16); +- bool32ix2, u32x2, f32, bool32ix2_all(arm_vpmin_u32), bool32ix2_any(arm_vpmax_u32); +-} +- +-macro_rules! half_simd { +- ($($ty: ty, $elem: ty, $bool_ty: ty;)*) => { +- $( +- unsafe impl Simd for $ty { +- type Bool = $bool_ty; +- type Elem = $elem; +- } +- )* +- } +-} +- +-half_simd! { +- f32x2, f32, bool32fx2; +- u32x2, u32, bool32ix2; +- i32x2, i32, bool32ix2; +- u16x4, u16, bool16ix4; +- i16x4, i16, bool16ix4; +- u8x8, u8, bool8ix8; +- i8x8, i8, bool8ix8; +-} +- +-#[allow(dead_code)] +-extern "platform-intrinsic" { +- fn arm_vhadd_s8(x: i8x8, y: i8x8) -> i8x8; +- fn arm_vhadd_u8(x: u8x8, y: u8x8) -> u8x8; +- fn arm_vhadd_s16(x: i16x4, y: i16x4) -> i16x4; +- fn arm_vhadd_u16(x: u16x4, y: u16x4) -> u16x4; +- fn arm_vhadd_s32(x: i32x2, y: i32x2) -> i32x2; +- fn arm_vhadd_u32(x: u32x2, y: u32x2) -> u32x2; +- fn arm_vhaddq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn arm_vhaddq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn arm_vhaddq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn arm_vhaddq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn arm_vhaddq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn arm_vhaddq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn arm_vrhadd_s8(x: i8x8, y: i8x8) -> i8x8; +- fn arm_vrhadd_u8(x: u8x8, y: u8x8) -> u8x8; +- fn arm_vrhadd_s16(x: i16x4, y: i16x4) -> i16x4; +- fn arm_vrhadd_u16(x: u16x4, y: u16x4) -> u16x4; +- fn arm_vrhadd_s32(x: i32x2, y: i32x2) -> i32x2; +- fn arm_vrhadd_u32(x: u32x2, y: u32x2) -> u32x2; +- fn arm_vrhaddq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn arm_vrhaddq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn arm_vrhaddq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn arm_vrhaddq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn arm_vrhaddq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn arm_vrhaddq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn arm_vqadd_s8(x: i8x8, y: i8x8) -> i8x8; +- fn arm_vqadd_u8(x: u8x8, y: u8x8) -> u8x8; +- fn arm_vqadd_s16(x: i16x4, y: i16x4) -> i16x4; +- fn arm_vqadd_u16(x: u16x4, y: u16x4) -> u16x4; +- fn arm_vqadd_s32(x: i32x2, y: i32x2) -> i32x2; +- fn arm_vqadd_u32(x: u32x2, y: u32x2) -> u32x2; +- fn arm_vqadd_s64(x: i64x1, y: i64x1) -> i64x1; +- fn arm_vqadd_u64(x: u64x1, y: u64x1) -> u64x1; +- fn arm_vqaddq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn arm_vqaddq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn arm_vqaddq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn arm_vqaddq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn arm_vqaddq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn arm_vqaddq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn arm_vqaddq_s64(x: i64x2, y: i64x2) -> i64x2; +- fn arm_vqaddq_u64(x: u64x2, y: u64x2) -> u64x2; +- fn arm_vraddhn_s16(x: i16x8, y: i16x8) -> i8x8; +- fn arm_vraddhn_u16(x: u16x8, y: u16x8) -> u8x8; +- fn arm_vraddhn_s32(x: i32x4, y: i32x4) -> i16x4; +- fn arm_vraddhn_u32(x: u32x4, y: u32x4) -> u16x4; +- fn arm_vraddhn_s64(x: i64x2, y: i64x2) -> i32x2; +- fn arm_vraddhn_u64(x: u64x2, y: u64x2) -> u32x2; +- fn arm_vfma_f32(x: f32x2, y: f32x2) -> f32x2; +- fn arm_vfmaq_f32(x: f32x4, y: f32x4) -> f32x4; +- fn arm_vqdmulh_s16(x: i16x4, y: i16x4) -> i16x4; +- fn arm_vqdmulh_s32(x: i32x2, y: i32x2) -> i32x2; +- fn arm_vqdmulhq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn arm_vqdmulhq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn arm_vqrdmulh_s16(x: i16x4, y: i16x4) -> i16x4; +- fn arm_vqrdmulh_s32(x: i32x2, y: i32x2) -> i32x2; +- fn arm_vqrdmulhq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn arm_vqrdmulhq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn arm_vmull_s8(x: i8x8, y: i8x8) -> i16x8; +- fn arm_vmull_u8(x: u8x8, y: u8x8) -> u16x8; +- fn arm_vmull_s16(x: i16x4, y: i16x4) -> i32x4; +- fn arm_vmull_u16(x: u16x4, y: u16x4) -> u32x4; +- fn arm_vmull_s32(x: i32x2, y: i32x2) -> i64x2; +- fn arm_vmull_u32(x: u32x2, y: u32x2) -> u64x2; +- fn arm_vqdmullq_s8(x: i8x8, y: i8x8) -> i16x8; +- fn arm_vqdmullq_s16(x: i16x4, y: i16x4) -> i32x4; +- fn arm_vhsub_s8(x: i8x8, y: i8x8) -> i8x8; +- fn arm_vhsub_u8(x: u8x8, y: u8x8) -> u8x8; +- fn arm_vhsub_s16(x: i16x4, y: i16x4) -> i16x4; +- fn arm_vhsub_u16(x: u16x4, y: u16x4) -> u16x4; +- fn arm_vhsub_s32(x: i32x2, y: i32x2) -> i32x2; +- fn arm_vhsub_u32(x: u32x2, y: u32x2) -> u32x2; +- fn arm_vhsubq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn arm_vhsubq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn arm_vhsubq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn arm_vhsubq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn arm_vhsubq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn arm_vhsubq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn arm_vqsub_s8(x: i8x8, y: i8x8) -> i8x8; +- fn arm_vqsub_u8(x: u8x8, y: u8x8) -> u8x8; +- fn arm_vqsub_s16(x: i16x4, y: i16x4) -> i16x4; +- fn arm_vqsub_u16(x: u16x4, y: u16x4) -> u16x4; +- fn arm_vqsub_s32(x: i32x2, y: i32x2) -> i32x2; +- fn arm_vqsub_u32(x: u32x2, y: u32x2) -> u32x2; +- fn arm_vqsub_s64(x: i64x1, y: i64x1) -> i64x1; +- fn arm_vqsub_u64(x: u64x1, y: u64x1) -> u64x1; +- fn arm_vqsubq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn arm_vqsubq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn arm_vqsubq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn arm_vqsubq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn arm_vqsubq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn arm_vqsubq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn arm_vqsubq_s64(x: i64x2, y: i64x2) -> i64x2; +- fn arm_vqsubq_u64(x: u64x2, y: u64x2) -> u64x2; +- fn arm_vrsubhn_s16(x: i16x8, y: i16x8) -> i8x8; +- fn arm_vrsubhn_u16(x: u16x8, y: u16x8) -> u8x8; +- fn arm_vrsubhn_s32(x: i32x4, y: i32x4) -> i16x4; +- fn arm_vrsubhn_u32(x: u32x4, y: u32x4) -> u16x4; +- fn arm_vrsubhn_s64(x: i64x2, y: i64x2) -> i32x2; +- fn arm_vrsubhn_u64(x: u64x2, y: u64x2) -> u32x2; +- fn arm_vabd_s8(x: i8x8, y: i8x8) -> i8x8; +- fn arm_vabd_u8(x: u8x8, y: u8x8) -> u8x8; +- fn arm_vabd_s16(x: i16x4, y: i16x4) -> i16x4; +- fn arm_vabd_u16(x: u16x4, y: u16x4) -> u16x4; +- fn arm_vabd_s32(x: i32x2, y: i32x2) -> i32x2; +- fn arm_vabd_u32(x: u32x2, y: u32x2) -> u32x2; +- fn arm_vabd_f32(x: f32x2, y: f32x2) -> f32x2; +- fn arm_vabdq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn arm_vabdq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn arm_vabdq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn arm_vabdq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn arm_vabdq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn arm_vabdq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn arm_vabdq_f32(x: f32x4, y: f32x4) -> f32x4; +- fn arm_vmax_s8(x: i8x8, y: i8x8) -> i8x8; +- fn arm_vmax_u8(x: u8x8, y: u8x8) -> u8x8; +- fn arm_vmax_s16(x: i16x4, y: i16x4) -> i16x4; +- fn arm_vmax_u16(x: u16x4, y: u16x4) -> u16x4; +- fn arm_vmax_s32(x: i32x2, y: i32x2) -> i32x2; +- fn arm_vmax_u32(x: u32x2, y: u32x2) -> u32x2; +- fn arm_vmax_f32(x: f32x2, y: f32x2) -> f32x2; +- fn arm_vmaxq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn arm_vmaxq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn arm_vmaxq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn arm_vmaxq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn arm_vmaxq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn arm_vmaxq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn arm_vmaxq_f32(x: f32x4, y: f32x4) -> f32x4; +- fn arm_vmin_s8(x: i8x8, y: i8x8) -> i8x8; +- fn arm_vmin_u8(x: u8x8, y: u8x8) -> u8x8; +- fn arm_vmin_s16(x: i16x4, y: i16x4) -> i16x4; +- fn arm_vmin_u16(x: u16x4, y: u16x4) -> u16x4; +- fn arm_vmin_s32(x: i32x2, y: i32x2) -> i32x2; +- fn arm_vmin_u32(x: u32x2, y: u32x2) -> u32x2; +- fn arm_vmin_f32(x: f32x2, y: f32x2) -> f32x2; +- fn arm_vminq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn arm_vminq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn arm_vminq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn arm_vminq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn arm_vminq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn arm_vminq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn arm_vminq_f32(x: f32x4, y: f32x4) -> f32x4; +- fn arm_vshl_s8(x: i8x8, y: i8x8) -> i8x8; +- fn arm_vshl_u8(x: u8x8, y: i8x8) -> u8x8; +- fn arm_vshl_s16(x: i16x4, y: i16x4) -> i16x4; +- fn arm_vshl_u16(x: u16x4, y: i16x4) -> u16x4; +- fn arm_vshl_s32(x: i32x2, y: i32x2) -> i32x2; +- fn arm_vshl_u32(x: u32x2, y: i32x2) -> u32x2; +- fn arm_vshl_s64(x: i64x1, y: i64x1) -> i64x1; +- fn arm_vshl_u64(x: u64x1, y: i64x1) -> u64x1; +- fn arm_vshlq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn arm_vshlq_u8(x: u8x16, y: i8x16) -> u8x16; +- fn arm_vshlq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn arm_vshlq_u16(x: u16x8, y: i16x8) -> u16x8; +- fn arm_vshlq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn arm_vshlq_u32(x: u32x4, y: i32x4) -> u32x4; +- fn arm_vshlq_s64(x: i64x2, y: i64x2) -> i64x2; +- fn arm_vshlq_u64(x: u64x2, y: i64x2) -> u64x2; +- fn arm_vqshl_s8(x: i8x8, y: i8x8) -> i8x8; +- fn arm_vqshl_u8(x: u8x8, y: i8x8) -> u8x8; +- fn arm_vqshl_s16(x: i16x4, y: i16x4) -> i16x4; +- fn arm_vqshl_u16(x: u16x4, y: i16x4) -> u16x4; +- fn arm_vqshl_s32(x: i32x2, y: i32x2) -> i32x2; +- fn arm_vqshl_u32(x: u32x2, y: i32x2) -> u32x2; +- fn arm_vqshl_s64(x: i64x1, y: i64x1) -> i64x1; +- fn arm_vqshl_u64(x: u64x1, y: i64x1) -> u64x1; +- fn arm_vqshlq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn arm_vqshlq_u8(x: u8x16, y: i8x16) -> u8x16; +- fn arm_vqshlq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn arm_vqshlq_u16(x: u16x8, y: i16x8) -> u16x8; +- fn arm_vqshlq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn arm_vqshlq_u32(x: u32x4, y: i32x4) -> u32x4; +- fn arm_vqshlq_s64(x: i64x2, y: i64x2) -> i64x2; +- fn arm_vqshlq_u64(x: u64x2, y: i64x2) -> u64x2; +- fn arm_vrshl_s8(x: i8x8, y: i8x8) -> i8x8; +- fn arm_vrshl_u8(x: u8x8, y: i8x8) -> u8x8; +- fn arm_vrshl_s16(x: i16x4, y: i16x4) -> i16x4; +- fn arm_vrshl_u16(x: u16x4, y: i16x4) -> u16x4; +- fn arm_vrshl_s32(x: i32x2, y: i32x2) -> i32x2; +- fn arm_vrshl_u32(x: u32x2, y: i32x2) -> u32x2; +- fn arm_vrshl_s64(x: i64x1, y: i64x1) -> i64x1; +- fn arm_vrshl_u64(x: u64x1, y: i64x1) -> u64x1; +- fn arm_vrshlq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn arm_vrshlq_u8(x: u8x16, y: i8x16) -> u8x16; +- fn arm_vrshlq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn arm_vrshlq_u16(x: u16x8, y: i16x8) -> u16x8; +- fn arm_vrshlq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn arm_vrshlq_u32(x: u32x4, y: i32x4) -> u32x4; +- fn arm_vrshlq_s64(x: i64x2, y: i64x2) -> i64x2; +- fn arm_vrshlq_u64(x: u64x2, y: i64x2) -> u64x2; +- fn arm_vqrshl_s8(x: i8x8, y: i8x8) -> i8x8; +- fn arm_vqrshl_u8(x: u8x8, y: i8x8) -> u8x8; +- fn arm_vqrshl_s16(x: i16x4, y: i16x4) -> i16x4; +- fn arm_vqrshl_u16(x: u16x4, y: i16x4) -> u16x4; +- fn arm_vqrshl_s32(x: i32x2, y: i32x2) -> i32x2; +- fn arm_vqrshl_u32(x: u32x2, y: i32x2) -> u32x2; +- fn arm_vqrshl_s64(x: i64x1, y: i64x1) -> i64x1; +- fn arm_vqrshl_u64(x: u64x1, y: i64x1) -> u64x1; +- fn arm_vqrshlq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn arm_vqrshlq_u8(x: u8x16, y: i8x16) -> u8x16; +- fn arm_vqrshlq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn arm_vqrshlq_u16(x: u16x8, y: i16x8) -> u16x8; +- fn arm_vqrshlq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn arm_vqrshlq_u32(x: u32x4, y: i32x4) -> u32x4; +- fn arm_vqrshlq_s64(x: i64x2, y: i64x2) -> i64x2; +- fn arm_vqrshlq_u64(x: u64x2, y: i64x2) -> u64x2; +- fn arm_vqshrun_n_s16(x: i16x8, y: u32) -> i8x8; +- fn arm_vqshrun_n_s32(x: i32x4, y: u32) -> i16x4; +- fn arm_vqshrun_n_s64(x: i64x2, y: u32) -> i32x2; +- fn arm_vqrshrun_n_s16(x: i16x8, y: u32) -> i8x8; +- fn arm_vqrshrun_n_s32(x: i32x4, y: u32) -> i16x4; +- fn arm_vqrshrun_n_s64(x: i64x2, y: u32) -> i32x2; +- fn arm_vqshrn_n_s16(x: i16x8, y: u32) -> i8x8; +- fn arm_vqshrn_n_u16(x: u16x8, y: u32) -> u8x8; +- fn arm_vqshrn_n_s32(x: i32x4, y: u32) -> i16x4; +- fn arm_vqshrn_n_u32(x: u32x4, y: u32) -> u16x4; +- fn arm_vqshrn_n_s64(x: i64x2, y: u32) -> i32x2; +- fn arm_vqshrn_n_u64(x: u64x2, y: u32) -> u32x2; +- fn arm_vrshrn_n_s16(x: i16x8, y: u32) -> i8x8; +- fn arm_vrshrn_n_u16(x: u16x8, y: u32) -> u8x8; +- fn arm_vrshrn_n_s32(x: i32x4, y: u32) -> i16x4; +- fn arm_vrshrn_n_u32(x: u32x4, y: u32) -> u16x4; +- fn arm_vrshrn_n_s64(x: i64x2, y: u32) -> i32x2; +- fn arm_vrshrn_n_u64(x: u64x2, y: u32) -> u32x2; +- fn arm_vqrshrn_n_s16(x: i16x8, y: u32) -> i8x8; +- fn arm_vqrshrn_n_u16(x: u16x8, y: u32) -> u8x8; +- fn arm_vqrshrn_n_s32(x: i32x4, y: u32) -> i16x4; +- fn arm_vqrshrn_n_u32(x: u32x4, y: u32) -> u16x4; +- fn arm_vqrshrn_n_s64(x: i64x2, y: u32) -> i32x2; +- fn arm_vqrshrn_n_u64(x: u64x2, y: u32) -> u32x2; +- fn arm_vsri_s8(x: i8x8, y: i8x8) -> i8x8; +- fn arm_vsri_u8(x: u8x8, y: u8x8) -> u8x8; +- fn arm_vsri_s16(x: i16x4, y: i16x4) -> i16x4; +- fn arm_vsri_u16(x: u16x4, y: u16x4) -> u16x4; +- fn arm_vsri_s32(x: i32x2, y: i32x2) -> i32x2; +- fn arm_vsri_u32(x: u32x2, y: u32x2) -> u32x2; +- fn arm_vsri_s64(x: i64x1, y: i64x1) -> i64x1; +- fn arm_vsri_u64(x: u64x1, y: u64x1) -> u64x1; +- fn arm_vsriq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn arm_vsriq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn arm_vsriq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn arm_vsriq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn arm_vsriq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn arm_vsriq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn arm_vsriq_s64(x: i64x2, y: i64x2) -> i64x2; +- fn arm_vsriq_u64(x: u64x2, y: u64x2) -> u64x2; +- fn arm_vsli_s8(x: i8x8, y: i8x8) -> i8x8; +- fn arm_vsli_u8(x: u8x8, y: u8x8) -> u8x8; +- fn arm_vsli_s16(x: i16x4, y: i16x4) -> i16x4; +- fn arm_vsli_u16(x: u16x4, y: u16x4) -> u16x4; +- fn arm_vsli_s32(x: i32x2, y: i32x2) -> i32x2; +- fn arm_vsli_u32(x: u32x2, y: u32x2) -> u32x2; +- fn arm_vsli_s64(x: i64x1, y: i64x1) -> i64x1; +- fn arm_vsli_u64(x: u64x1, y: u64x1) -> u64x1; +- fn arm_vsliq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn arm_vsliq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn arm_vsliq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn arm_vsliq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn arm_vsliq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn arm_vsliq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn arm_vsliq_s64(x: i64x2, y: i64x2) -> i64x2; +- fn arm_vsliq_u64(x: u64x2, y: u64x2) -> u64x2; +- fn arm_vvqmovn_s16(x: i16x8) -> i8x8; +- fn arm_vvqmovn_u16(x: u16x8) -> u8x8; +- fn arm_vvqmovn_s32(x: i32x4) -> i16x4; +- fn arm_vvqmovn_u32(x: u32x4) -> u16x4; +- fn arm_vvqmovn_s64(x: i64x2) -> i32x2; +- fn arm_vvqmovn_u64(x: u64x2) -> u32x2; +- fn arm_vabs_s8(x: i8x8) -> i8x8; +- fn arm_vabs_s16(x: i16x4) -> i16x4; +- fn arm_vabs_s32(x: i32x2) -> i32x2; +- fn arm_vabsq_s8(x: i8x16) -> i8x16; +- fn arm_vabsq_s16(x: i16x8) -> i16x8; +- fn arm_vabsq_s32(x: i32x4) -> i32x4; +- fn arm_vabs_f32(x: f32x2) -> f32x2; +- fn arm_vabsq_f32(x: f32x4) -> f32x4; +- fn arm_vqabs_s8(x: i8x8) -> i8x8; +- fn arm_vqabs_s16(x: i16x4) -> i16x4; +- fn arm_vqabs_s32(x: i32x2) -> i32x2; +- fn arm_vqabsq_s8(x: i8x16) -> i8x16; +- fn arm_vqabsq_s16(x: i16x8) -> i16x8; +- fn arm_vqabsq_s32(x: i32x4) -> i32x4; +- fn arm_vqneg_s8(x: i8x8) -> i8x8; +- fn arm_vqneg_s16(x: i16x4) -> i16x4; +- fn arm_vqneg_s32(x: i32x2) -> i32x2; +- fn arm_vqnegq_s8(x: i8x16) -> i8x16; +- fn arm_vqnegq_s16(x: i16x8) -> i16x8; +- fn arm_vqnegq_s32(x: i32x4) -> i32x4; +- fn arm_vclz_s8(x: i8x8) -> i8x8; +- fn arm_vclz_u8(x: u8x8) -> u8x8; +- fn arm_vclz_s16(x: i16x4) -> i16x4; +- fn arm_vclz_u16(x: u16x4) -> u16x4; +- fn arm_vclz_s32(x: i32x2) -> i32x2; +- fn arm_vclz_u32(x: u32x2) -> u32x2; +- fn arm_vclzq_s8(x: i8x16) -> i8x16; +- fn arm_vclzq_u8(x: u8x16) -> u8x16; +- fn arm_vclzq_s16(x: i16x8) -> i16x8; +- fn arm_vclzq_u16(x: u16x8) -> u16x8; +- fn arm_vclzq_s32(x: i32x4) -> i32x4; +- fn arm_vclzq_u32(x: u32x4) -> u32x4; +- fn arm_vcls_s8(x: i8x8) -> i8x8; +- fn arm_vcls_u8(x: u8x8) -> u8x8; +- fn arm_vcls_s16(x: i16x4) -> i16x4; +- fn arm_vcls_u16(x: u16x4) -> u16x4; +- fn arm_vcls_s32(x: i32x2) -> i32x2; +- fn arm_vcls_u32(x: u32x2) -> u32x2; +- fn arm_vclsq_s8(x: i8x16) -> i8x16; +- fn arm_vclsq_u8(x: u8x16) -> u8x16; +- fn arm_vclsq_s16(x: i16x8) -> i16x8; +- fn arm_vclsq_u16(x: u16x8) -> u16x8; +- fn arm_vclsq_s32(x: i32x4) -> i32x4; +- fn arm_vclsq_u32(x: u32x4) -> u32x4; +- fn arm_vcnt_s8(x: i8x8) -> i8x8; +- fn arm_vcnt_u8(x: u8x8) -> u8x8; +- fn arm_vcntq_s8(x: i8x16) -> i8x16; +- fn arm_vcntq_u8(x: u8x16) -> u8x16; +- fn arm_vrecpe_u32(x: u32x2) -> u32x2; +- fn arm_vrecpe_f32(x: f32x2) -> f32x2; +- fn arm_vrecpeq_u32(x: u32x4) -> u32x4; +- fn arm_vrecpeq_f32(x: f32x4) -> f32x4; +- fn arm_vrecps_f32(x: f32x2, y: f32x2) -> f32x2; +- fn arm_vrecpsq_f32(x: f32x4, y: f32x4) -> f32x4; +- fn arm_vsqrt_f32(x: f32x2) -> f32x2; +- fn arm_vsqrtq_f32(x: f32x4) -> f32x4; +- fn arm_vrsqrte_u32(x: u32x2) -> u32x2; +- fn arm_vrsqrte_f32(x: f32x2) -> f32x2; +- fn arm_vrsqrteq_u32(x: u32x4) -> u32x4; +- fn arm_vrsqrteq_f32(x: f32x4) -> f32x4; +- fn arm_vrsqrts_f32(x: f32x2, y: f32x2) -> f32x2; +- fn arm_vrsqrtsq_f32(x: f32x4, y: f32x4) -> f32x4; +- fn arm_vbsl_s8(x: u8x8, y: i8x8) -> i8x8; +- fn arm_vbsl_u8(x: u8x8, y: u8x8) -> u8x8; +- fn arm_vbsl_s16(x: u16x4, y: i16x4) -> i16x4; +- fn arm_vbsl_u16(x: u16x4, y: u16x4) -> u16x4; +- fn arm_vbsl_s32(x: u32x2, y: i32x2) -> i32x2; +- fn arm_vbsl_u32(x: u32x2, y: u32x2) -> u32x2; +- fn arm_vbsl_s64(x: u64x1, y: i64x1) -> i64x1; +- fn arm_vbsl_u64(x: u64x1, y: u64x1) -> u64x1; +- fn arm_vbslq_s8(x: u8x16, y: i8x16) -> i8x16; +- fn arm_vbslq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn arm_vbslq_s16(x: u16x8, y: i16x8) -> i16x8; +- fn arm_vbslq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn arm_vbslq_s32(x: u32x4, y: i32x4) -> i32x4; +- fn arm_vbslq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn arm_vbslq_s64(x: u64x2, y: i64x2) -> i64x2; +- fn arm_vbslq_u64(x: u64x2, y: u64x2) -> u64x2; +- fn arm_vpadd_s8(x: i8x8, y: i8x8) -> i8x8; +- fn arm_vpadd_u8(x: u8x8, y: u8x8) -> u8x8; +- fn arm_vpadd_s16(x: i16x4, y: i16x4) -> i16x4; +- fn arm_vpadd_u16(x: u16x4, y: u16x4) -> u16x4; +- fn arm_vpadd_s32(x: i32x2, y: i32x2) -> i32x2; +- fn arm_vpadd_u32(x: u32x2, y: u32x2) -> u32x2; +- fn arm_vpadd_f32(x: f32x2, y: f32x2) -> f32x2; +- fn arm_vpaddl_s16(x: i8x8) -> i16x4; +- fn arm_vpaddl_u16(x: u8x8) -> u16x4; +- fn arm_vpaddl_s32(x: i16x4) -> i32x2; +- fn arm_vpaddl_u32(x: u16x4) -> u32x2; +- fn arm_vpaddl_s64(x: i32x2) -> i64x1; +- fn arm_vpaddl_u64(x: u32x2) -> u64x1; +- fn arm_vpaddlq_s16(x: i8x16) -> i16x8; +- fn arm_vpaddlq_u16(x: u8x16) -> u16x8; +- fn arm_vpaddlq_s32(x: i16x8) -> i32x4; +- fn arm_vpaddlq_u32(x: u16x8) -> u32x4; +- fn arm_vpaddlq_s64(x: i32x4) -> i64x2; +- fn arm_vpaddlq_u64(x: u32x4) -> u64x2; +- fn arm_vpadal_s16(x: i16x4, y: i8x8) -> i16x4; +- fn arm_vpadal_u16(x: u16x4, y: u8x8) -> u16x4; +- fn arm_vpadal_s32(x: i32x2, y: i16x4) -> i32x2; +- fn arm_vpadal_u32(x: u32x2, y: u16x4) -> u32x2; +- fn arm_vpadal_s64(x: i64x1, y: i32x2) -> i64x1; +- fn arm_vpadal_u64(x: u64x1, y: u32x2) -> u64x1; +- fn arm_vpadalq_s16(x: i16x8, y: i8x16) -> i16x8; +- fn arm_vpadalq_u16(x: u16x8, y: u8x16) -> u16x8; +- fn arm_vpadalq_s32(x: i32x4, y: i16x8) -> i32x4; +- fn arm_vpadalq_u32(x: u32x4, y: u16x8) -> u32x4; +- fn arm_vpadalq_s64(x: i64x2, y: i32x4) -> i64x2; +- fn arm_vpadalq_u64(x: u64x2, y: u32x4) -> u64x2; +- fn arm_vpmax_s8(x: i8x8, y: i8x8) -> i8x8; +- fn arm_vpmax_u8(x: u8x8, y: u8x8) -> u8x8; +- fn arm_vpmax_s16(x: i16x4, y: i16x4) -> i16x4; +- fn arm_vpmax_u16(x: u16x4, y: u16x4) -> u16x4; +- fn arm_vpmax_s32(x: i32x2, y: i32x2) -> i32x2; +- fn arm_vpmax_u32(x: u32x2, y: u32x2) -> u32x2; +- fn arm_vpmax_f32(x: f32x2, y: f32x2) -> f32x2; +- fn arm_vpmin_s8(x: i8x8, y: i8x8) -> i8x8; +- fn arm_vpmin_u8(x: u8x8, y: u8x8) -> u8x8; +- fn arm_vpmin_s16(x: i16x4, y: i16x4) -> i16x4; +- fn arm_vpmin_u16(x: u16x4, y: u16x4) -> u16x4; +- fn arm_vpmin_s32(x: i32x2, y: i32x2) -> i32x2; +- fn arm_vpmin_u32(x: u32x2, y: u32x2) -> u32x2; +- fn arm_vpmin_f32(x: f32x2, y: f32x2) -> f32x2; +- fn arm_vpminq_s8(x: i8x16, y: i8x16) -> i8x16; +- fn arm_vpminq_u8(x: u8x16, y: u8x16) -> u8x16; +- fn arm_vpminq_s16(x: i16x8, y: i16x8) -> i16x8; +- fn arm_vpminq_u16(x: u16x8, y: u16x8) -> u16x8; +- fn arm_vpminq_s32(x: i32x4, y: i32x4) -> i32x4; +- fn arm_vpminq_u32(x: u32x4, y: u32x4) -> u32x4; +- fn arm_vpminq_f32(x: f32x4, y: f32x4) -> f32x4; +- fn arm_vtbl1_s8(x: i8x8, y: u8x8) -> i8x8; +- fn arm_vtbl1_u8(x: u8x8, y: u8x8) -> u8x8; +- fn arm_vtbx1_s8(x: i8x8, y: i8x8, z: u8x8) -> i8x8; +- fn arm_vtbx1_u8(x: u8x8, y: u8x8, z: u8x8) -> u8x8; +- fn arm_vtbl2_s8(x: (i8x8, i8x8), y: u8x8) -> i8x8; +- fn arm_vtbl2_u8(x: (u8x8, u8x8), y: u8x8) -> u8x8; +- fn arm_vtbx2_s8(x: (i8x8, i8x8), y: u8x8) -> i8x8; +- fn arm_vtbx2_u8(x: (u8x8, u8x8), y: u8x8) -> u8x8; +- fn arm_vtbl3_s8(x: (i8x8, i8x8, i8x8), y: u8x8) -> i8x8; +- fn arm_vtbl3_u8(x: (u8x8, u8x8, u8x8), y: u8x8) -> u8x8; +- fn arm_vtbx3_s8(x: i8x8, y: (i8x8, i8x8, i8x8), z: u8x8) -> i8x8; +- fn arm_vtbx3_u8(x: u8x8, y: (u8x8, u8x8, u8x8), z: u8x8) -> u8x8; +- fn arm_vtbl4_s8(x: (i8x8, i8x8, i8x8, i8x8), y: u8x8) -> i8x8; +- fn arm_vtbl4_u8(x: (u8x8, u8x8, u8x8, u8x8), y: u8x8) -> u8x8; +- fn arm_vtbx4_s8(x: i8x8, y: (i8x8, i8x8, i8x8, i8x8), z: u8x8) -> i8x8; +- fn arm_vtbx4_u8(x: u8x8, y: (u8x8, u8x8, u8x8, u8x8), z: u8x8) -> u8x8; +-} +- +- +-impl u8x8 { +- #[inline] +- pub fn table_lookup_1(self, t0: u8x8) -> u8x8 { +- unsafe {arm_vtbl1_u8(t0, self)} +- } +- #[inline] +- pub fn table_lookup_2(self, t0: u8x8, t1: u8x8) -> u8x8 { +- unsafe {arm_vtbl2_u8((t0, t1), self)} +- } +- #[inline] +- pub fn table_lookup_3(self, t0: u8x8, t1: u8x8, t2: u8x8) -> u8x8 { +- unsafe {arm_vtbl3_u8((t0, t1, t2), self)} +- } +- #[inline] +- pub fn table_lookup_4(self, t0: u8x8, t1: u8x8, t2: u8x8, t3: u8x8) -> u8x8 { +- unsafe {arm_vtbl4_u8((t0, t1, t2, t3), self)} +- } +-} +- +-#[doc(hidden)] +-pub mod common { +- use super::super::super::*; +- use super::*; +- use core::mem; +- +- #[inline] +- pub fn f32x4_sqrt(x: f32x4) -> f32x4 { +- unsafe {super::arm_vsqrtq_f32(x)} +- } +- #[inline] +- pub fn f32x4_approx_rsqrt(x: f32x4) -> f32x4 { +- unsafe {super::arm_vrsqrteq_f32(x)} +- } +- #[inline] +- pub fn f32x4_approx_reciprocal(x: f32x4) -> f32x4 { +- unsafe {super::arm_vrecpeq_f32(x)} +- } +- #[inline] +- pub fn f32x4_max(x: f32x4, y: f32x4) -> f32x4 { +- unsafe {super::arm_vmaxq_f32(x, y)} +- } +- #[inline] +- pub fn f32x4_min(x: f32x4, y: f32x4) -> f32x4 { +- unsafe {super::arm_vminq_f32(x, y)} +- } +- +- macro_rules! bools { +- ($($ty: ty, $as_u: ty, $shuffle_fn: ident, $lo_idxs: expr, $hi_idxs: expr, $all: ident ($min: ident), $any: ident ($max: ident);)*) => { +- $( +- #[inline] +- pub fn $all(x: $ty) -> bool { +- unsafe { +- let t: $as_u = bitcast(x); +- let lo = $shuffle_fn(t, t, $lo_idxs); +- let hi = $shuffle_fn(t, t, $hi_idxs); +- let x = super::$min(lo, hi); +- let y = super::$min(x, mem::uninitialized()); +- let y32: u32x2 = bitcast(y); +- y32.0 == 0xFFFFFFFF +- } +- } +- #[inline] +- pub fn $any(x: $ty) -> bool { +- unsafe { +- let t: $as_u = bitcast(x); +- let lo = $shuffle_fn(t, t, $lo_idxs); +- let hi = $shuffle_fn(t, t, $hi_idxs); +- let x = super::$max(lo, hi); +- let y = super::$max(x, mem::uninitialized()); +- let y32: u32x2 = bitcast(y); +- y32.0 != 0 +- } +- } +- )* +- } +- } +- +- bools! { +- bool32fx4, u32x4, simd_shuffle2, [0, 1], [2, 3], bool32fx4_all(arm_vpmin_u32), bool32fx4_any(arm_vpmax_u32); +- bool8ix16, u8x16, simd_shuffle8, [0, 1, 2, 3, 4, 5, 6, 7], [8, 9, 10, 11, 12, 13, 14, 15], bool8ix16_all(arm_vpmin_u8), bool8ix16_any(arm_vpmax_u8); +- bool16ix8, u16x8, simd_shuffle4, [0, 1, 2, 3], [4, 5, 6, 7], bool16ix8_all(arm_vpmin_u16), bool16ix8_any(arm_vpmax_u16); +- bool32ix4, u32x4, simd_shuffle2, [0, 1], [2, 3], bool32ix4_all(arm_vpmin_u32), bool32ix4_any(arm_vpmax_u32); +- } +-} +diff --git a/third_party/rust/simd/src/common.rs b/third_party/rust/simd/src/common.rs +deleted file mode 100644 +index 1052ae36959d..000000000000 +--- a/third_party/rust/simd/src/common.rs ++++ /dev/null +@@ -1,520 +0,0 @@ +-use super::*; +-#[allow(unused_imports)] +-use super::{ +- simd_eq, simd_ne, simd_lt, simd_le, simd_gt, simd_ge, +- simd_shuffle2, simd_shuffle4, simd_shuffle8, simd_shuffle16, +- simd_insert, simd_extract, +- simd_cast, +- simd_add, simd_sub, simd_mul, simd_div, simd_shl, simd_shr, simd_and, simd_or, simd_xor, +- +- Unalign, bitcast, +-}; +-use core::{mem,ops}; +- +-#[cfg(any(target_arch = "x86", +- target_arch = "x86_64"))] +-use x86::sse2::common; +-#[cfg(any(target_arch = "arm"))] +-use arm::neon::common; +-#[cfg(any(target_arch = "aarch64"))] +-use aarch64::neon::common; +- +-macro_rules! basic_impls { +- ($( +- $name: ident: +- $elem: ident, $bool: ident, $shuffle: ident, $length: expr, $($first: ident),* | $($last: ident),*; +- )*) => { +- $(impl $name { +- /// Create a new instance. +- #[inline] +- pub const fn new($($first: $elem),*, $($last: $elem),*) -> $name { +- $name($($first),*, $($last),*) +- } +- +- /// Create a new instance where every lane has value `x`. +- #[inline] +- pub const fn splat(x: $elem) -> $name { +- $name($({ #[allow(dead_code)] struct $first; x }),*, +- $({ #[allow(dead_code)] struct $last; x }),*) +- } +- +- /// Compare for equality. +- #[inline] +- pub fn eq(self, other: Self) -> $bool { +- unsafe {simd_eq(self, other)} +- } +- /// Compare for equality. +- #[inline] +- pub fn ne(self, other: Self) -> $bool { +- unsafe {simd_ne(self, other)} +- } +- /// Compare for equality. +- #[inline] +- pub fn lt(self, other: Self) -> $bool { +- unsafe {simd_lt(self, other)} +- } +- /// Compare for equality. +- #[inline] +- pub fn le(self, other: Self) -> $bool { +- unsafe {simd_le(self, other)} +- } +- /// Compare for equality. +- #[inline] +- pub fn gt(self, other: Self) -> $bool { +- unsafe {simd_gt(self, other)} +- } +- /// Compare for equality. +- #[inline] +- pub fn ge(self, other: Self) -> $bool { +- unsafe {simd_ge(self, other)} +- } +- +- /// Extract the value of the `idx`th lane of `self`. +- /// +- /// # Panics +- /// +- /// `extract` will panic if `idx` is out of bounds. +- #[inline] +- pub fn extract(self, idx: u32) -> $elem { +- assert!(idx < $length); +- unsafe {simd_extract(self, idx)} +- } +- /// Return a new vector where the `idx`th lane is replaced +- /// by `elem`. +- /// +- /// # Panics +- /// +- /// `replace` will panic if `idx` is out of bounds. +- #[inline] +- pub fn replace(self, idx: u32, elem: $elem) -> Self { +- assert!(idx < $length); +- unsafe {simd_insert(self, idx, elem)} +- } +- +- /// Load a new value from the `idx`th position of `array`. +- /// +- /// This is equivalent to the following, but is possibly +- /// more efficient: +- /// +- /// ```rust,ignore +- /// Self::new(array[idx], array[idx + 1], ...) +- /// ``` +- /// +- /// # Panics +- /// +- /// `load` will panic if `idx` is out of bounds in +- /// `array`, or if `array[idx..]` is too short. +- #[inline] +- pub fn load(array: &[$elem], idx: usize) -> Self { +- let data = &array[idx..idx + $length]; +- let loaded = unsafe { +- *(data.as_ptr() as *const Unalign) +- }; +- loaded.0 +- } +- +- /// Store the elements of `self` to `array`, starting at +- /// the `idx`th position. +- /// +- /// This is equivalent to the following, but is possibly +- /// more efficient: +- /// +- /// ```rust,ignore +- /// array[i] = self.extract(0); +- /// array[i + 1] = self.extract(1); +- /// // ... +- /// ``` +- /// +- /// # Panics +- /// +- /// `store` will panic if `idx` is out of bounds in +- /// `array`, or if `array[idx...]` is too short. +- #[inline] +- pub fn store(self, array: &mut [$elem], idx: usize) { +- let place = &mut array[idx..idx + $length]; +- unsafe { +- *(place.as_mut_ptr() as *mut Unalign) = Unalign(self) +- } +- } +- })* +- } +-} +- +-basic_impls! { +- u32x4: u32, bool32ix4, simd_shuffle4, 4, x0, x1 | x2, x3; +- i32x4: i32, bool32ix4, simd_shuffle4, 4, x0, x1 | x2, x3; +- f32x4: f32, bool32fx4, simd_shuffle4, 4, x0, x1 | x2, x3; +- +- u16x8: u16, bool16ix8, simd_shuffle8, 8, x0, x1, x2, x3 | x4, x5, x6, x7; +- i16x8: i16, bool16ix8, simd_shuffle8, 8, x0, x1, x2, x3 | x4, x5, x6, x7; +- +- u8x16: u8, bool8ix16, simd_shuffle16, 16, x0, x1, x2, x3, x4, x5, x6, x7 | x8, x9, x10, x11, x12, x13, x14, x15; +- i8x16: i8, bool8ix16, simd_shuffle16, 16, x0, x1, x2, x3, x4, x5, x6, x7 | x8, x9, x10, x11, x12, x13, x14, x15; +-} +- +-macro_rules! bool_impls { +- ($( +- $name: ident: +- $elem: ident, $repr: ident, $repr_elem: ident, $length: expr, $all: ident, $any: ident, +- $($first: ident),* | $($last: ident),* +- [$(#[$cvt_meta: meta] $cvt: ident -> $cvt_to: ident),*]; +- )*) => { +- $(impl $name { +- /// Convert to integer representation. +- #[inline] +- pub fn to_repr(self) -> $repr { +- unsafe {mem::transmute(self)} +- } +- /// Convert from integer representation. +- #[inline] +- #[inline] +- pub fn from_repr(x: $repr) -> Self { +- unsafe {mem::transmute(x)} +- } +- +- /// Create a new instance. +- #[inline] +- pub fn new($($first: bool),*, $($last: bool),*) -> $name { +- unsafe { +- // negate everything together +- simd_sub($name::splat(false), +- $name($( ($first as $repr_elem) ),*, +- $( ($last as $repr_elem) ),*)) +- } +- } +- +- /// Create a new instance where every lane has value `x`. +- #[allow(unused_variables)] +- #[inline] +- pub fn splat(x: bool) -> $name { +- let x = if x {!(0 as $repr_elem)} else {0}; +- $name($({ let $first = (); x}),*, +- $({ let $last = (); x}),*) +- } +- +- /// Extract the value of the `idx`th lane of `self`. +- /// +- /// # Panics +- /// +- /// `extract` will panic if `idx` is out of bounds. +- #[inline] +- pub fn extract(self, idx: u32) -> bool { +- assert!(idx < $length); +- unsafe {simd_extract(self.to_repr(), idx) != 0} +- } +- /// Return a new vector where the `idx`th lane is replaced +- /// by `elem`. +- /// +- /// # Panics +- /// +- /// `replace` will panic if `idx` is out of bounds. +- #[inline] +- pub fn replace(self, idx: u32, elem: bool) -> Self { +- assert!(idx < $length); +- let x = if elem {!(0 as $repr_elem)} else {0}; +- unsafe {Self::from_repr(simd_insert(self.to_repr(), idx, x))} +- } +- /// Select between elements of `then` and `else_`, based on +- /// the corresponding element of `self`. +- /// +- /// This is equivalent to the following, but is possibly +- /// more efficient: +- /// +- /// ```rust,ignore +- /// T::new(if self.extract(0) { then.extract(0) } else { else_.extract(0) }, +- /// if self.extract(1) { then.extract(1) } else { else_.extract(1) }, +- /// ...) +- /// ``` +- #[inline] +- pub fn select>(self, then: T, else_: T) -> T { +- let then: $repr = bitcast(then); +- let else_: $repr = bitcast(else_); +- bitcast((then & self.to_repr()) | (else_ & (!self).to_repr())) +- } +- +- /// Check if every element of `self` is true. +- /// +- /// This is equivalent to the following, but is possibly +- /// more efficient: +- /// +- /// ```rust,ignore +- /// self.extract(0) && self.extract(1) && ... +- /// ``` +- #[inline] +- pub fn all(self) -> bool { +- common::$all(self) +- } +- /// Check if any element of `self` is true. +- /// +- /// This is equivalent to the following, but is possibly +- /// more efficient: +- /// +- /// ```rust,ignore +- /// self.extract(0) || self.extract(1) || ... +- /// ``` +- #[inline] +- pub fn any(self) -> bool { +- common::$any(self) +- } +- +- $( +- #[$cvt_meta] +- #[inline] +- pub fn $cvt(self) -> $cvt_to { +- bitcast(self) +- } +- )* +- } +- impl ops::Not for $name { +- type Output = Self; +- +- #[inline] +- fn not(self) -> Self { +- Self::from_repr($repr::splat(!(0 as $repr_elem)) ^ self.to_repr()) +- } +- } +- )* +- } +-} +- +-bool_impls! { +- bool32ix4: bool32i, i32x4, i32, 4, bool32ix4_all, bool32ix4_any, x0, x1 | x2, x3 +- [/// Convert `self` to a boolean vector for interacting with floating point vectors. +- to_f -> bool32fx4]; +- bool32fx4: bool32f, i32x4, i32, 4, bool32fx4_all, bool32fx4_any, x0, x1 | x2, x3 +- [/// Convert `self` to a boolean vector for interacting with integer vectors. +- to_i -> bool32ix4]; +- +- bool16ix8: bool16i, i16x8, i16, 8, bool16ix8_all, bool16ix8_any, x0, x1, x2, x3 | x4, x5, x6, x7 []; +- +- bool8ix16: bool8i, i8x16, i8, 16, bool8ix16_all, bool8ix16_any, x0, x1, x2, x3, x4, x5, x6, x7 | x8, x9, x10, x11, x12, x13, x14, x15 []; +-} +- +-impl u32x4 { +- /// Convert each lane to a signed integer. +- #[inline] +- pub fn to_i32(self) -> i32x4 { +- unsafe {simd_cast(self)} +- } +- /// Convert each lane to a 32-bit float. +- #[inline] +- pub fn to_f32(self) -> f32x4 { +- unsafe {simd_cast(self)} +- } +-} +-impl i32x4 { +- /// Convert each lane to an unsigned integer. +- #[inline] +- pub fn to_u32(self) -> u32x4 { +- unsafe {simd_cast(self)} +- } +- /// Convert each lane to a 32-bit float. +- #[inline] +- pub fn to_f32(self) -> f32x4 { +- unsafe {simd_cast(self)} +- } +-} +-impl f32x4 { +- /// Compute the square root of each lane. +- #[inline] +- pub fn sqrt(self) -> Self { +- common::f32x4_sqrt(self) +- } +- /// Compute an approximation to the reciprocal of the square root +- /// of `self`, that is, `f32::splat(1.0) / self.sqrt()`. +- /// +- /// The accuracy of this approximation is platform dependent. +- #[inline] +- pub fn approx_rsqrt(self) -> Self { +- common::f32x4_approx_rsqrt(self) +- } +- /// Compute an approximation to the reciprocal of `self`, that is, +- /// `f32::splat(1.0) / self`. +- /// +- /// The accuracy of this approximation is platform dependent. +- #[inline] +- pub fn approx_reciprocal(self) -> Self { +- common::f32x4_approx_reciprocal(self) +- } +- /// Compute the lane-wise maximum of `self` and `other`. +- /// +- /// This is equivalent to the following, but is possibly more +- /// efficient: +- /// +- /// ```rust,ignore +- /// f32x4::new(self.extract(0).max(other.extract(0)), +- /// self.extract(1).max(other.extract(1)), +- /// ...) +- /// ``` +- #[inline] +- pub fn max(self, other: Self) -> Self { +- common::f32x4_max(self, other) +- } +- /// Compute the lane-wise minimum of `self` and `other`. +- /// +- /// This is equivalent to the following, but is possibly more +- /// efficient: +- /// +- /// ```rust,ignore +- /// f32x4::new(self.extract(0).min(other.extract(0)), +- /// self.extract(1).min(other.extract(1)), +- /// ...) +- /// ``` +- #[inline] +- pub fn min(self, other: Self) -> Self { +- common::f32x4_min(self, other) +- } +- /// Convert each lane to a signed integer. +- #[inline] +- pub fn to_i32(self) -> i32x4 { +- unsafe {simd_cast(self)} +- } +- /// Convert each lane to an unsigned integer. +- #[inline] +- pub fn to_u32(self) -> u32x4 { +- unsafe {simd_cast(self)} +- } +-} +- +-impl i16x8 { +- /// Convert each lane to an unsigned integer. +- #[inline] +- pub fn to_u16(self) -> u16x8 { +- unsafe {simd_cast(self)} +- } +-} +-impl u16x8 { +- /// Convert each lane to a signed integer. +- #[inline] +- pub fn to_i16(self) -> i16x8 { +- unsafe {simd_cast(self)} +- } +-} +- +-impl i8x16 { +- /// Convert each lane to an unsigned integer. +- #[inline] +- pub fn to_u8(self) -> u8x16 { +- unsafe {simd_cast(self)} +- } +-} +-impl u8x16 { +- /// Convert each lane to a signed integer. +- #[inline] +- pub fn to_i8(self) -> i8x16 { +- unsafe {simd_cast(self)} +- } +-} +- +- +-macro_rules! neg_impls { +- ($zero: expr, $($ty: ident,)*) => { +- $(impl ops::Neg for $ty { +- type Output = Self; +- fn neg(self) -> Self { +- $ty::splat($zero) - self +- } +- })* +- } +-} +-neg_impls!{ +- 0, +- i32x4, +- i16x8, +- i8x16, +-} +-neg_impls! { +- 0.0, +- f32x4, +-} +-macro_rules! not_impls { +- ($($ty: ident,)*) => { +- $(impl ops::Not for $ty { +- type Output = Self; +- fn not(self) -> Self { +- $ty::splat(!0) ^ self +- } +- })* +- } +-} +-not_impls! { +- i32x4, +- i16x8, +- i8x16, +- u32x4, +- u16x8, +- u8x16, +-} +- +-macro_rules! operators { +- ($($trayt: ident ($func: ident, $method: ident): $($ty: ty),*;)*) => { +- $( +- $(impl ops::$trayt for $ty { +- type Output = Self; +- #[inline] +- fn $method(self, x: Self) -> Self { +- unsafe {$func(self, x)} +- } +- })* +- )* +- } +-} +-operators! { +- Add (simd_add, add): +- i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, +- f32x4; +- Sub (simd_sub, sub): +- i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, +- f32x4; +- Mul (simd_mul, mul): +- i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, +- f32x4; +- Div (simd_div, div): f32x4; +- +- BitAnd (simd_and, bitand): +- i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, +- bool8ix16, bool16ix8, bool32ix4, +- bool32fx4; +- BitOr (simd_or, bitor): +- i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, +- bool8ix16, bool16ix8, bool32ix4, +- bool32fx4; +- BitXor (simd_xor, bitxor): +- i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, +- bool8ix16, bool16ix8, bool32ix4, +- bool32fx4; +-} +- +-macro_rules! shift_one { +- ($ty: ident, $($by: ident),*) => { +- $( +- impl ops::Shl<$by> for $ty { +- type Output = Self; +- #[inline] +- fn shl(self, other: $by) -> Self { +- unsafe { simd_shl(self, $ty::splat(other as <$ty as Simd>::Elem)) } +- } +- } +- impl ops::Shr<$by> for $ty { +- type Output = Self; +- #[inline] +- fn shr(self, other: $by) -> Self { +- unsafe {simd_shr(self, $ty::splat(other as <$ty as Simd>::Elem))} +- } +- } +- )* +- } +-} +- +-macro_rules! shift { +- ($($ty: ident),*) => { +- $(shift_one! { +- $ty, +- u8, u16, u32, u64, usize, +- i8, i16, i32, i64, isize +- })* +- } +-} +-shift! { +- i8x16, u8x16, i16x8, u16x8, i32x4, u32x4 +-} +diff --git a/third_party/rust/simd/src/lib.rs b/third_party/rust/simd/src/lib.rs +deleted file mode 100644 +index e8fb1b16f53b..000000000000 +--- a/third_party/rust/simd/src/lib.rs ++++ /dev/null +@@ -1,804 +0,0 @@ +-//! `simd` offers a basic interface to the SIMD functionality of CPUs. +-#![no_std] +- +-#![feature(cfg_target_feature, repr_simd, platform_intrinsics, const_fn)] +-#![allow(non_camel_case_types)] +- +-#[cfg(feature = "with-serde")] +-extern crate serde; +-#[cfg(feature = "with-serde")] +-#[macro_use] +-extern crate serde_derive; +- +-use core::mem; +- +-/// Boolean type for 8-bit integers. +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +-pub struct bool8i(i8); +-/// Boolean type for 16-bit integers. +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +-pub struct bool16i(i16); +-/// Boolean type for 32-bit integers. +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +-pub struct bool32i(i32); +-/// Boolean type for 32-bit floats. +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +-pub struct bool32f(i32); +- +-macro_rules! bool { +- ($($name: ident, $inner: ty;)*) => { +- $( +- impl From for $name { +- #[inline] +- fn from(b: bool) -> $name { +- $name(-(b as $inner)) +- } +- } +- impl From<$name> for bool { +- #[inline] +- fn from(b: $name) -> bool { +- b.0 != 0 +- } +- } +- )* +- } +-} +-bool! { +- bool8i, i8; +- bool16i, i16; +- bool32i, i32; +- bool32f, i32; +-} +- +-/// Types that are SIMD vectors. +-pub unsafe trait Simd { +- /// The corresponding boolean vector type. +- type Bool: Simd; +- /// The element that this vector stores. +- type Elem; +-} +- +-/// A SIMD vector of 4 `u32`s. +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct u32x4(u32, u32, u32, u32); +-/// A SIMD vector of 4 `i32`s. +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct i32x4(i32, i32, i32, i32); +-/// A SIMD vector of 4 `f32`s. +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct f32x4(f32, f32, f32, f32); +-/// A SIMD boolean vector for length-4 vectors of 32-bit integers. +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct bool32ix4(i32, i32, i32, i32); +-/// A SIMD boolean vector for length-4 vectors of 32-bit floats. +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct bool32fx4(i32, i32, i32, i32); +- +-#[allow(dead_code)] +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-struct u32x2(u32, u32); +-#[allow(dead_code)] +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-struct i32x2(i32, i32); +-#[allow(dead_code)] +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-struct f32x2(f32, f32); +-#[allow(dead_code)] +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-struct bool32ix2(i32, i32); +-#[allow(dead_code)] +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-struct bool32fx2(i32, i32); +- +-/// A SIMD vector of 8 `u16`s. +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct u16x8(u16, u16, u16, u16, +- u16, u16, u16, u16); +-/// A SIMD vector of 8 `i16`s. +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct i16x8(i16, i16, i16, i16, +- i16, i16, i16, i16); +-/// A SIMD boolean vector for length-8 vectors of 16-bit integers. +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct bool16ix8(i16, i16, i16, i16, +- i16, i16, i16, i16); +- +-/// A SIMD vector of 16 `u8`s. +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct u8x16(u8, u8, u8, u8, u8, u8, u8, u8, +- u8, u8, u8, u8, u8, u8, u8, u8); +-/// A SIMD vector of 16 `i8`s. +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct i8x16(i8, i8, i8, i8, i8, i8, i8, i8, +- i8, i8, i8, i8, i8, i8, i8, i8); +-/// A SIMD boolean vector for length-16 vectors of 8-bit integers. +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct bool8ix16(i8, i8, i8, i8, i8, i8, i8, i8, +- i8, i8, i8, i8, i8, i8, i8, i8); +- +- +-macro_rules! simd { +- ($($bool: ty: $($ty: ty = $elem: ty),*;)*) => { +- $($(unsafe impl Simd for $ty { +- type Bool = $bool; +- type Elem = $elem; +- } +- impl Clone for $ty { #[inline] fn clone(&self) -> Self { *self } } +- )*)*} +-} +-simd! { +- bool8ix16: i8x16 = i8, u8x16 = u8, bool8ix16 = bool8i; +- bool16ix8: i16x8 = i16, u16x8 = u16, bool16ix8 = bool16i; +- bool32ix4: i32x4 = i32, u32x4 = u32, bool32ix4 = bool32i; +- bool32fx4: f32x4 = f32, bool32fx4 = bool32f; +- +- bool32ix2: i32x2 = i32, u32x2 = u32, bool32ix2 = bool32i; +- bool32fx2: f32x2 = f32, bool32fx2 = bool32f; +-} +- +-#[allow(dead_code)] +-#[inline] +-fn bitcast(x: T) -> U { +- assert_eq!(mem::size_of::(), +- mem::size_of::()); +- unsafe {mem::transmute_copy(&x)} +-} +- +-#[allow(dead_code)] +-extern "platform-intrinsic" { +- fn simd_eq, U>(x: T, y: T) -> U; +- fn simd_ne, U>(x: T, y: T) -> U; +- fn simd_lt, U>(x: T, y: T) -> U; +- fn simd_le, U>(x: T, y: T) -> U; +- fn simd_gt, U>(x: T, y: T) -> U; +- fn simd_ge, U>(x: T, y: T) -> U; +- +- fn simd_shuffle2>(x: T, y: T, idx: [u32; 2]) -> U; +- fn simd_shuffle4>(x: T, y: T, idx: [u32; 4]) -> U; +- fn simd_shuffle8>(x: T, y: T, idx: [u32; 8]) -> U; +- fn simd_shuffle16>(x: T, y: T, idx: [u32; 16]) -> U; +- +- fn simd_insert, U>(x: T, idx: u32, val: U) -> T; +- fn simd_extract, U>(x: T, idx: u32) -> U; +- +- fn simd_cast(x: T) -> U; +- +- fn simd_add(x: T, y: T) -> T; +- fn simd_sub(x: T, y: T) -> T; +- fn simd_mul(x: T, y: T) -> T; +- fn simd_div(x: T, y: T) -> T; +- fn simd_shl(x: T, y: T) -> T; +- fn simd_shr(x: T, y: T) -> T; +- fn simd_and(x: T, y: T) -> T; +- fn simd_or(x: T, y: T) -> T; +- fn simd_xor(x: T, y: T) -> T; +-} +-#[repr(packed)] +-#[derive(Copy)] +-struct Unalign(T); +- +-impl Clone for Unalign { +- fn clone(&self) -> Unalign { +- Unalign(unsafe { self.0.clone() }) +- } +-} +- +-#[macro_use] +-mod common; +-mod sixty_four; +-mod v256; +- +-#[cfg(any(feature = "doc", +- target_arch = "x86", +- target_arch = "x86_64"))] +-pub mod x86; +-#[cfg(any(feature = "doc", target_arch = "arm"))] +-pub mod arm; +-#[cfg(any(feature = "doc", target_arch = "aarch64"))] +-pub mod aarch64; +- +-#[cfg(test)] +-mod tests { +- +- use super::u8x16; +- use super::u16x8; +- use super::u32x4; +- use super::f32x4; +- +- #[test] +- fn test_u8x16_none_not_any() { +- let x1 = u8x16::splat(1); +- let x2 = u8x16::splat(2); +- assert!(!(x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u8x16_none_not_all() { +- let x1 = u8x16::splat(1); +- let x2 = u8x16::splat(2); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u8x16_all_any() { +- let x1 = u8x16::splat(1); +- let x2 = u8x16::splat(1); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u8x16_all_all() { +- let x1 = u8x16::splat(1); +- let x2 = u8x16::splat(1); +- assert!((x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u8x16_except_last_any() { +- let x1 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1); +- let x2 = u8x16::splat(2); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u8x16_except_last_not_all() { +- let x1 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1); +- let x2 = u8x16::splat(2); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u8x16_except_first_any() { +- let x1 = u8x16::new(1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); +- let x2 = u8x16::splat(2); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u8x16_except_first_not_all() { +- let x1 = u8x16::new(1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); +- let x2 = u8x16::splat(2); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u8x16_only_last_any() { +- let x1 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1); +- let x2 = u8x16::splat(1); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u8x16_only_last_not_all() { +- let x1 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1); +- let x2 = u8x16::splat(1); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u8x16_only_first_any() { +- let x1 = u8x16::new(1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); +- let x2 = u8x16::splat(1); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u8x16_only_first_not_all() { +- let x1 = u8x16::new(1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); +- let x2 = u8x16::splat(1); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u8x16_except_thirteenth_any() { +- let x1 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2); +- let x2 = u8x16::splat(2); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u8x16_except_thirteenth_not_all() { +- let x1 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2); +- let x2 = u8x16::splat(2); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u8x16_except_fifth_any() { +- let x1 = u8x16::new(2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); +- let x2 = u8x16::splat(2); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u8x16_except_fifth_not_all() { +- let x1 = u8x16::new(2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); +- let x2 = u8x16::splat(2); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u8x16_only_thirteenth_any() { +- let x1 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2); +- let x2 = u8x16::splat(1); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u8x16_only_thirteenth_not_all() { +- let x1 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2); +- let x2 = u8x16::splat(1); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u8x16_only_fifth_any() { +- let x1 = u8x16::new(2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); +- let x2 = u8x16::splat(1); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u8x16_only_fifth_not_all() { +- let x1 = u8x16::new(2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); +- let x2 = u8x16::splat(1); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u16x8_none_not_any() { +- let x1 = u16x8::splat(1); +- let x2 = u16x8::splat(2); +- assert!(!(x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u16x8_none_not_all() { +- let x1 = u16x8::splat(1); +- let x2 = u16x8::splat(2); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u16x8_all_any() { +- let x1 = u16x8::splat(1); +- let x2 = u16x8::splat(1); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u16x8_all_all() { +- let x1 = u16x8::splat(1); +- let x2 = u16x8::splat(1); +- assert!((x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u16x8_except_last_any() { +- let x1 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 1); +- let x2 = u16x8::splat(2); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u16x8_except_last_not_all() { +- let x1 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 1); +- let x2 = u16x8::splat(2); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u16x8_except_first_any() { +- let x1 = u16x8::new(1, 2, 2, 2, 2, 2, 2, 2); +- let x2 = u16x8::splat(2); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u16x8_except_first_not_all() { +- let x1 = u16x8::new(1, 2, 2, 2, 2, 2, 2, 2); +- let x2 = u16x8::splat(2); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u16x8_only_last_any() { +- let x1 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 1); +- let x2 = u16x8::splat(1); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u16x8_only_last_not_all() { +- let x1 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 1); +- let x2 = u16x8::splat(1); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u16x8_only_first_any() { +- let x1 = u16x8::new(1, 2, 2, 2, 2, 2, 2, 2); +- let x2 = u16x8::splat(1); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u16x8_only_first_not_all() { +- let x1 = u16x8::new(1, 2, 2, 2, 2, 2, 2, 2); +- let x2 = u16x8::splat(1); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u16x8_except_sixth_any() { +- let x1 = u16x8::new(2, 2, 2, 2, 2, 1, 2, 2); +- let x2 = u16x8::splat(2); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u16x8_except_sixth_not_all() { +- let x1 = u16x8::new(2, 2, 2, 2, 2, 1, 2, 2); +- let x2 = u16x8::splat(2); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u16x8_except_third_any() { +- let x1 = u16x8::new(2, 2, 1, 2, 2, 2, 2, 2); +- let x2 = u16x8::splat(2); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u16x8_except_third_not_all() { +- let x1 = u16x8::new(2, 2, 1, 2, 2, 2, 2, 2); +- let x2 = u16x8::splat(2); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u16x8_only_sixth_any() { +- let x1 = u16x8::new(2, 2, 2, 2, 2, 1, 2, 2); +- let x2 = u16x8::splat(1); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u16x8_only_sixth_not_all() { +- let x1 = u16x8::new(2, 2, 2, 2, 2, 1, 2, 2); +- let x2 = u16x8::splat(1); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u16x8_only_third_any() { +- let x1 = u16x8::new(2, 2, 1, 2, 2, 2, 2, 2); +- let x2 = u16x8::splat(1); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u16x8_only_third_not_all() { +- let x1 = u16x8::new(2, 2, 1, 2, 2, 2, 2, 2); +- let x2 = u16x8::splat(1); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u32x4_none_not_any() { +- let x1 = u32x4::splat(1); +- let x2 = u32x4::splat(2); +- assert!(!(x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u32x4_none_not_all() { +- let x1 = u32x4::splat(1); +- let x2 = u32x4::splat(2); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u32x4_all_any() { +- let x1 = u32x4::splat(1); +- let x2 = u32x4::splat(1); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u32x4_all_all() { +- let x1 = u32x4::splat(1); +- let x2 = u32x4::splat(1); +- assert!((x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u32x4_except_last_any() { +- let x1 = u32x4::new(2, 2, 2, 1); +- let x2 = u32x4::splat(2); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u32x4_except_last_not_all() { +- let x1 = u32x4::new(2, 2, 2, 1); +- let x2 = u32x4::splat(2); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u32x4_except_first_any() { +- let x1 = u32x4::new(1, 2, 2, 2); +- let x2 = u32x4::splat(2); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u32x4_except_first_not_all() { +- let x1 = u32x4::new(1, 2, 2, 2); +- let x2 = u32x4::splat(2); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u32x4_only_last_any() { +- let x1 = u32x4::new(2, 2, 2, 1); +- let x2 = u32x4::splat(1); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u32x4_only_last_not_all() { +- let x1 = u32x4::new(2, 2, 2, 1); +- let x2 = u32x4::splat(1); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u32x4_only_first_any() { +- let x1 = u32x4::new(1, 2, 2, 2); +- let x2 = u32x4::splat(1); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u32x4_only_first_not_all() { +- let x1 = u32x4::new(1, 2, 2, 2); +- let x2 = u32x4::splat(1); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u32x4_except_second_any() { +- let x1 = u32x4::new(1, 2, 2, 2); +- let x2 = u32x4::splat(2); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u32x4_except_second_not_all() { +- let x1 = u32x4::new(1, 2, 2, 2); +- let x2 = u32x4::splat(2); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u32x4_except_third_any() { +- let x1 = u32x4::new(2, 2, 1, 2); +- let x2 = u32x4::splat(2); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u32x4_except_third_not_all() { +- let x1 = u32x4::new(2, 2, 1, 2); +- let x2 = u32x4::splat(2); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u32x4_only_second_any() { +- let x1 = u32x4::new(1, 2, 2, 2); +- let x2 = u32x4::splat(1); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u32x4_only_second_not_all() { +- let x1 = u32x4::new(1, 2, 2, 2); +- let x2 = u32x4::splat(1); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_u32x4_only_third_any() { +- let x1 = u32x4::new(2, 2, 1, 2); +- let x2 = u32x4::splat(1); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_u32x4_only_third_not_all() { +- let x1 = u32x4::new(2, 2, 1, 2); +- let x2 = u32x4::splat(1); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_f32x4_none_not_any() { +- let x1 = f32x4::splat(1.0); +- let x2 = f32x4::splat(2.0); +- assert!(!(x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_f32x4_none_not_all() { +- let x1 = f32x4::splat(1.0); +- let x2 = f32x4::splat(2.0); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_f32x4_all_any() { +- let x1 = f32x4::splat(1.0); +- let x2 = f32x4::splat(1.0); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_f32x4_all_all() { +- let x1 = f32x4::splat(1.0); +- let x2 = f32x4::splat(1.0); +- assert!((x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_f32x4_except_last_any() { +- let x1 = f32x4::new(2.0, 2.0, 2.0, 1.0); +- let x2 = f32x4::splat(2.0); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_f32x4_except_last_not_all() { +- let x1 = f32x4::new(2.0, 2.0, 2.0, 1.0); +- let x2 = f32x4::splat(2.0); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_f32x4_except_first_any() { +- let x1 = f32x4::new(1.0, 2.0, 2.0, 2.0); +- let x2 = f32x4::splat(2.0); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_f32x4_except_first_not_all() { +- let x1 = f32x4::new(1.0, 2.0, 2.0, 2.0); +- let x2 = f32x4::splat(2.0); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_f32x4_only_last_any() { +- let x1 = f32x4::new(2.0, 2.0, 2.0, 1.0); +- let x2 = f32x4::splat(1.0); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_f32x4_only_last_not_all() { +- let x1 = f32x4::new(2.0, 2.0, 2.0, 1.0); +- let x2 = f32x4::splat(1.0); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_f32x4_only_first_any() { +- let x1 = f32x4::new(1.0, 2.0, 2.0, 2.0); +- let x2 = f32x4::splat(1.0); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_f32x4_only_first_not_all() { +- let x1 = f32x4::new(1.0, 2.0, 2.0, 2.0); +- let x2 = f32x4::splat(1.0); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_f32x4_except_second_any() { +- let x1 = f32x4::new(1.0, 2.0, 2.0, 2.0); +- let x2 = f32x4::splat(2.0); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_f32x4_except_second_not_all() { +- let x1 = f32x4::new(1.0, 2.0, 2.0, 2.0); +- let x2 = f32x4::splat(2.0); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_f32x4_except_third_any() { +- let x1 = f32x4::new(2.0, 2.0, 1.0, 2.0); +- let x2 = f32x4::splat(2.0); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_f32x4_except_third_not_all() { +- let x1 = f32x4::new(2.0, 2.0, 1.0, 2.0); +- let x2 = f32x4::splat(2.0); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_f32x4_only_second_any() { +- let x1 = f32x4::new(1.0, 2.0, 2.0, 2.0); +- let x2 = f32x4::splat(1.0); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_f32x4_only_second_not_all() { +- let x1 = f32x4::new(1.0, 2.0, 2.0, 2.0); +- let x2 = f32x4::splat(1.0); +- assert!(!(x1.eq(x2)).all()); +- } +- +- #[test] +- fn test_f32x4_only_third_any() { +- let x1 = f32x4::new(2.0, 2.0, 1.0, 2.0); +- let x2 = f32x4::splat(1.0); +- assert!((x1.eq(x2)).any()); +- } +- +- #[test] +- fn test_f32x4_only_third_not_all() { +- let x1 = f32x4::new(2.0, 2.0, 1.0, 2.0); +- let x2 = f32x4::splat(1.0); +- assert!(!(x1.eq(x2)).all()); +- } +- +-} +diff --git a/third_party/rust/simd/src/sixty_four.rs b/third_party/rust/simd/src/sixty_four.rs +deleted file mode 100644 +index a87f44a77ee7..000000000000 +--- a/third_party/rust/simd/src/sixty_four.rs ++++ /dev/null +@@ -1,228 +0,0 @@ +-#![allow(dead_code)] +-use super::*; +-#[allow(unused_imports)] +-use super::{ +- f32x2, +- simd_eq, simd_ne, simd_lt, simd_le, simd_gt, simd_ge, +- simd_shuffle2, simd_shuffle4, simd_shuffle8, simd_shuffle16, +- simd_insert, simd_extract, +- simd_cast, +- simd_add, simd_sub, simd_mul, simd_div, simd_shl, simd_shr, simd_and, simd_or, simd_xor, +- +- Unalign, bitcast, +-}; +-use core::{mem,ops}; +- +-/// Boolean type for 64-bit integers. +-#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy, Clone)] +-pub struct bool64i(i64); +-/// Boolean type for 64-bit floats. +-#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy, Clone)] +-pub struct bool64f(i64); +-/// A SIMD vector of 2 `u64`s. +-#[repr(simd)] +-#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct u64x2(u64, u64); +-/// A SIMD vector of 2 `i64`s. +-#[repr(simd)] +-#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct i64x2(i64, i64); +-/// A SIMD vector of 2 `f64`s. +-#[repr(simd)] +-#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct f64x2(f64, f64); +-/// A SIMD boolean vector for length-2 vectors of 64-bit integers. +-#[repr(simd)] +-#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct bool64ix2(i64, i64); +-/// A SIMD boolean vector for length-2 vectors of 64-bit floats. +-#[repr(simd)] +-#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct bool64fx2(i64, i64); +- +-simd! { +- bool64ix2: i64x2 = i64, u64x2 = u64, bool64ix2 = bool64i; +- bool64fx2: f64x2 = f64, bool64fx2 = bool64f; +-} +-basic_impls! { +- u64x2: u64, bool64ix2, simd_shuffle2, 2, x0 | x1; +- i64x2: i64, bool64ix2, simd_shuffle2, 2, x0 | x1; +- f64x2: f64, bool64fx2, simd_shuffle2, 2, x0 | x1; +-} +- +-mod common { +- use super::*; +- // naive for now +- #[inline] +- pub fn bool64ix2_all(x: bool64ix2) -> bool { +- x.0 != 0 && x.1 != 0 +- } +- #[inline] +- pub fn bool64ix2_any(x: bool64ix2) -> bool { +- x.0 != 0 || x.1 != 0 +- } +- #[inline] +- pub fn bool64fx2_all(x: bool64fx2) -> bool { +- x.0 != 0 && x.1 != 0 +- } +- #[inline] +- pub fn bool64fx2_any(x: bool64fx2) -> bool { +- x.0 != 0 || x.1 != 0 +- }} +-bool_impls! { +- bool64ix2: bool64i, i64x2, i64, 2, bool64ix2_all, bool64ix2_any, x0 | x1 +- [/// Convert `self` to a boolean vector for interacting with floating point vectors. +- to_f -> bool64fx2]; +- +- bool64fx2: bool64f, i64x2, i64, 2, bool64fx2_all, bool64fx2_any, x0 | x1 +- [/// Convert `self` to a boolean vector for interacting with integer vectors. +- to_i -> bool64ix2]; +-} +- +-impl u64x2 { +- /// Convert each lane to a signed integer. +- #[inline] +- pub fn to_i64(self) -> i64x2 { +- unsafe {simd_cast(self)} +- } +- /// Convert each lane to a 64-bit float. +- #[inline] +- pub fn to_f64(self) -> f64x2 { +- unsafe {simd_cast(self)} +- } +-} +-impl i64x2 { +- /// Convert each lane to an unsigned integer. +- #[inline] +- pub fn to_u64(self) -> u64x2 { +- unsafe {simd_cast(self)} +- } +- /// Convert each lane to a 64-bit float. +- #[inline] +- pub fn to_f64(self) -> f64x2 { +- unsafe {simd_cast(self)} +- } +-} +-impl f64x2 { +- /// Convert each lane to a signed integer. +- #[inline] +- pub fn to_i64(self) -> i64x2 { +- unsafe {simd_cast(self)} +- } +- /// Convert each lane to an unsigned integer. +- #[inline] +- pub fn to_u64(self) -> u64x2 { +- unsafe {simd_cast(self)} +- } +- +- /// Convert each lane to a 32-bit float. +- #[inline] +- pub fn to_f32(self) -> f32x4 { +- unsafe { +- let x: f32x2 = simd_cast(self); +- f32x4::new(x.0, x.1, 0.0, 0.0) +- } +- } +-} +- +-neg_impls!{ +- 0, +- i64x2, +-} +-neg_impls! { +- 0.0, +- f64x2, +-} +-macro_rules! not_impls { +- ($($ty: ident,)*) => { +- $(impl ops::Not for $ty { +- type Output = Self; +- fn not(self) -> Self { +- $ty::splat(!0) ^ self +- } +- })* +- } +-} +-not_impls! { +- i64x2, +- u64x2, +-} +- +-macro_rules! operators { +- ($($trayt: ident ($func: ident, $method: ident): $($ty: ty),*;)*) => { +- $( +- $(impl ops::$trayt for $ty { +- type Output = Self; +- #[inline] +- fn $method(self, x: Self) -> Self { +- unsafe {$func(self, x)} +- } +- })* +- )* +- } +-} +-operators! { +- Add (simd_add, add): +- i64x2, u64x2, +- f64x2; +- Sub (simd_sub, sub): +- i64x2, u64x2, +- f64x2; +- Mul (simd_mul, mul): +- i64x2, u64x2, +- f64x2; +- Div (simd_div, div): f64x2; +- +- BitAnd (simd_and, bitand): +- i64x2, u64x2, +- bool64ix2, +- bool64fx2; +- BitOr (simd_or, bitor): +- i64x2, u64x2, +- bool64ix2, +- bool64fx2; +- BitXor (simd_xor, bitxor): +- i64x2, u64x2, +- bool64ix2, +- bool64fx2; +-} +- +-macro_rules! shift_one { ($ty: ident, $($by: ident),*) => { +- $( +- impl ops::Shl<$by> for $ty { +- type Output = Self; +- #[inline] +- fn shl(self, other: $by) -> Self { +- unsafe { simd_shl(self, $ty::splat(other as <$ty as Simd>::Elem)) } +- } +- } +- impl ops::Shr<$by> for $ty { +- type Output = Self; +- #[inline] +- fn shr(self, other: $by) -> Self { +- unsafe {simd_shr(self, $ty::splat(other as <$ty as Simd>::Elem))} +- } +- } +- )* +- } +-} +- +-macro_rules! shift { +- ($($ty: ident),*) => { +- $(shift_one! { +- $ty, +- u8, u16, u32, u64, usize, +- i8, i16, i32, i64, isize +- })* +- } +-} +-shift! { +- i64x2, u64x2 +-} +diff --git a/third_party/rust/simd/src/v256.rs b/third_party/rust/simd/src/v256.rs +deleted file mode 100644 +index 519eb14e7259..000000000000 +--- a/third_party/rust/simd/src/v256.rs ++++ /dev/null +@@ -1,436 +0,0 @@ +-#![allow(dead_code)] +-use core::{mem,ops}; +-#[allow(unused_imports)] +-use super::{ +- Simd, +- u32x4, i32x4, u16x8, i16x8, u8x16, i8x16, f32x4, +- bool32ix4, bool16ix8, bool8ix16, bool32fx4, +- simd_eq, simd_ne, simd_lt, simd_le, simd_gt, simd_ge, +- simd_shuffle2, simd_shuffle4, simd_shuffle8, simd_shuffle16, +- simd_insert, simd_extract, +- simd_cast, +- simd_add, simd_sub, simd_mul, simd_div, simd_shl, simd_shr, simd_and, simd_or, simd_xor, +- bool8i, bool16i, bool32i, bool32f, +- Unalign, bitcast, +-}; +-use super::sixty_four::*; +-#[cfg(all(target_feature = "avx"))] +-use super::x86::avx::common; +- +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct u64x4(u64, u64, u64, u64); +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct i64x4(i64, i64, i64, i64); +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct f64x4(f64, f64, f64, f64); +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct bool64ix4(i64, i64, i64, i64); +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct bool64fx4(i64, i64, i64, i64); +- +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct u32x8(u32, u32, u32, u32, +- u32, u32, u32, u32); +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct i32x8(i32, i32, i32, i32, +- i32, i32, i32, i32); +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct f32x8(f32, f32, f32, f32, +- f32, f32, f32, f32); +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct bool32ix8(i32, i32, i32, i32, +- i32, i32, i32, i32);#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct bool32fx8(i32, i32, i32, i32, +- i32, i32, i32, i32); +- +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct u16x16(u16, u16, u16, u16, u16, u16, u16, u16, +- u16, u16, u16, u16, u16, u16, u16, u16); +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct i16x16(i16, i16, i16, i16, i16, i16, i16, i16, +- i16, i16, i16, i16, i16, i16, i16, i16); +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct bool16ix16(i16, i16, i16, i16, i16, i16, i16, i16, +- i16, i16, i16, i16, i16, i16, i16, i16); +- +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct u8x32(u8, u8, u8, u8, u8, u8, u8, u8, +- u8, u8, u8, u8, u8, u8, u8, u8, +- u8, u8, u8, u8, u8, u8, u8, u8, +- u8, u8, u8, u8, u8, u8, u8, u8); +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct i8x32(i8, i8, i8, i8, i8, i8, i8, i8, +- i8, i8, i8, i8, i8, i8, i8, i8, +- i8, i8, i8, i8, i8, i8, i8, i8, +- i8, i8, i8, i8, i8, i8, i8, i8); +-#[repr(simd)] +-#[cfg_attr(feature = "with-serde", derive(Serialize, Deserialize))] +-#[derive(Debug, Copy)] +-pub struct bool8ix32(i8, i8, i8, i8, i8, i8, i8, i8, +- i8, i8, i8, i8, i8, i8, i8, i8, +- i8, i8, i8, i8, i8, i8, i8, i8, +- i8, i8, i8, i8, i8, i8, i8, i8); +- +-simd! { +- bool8ix32: i8x32 = i8, u8x32 = u8, bool8ix32 = bool8i; +- bool16ix16: i16x16 = i16, u16x16 = u16, bool16ix16 = bool16i; +- bool32ix8: i32x8 = i32, u32x8 = u32, bool32ix8 = bool32i; +- bool64ix4: i64x4 = i64, u64x4 = u64, bool64ix4 = bool64i; +- +- bool32fx8: f32x8 = f32, bool32fx8 = bool32f; +- bool64fx4: f64x4 = f64, bool64fx4 = bool64f; +-} +- +-basic_impls! { +- u64x4: u64, bool64ix4, simd_shuffle4, 4, x0, x1 | x2, x3; +- i64x4: i64, bool64ix4, simd_shuffle4, 4, x0, x1 | x2, x3; +- f64x4: f64, bool64fx4, simd_shuffle4, 4, x0, x1 | x2, x3; +- +- u32x8: u32, bool32ix8, simd_shuffle8, 8, x0, x1, x2, x3 | x4, x5, x6, x7; +- i32x8: i32, bool32ix8, simd_shuffle8, 8, x0, x1, x2, x3 | x4, x5, x6, x7; +- f32x8: f32, bool32fx8, simd_shuffle8, 8, x0, x1, x2, x3 | x4, x5, x6, x7; +- +- u16x16: u16, bool16ix16, simd_shuffle16, 16, x0, x1, x2, x3, x4, x5, x6, x7 | x8, x9, x10, x11, x12, x13, x14, x15; +- i16x16: i16, bool16ix16, simd_shuffle16, 16, x0, x1, x2, x3, x4, x5, x6, x7 | x8, x9, x10, x11, x12, x13, x14, x15; +- +- u8x32: u8, bool8ix32, simd_shuffle32, 32, x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | +- x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31; +- i8x32: i8, bool8ix32, simd_shuffle32, 32, x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | +- x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31; +-} +- +-#[cfg(all(not(target_feature = "avx")))] +-#[doc(hidden)] +-mod common { +- use super::*; +- // implementation via SSE vectors +- macro_rules! bools { +- ($($ty: ty, $all: ident, $any: ident;)*) => { +- $( +- #[inline] +- pub fn $all(x: $ty) -> bool { +- x.low().all() && x.high().all() +- } +- #[inline] +- pub fn $any(x: $ty) -> bool { +- x.low().any() || x.high().any() +- } +- )* +- } +- } +- +- bools! { +- bool64ix4, bool64ix4_all, bool64ix4_any; +- bool64fx4, bool64fx4_all, bool64fx4_any; +- bool32ix8, bool32ix8_all, bool32ix8_any; +- bool32fx8, bool32fx8_all, bool32fx8_any; +- bool16ix16, bool16ix16_all, bool16ix16_any; +- bool8ix32, bool8ix32_all, bool8ix32_any; +- } +- +-} +- +-bool_impls! { +- bool64ix4: bool64i, i64x4, i64, 4, bool64ix4_all, bool64ix4_any, x0, x1 | x2, x3 +- [/// Convert `self` to a boolean vector for interacting with floating point vectors. +- to_f -> bool64fx4]; +- +- bool64fx4: bool64f, i64x4, i64, 4, bool64fx4_all, bool64fx4_any, x0, x1 | x2, x3 +- [/// Convert `self` to a boolean vector for interacting with integer vectors. +- to_i -> bool64ix4]; +- +- bool32ix8: bool32i, i32x8, i32, 8, bool32ix8_all, bool32ix8_any, x0, x1, x2, x3 | x4, x5, x6, x7 +- [/// Convert `self` to a boolean vector for interacting with floating point vectors. +- to_f -> bool32fx8]; +- +- bool32fx8: bool32f, i32x8, i32, 8, bool32fx8_all, bool32fx8_any, x0, x1, x2, x3 | x4, x5, x6, x7 +- [/// Convert `self` to a boolean vector for interacting with integer vectors. +- to_i -> bool32ix8]; +- +- bool16ix16: bool16i, i16x16, i16, 16, bool16ix16_all, bool16ix16_any, +- x0, x1, x2, x3, x4, x5, x6, x7 | x8, x9, x10, x11, x12, x13, x14, x15 []; +- +- bool8ix32: bool8i, i8x32, i8, 32, bool8ix32_all, bool8ix32_any, +- x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | +- x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 []; +-} +- +-pub trait LowHigh128 { +- type Half: Simd; +- /// Extract the low 128 bit part. +- fn low(self) -> Self::Half; +- /// Extract the high 128 bit part. +- fn high(self) -> Self::Half; +-} +- +-macro_rules! expr { ($x:expr) => ($x) } // HACK +-macro_rules! low_high_impls { +- ($( +- $name: ident, $half: ident, $($first: tt),+ ... $($last: tt),+; +- )*) => { +- $(impl LowHigh128 for $name { +- type Half = $half; +- #[inline] +- fn low(self) -> Self::Half { +- $half::new($( expr!(self.$first), )*) +- } +- +- #[inline] +- fn high(self) -> Self::Half { +- $half::new($( expr!(self.$last), )*) +- } +- })* +- } +-} +- +-low_high_impls! { +- u64x4, u64x2, 0, 1 ... 2, 3; +- i64x4, i64x2, 0, 1 ... 2, 3; +- f64x4, f64x2, 0, 1 ... 2, 3; +- +- u32x8, u32x4, 0, 1, 2, 3 ... 4, 5, 6, 7; +- i32x8, i32x4, 0, 1, 2, 3 ... 4, 5, 6, 7; +- f32x8, f32x4, 0, 1, 2, 3 ... 4, 5, 6, 7; +- +- u16x16, u16x8, 0, 1, 2, 3, 4, 5, 6, 7 ... 8, 9, 10, 11, 12, 13, 14, 15; +- i16x16, i16x8, 0, 1, 2, 3, 4, 5, 6, 7 ... 8, 9, 10, 11, 12, 13, 14, 15; +- +- u8x32, u8x16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ... +- 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31; +- i8x32, i8x16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ... +- 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31; +- +-} +- +-macro_rules! bool_low_high_impls { +- ($( +- $name: ident: $half: ident; +- )*) => { +- $(impl LowHigh128 for $name { +- type Half = $half; +- /// Extract the low 128 bit part. +- #[inline] +- fn low(self) -> Self::Half { +- Self::Half::from_repr(self.to_repr().low()) +- } +- +- /// Extract the high 128 bit part. +- #[inline] +- fn high(self) -> Self::Half { +- Self::Half::from_repr(self.to_repr().high()) +- } +- })* +- } +-} +- +-bool_low_high_impls! { +- bool64fx4: bool64fx2; +- bool32fx8: bool32fx4; +- +- bool64ix4: bool64ix2; +- bool32ix8: bool32ix4; +- bool16ix16: bool16ix8; +- bool8ix32: bool8ix16; +-} +- +-impl u64x4 { +- /// Convert each lane to a signed integer. +- #[inline] +- pub fn to_i64(self) -> i64x4 { +- unsafe {simd_cast(self)} +- } +- /// Convert each lane to a 64-bit float. +- #[inline] +- pub fn to_f64(self) -> f64x4 { +- unsafe {simd_cast(self)} +- } +-} +- +-impl i64x4 { +- /// Convert each lane to an unsigned integer. +- #[inline] +- pub fn to_u64(self) -> u64x4 { +- unsafe {simd_cast(self)} +- } +- /// Convert each lane to a 64-bit float. +- #[inline] +- pub fn to_f64(self) -> f64x4 { +- unsafe {simd_cast(self)} +- } +-} +- +-impl f64x4 { +- /// Convert each lane to a signed integer. +- #[inline] +- pub fn to_i64(self) -> i64x4 { +- unsafe {simd_cast(self)} +- } +- /// Convert each lane to an unsigned integer. +- #[inline] +- pub fn to_u64(self) -> u64x4 { +- unsafe {simd_cast(self)} +- } +-} +- +-impl u32x8 { +- /// Convert each lane to a signed integer. +- #[inline] +- pub fn to_i32(self) -> i32x8 { +- unsafe {simd_cast(self)} +- } +- /// Convert each lane to a 32-bit float. +- #[inline] +- pub fn to_f32(self) -> f32x8 { +- unsafe {simd_cast(self)} +- } +-} +- +-impl i32x8 { +- /// Convert each lane to an unsigned integer. +- #[inline] +- pub fn to_u32(self) -> u32x8 { +- unsafe {simd_cast(self)} +- } +- /// Convert each lane to a 32-bit float. +- #[inline] +- pub fn to_f32(self) -> f32x8 { +- unsafe {simd_cast(self)} +- } +-} +- +-impl f32x8 { +- /// Convert each lane to a signed integer. +- #[inline] +- pub fn to_i32(self) -> i32x8 { +- unsafe {simd_cast(self)} +- } +- /// Convert each lane to an unsigned integer. +- #[inline] +- pub fn to_u32(self) -> u32x8 { +- unsafe {simd_cast(self)} +- } +-} +- +-impl i16x16 { +- /// Convert each lane to an unsigned integer. +- #[inline] +- pub fn to_u16(self) -> u16x16 { +- unsafe {simd_cast(self)} +- } +-} +- +-impl u16x16 { +- /// Convert each lane to a signed integer. +- #[inline] +- pub fn to_i16(self) -> i16x16 { +- unsafe {simd_cast(self)} +- } +-} +- +-impl i8x32 { +- /// Convert each lane to an unsigned integer. +- #[inline] +- pub fn to_u8(self) -> u8x32 { +- unsafe {simd_cast(self)} +- } +-} +- +-impl u8x32 { +- /// Convert each lane to a signed integer. +- #[inline] +- pub fn to_i8(self) -> i8x32 { +- unsafe {simd_cast(self)} +- } +-} +- +-operators! { +- Add (simd_add, add): +- i8x32, u8x32, i16x16, u16x16, i32x8, u32x8, i64x4, u64x4, +- f64x4, f32x8; +- Sub (simd_sub, sub): +- i8x32, u8x32, i16x16, u16x16, i32x8, u32x8, i64x4, u64x4, +- f64x4, f32x8; +- Mul (simd_mul, mul): +- i8x32, u8x32, i16x16, u16x16, i32x8, u32x8, i64x4, u64x4, +- f64x4, f32x8; +- Div (simd_div, div): f64x4, f32x8; +- +- BitAnd (simd_and, bitand): +- i8x32, u8x32, i16x16, u16x16, i32x8, u32x8, i64x4, u64x4, +- bool64ix4, bool32ix8, bool16ix16, +- bool64fx4, bool32fx8; +- BitOr (simd_or, bitor): +- i8x32, u8x32, i16x16, u16x16, i32x8, u32x8, i64x4, u64x4, +- bool64ix4, bool32ix8, bool16ix16, +- bool64fx4, bool32fx8; +- BitXor (simd_xor, bitxor): +- i8x32, u8x32, i16x16, u16x16, i32x8, u32x8, i64x4, u64x4, +- bool64ix4, bool32ix8, bool16ix16, +- bool64fx4, bool32fx8; +-} +- +-neg_impls!{ +- 0, +- i64x4, +- i32x8, +- i16x16, +- i8x32, +-} +- +-neg_impls! { +- 0.0, +- f64x4, +- f32x8, +-} +- +-not_impls! { +- i64x4, +- u64x4, +- i32x8, +- u32x8, +- i16x16, +- u16x16, +- i8x32, +- u8x32, +-} +- +-shift! { +- i64x4, +- u64x4, +- i32x8, +- u32x8, +- i16x16, +- u16x16, +- i8x32, +- u8x32 +-} +diff --git a/third_party/rust/simd/src/x86/avx.rs b/third_party/rust/simd/src/x86/avx.rs +deleted file mode 100644 +index 180247e36561..000000000000 +--- a/third_party/rust/simd/src/x86/avx.rs ++++ /dev/null +@@ -1,290 +0,0 @@ +-use super::super::*; +-use sixty_four::*; +- +-use super::super::bitcast; +- +-pub use v256::{ +- f64x4, bool64fx4, u64x4, i64x4, bool64ix4, +- f32x8, bool32fx8, u32x8, i32x8, bool32ix8, +- u16x16, i16x16, bool16ix16, +- u8x32, i8x32, bool8ix32, +- LowHigh128 +-}; +- +-#[allow(dead_code)] +-extern "platform-intrinsic" { +- fn x86_mm256_addsub_ps(x: f32x8, y: f32x8) -> f32x8; +- fn x86_mm256_addsub_pd(x: f64x4, y: f64x4) -> f64x4; +- fn x86_mm256_dp_ps(x: f32x8, y: f32x8, z: i32) -> f32x8; +- fn x86_mm256_hadd_ps(x: f32x8, y: f32x8) -> f32x8; +- fn x86_mm256_hadd_pd(x: f64x4, y: f64x4) -> f64x4; +- fn x86_mm256_hsub_ps(x: f32x8, y: f32x8) -> f32x8; +- fn x86_mm256_hsub_pd(x: f64x4, y: f64x4) -> f64x4; +- fn x86_mm256_max_ps(x: f32x8, y: f32x8) -> f32x8; +- fn x86_mm256_max_pd(x: f64x4, y: f64x4) -> f64x4; +- fn x86_mm256_min_ps(x: f32x8, y: f32x8) -> f32x8; +- fn x86_mm256_min_pd(x: f64x4, y: f64x4) -> f64x4; +- fn x86_mm256_movemask_ps(x: f32x8) -> i32; +- fn x86_mm256_movemask_pd(x: f64x4) -> i32; +- fn x86_mm_permutevar_ps(x: f32x4, y: i32x4) -> f32x4; +- fn x86_mm_permutevar_pd(x: f64x2, y: i64x2) -> f64x2; +- fn x86_mm256_permutevar_ps(x: f32x8, y: i32x8) -> f32x8; +- fn x86_mm256_permutevar_pd(x: f64x4, y: i64x4) -> f64x4; +- fn x86_mm256_rcp_ps(x: f32x8) -> f32x8; +- fn x86_mm256_rsqrt_ps(x: f32x8) -> f32x8; +- fn x86_mm256_sqrt_ps(x: f32x8) -> f32x8; +- fn x86_mm256_sqrt_pd(x: f64x4) -> f64x4; +- fn x86_mm_testc_ps(x: f32x4, y: f32x4) -> i32; +- fn x86_mm256_testc_ps(x: f32x8, y: f32x8) -> i32; +- fn x86_mm_testc_pd(x: f64x2, y: f64x2) -> i32; +- fn x86_mm256_testc_pd(x: f64x4, y: f64x4) -> i32; +- fn x86_mm256_testc_si256(x: u64x4, y: u64x4) -> i32; +- fn x86_mm_testnzc_ps(x: f32x4, y: f32x4) -> i32; +- fn x86_mm256_testnzc_ps(x: f32x8, y: f32x8) -> i32; +- fn x86_mm_testnzc_pd(x: f64x2, y: f64x2) -> i32; +- fn x86_mm256_testnzc_pd(x: f64x4, y: f64x4) -> i32; +- fn x86_mm256_testnzc_si256(x: u64x4, y: u64x4) -> i32; +- fn x86_mm_testz_ps(x: f32x4, y: f32x4) -> i32; +- fn x86_mm256_testz_ps(x: f32x8, y: f32x8) -> i32; +- fn x86_mm_testz_pd(x: f64x2, y: f64x2) -> i32; +- fn x86_mm256_testz_pd(x: f64x4, y: f64x4) -> i32; +- fn x86_mm256_testz_si256(x: u64x4, y: u64x4) -> i32; +-} +- +-#[doc(hidden)] +-pub mod common { +- use super::*; +- use core::mem; +- +- macro_rules! bools { +- ($($ty: ty, $all: ident, $any: ident, $testc: ident, $testz: ident;)*) => { +- $( +- #[inline] +- pub fn $all(x: $ty) -> bool { +- unsafe { +- super::$testc(mem::transmute(x), mem::transmute(<$ty>::splat(true))) != 0 +- } +- } +- #[inline] +- pub fn $any(x: $ty) -> bool { +- unsafe { +- super::$testz(mem::transmute(x), mem::transmute(x)) == 0 +- } +- } +- )* +- } +- } +- +- bools! { +- bool32fx8, bool32fx8_all, bool32fx8_any, x86_mm256_testc_ps, x86_mm256_testz_ps; +- bool64fx4, bool64fx4_all, bool64fx4_any, x86_mm256_testc_pd, x86_mm256_testz_pd; +- bool8ix32, bool8ix32_all, bool8ix32_any, x86_mm256_testc_si256, x86_mm256_testz_si256; +- bool16ix16, bool16ix16_all, bool16ix16_any, x86_mm256_testc_si256, x86_mm256_testz_si256; +- bool32ix8, bool32ix8_all, bool32ix8_any, x86_mm256_testc_si256, x86_mm256_testz_si256; +- bool64ix4, bool64ix4_all, bool64ix4_any, x86_mm256_testc_si256, x86_mm256_testz_si256; +- } +-} +- +-// 128-bit vectors: +- +-// 32 bit floats +- +-pub trait AvxF32x4 { +- fn permutevar(self, other: i32x4) -> f32x4; +-} +-impl AvxF32x4 for f32x4 { +- fn permutevar(self, other: i32x4) -> f32x4 { +- unsafe { x86_mm_permutevar_ps(self, other) } +- } +-} +- +-pub trait AvxF64x4 { +- fn sqrt(self) -> Self; +- fn addsub(self, other: Self) -> Self; +- fn hadd(self, other: Self) -> Self; +- fn hsub(self, other: Self) -> Self; +- fn max(self, other: Self) -> Self; +- fn min(self, other: Self) -> Self; +- fn move_mask(self) -> u32; +-} +- +-impl AvxF64x4 for f64x4 { +- #[inline] +- fn sqrt(self) -> Self { +- unsafe { x86_mm256_sqrt_pd(self) } +- } +- +- #[inline] +- fn addsub(self, other: Self) -> Self { +- unsafe { x86_mm256_addsub_pd(self, other) } +- } +- +- #[inline] +- fn hadd(self, other: Self) -> Self { +- unsafe { x86_mm256_hadd_pd(self, other) } +- } +- +- #[inline] +- fn hsub(self, other: Self) -> Self { +- unsafe { x86_mm256_hsub_pd(self, other) } +- } +- +- #[inline] +- fn max(self, other: Self) -> Self { +- unsafe { x86_mm256_max_pd(self, other) } +- } +- +- #[inline] +- fn min(self, other: Self) -> Self { +- unsafe { x86_mm256_min_pd(self, other) } +- } +- +- #[inline] +- fn move_mask(self) -> u32 { +- unsafe { x86_mm256_movemask_pd(self) as u32 } +- } +-} +- +-pub trait AvxBool64fx4 { +- fn move_mask(self) -> u32; +-} +-impl AvxBool64fx4 for bool64fx4 { +- #[inline] +- fn move_mask(self) -> u32 { +- unsafe { x86_mm256_movemask_pd(bitcast(self)) as u32 } +- } +-} +- +-pub trait AvxF32x8 { +- fn sqrt(self) -> Self; +- fn addsub(self, other: Self) -> Self; +- fn hadd(self, other: Self) -> Self; +- fn hsub(self, other: Self) -> Self; +- fn max(self, other: Self) -> Self; +- fn min(self, other: Self) -> Self; +- fn move_mask(self) -> u32; +- /// Compute an approximation to the reciprocal of the square root +- /// of `self`, that is, `f32x8::splat(1.0) / self.sqrt()`. +- /// +- /// The accuracy of this approximation is platform dependent. +- fn approx_rsqrt(self) -> Self; +- /// Compute an approximation to the reciprocal of `self`, that is, +- /// `f32x8::splat(1.0) / self`. +- /// +- /// The accuracy of this approximation is platform dependent. +- fn approx_reciprocal(self) -> Self; +-} +- +-impl AvxF32x8 for f32x8 { +- #[inline] +- fn sqrt(self) -> Self { +- unsafe { x86_mm256_sqrt_ps(self) } +- } +- +- #[inline] +- fn addsub(self, other: Self) -> Self { +- unsafe { x86_mm256_addsub_ps(self, other) } +- } +- +- #[inline] +- fn hadd(self, other: Self) -> Self { +- unsafe { x86_mm256_hadd_ps(self, other) } +- } +- +- #[inline] +- fn hsub(self, other: Self) -> Self { +- unsafe { x86_mm256_hsub_ps(self, other) } +- } +- +- #[inline] +- fn max(self, other: Self) -> Self { +- unsafe { x86_mm256_max_ps(self, other) } +- } +- +- #[inline] +- fn min(self, other: Self) -> Self { +- unsafe { x86_mm256_min_ps(self, other) } +- } +- +- #[inline] +- fn move_mask(self) -> u32 { +- unsafe { x86_mm256_movemask_ps(self) as u32 } +- } +- +- #[inline] +- fn approx_reciprocal(self) -> Self { +- unsafe { x86_mm256_rcp_ps(self) } +- } +- +- #[inline] +- fn approx_rsqrt(self) -> Self { +- unsafe { x86_mm256_rsqrt_ps(self) } +- } +-} +- +-pub trait AvxBool32fx8 { +- fn move_mask(self) -> u32; +-} +-impl AvxBool32fx8 for bool32fx8 { +- #[inline] +- fn move_mask(self) -> u32 { +- unsafe { x86_mm256_movemask_ps(bitcast(self)) as u32 } +- } +-} +- +-pub trait AvxBool32fx4 {} +-impl AvxBool32fx4 for bool32fx4 {} +- +-// 64 bit floats +- +-pub trait AvxF64x2 { +- fn permutevar(self, other: i64x2) -> f64x2; +-} +-impl AvxF64x2 for f64x2 { +- fn permutevar(self, other: i64x2) -> f64x2 { +- unsafe { x86_mm_permutevar_pd(self, other) } +- } +-} +- +-pub trait AvxBool64fx2 {} +-impl AvxBool64fx2 for bool64fx2 {} +- +-// 64 bit integers +- +-pub trait AvxU64x2 {} +-impl AvxU64x2 for u64x2 {} +-pub trait AvxI64x2 {} +-impl AvxI64x2 for i64x2 {} +- +-pub trait AvxBool64ix2 {} +-impl AvxBool64ix2 for bool64ix2 {} +- +-// 32 bit integers +- +-pub trait AvxU32x4 {} +-impl AvxU32x4 for u32x4 {} +-pub trait AvxI32x4 {} +-impl AvxI32x4 for i32x4 {} +- +-pub trait AvxBool32ix4 {} +-impl AvxBool32ix4 for bool32ix4 {} +- +-// 16 bit integers +- +-pub trait AvxU16x8 {} +-impl AvxU16x8 for u16x8 {} +-pub trait AvxI16x8 {} +-impl AvxI16x8 for i16x8 {} +- +-pub trait AvxBool16ix8 {} +-impl AvxBool16ix8 for bool16ix8 {} +- +-// 8 bit integers +- +-pub trait AvxU8x16 {} +-impl AvxU8x16 for u8x16 {} +-pub trait AvxI8x16 {} +-impl AvxI8x16 for i8x16 {} +- +-pub trait AvxBool8ix16 {} +-impl AvxBool8ix16 for bool8ix16 {} +diff --git a/third_party/rust/simd/src/x86/avx2.rs b/third_party/rust/simd/src/x86/avx2.rs +deleted file mode 100644 +index e86a33d3b5bb..000000000000 +--- a/third_party/rust/simd/src/x86/avx2.rs ++++ /dev/null +@@ -1,65 +0,0 @@ +-use x86::avx::*; +- +-#[allow(dead_code)] +-extern "platform-intrinsic" { +- fn x86_mm256_abs_epi8(x: i8x32) -> i8x32; +- fn x86_mm256_abs_epi16(x: i16x16) -> i16x16; +- fn x86_mm256_abs_epi32(x: i32x8) -> i32x8; +- fn x86_mm256_adds_epi8(x: i8x32, y: i8x32) -> i8x32; +- fn x86_mm256_adds_epu8(x: u8x32, y: u8x32) -> u8x32; +- fn x86_mm256_adds_epi16(x: i16x16, y: i16x16) -> i16x16; +- fn x86_mm256_adds_epu16(x: u16x16, y: u16x16) -> u16x16; +- fn x86_mm256_avg_epu8(x: u8x32, y: u8x32) -> u8x32; +- fn x86_mm256_avg_epu16(x: u16x16, y: u16x16) -> u16x16; +- fn x86_mm256_hadd_epi16(x: i16x16, y: i16x16) -> i16x16; +- fn x86_mm256_hadd_epi32(x: i32x8, y: i32x8) -> i32x8; +- fn x86_mm256_hadds_epi16(x: i16x16, y: i16x16) -> i16x16; +- fn x86_mm256_hsub_epi16(x: i16x16, y: i16x16) -> i16x16; +- fn x86_mm256_hsub_epi32(x: i32x8, y: i32x8) -> i32x8; +- fn x86_mm256_hsubs_epi16(x: i16x16, y: i16x16) -> i16x16; +- fn x86_mm256_madd_epi16(x: i16x16, y: i16x16) -> i32x8; +- fn x86_mm256_maddubs_epi16(x: i8x32, y: i8x32) -> i16x16; +- fn x86_mm256_max_epi8(x: i8x32, y: i8x32) -> i8x32; +- fn x86_mm256_max_epu8(x: u8x32, y: u8x32) -> u8x32; +- fn x86_mm256_max_epi16(x: i16x16, y: i16x16) -> i16x16; +- fn x86_mm256_max_epu16(x: u16x16, y: u16x16) -> u16x16; +- fn x86_mm256_max_epi32(x: i32x8, y: i32x8) -> i32x8; +- fn x86_mm256_max_epu32(x: u32x8, y: u32x8) -> u32x8; +- fn x86_mm256_min_epi8(x: i8x32, y: i8x32) -> i8x32; +- fn x86_mm256_min_epu8(x: u8x32, y: u8x32) -> u8x32; +- fn x86_mm256_min_epi16(x: i16x16, y: i16x16) -> i16x16; +- fn x86_mm256_min_epu16(x: u16x16, y: u16x16) -> u16x16; +- fn x86_mm256_min_epi32(x: i32x8, y: i32x8) -> i32x8; +- fn x86_mm256_min_epu32(x: u32x8, y: u32x8) -> u32x8; +- fn x86_mm256_mul_epi64(x: i32x8, y: i32x8) -> i64x4; +- fn x86_mm256_mul_epu64(x: u32x8, y: u32x8) -> u64x4; +- fn x86_mm256_mulhi_epi16(x: i16x16, y: i16x16) -> i16x16; +- fn x86_mm256_mulhi_epu16(x: u16x16, y: u16x16) -> u16x16; +- fn x86_mm256_mulhrs_epi16(x: i16x16, y: i16x16) -> i16x16; +- fn x86_mm256_packs_epi16(x: i16x16, y: i16x16) -> i8x32; +- fn x86_mm256_packus_epi16(x: i16x16, y: i16x16) -> u8x32; +- fn x86_mm256_packs_epi32(x: i32x8, y: i32x8) -> i16x16; +- fn x86_mm256_packus_epi32(x: i32x8, y: i32x8) -> u16x16; +- fn x86_mm256_permutevar8x32_epi32(x: i32x8, y: i32x8) -> i32x8; +- fn x86_mm256_permutevar8x32_ps(x: f32x8, y: i32x8) -> f32x8; +- fn x86_mm256_sad_epu8(x: u8x32, y: u8x32) -> u64x4; +- fn x86_mm256_shuffle_epi8(x: i8x32, y: i8x32) -> i8x32; +- fn x86_mm256_sign_epi8(x: i8x32, y: i8x32) -> i8x32; +- fn x86_mm256_sign_epi16(x: i16x16, y: i16x16) -> i16x16; +- fn x86_mm256_sign_epi32(x: i32x8, y: i32x8) -> i32x8; +- fn x86_mm256_subs_epi8(x: i8x32, y: i8x32) -> i8x32; +- fn x86_mm256_subs_epu8(x: u8x32, y: u8x32) -> u8x32; +- fn x86_mm256_subs_epi16(x: i16x16, y: i16x16) -> i16x16; +- fn x86_mm256_subs_epu16(x: u16x16, y: u16x16) -> u16x16; +-} +- +-// broken on rustc 1.7.0-nightly (1ddaf8bdf 2015-12-12) +-// pub trait Avx2F32x8 { +-// fn permutevar(self, other: i32x8) -> f32x8; +-// } +-// +-// impl Avx2F32x8 for f32x8 { +-// fn permutevar(self, other: i32x8) -> f32x8 { +-// unsafe { x86_mm256_permutevar8x32_ps(self, other) } +-// } +-// } +diff --git a/third_party/rust/simd/src/x86/mod.rs b/third_party/rust/simd/src/x86/mod.rs +deleted file mode 100644 +index 8763fb16ccfd..000000000000 +--- a/third_party/rust/simd/src/x86/mod.rs ++++ /dev/null +@@ -1,16 +0,0 @@ +-//! Features specific to x86 and x86-64 CPUs. +- +-#[cfg(any(feature = "doc", target_feature = "sse2"))] +-pub mod sse2; +-#[cfg(any(feature = "doc", target_feature = "sse3"))] +-pub mod sse3; +-#[cfg(any(feature = "doc", target_feature = "ssse3"))] +-pub mod ssse3; +-#[cfg(any(feature = "doc", target_feature = "sse4.1"))] +-pub mod sse4_1; +-#[cfg(any(feature = "doc", target_feature = "sse4.2"))] +-pub mod sse4_2; +-#[cfg(any(feature = "doc", target_feature = "avx"))] +-pub mod avx; +-#[cfg(any(feature = "doc", target_feature = "avx2"))] +-pub mod avx2; +diff --git a/third_party/rust/simd/src/x86/sse2.rs b/third_party/rust/simd/src/x86/sse2.rs +deleted file mode 100644 +index 5cbc853694d5..000000000000 +--- a/third_party/rust/simd/src/x86/sse2.rs ++++ /dev/null +@@ -1,359 +0,0 @@ +-use super::super::*; +-use {bitcast, simd_cast, f32x2}; +- +-pub use sixty_four::{f64x2, i64x2, u64x2, bool64ix2, bool64fx2}; +- +-//pub use super::{u64x2, i64x2, f64x2, bool64ix2, bool64fx2}; +- +-// strictly speaking, these are SSE instructions, not SSE2. +-extern "platform-intrinsic" { +- fn x86_mm_movemask_ps(x: f32x4) -> i32; +- fn x86_mm_max_ps(x: f32x4, y: f32x4) -> f32x4; +- fn x86_mm_min_ps(x: f32x4, y: f32x4) -> f32x4; +- fn x86_mm_rsqrt_ps(x: f32x4) -> f32x4; +- fn x86_mm_rcp_ps(x: f32x4) -> f32x4; +- fn x86_mm_sqrt_ps(x: f32x4) -> f32x4; +-} +- +-extern "platform-intrinsic" { +- fn x86_mm_adds_epi8(x: i8x16, y: i8x16) -> i8x16; +- fn x86_mm_adds_epu8(x: u8x16, y: u8x16) -> u8x16; +- fn x86_mm_adds_epi16(x: i16x8, y: i16x8) -> i16x8; +- fn x86_mm_adds_epu16(x: u16x8, y: u16x8) -> u16x8; +- fn x86_mm_avg_epu8(x: u8x16, y: u8x16) -> u8x16; +- fn x86_mm_avg_epu16(x: u16x8, y: u16x8) -> u16x8; +- fn x86_mm_madd_epi16(x: i16x8, y: i16x8) -> i32x4; +- fn x86_mm_max_epi16(x: i16x8, y: i16x8) -> i16x8; +- fn x86_mm_max_epu8(x: u8x16, y: u8x16) -> u8x16; +- fn x86_mm_max_pd(x: f64x2, y: f64x2) -> f64x2; +- fn x86_mm_min_epi16(x: i16x8, y: i16x8) -> i16x8; +- fn x86_mm_min_epu8(x: u8x16, y: u8x16) -> u8x16; +- fn x86_mm_min_pd(x: f64x2, y: f64x2) -> f64x2; +- fn x86_mm_movemask_pd(x: f64x2) -> i32; +- fn x86_mm_movemask_epi8(x: i8x16) -> i32; +- fn x86_mm_mul_epu32(x: u32x4, y: u32x4) -> u64x2; +- fn x86_mm_mulhi_epi16(x: i16x8, y: i16x8) -> i16x8; +- fn x86_mm_mulhi_epu16(x: u16x8, y: u16x8) -> u16x8; +- fn x86_mm_packs_epi16(x: i16x8, y: i16x8) -> i8x16; +- fn x86_mm_packs_epi32(x: i32x4, y: i32x4) -> i16x8; +- fn x86_mm_packus_epi16(x: i16x8, y: i16x8) -> u8x16; +- fn x86_mm_sad_epu8(x: u8x16, y: u8x16) -> u64x2; +- fn x86_mm_sqrt_pd(x: f64x2) -> f64x2; +- fn x86_mm_subs_epi8(x: i8x16, y: i8x16) -> i8x16; +- fn x86_mm_subs_epu8(x: u8x16, y: u8x16) -> u8x16; +- fn x86_mm_subs_epi16(x: i16x8, y: i16x8) -> i16x8; +- fn x86_mm_subs_epu16(x: u16x8, y: u16x8) -> u16x8; +-} +- +-#[doc(hidden)] +-pub mod common { +- use super::super::super::*; +- use core::mem; +- +- #[inline] +- pub fn f32x4_sqrt(x: f32x4) -> f32x4 { +- unsafe {super::x86_mm_sqrt_ps(x)} +- } +- #[inline] +- pub fn f32x4_approx_rsqrt(x: f32x4) -> f32x4 { +- unsafe {super::x86_mm_rsqrt_ps(x)} +- } +- #[inline] +- pub fn f32x4_approx_reciprocal(x: f32x4) -> f32x4 { +- unsafe {super::x86_mm_rcp_ps(x)} +- } +- #[inline] +- pub fn f32x4_max(x: f32x4, y: f32x4) -> f32x4 { +- unsafe {super::x86_mm_max_ps(x, y)} +- } +- #[inline] +- pub fn f32x4_min(x: f32x4, y: f32x4) -> f32x4 { +- unsafe {super::x86_mm_min_ps(x, y)} +- } +- +- macro_rules! bools { +- ($($ty: ty, $all: ident, $any: ident, $movemask: ident, $width: expr;)*) => { +- $( +- #[inline] +- pub fn $all(x: $ty) -> bool { +- unsafe { +- super::$movemask(mem::transmute(x)) == (1 << $width) - 1 +- } +- } +- #[inline] +- pub fn $any(x: $ty) -> bool { +- unsafe { +- super::$movemask(mem::transmute(x)) != 0 +- } +- } +- )* +- } +- } +- +- bools! { +- bool32fx4, bool32fx4_all, bool32fx4_any, x86_mm_movemask_ps, 4; +- bool8ix16, bool8ix16_all, bool8ix16_any, x86_mm_movemask_epi8, 16; +- bool16ix8, bool16ix8_all, bool16ix8_any, x86_mm_movemask_epi8, 16; +- bool32ix4, bool32ix4_all, bool32ix4_any, x86_mm_movemask_epi8, 16; +- } +-} +- +-// 32 bit floats +- +-pub trait Sse2F32x4 { +- fn to_f64(self) -> f64x2; +- fn move_mask(self) -> u32; +-} +-impl Sse2F32x4 for f32x4 { +- #[inline] +- fn to_f64(self) -> f64x2 { +- unsafe { +- simd_cast(f32x2(self.0, self.1)) +- } +- } +- fn move_mask(self) -> u32 { +- unsafe {x86_mm_movemask_ps(self) as u32} +- } +-} +-pub trait Sse2Bool32fx4 { +- fn move_mask(self) -> u32; +-} +-impl Sse2Bool32fx4 for bool32fx4 { +- #[inline] +- fn move_mask(self) -> u32 { +- unsafe { x86_mm_movemask_ps(bitcast(self)) as u32} +- } +-} +- +-// 64 bit floats +- +-pub trait Sse2F64x2 { +- fn move_mask(self) -> u32; +- fn sqrt(self) -> Self; +- fn max(self, other: Self) -> Self; +- fn min(self, other: Self) -> Self; +-} +-impl Sse2F64x2 for f64x2 { +- #[inline] +- fn move_mask(self) -> u32 { +- unsafe { x86_mm_movemask_pd(bitcast(self)) as u32} +- } +- +- #[inline] +- fn sqrt(self) -> Self { +- unsafe { x86_mm_sqrt_pd(self) } +- } +- +- #[inline] +- fn max(self, other: Self) -> Self { +- unsafe { x86_mm_max_pd(self, other) } +- } +- #[inline] +- fn min(self, other: Self) -> Self { +- unsafe { x86_mm_min_pd(self, other) } +- } +-} +- +-pub trait Sse2Bool64fx2 { +- fn move_mask(self) -> u32; +-} +-impl Sse2Bool64fx2 for bool64fx2 { +- #[inline] +- fn move_mask(self) -> u32 { +- unsafe { x86_mm_movemask_pd(bitcast(self)) as u32} +- } +-} +- +-// 64 bit ints +- +-pub trait Sse2U64x2 {} +-impl Sse2U64x2 for u64x2 {} +- +-pub trait Sse2I64x2 {} +-impl Sse2I64x2 for i64x2 {} +- +-pub trait Sse2Bool64ix2 {} +-impl Sse2Bool64ix2 for bool64ix2 {} +- +-// 32 bit ints +- +-pub trait Sse2U32x4 { +- fn low_mul(self, other: Self) -> u64x2; +-} +-impl Sse2U32x4 for u32x4 { +- #[inline] +- fn low_mul(self, other: Self) -> u64x2 { +- unsafe { x86_mm_mul_epu32(self, other) } +- } +-} +- +-pub trait Sse2I32x4 { +- fn packs(self, other: Self) -> i16x8; +-} +-impl Sse2I32x4 for i32x4 { +- #[inline] +- fn packs(self, other: Self) -> i16x8 { +- unsafe { x86_mm_packs_epi32(self, other) } +- } +-} +- +-pub trait Sse2Bool32ix4 {} +-impl Sse2Bool32ix4 for bool32ix4 {} +- +-// 16 bit ints +- +-pub trait Sse2U16x8 { +- fn adds(self, other: Self) -> Self; +- fn subs(self, other: Self) -> Self; +- fn avg(self, other: Self) -> Self; +- fn mulhi(self, other: Self) -> Self; +-} +-impl Sse2U16x8 for u16x8 { +- #[inline] +- fn adds(self, other: Self) -> Self { +- unsafe { x86_mm_adds_epu16(self, other) } +- } +- #[inline] +- fn subs(self, other: Self) -> Self { +- unsafe { x86_mm_subs_epu16(self, other) } +- } +- +- #[inline] +- fn avg(self, other: Self) -> Self { +- unsafe { x86_mm_avg_epu16(self, other) } +- } +- +- #[inline] +- fn mulhi(self, other: Self) -> Self { +- unsafe { x86_mm_mulhi_epu16(self, other) } +- } +-} +- +-pub trait Sse2I16x8 { +- fn adds(self, other: Self) -> Self; +- fn subs(self, other: Self) -> Self; +- fn madd(self, other: Self) -> i32x4; +- fn max(self, other: Self) -> Self; +- fn min(self, other: Self) -> Self; +- fn mulhi(self, other: Self) -> Self; +- fn packs(self, other: Self) -> i8x16; +- fn packus(self, other: Self) -> u8x16; +-} +-impl Sse2I16x8 for i16x8 { +- #[inline] +- fn adds(self, other: Self) -> Self { +- unsafe { x86_mm_adds_epi16(self, other) } +- } +- #[inline] +- fn subs(self, other: Self) -> Self { +- unsafe { x86_mm_subs_epi16(self, other) } +- } +- +- #[inline] +- fn madd(self, other: Self) -> i32x4 { +- unsafe { x86_mm_madd_epi16(self, other) } +- } +- +- #[inline] +- fn max(self, other: Self) -> Self { +- unsafe { x86_mm_max_epi16(self, other) } +- } +- #[inline] +- fn min(self, other: Self) -> Self { +- unsafe { x86_mm_min_epi16(self, other) } +- } +- +- #[inline] +- fn mulhi(self, other: Self) -> Self { +- unsafe { x86_mm_mulhi_epi16(self, other) } +- } +- +- #[inline] +- fn packs(self, other: Self) -> i8x16 { +- unsafe { x86_mm_packs_epi16(self, other) } +- } +- #[inline] +- fn packus(self, other: Self) -> u8x16 { +- unsafe { x86_mm_packus_epi16(self, other) } +- } +-} +- +-pub trait Sse2Bool16ix8 {} +-impl Sse2Bool16ix8 for bool16ix8 {} +- +-// 8 bit ints +- +-pub trait Sse2U8x16 { +- fn move_mask(self) -> u32; +- fn adds(self, other: Self) -> Self; +- fn subs(self, other: Self) -> Self; +- fn avg(self, other: Self) -> Self; +- fn max(self, other: Self) -> Self; +- fn min(self, other: Self) -> Self; +- fn sad(self, other: Self) -> u64x2; +-} +-impl Sse2U8x16 for u8x16 { +- #[inline] +- fn move_mask(self) -> u32 { +- unsafe { x86_mm_movemask_epi8(bitcast(self)) as u32} +- } +- +- #[inline] +- fn adds(self, other: Self) -> Self { +- unsafe { x86_mm_adds_epu8(self, other) } +- } +- #[inline] +- fn subs(self, other: Self) -> Self { +- unsafe { x86_mm_subs_epu8(self, other) } +- } +- +- #[inline] +- fn avg(self, other: Self) -> Self { +- unsafe { x86_mm_avg_epu8(self, other) } +- } +- +- #[inline] +- fn max(self, other: Self) -> Self { +- unsafe { x86_mm_max_epu8(self, other) } +- } +- #[inline] +- fn min(self, other: Self) -> Self { +- unsafe { x86_mm_min_epu8(self, other) } +- } +- +- #[inline] +- fn sad(self, other: Self) -> u64x2 { +- unsafe { x86_mm_sad_epu8(self, other) } +- } +-} +- +-pub trait Sse2I8x16 { +- fn move_mask(self) -> u32; +- fn adds(self, other: Self) -> Self; +- fn subs(self, other: Self) -> Self; +-} +-impl Sse2I8x16 for i8x16 { +- #[inline] +- fn move_mask(self) -> u32 { +- unsafe { x86_mm_movemask_epi8(bitcast(self)) as u32} +- } +- +- #[inline] +- fn adds(self, other: Self) -> Self { +- unsafe { x86_mm_adds_epi8(self, other) } +- } +- #[inline] +- fn subs(self, other: Self) -> Self { +- unsafe { x86_mm_subs_epi8(self, other) } +- } +-} +- +-pub trait Sse2Bool8ix16 { +- fn move_mask(self) -> u32; +-} +-impl Sse2Bool8ix16 for bool8ix16 { +- #[inline] +- fn move_mask(self) -> u32 { +- unsafe { x86_mm_movemask_epi8(bitcast(self)) as u32} +- } +-} +diff --git a/third_party/rust/simd/src/x86/sse3.rs b/third_party/rust/simd/src/x86/sse3.rs +deleted file mode 100644 +index bd70b569f9c0..000000000000 +--- a/third_party/rust/simd/src/x86/sse3.rs ++++ /dev/null +@@ -1,57 +0,0 @@ +-use sixty_four::*; +-use super::super::*; +- +-extern "platform-intrinsic" { +- fn x86_mm_addsub_ps(x: f32x4, y: f32x4) -> f32x4; +- fn x86_mm_addsub_pd(x: f64x2, y: f64x2) -> f64x2; +- fn x86_mm_hadd_ps(x: f32x4, y: f32x4) -> f32x4; +- fn x86_mm_hadd_pd(x: f64x2, y: f64x2) -> f64x2; +- fn x86_mm_hsub_ps(x: f32x4, y: f32x4) -> f32x4; +- fn x86_mm_hsub_pd(x: f64x2, y: f64x2) -> f64x2; +-} +- +-pub trait Sse3F32x4 { +- fn addsub(self, other: Self) -> Self; +- fn hadd(self, other: Self) -> Self; +- fn hsub(self, other: Self) -> Self; +-} +- +-impl Sse3F32x4 for f32x4 { +- #[inline] +- fn addsub(self, other: Self) -> Self { +- unsafe { x86_mm_addsub_ps(self, other) } +- } +- +- #[inline] +- fn hadd(self, other: Self) -> Self { +- unsafe { x86_mm_hadd_ps(self, other) } +- } +- +- #[inline] +- fn hsub(self, other: Self) -> Self { +- unsafe { x86_mm_hsub_ps(self, other) } +- } +-} +- +-pub trait Sse3F64x2 { +- fn addsub(self, other: Self) -> Self; +- fn hadd(self, other: Self) -> Self; +- fn hsub(self, other: Self) -> Self; +-} +- +-impl Sse3F64x2 for f64x2 { +- #[inline] +- fn addsub(self, other: Self) -> Self { +- unsafe { x86_mm_addsub_pd(self, other) } +- } +- +- #[inline] +- fn hadd(self, other: Self) -> Self { +- unsafe { x86_mm_hadd_pd(self, other) } +- } +- +- #[inline] +- fn hsub(self, other: Self) -> Self { +- unsafe { x86_mm_hsub_pd(self, other) } +- } +-} +diff --git a/third_party/rust/simd/src/x86/sse4_1.rs b/third_party/rust/simd/src/x86/sse4_1.rs +deleted file mode 100644 +index fa44678a0584..000000000000 +--- a/third_party/rust/simd/src/x86/sse4_1.rs ++++ /dev/null +@@ -1,155 +0,0 @@ +-use super::super::*; +-use x86::sse2::*; +- +-#[allow(dead_code)] +-extern "platform-intrinsic" { +- fn x86_mm_dp_ps(x: f32x4, y: f32x4, z: i32) -> f32x4; +- fn x86_mm_dp_pd(x: f64x2, y: f64x2, z: i32) -> f64x2; +- fn x86_mm_max_epi8(x: i8x16, y: i8x16) -> i8x16; +- fn x86_mm_max_epu16(x: u16x8, y: u16x8) -> u16x8; +- fn x86_mm_max_epi32(x: i32x4, y: i32x4) -> i32x4; +- fn x86_mm_max_epu32(x: u32x4, y: u32x4) -> u32x4; +- fn x86_mm_min_epi8(x: i8x16, y: i8x16) -> i8x16; +- fn x86_mm_min_epu16(x: u16x8, y: u16x8) -> u16x8; +- fn x86_mm_min_epi32(x: i32x4, y: i32x4) -> i32x4; +- fn x86_mm_min_epu32(x: u32x4, y: u32x4) -> u32x4; +- fn x86_mm_minpos_epu16(x: u16x8) -> u16x8; +- fn x86_mm_mpsadbw_epu8(x: u8x16, y: u8x16, z: i32) -> u16x8; +- fn x86_mm_mul_epi32(x: i32x4, y: i32x4) -> i64x2; +- fn x86_mm_packus_epi32(x: i32x4, y: i32x4) -> u16x8; +- fn x86_mm_testc_si128(x: u64x2, y: u64x2) -> i32; +- fn x86_mm_testnzc_si128(x: u64x2, y: u64x2) -> i32; +- fn x86_mm_testz_si128(x: u64x2, y: u64x2) -> i32; +-} +- +-// 32 bit floats +- +-pub trait Sse41F32x4 {} +-impl Sse41F32x4 for f32x4 {} +- +-// 64 bit floats +- +-pub trait Sse41F64x2 {} +-impl Sse41F64x2 for f64x2 {} +- +-// 64 bit integers +- +-pub trait Sse41U64x2 { +- fn testc(self, other: Self) -> i32; +- fn testnzc(self, other: Self) -> i32; +- fn testz(self, other: Self) -> i32; +-} +-impl Sse41U64x2 for u64x2 { +- #[inline] +- fn testc(self, other: Self) -> i32 { +- unsafe { x86_mm_testc_si128(self, other) } +- } +- #[inline] +- fn testnzc(self, other: Self) -> i32 { +- unsafe { x86_mm_testnzc_si128(self, other) } +- } +- #[inline] +- fn testz(self, other: Self) -> i32 { +- unsafe { x86_mm_testz_si128(self, other) } +- } +-} +-pub trait Sse41I64x2 {} +-impl Sse41I64x2 for i64x2 {} +- +-pub trait Sse41Bool64ix2 {} +-impl Sse41Bool64ix2 for bool64ix2 {} +- +-// 32 bit integers +- +-pub trait Sse41U32x4 { +- fn max(self, other: Self) -> Self; +- fn min(self, other: Self) -> Self; +-} +-impl Sse41U32x4 for u32x4 { +- #[inline] +- fn max(self, other: Self) -> Self { +- unsafe { x86_mm_max_epu32(self, other) } +- } +- #[inline] +- fn min(self, other: Self) -> Self { +- unsafe { x86_mm_min_epu32(self, other) } +- } +-} +-pub trait Sse41I32x4 { +- fn max(self, other: Self) -> Self; +- fn min(self, other: Self) -> Self; +- fn low_mul(self, other: Self) -> i64x2; +- fn packus(self, other: Self) -> u16x8; +-} +-impl Sse41I32x4 for i32x4 { +- #[inline] +- fn max(self, other: Self) -> Self { +- unsafe { x86_mm_max_epi32(self, other) } +- } +- #[inline] +- fn min(self, other: Self) -> Self { +- unsafe { x86_mm_min_epi32(self, other) } +- } +- +- #[inline] +- fn low_mul(self, other: Self) -> i64x2 { +- unsafe { x86_mm_mul_epi32(self, other) } +- } +- #[inline] +- fn packus(self, other: Self) -> u16x8 { +- unsafe { x86_mm_packus_epi32(self, other) } +- } +-} +- +-pub trait Sse41Bool32ix4 {} +-impl Sse41Bool32ix4 for bool32ix4 {} +- +-// 16 bit integers +- +-pub trait Sse41U16x8 { +- fn max(self, other: Self) -> Self; +- fn min(self, other: Self) -> Self; +- fn minpos(self) -> Self; +-} +-impl Sse41U16x8 for u16x8 { +- #[inline] +- fn max(self, other: Self) -> Self { +- unsafe { x86_mm_max_epu16(self, other) } +- } +- #[inline] +- fn min(self, other: Self) -> Self { +- unsafe { x86_mm_min_epu16(self, other) } +- } +- +- #[inline] +- fn minpos(self) -> Self { +- unsafe { x86_mm_minpos_epu16(self) } +- } +-} +-pub trait Sse41I16x8 {} +-impl Sse41I16x8 for i16x8 {} +- +-pub trait Sse41Bool16ix8 {} +-impl Sse41Bool16ix8 for bool16ix8 {} +- +-// 8 bit integers +- +-pub trait Sse41U8x16 {} +-impl Sse41U8x16 for u8x16 {} +-pub trait Sse41I8x16 { +- fn max(self, other: Self) -> Self; +- fn min(self, other: Self) -> Self; +-} +-impl Sse41I8x16 for i8x16 { +- #[inline] +- fn max(self, other: Self) -> Self { +- unsafe { x86_mm_max_epi8(self, other) } +- } +- #[inline] +- fn min(self, other: Self) -> Self { +- unsafe { x86_mm_min_epi8(self, other) } +- } +-} +- +-pub trait Sse41Bool8ix16 {} +-impl Sse41Bool8ix16 for bool8ix16 {} +diff --git a/third_party/rust/simd/src/x86/sse4_2.rs b/third_party/rust/simd/src/x86/sse4_2.rs +deleted file mode 100644 +index 5afe4583cf71..000000000000 +--- a/third_party/rust/simd/src/x86/sse4_2.rs ++++ /dev/null +@@ -1,19 +0,0 @@ +-use i8x16; +- +-#[allow(dead_code)] +-extern "platform-intrinsic" { +- fn x86_mm_cmpestra(x: i8x16, y: i32, z: i8x16, w: i32, a: i32) -> i32; +- fn x86_mm_cmpestrc(x: i8x16, y: i32, z: i8x16, w: i32, a: i32) -> i32; +- fn x86_mm_cmpestri(x: i8x16, y: i32, z: i8x16, w: i32, a: i32) -> i32; +- fn x86_mm_cmpestrm(x: i8x16, y: i32, z: i8x16, w: i32, a: i32) -> i8x16; +- fn x86_mm_cmpestro(x: i8x16, y: i32, z: i8x16, w: i32, a: i32) -> i32; +- fn x86_mm_cmpestrs(x: i8x16, y: i32, z: i8x16, w: i32, a: i32) -> i32; +- fn x86_mm_cmpestrz(x: i8x16, y: i32, z: i8x16, w: i32, a: i32) -> i32; +- fn x86_mm_cmpistra(x: i8x16, y: i8x16, z: i32) -> i32; +- fn x86_mm_cmpistrc(x: i8x16, y: i8x16, z: i32) -> i32; +- fn x86_mm_cmpistri(x: i8x16, y: i8x16, z: i32) -> i32; +- fn x86_mm_cmpistrm(x: i8x16, y: i8x16, z: i32) -> i8x16; +- fn x86_mm_cmpistro(x: i8x16, y: i8x16, z: i32) -> i32; +- fn x86_mm_cmpistrs(x: i8x16, y: i8x16, z: i32) -> i32; +- fn x86_mm_cmpistrz(x: i8x16, y: i8x16, z: i32) -> i32; +-} +diff --git a/third_party/rust/simd/src/x86/ssse3.rs b/third_party/rust/simd/src/x86/ssse3.rs +deleted file mode 100644 +index aa22a08a68a4..000000000000 +--- a/third_party/rust/simd/src/x86/ssse3.rs ++++ /dev/null +@@ -1,172 +0,0 @@ +-use super::super::*; +-use bitcast; +- +-macro_rules! bitcast { +- ($func: ident($a: ident, $b: ident)) => { +- bitcast($func(bitcast($a), bitcast($b))) +- } +-} +- +-extern "platform-intrinsic" { +- fn x86_mm_abs_epi8(x: i8x16) -> i8x16; +- fn x86_mm_abs_epi16(x: i16x8) -> i16x8; +- fn x86_mm_abs_epi32(x: i32x4) -> i32x4; +- fn x86_mm_hadd_epi16(x: i16x8, y: i16x8) -> i16x8; +- fn x86_mm_hadd_epi32(x: i32x4, y: i32x4) -> i32x4; +- fn x86_mm_hadds_epi16(x: i16x8, y: i16x8) -> i16x8; +- fn x86_mm_hsub_epi16(x: i16x8, y: i16x8) -> i16x8; +- fn x86_mm_hsub_epi32(x: i32x4, y: i32x4) -> i32x4; +- fn x86_mm_hsubs_epi16(x: i16x8, y: i16x8) -> i16x8; +- fn x86_mm_maddubs_epi16(x: u8x16, y: i8x16) -> i16x8; +- fn x86_mm_mulhrs_epi16(x: i16x8, y: i16x8) -> i16x8; +- fn x86_mm_shuffle_epi8(x: i8x16, y: i8x16) -> i8x16; +- fn x86_mm_sign_epi8(x: i8x16, y: i8x16) -> i8x16; +- fn x86_mm_sign_epi16(x: i16x8, y: i16x8) -> i16x8; +- fn x86_mm_sign_epi32(x: i32x4, y: i32x4) -> i32x4; +-} +- +-// 32 bit integers +- +-pub trait Ssse3I32x4 { +- fn abs(self) -> Self; +- fn hadd(self, other: Self) -> Self; +- fn hsub(self, other: Self) -> Self; +- fn sign(self, other: Self) -> Self; +-} +-impl Ssse3I32x4 for i32x4 { +- #[inline] +- fn abs(self) -> Self { +- unsafe { x86_mm_abs_epi32(self) } +- } +- +- #[inline] +- fn hadd(self, other: Self) -> Self { +- unsafe { x86_mm_hadd_epi32(self, other) } +- } +- #[inline] +- fn hsub(self, other: Self) -> Self { +- unsafe { x86_mm_hsub_epi32(self, other) } +- } +- +- #[inline] +- fn sign(self, other: Self) -> Self { +- unsafe { x86_mm_sign_epi32(self, other) } +- } +-} +- +-pub trait Ssse3U32x4 { +- fn hadd(self, other: Self) -> Self; +- fn hsub(self, other: Self) -> Self; +-} +-impl Ssse3U32x4 for u32x4 { +- #[inline] +- fn hadd(self, other: Self) -> Self { +- unsafe { bitcast!(x86_mm_hadd_epi32(self, other)) } +- } +- #[inline] +- fn hsub(self, other: Self) -> Self { +- unsafe { bitcast!(x86_mm_hsub_epi32(self, other)) } +- } +-} +- +-// 16 bit integers +- +-pub trait Ssse3I16x8 { +- fn abs(self) -> Self; +- fn hadd(self, other: Self) -> Self; +- fn hadds(self, other: Self) -> Self; +- fn hsub(self, other: Self) -> Self; +- fn hsubs(self, other: Self) -> Self; +- fn sign(self, other: Self) -> Self; +- fn mulhrs(self, other: Self) -> Self; +-} +-impl Ssse3I16x8 for i16x8 { +- #[inline] +- fn abs(self) -> Self { +- unsafe { x86_mm_abs_epi16(self) } +- } +- +- #[inline] +- fn hadd(self, other: Self) -> Self { +- unsafe { x86_mm_hadd_epi16(self, other) } +- } +- #[inline] +- fn hadds(self, other: Self) -> Self { +- unsafe { x86_mm_hadds_epi16(self, other) } +- } +- #[inline] +- fn hsub(self, other: Self) -> Self { +- unsafe { x86_mm_hsub_epi16(self, other) } +- } +- #[inline] +- fn hsubs(self, other: Self) -> Self { +- unsafe { x86_mm_hsubs_epi16(self, other) } +- } +- +- #[inline] +- fn sign(self, other: Self) -> Self { +- unsafe { x86_mm_sign_epi16(self, other) } +- } +- +- #[inline] +- fn mulhrs(self, other: Self) -> Self { +- unsafe { x86_mm_mulhrs_epi16(self, other) } +- } +-} +- +-pub trait Ssse3U16x8 { +- fn hadd(self, other: Self) -> Self; +- fn hsub(self, other: Self) -> Self; +-} +-impl Ssse3U16x8 for u16x8 { +- #[inline] +- fn hadd(self, other: Self) -> Self { +- unsafe { bitcast!(x86_mm_hadd_epi16(self, other)) } +- } +- #[inline] +- fn hsub(self, other: Self) -> Self { +- unsafe { bitcast!(x86_mm_hsub_epi16(self, other)) } +- } +-} +- +- +-// 8 bit integers +- +-pub trait Ssse3U8x16 { +- fn shuffle_bytes(self, indices: Self) -> Self; +- fn maddubs(self, other: i8x16) -> i16x8; +-} +- +-impl Ssse3U8x16 for u8x16 { +- #[inline] +- fn shuffle_bytes(self, indices: Self) -> Self { +- unsafe {bitcast!(x86_mm_shuffle_epi8(self, indices))} +- } +- +- fn maddubs(self, other: i8x16) -> i16x8 { +- unsafe { x86_mm_maddubs_epi16(self, other) } +- } +-} +- +-pub trait Ssse3I8x16 { +- fn abs(self) -> Self; +- fn shuffle_bytes(self, indices: Self) -> Self; +- fn sign(self, other: Self) -> Self; +-} +-impl Ssse3I8x16 for i8x16 { +- #[inline] +- fn abs(self) -> Self { +- unsafe {x86_mm_abs_epi8(self)} +- } +- #[inline] +- fn shuffle_bytes(self, indices: Self) -> Self { +- unsafe { +- x86_mm_shuffle_epi8(self, indices) +- } +- } +- +- #[inline] +- fn sign(self, other: Self) -> Self { +- unsafe { x86_mm_sign_epi8(self, other) } +- } +-} +diff --git a/toolkit/moz.configure b/toolkit/moz.configure +index c2b3c768cba9..c3f3de62f09a 100644 +--- a/toolkit/moz.configure ++++ b/toolkit/moz.configure +@@ -696,14 +696,11 @@ set_config('MOZ_ENABLE_WEBRENDER', webrender.enable) + option('--enable-rust-simd', env='MOZ_RUST_SIMD', + help='Enable explicit SIMD in Rust code.') + +-@depends('--enable-rust-simd', target, rustc_info) +-def rust_simd(value, target, rustc_info): +- # As of 2018-06-05, the simd crate only works on aarch64, +- # armv7, x86 and x86_64. ++@depends('--enable-rust-simd', target) ++def rust_simd(value, target): ++ # As of 2019-03-04, the simd-accel feature of encoding_rs has not ++ # been properly set up outside aarch64, armv7, x86 and x86_64. + if target.cpu in ('aarch64', 'arm', 'x86', 'x86_64') and value: +- if rustc_info and rustc_info.version >= Version('1.33.0'): +- die('--enable-rust-simd does not work with Rust 1.33 or later. ' +- 'See https://bugzilla.mozilla.org/show_bug.cgi?id=1521249 .') + return True + + set_config('MOZ_RUST_SIMD', rust_simd) +-- +2.21.0 + -- cgit v1.2.1