ppv_lite86/
types.rs

1#![allow(non_camel_case_types)]
2use core::ops::{Add, AddAssign, BitAnd, BitOr, BitXor, BitXorAssign, Not};
3
4pub trait AndNot {
5    type Output;
6    fn andnot(self, rhs: Self) -> Self::Output;
7}
8pub trait BSwap {
9    fn bswap(self) -> Self;
10}
11/// Ops that depend on word size
12pub trait ArithOps: Add<Output = Self> + AddAssign + Sized + Copy + Clone + BSwap {}
13/// Ops that are independent of word size and endian
14pub trait BitOps0:
15    BitAnd<Output = Self>
16    + BitOr<Output = Self>
17    + BitXor<Output = Self>
18    + BitXorAssign
19    + Not<Output = Self>
20    + AndNot<Output = Self>
21    + Sized
22    + Copy
23    + Clone
24{
25}
26
27pub trait BitOps32: BitOps0 + RotateEachWord32 {}
28pub trait BitOps64: BitOps32 + RotateEachWord64 {}
29pub trait BitOps128: BitOps64 + RotateEachWord128 {}
30
31pub trait RotateEachWord32 {
32    fn rotate_each_word_right7(self) -> Self;
33    fn rotate_each_word_right8(self) -> Self;
34    fn rotate_each_word_right11(self) -> Self;
35    fn rotate_each_word_right12(self) -> Self;
36    fn rotate_each_word_right16(self) -> Self;
37    fn rotate_each_word_right20(self) -> Self;
38    fn rotate_each_word_right24(self) -> Self;
39    fn rotate_each_word_right25(self) -> Self;
40}
41
42pub trait RotateEachWord64 {
43    fn rotate_each_word_right32(self) -> Self;
44}
45
46pub trait RotateEachWord128 {}
47
48// Vector type naming scheme:
49// uN[xP]xL
50// Unsigned; N-bit words * P bits per lane * L lanes
51//
52// A lane is always 128-bits, chosen because common SIMD architectures treat 128-bit units of
53// wide vectors specially (supporting e.g. intra-lane shuffles), and tend to have limited and
54// slow inter-lane operations.
55
56use crate::arch::{vec128_storage, vec256_storage, vec512_storage};
57
58#[allow(clippy::missing_safety_doc)]
59pub trait UnsafeFrom<T> {
60    unsafe fn unsafe_from(t: T) -> Self;
61}
62
63/// A vector composed of two elements, which may be words or themselves vectors.
64pub trait Vec2<W> {
65    fn extract(self, i: u32) -> W;
66    fn insert(self, w: W, i: u32) -> Self;
67}
68
69/// A vector composed of four elements, which may be words or themselves vectors.
70pub trait Vec4<W> {
71    fn extract(self, i: u32) -> W;
72    fn insert(self, w: W, i: u32) -> Self;
73}
74/// Vec4 functions which may not be implemented yet for all Vec4 types.
75/// NOTE: functions in this trait may be moved to Vec4 in any patch release. To avoid breakage,
76/// import Vec4Ext only together with Vec4, and don't qualify its methods.
77pub trait Vec4Ext<W> {
78    fn transpose4(a: Self, b: Self, c: Self, d: Self) -> (Self, Self, Self, Self) where Self: Sized;
79}
80pub trait Vector<T> {
81    fn to_scalars(self) -> T;
82}
83
84// TODO: multiples of 4 should inherit this
85/// A vector composed of four words; depending on their size, operations may cross lanes.
86pub trait Words4 {
87    fn shuffle1230(self) -> Self;
88    fn shuffle2301(self) -> Self;
89    fn shuffle3012(self) -> Self;
90}
91
92/// A vector composed one or more lanes each composed of four words.
93pub trait LaneWords4 {
94    fn shuffle_lane_words1230(self) -> Self;
95    fn shuffle_lane_words2301(self) -> Self;
96    fn shuffle_lane_words3012(self) -> Self;
97}
98
99// TODO: make this a part of BitOps
100/// Exchange neigboring ranges of bits of the specified size
101pub trait Swap64 {
102    fn swap1(self) -> Self;
103    fn swap2(self) -> Self;
104    fn swap4(self) -> Self;
105    fn swap8(self) -> Self;
106    fn swap16(self) -> Self;
107    fn swap32(self) -> Self;
108    fn swap64(self) -> Self;
109}
110
111pub trait u32x4<M: Machine>:
112    BitOps32
113    + Store<vec128_storage>
114    + ArithOps
115    + Vec4<u32>
116    + Words4
117    + LaneWords4
118    + StoreBytes
119    + MultiLane<[u32; 4]>
120    + Into<vec128_storage>
121{
122}
123pub trait u64x2<M: Machine>:
124    BitOps64
125    + Store<vec128_storage>
126    + ArithOps
127    + Vec2<u64>
128    + MultiLane<[u64; 2]>
129    + Into<vec128_storage>
130{
131}
132pub trait u128x1<M: Machine>:
133    BitOps128 + Store<vec128_storage> + Swap64 + MultiLane<[u128; 1]> + Into<vec128_storage>
134{
135}
136
137pub trait u32x4x2<M: Machine>:
138    BitOps32
139    + Store<vec256_storage>
140    + Vec2<M::u32x4>
141    + MultiLane<[M::u32x4; 2]>
142    + ArithOps
143    + Into<vec256_storage>
144{
145}
146pub trait u64x2x2<M: Machine>:
147    BitOps64
148    + Store<vec256_storage>
149    + Vec2<M::u64x2>
150    + MultiLane<[M::u64x2; 2]>
151    + ArithOps
152    + StoreBytes
153    + Into<vec256_storage>
154{
155}
156pub trait u64x4<M: Machine>:
157    BitOps64
158    + Store<vec256_storage>
159    + Vec4<u64>
160    + MultiLane<[u64; 4]>
161    + ArithOps
162    + Words4
163    + StoreBytes
164    + Into<vec256_storage>
165{
166}
167pub trait u128x2<M: Machine>:
168    BitOps128
169    + Store<vec256_storage>
170    + Vec2<M::u128x1>
171    + MultiLane<[M::u128x1; 2]>
172    + Swap64
173    + Into<vec256_storage>
174{
175}
176
177pub trait u32x4x4<M: Machine>:
178    BitOps32
179    + Store<vec512_storage>
180    + Vec4<M::u32x4>
181    + Vec4Ext<M::u32x4>
182    + Vector<[u32; 16]>
183    + MultiLane<[M::u32x4; 4]>
184    + ArithOps
185    + LaneWords4
186    + Into<vec512_storage>
187{
188}
189pub trait u64x2x4<M: Machine>:
190    BitOps64
191    + Store<vec512_storage>
192    + Vec4<M::u64x2>
193    + MultiLane<[M::u64x2; 4]>
194    + ArithOps
195    + Into<vec512_storage>
196{
197}
198// TODO: Words4
199pub trait u128x4<M: Machine>:
200    BitOps128
201    + Store<vec512_storage>
202    + Vec4<M::u128x1>
203    + MultiLane<[M::u128x1; 4]>
204    + Swap64
205    + Into<vec512_storage>
206{
207}
208
209/// A vector composed of multiple 128-bit lanes.
210pub trait MultiLane<Lanes> {
211    /// Split a multi-lane vector into single-lane vectors.
212    fn to_lanes(self) -> Lanes;
213    /// Build a multi-lane vector from individual lanes.
214    fn from_lanes(lanes: Lanes) -> Self;
215}
216
217/// Combine single vectors into a multi-lane vector.
218pub trait VZip<V> {
219    fn vzip(self) -> V;
220}
221
222impl<V, T> VZip<V> for T
223where
224    V: MultiLane<T>,
225{
226    #[inline(always)]
227    fn vzip(self) -> V {
228        V::from_lanes(self)
229    }
230}
231
232pub trait Machine: Sized + Copy {
233    type u32x4: u32x4<Self>;
234    type u64x2: u64x2<Self>;
235    type u128x1: u128x1<Self>;
236
237    type u32x4x2: u32x4x2<Self>;
238    type u64x2x2: u64x2x2<Self>;
239    type u64x4: u64x4<Self>;
240    type u128x2: u128x2<Self>;
241
242    type u32x4x4: u32x4x4<Self>;
243    type u64x2x4: u64x2x4<Self>;
244    type u128x4: u128x4<Self>;
245
246    #[inline(always)]
247    fn unpack<S, V: Store<S>>(self, s: S) -> V {
248        unsafe { V::unpack(s) }
249    }
250
251    #[inline(always)]
252    fn vec<V, A>(self, a: A) -> V
253    where
254        V: MultiLane<A>,
255    {
256        V::from_lanes(a)
257    }
258
259    #[inline(always)]
260    fn read_le<V>(self, input: &[u8]) -> V
261    where
262        V: StoreBytes,
263    {
264        unsafe { V::unsafe_read_le(input) }
265    }
266
267    #[inline(always)]
268    fn read_be<V>(self, input: &[u8]) -> V
269    where
270        V: StoreBytes,
271    {
272        unsafe { V::unsafe_read_be(input) }
273    }
274
275    /// # Safety
276    /// Caller must ensure the type of Self is appropriate for the hardware of the execution
277    /// environment.
278    unsafe fn instance() -> Self;
279}
280
281pub trait Store<S> {
282    /// # Safety
283    /// Caller must ensure the type of Self is appropriate for the hardware of the execution
284    /// environment.
285    unsafe fn unpack(p: S) -> Self;
286}
287
288pub trait StoreBytes {
289    /// # Safety
290    /// Caller must ensure the type of Self is appropriate for the hardware of the execution
291    /// environment.
292    unsafe fn unsafe_read_le(input: &[u8]) -> Self;
293    /// # Safety
294    /// Caller must ensure the type of Self is appropriate for the hardware of the execution
295    /// environment.
296    unsafe fn unsafe_read_be(input: &[u8]) -> Self;
297    fn write_le(self, out: &mut [u8]);
298    fn write_be(self, out: &mut [u8]);
299}
ppv_lite86/types.rs

ppv_lite86/
types.rs