ppv_lite86/
soft.rs

1//! Implement 256- and 512- bit in terms of 128-bit, for machines without native wide SIMD.
2
3use crate::types::*;
4use crate::{vec128_storage, vec256_storage, vec512_storage};
5use core::marker::PhantomData;
6use core::ops::*;
7
8#[derive(Copy, Clone, Default)]
9#[allow(non_camel_case_types)]
10pub struct x2<W, G>(pub [W; 2], PhantomData<G>);
11impl<W, G> x2<W, G> {
12    #[inline(always)]
13    pub fn new(xs: [W; 2]) -> Self {
14        x2(xs, PhantomData)
15    }
16}
17macro_rules! fwd_binop_x2 {
18    ($trait:ident, $fn:ident) => {
19        impl<W: $trait + Copy, G> $trait for x2<W, G> {
20            type Output = x2<W::Output, G>;
21            #[inline(always)]
22            fn $fn(self, rhs: Self) -> Self::Output {
23                x2::new([self.0[0].$fn(rhs.0[0]), self.0[1].$fn(rhs.0[1])])
24            }
25        }
26    };
27}
28macro_rules! fwd_binop_assign_x2 {
29    ($trait:ident, $fn_assign:ident) => {
30        impl<W: $trait + Copy, G> $trait for x2<W, G> {
31            #[inline(always)]
32            fn $fn_assign(&mut self, rhs: Self) {
33                (self.0[0]).$fn_assign(rhs.0[0]);
34                (self.0[1]).$fn_assign(rhs.0[1]);
35            }
36        }
37    };
38}
39macro_rules! fwd_unop_x2 {
40    ($fn:ident) => {
41        #[inline(always)]
42        fn $fn(self) -> Self {
43            x2::new([self.0[0].$fn(), self.0[1].$fn()])
44        }
45    };
46}
47impl<W, G> RotateEachWord32 for x2<W, G>
48where
49    W: Copy + RotateEachWord32,
50{
51    fwd_unop_x2!(rotate_each_word_right7);
52    fwd_unop_x2!(rotate_each_word_right8);
53    fwd_unop_x2!(rotate_each_word_right11);
54    fwd_unop_x2!(rotate_each_word_right12);
55    fwd_unop_x2!(rotate_each_word_right16);
56    fwd_unop_x2!(rotate_each_word_right20);
57    fwd_unop_x2!(rotate_each_word_right24);
58    fwd_unop_x2!(rotate_each_word_right25);
59}
60impl<W, G> RotateEachWord64 for x2<W, G>
61where
62    W: Copy + RotateEachWord64,
63{
64    fwd_unop_x2!(rotate_each_word_right32);
65}
66impl<W, G> RotateEachWord128 for x2<W, G> where W: RotateEachWord128 {}
67impl<W, G> BitOps0 for x2<W, G>
68where
69    W: BitOps0,
70    G: Copy,
71{
72}
73impl<W, G> BitOps32 for x2<W, G>
74where
75    W: BitOps32 + BitOps0,
76    G: Copy,
77{
78}
79impl<W, G> BitOps64 for x2<W, G>
80where
81    W: BitOps64 + BitOps0,
82    G: Copy,
83{
84}
85impl<W, G> BitOps128 for x2<W, G>
86where
87    W: BitOps128 + BitOps0,
88    G: Copy,
89{
90}
91fwd_binop_x2!(BitAnd, bitand);
92fwd_binop_x2!(BitOr, bitor);
93fwd_binop_x2!(BitXor, bitxor);
94fwd_binop_x2!(AndNot, andnot);
95fwd_binop_assign_x2!(BitAndAssign, bitand_assign);
96fwd_binop_assign_x2!(BitOrAssign, bitor_assign);
97fwd_binop_assign_x2!(BitXorAssign, bitxor_assign);
98impl<W, G> ArithOps for x2<W, G>
99where
100    W: ArithOps,
101    G: Copy,
102{
103}
104fwd_binop_x2!(Add, add);
105fwd_binop_assign_x2!(AddAssign, add_assign);
106impl<W: Not + Copy, G> Not for x2<W, G> {
107    type Output = x2<W::Output, G>;
108    #[inline(always)]
109    fn not(self) -> Self::Output {
110        x2::new([self.0[0].not(), self.0[1].not()])
111    }
112}
113impl<W, G> UnsafeFrom<[W; 2]> for x2<W, G> {
114    #[inline(always)]
115    unsafe fn unsafe_from(xs: [W; 2]) -> Self {
116        x2::new(xs)
117    }
118}
119impl<W: Copy, G> Vec2<W> for x2<W, G> {
120    #[inline(always)]
121    fn extract(self, i: u32) -> W {
122        self.0[i as usize]
123    }
124    #[inline(always)]
125    fn insert(mut self, w: W, i: u32) -> Self {
126        self.0[i as usize] = w;
127        self
128    }
129}
130impl<W: Copy + Store<vec128_storage>, G> Store<vec256_storage> for x2<W, G> {
131    #[inline(always)]
132    unsafe fn unpack(p: vec256_storage) -> Self {
133        let p = p.split128();
134        x2::new([W::unpack(p[0]), W::unpack(p[1])])
135    }
136}
137impl<W, G> From<x2<W, G>> for vec256_storage
138where
139    W: Copy,
140    vec128_storage: From<W>,
141{
142    #[inline(always)]
143    fn from(x: x2<W, G>) -> Self {
144        vec256_storage::new128([x.0[0].into(), x.0[1].into()])
145    }
146}
147impl<W, G> Swap64 for x2<W, G>
148where
149    W: Swap64 + Copy,
150{
151    fwd_unop_x2!(swap1);
152    fwd_unop_x2!(swap2);
153    fwd_unop_x2!(swap4);
154    fwd_unop_x2!(swap8);
155    fwd_unop_x2!(swap16);
156    fwd_unop_x2!(swap32);
157    fwd_unop_x2!(swap64);
158}
159impl<W: Copy, G> MultiLane<[W; 2]> for x2<W, G> {
160    #[inline(always)]
161    fn to_lanes(self) -> [W; 2] {
162        self.0
163    }
164    #[inline(always)]
165    fn from_lanes(lanes: [W; 2]) -> Self {
166        x2::new(lanes)
167    }
168}
169impl<W: BSwap + Copy, G> BSwap for x2<W, G> {
170    #[inline(always)]
171    fn bswap(self) -> Self {
172        x2::new([self.0[0].bswap(), self.0[1].bswap()])
173    }
174}
175impl<W: StoreBytes + BSwap + Copy, G> StoreBytes for x2<W, G> {
176    #[inline(always)]
177    unsafe fn unsafe_read_le(input: &[u8]) -> Self {
178        let input = input.split_at(input.len() / 2);
179        x2::new([W::unsafe_read_le(input.0), W::unsafe_read_le(input.1)])
180    }
181    #[inline(always)]
182    unsafe fn unsafe_read_be(input: &[u8]) -> Self {
183        let input = input.split_at(input.len() / 2);
184        x2::new([W::unsafe_read_be(input.0), W::unsafe_read_be(input.1)])
185    }
186    #[inline(always)]
187    fn write_le(self, out: &mut [u8]) {
188        let out = out.split_at_mut(out.len() / 2);
189        self.0[0].write_le(out.0);
190        self.0[1].write_le(out.1);
191    }
192    #[inline(always)]
193    fn write_be(self, out: &mut [u8]) {
194        let out = out.split_at_mut(out.len() / 2);
195        self.0[0].write_be(out.0);
196        self.0[1].write_be(out.1);
197    }
198}
199impl<W: Copy + LaneWords4, G: Copy> LaneWords4 for x2<W, G> {
200    #[inline(always)]
201    fn shuffle_lane_words2301(self) -> Self {
202        Self::new([
203            self.0[0].shuffle_lane_words2301(),
204            self.0[1].shuffle_lane_words2301(),
205        ])
206    }
207    #[inline(always)]
208    fn shuffle_lane_words1230(self) -> Self {
209        Self::new([
210            self.0[0].shuffle_lane_words1230(),
211            self.0[1].shuffle_lane_words1230(),
212        ])
213    }
214    #[inline(always)]
215    fn shuffle_lane_words3012(self) -> Self {
216        Self::new([
217            self.0[0].shuffle_lane_words3012(),
218            self.0[1].shuffle_lane_words3012(),
219        ])
220    }
221}
222
223#[derive(Copy, Clone, Default)]
224#[allow(non_camel_case_types)]
225pub struct x4<W>(pub [W; 4]);
226impl<W> x4<W> {
227    #[inline(always)]
228    pub fn new(xs: [W; 4]) -> Self {
229        x4(xs)
230    }
231}
232macro_rules! fwd_binop_x4 {
233    ($trait:ident, $fn:ident) => {
234        impl<W: $trait + Copy> $trait for x4<W> {
235            type Output = x4<W::Output>;
236            #[inline(always)]
237            fn $fn(self, rhs: Self) -> Self::Output {
238                x4([
239                    self.0[0].$fn(rhs.0[0]),
240                    self.0[1].$fn(rhs.0[1]),
241                    self.0[2].$fn(rhs.0[2]),
242                    self.0[3].$fn(rhs.0[3]),
243                ])
244            }
245        }
246    };
247}
248macro_rules! fwd_binop_assign_x4 {
249    ($trait:ident, $fn_assign:ident) => {
250        impl<W: $trait + Copy> $trait for x4<W> {
251            #[inline(always)]
252            fn $fn_assign(&mut self, rhs: Self) {
253                self.0[0].$fn_assign(rhs.0[0]);
254                self.0[1].$fn_assign(rhs.0[1]);
255                self.0[2].$fn_assign(rhs.0[2]);
256                self.0[3].$fn_assign(rhs.0[3]);
257            }
258        }
259    };
260}
261macro_rules! fwd_unop_x4 {
262    ($fn:ident) => {
263        #[inline(always)]
264        fn $fn(self) -> Self {
265            x4([
266                self.0[0].$fn(),
267                self.0[1].$fn(),
268                self.0[2].$fn(),
269                self.0[3].$fn(),
270            ])
271        }
272    };
273}
274impl<W> RotateEachWord32 for x4<W>
275where
276    W: Copy + RotateEachWord32,
277{
278    fwd_unop_x4!(rotate_each_word_right7);
279    fwd_unop_x4!(rotate_each_word_right8);
280    fwd_unop_x4!(rotate_each_word_right11);
281    fwd_unop_x4!(rotate_each_word_right12);
282    fwd_unop_x4!(rotate_each_word_right16);
283    fwd_unop_x4!(rotate_each_word_right20);
284    fwd_unop_x4!(rotate_each_word_right24);
285    fwd_unop_x4!(rotate_each_word_right25);
286}
287impl<W> RotateEachWord64 for x4<W>
288where
289    W: Copy + RotateEachWord64,
290{
291    fwd_unop_x4!(rotate_each_word_right32);
292}
293impl<W> RotateEachWord128 for x4<W> where W: RotateEachWord128 {}
294impl<W> BitOps0 for x4<W> where W: BitOps0 {}
295impl<W> BitOps32 for x4<W> where W: BitOps32 + BitOps0 {}
296impl<W> BitOps64 for x4<W> where W: BitOps64 + BitOps0 {}
297impl<W> BitOps128 for x4<W> where W: BitOps128 + BitOps0 {}
298fwd_binop_x4!(BitAnd, bitand);
299fwd_binop_x4!(BitOr, bitor);
300fwd_binop_x4!(BitXor, bitxor);
301fwd_binop_x4!(AndNot, andnot);
302fwd_binop_assign_x4!(BitAndAssign, bitand_assign);
303fwd_binop_assign_x4!(BitOrAssign, bitor_assign);
304fwd_binop_assign_x4!(BitXorAssign, bitxor_assign);
305impl<W> ArithOps for x4<W> where W: ArithOps {}
306fwd_binop_x4!(Add, add);
307fwd_binop_assign_x4!(AddAssign, add_assign);
308impl<W: Not + Copy> Not for x4<W> {
309    type Output = x4<W::Output>;
310    #[inline(always)]
311    fn not(self) -> Self::Output {
312        x4([
313            self.0[0].not(),
314            self.0[1].not(),
315            self.0[2].not(),
316            self.0[3].not(),
317        ])
318    }
319}
320impl<W> UnsafeFrom<[W; 4]> for x4<W> {
321    #[inline(always)]
322    unsafe fn unsafe_from(xs: [W; 4]) -> Self {
323        x4(xs)
324    }
325}
326impl<W: Copy> Vec4<W> for x4<W> {
327    #[inline(always)]
328    fn extract(self, i: u32) -> W {
329        self.0[i as usize]
330    }
331    #[inline(always)]
332    fn insert(mut self, w: W, i: u32) -> Self {
333        self.0[i as usize] = w;
334        self
335    }
336}
337impl<W: Copy> Vec4Ext<W> for x4<W> {
338    #[inline(always)]
339    fn transpose4(a: Self, b: Self, c: Self, d: Self) -> (Self, Self, Self, Self)
340    where
341        Self: Sized,
342    {
343        (
344            x4([a.0[0], b.0[0], c.0[0], d.0[0]]),
345            x4([a.0[1], b.0[1], c.0[1], d.0[1]]),
346            x4([a.0[2], b.0[2], c.0[2], d.0[2]]),
347            x4([a.0[3], b.0[3], c.0[3], d.0[3]]),
348        )
349    }
350}
351impl<W: Copy + Store<vec128_storage>> Store<vec512_storage> for x4<W> {
352    #[inline(always)]
353    unsafe fn unpack(p: vec512_storage) -> Self {
354        let p = p.split128();
355        x4([
356            W::unpack(p[0]),
357            W::unpack(p[1]),
358            W::unpack(p[2]),
359            W::unpack(p[3]),
360        ])
361    }
362}
363impl<W> From<x4<W>> for vec512_storage
364where
365    W: Copy,
366    vec128_storage: From<W>,
367{
368    #[inline(always)]
369    fn from(x: x4<W>) -> Self {
370        vec512_storage::new128([x.0[0].into(), x.0[1].into(), x.0[2].into(), x.0[3].into()])
371    }
372}
373impl<W> Swap64 for x4<W>
374where
375    W: Swap64 + Copy,
376{
377    fwd_unop_x4!(swap1);
378    fwd_unop_x4!(swap2);
379    fwd_unop_x4!(swap4);
380    fwd_unop_x4!(swap8);
381    fwd_unop_x4!(swap16);
382    fwd_unop_x4!(swap32);
383    fwd_unop_x4!(swap64);
384}
385impl<W: Copy> MultiLane<[W; 4]> for x4<W> {
386    #[inline(always)]
387    fn to_lanes(self) -> [W; 4] {
388        self.0
389    }
390    #[inline(always)]
391    fn from_lanes(lanes: [W; 4]) -> Self {
392        x4(lanes)
393    }
394}
395impl<W: BSwap + Copy> BSwap for x4<W> {
396    #[inline(always)]
397    fn bswap(self) -> Self {
398        x4([
399            self.0[0].bswap(),
400            self.0[1].bswap(),
401            self.0[2].bswap(),
402            self.0[3].bswap(),
403        ])
404    }
405}
406impl<W: StoreBytes + BSwap + Copy> StoreBytes for x4<W> {
407    #[inline(always)]
408    unsafe fn unsafe_read_le(input: &[u8]) -> Self {
409        let n = input.len() / 4;
410        x4([
411            W::unsafe_read_le(&input[..n]),
412            W::unsafe_read_le(&input[n..n * 2]),
413            W::unsafe_read_le(&input[n * 2..n * 3]),
414            W::unsafe_read_le(&input[n * 3..]),
415        ])
416    }
417    #[inline(always)]
418    unsafe fn unsafe_read_be(input: &[u8]) -> Self {
419        let n = input.len() / 4;
420        x4([
421            W::unsafe_read_be(&input[..n]),
422            W::unsafe_read_be(&input[n..n * 2]),
423            W::unsafe_read_be(&input[n * 2..n * 3]),
424            W::unsafe_read_be(&input[n * 3..]),
425        ])
426    }
427    #[inline(always)]
428    fn write_le(self, out: &mut [u8]) {
429        let n = out.len() / 4;
430        self.0[0].write_le(&mut out[..n]);
431        self.0[1].write_le(&mut out[n..n * 2]);
432        self.0[2].write_le(&mut out[n * 2..n * 3]);
433        self.0[3].write_le(&mut out[n * 3..]);
434    }
435    #[inline(always)]
436    fn write_be(self, out: &mut [u8]) {
437        let n = out.len() / 4;
438        self.0[0].write_be(&mut out[..n]);
439        self.0[1].write_be(&mut out[n..n * 2]);
440        self.0[2].write_be(&mut out[n * 2..n * 3]);
441        self.0[3].write_be(&mut out[n * 3..]);
442    }
443}
444impl<W: Copy + LaneWords4> LaneWords4 for x4<W> {
445    #[inline(always)]
446    fn shuffle_lane_words2301(self) -> Self {
447        x4([
448            self.0[0].shuffle_lane_words2301(),
449            self.0[1].shuffle_lane_words2301(),
450            self.0[2].shuffle_lane_words2301(),
451            self.0[3].shuffle_lane_words2301(),
452        ])
453    }
454    #[inline(always)]
455    fn shuffle_lane_words1230(self) -> Self {
456        x4([
457            self.0[0].shuffle_lane_words1230(),
458            self.0[1].shuffle_lane_words1230(),
459            self.0[2].shuffle_lane_words1230(),
460            self.0[3].shuffle_lane_words1230(),
461        ])
462    }
463    #[inline(always)]
464    fn shuffle_lane_words3012(self) -> Self {
465        x4([
466            self.0[0].shuffle_lane_words3012(),
467            self.0[1].shuffle_lane_words3012(),
468            self.0[2].shuffle_lane_words3012(),
469            self.0[3].shuffle_lane_words3012(),
470        ])
471    }
472}