pyo3/conversions/std/
osstr.rs

1use crate::conversion::IntoPyObject;
2use crate::ffi_ptr_ext::FfiPtrExt;
3use crate::instance::Bound;
4use crate::types::PyString;
5use crate::{ffi, FromPyObject, PyAny, PyResult, Python};
6use std::borrow::Cow;
7use std::convert::Infallible;
8use std::ffi::{OsStr, OsString};
9
10impl<'py> IntoPyObject<'py> for &OsStr {
11    type Target = PyString;
12    type Output = Bound<'py, Self::Target>;
13    type Error = Infallible;
14
15    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
16        // If the string is UTF-8, take the quick and easy shortcut
17        if let Some(valid_utf8_path) = self.to_str() {
18            return valid_utf8_path.into_pyobject(py);
19        }
20
21        // All targets besides windows support the std::os::unix::ffi::OsStrExt API:
22        // https://doc.rust-lang.org/src/std/sys_common/mod.rs.html#59
23        #[cfg(not(windows))]
24        {
25            #[cfg(target_os = "wasi")]
26            let bytes = std::os::wasi::ffi::OsStrExt::as_bytes(self);
27            #[cfg(not(target_os = "wasi"))]
28            let bytes = std::os::unix::ffi::OsStrExt::as_bytes(self);
29
30            let ptr = bytes.as_ptr().cast();
31            let len = bytes.len() as ffi::Py_ssize_t;
32            unsafe {
33                // DecodeFSDefault automatically chooses an appropriate decoding mechanism to
34                // parse os strings losslessly (i.e. surrogateescape most of the time)
35                Ok(ffi::PyUnicode_DecodeFSDefaultAndSize(ptr, len)
36                    .assume_owned(py)
37                    .cast_into_unchecked::<PyString>())
38            }
39        }
40
41        #[cfg(windows)]
42        {
43            let wstr: Vec<u16> = std::os::windows::ffi::OsStrExt::encode_wide(self).collect();
44
45            unsafe {
46                // This will not panic because the data from encode_wide is well-formed Windows
47                // string data
48
49                Ok(
50                    ffi::PyUnicode_FromWideChar(wstr.as_ptr(), wstr.len() as ffi::Py_ssize_t)
51                        .assume_owned(py)
52                        .cast_into_unchecked::<PyString>(),
53                )
54            }
55        }
56    }
57}
58
59impl<'py> IntoPyObject<'py> for &&OsStr {
60    type Target = PyString;
61    type Output = Bound<'py, Self::Target>;
62    type Error = Infallible;
63
64    #[inline]
65    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
66        (*self).into_pyobject(py)
67    }
68}
69
70// There's no FromPyObject implementation for &OsStr because albeit possible on Unix, this would
71// be impossible to implement on Windows. Hence it's omitted entirely
72
73impl FromPyObject<'_> for OsString {
74    fn extract_bound(ob: &Bound<'_, PyAny>) -> PyResult<Self> {
75        let pystring = ob.cast::<PyString>()?;
76
77        #[cfg(not(windows))]
78        {
79            // Decode from Python's lossless bytes string representation back into raw bytes
80            let fs_encoded_bytes = unsafe {
81                crate::Py::<crate::types::PyBytes>::from_owned_ptr(
82                    ob.py(),
83                    ffi::PyUnicode_EncodeFSDefault(pystring.as_ptr()),
84                )
85            };
86
87            // Create an OsStr view into the raw bytes from Python
88            #[cfg(target_os = "wasi")]
89            let os_str: &OsStr =
90                std::os::wasi::ffi::OsStrExt::from_bytes(fs_encoded_bytes.as_bytes(ob.py()));
91            #[cfg(not(target_os = "wasi"))]
92            let os_str: &OsStr =
93                std::os::unix::ffi::OsStrExt::from_bytes(fs_encoded_bytes.as_bytes(ob.py()));
94
95            Ok(os_str.to_os_string())
96        }
97
98        #[cfg(windows)]
99        {
100            use crate::types::string::PyStringMethods;
101
102            // Take the quick and easy shortcut if UTF-8
103            if let Ok(utf8_string) = pystring.to_cow() {
104                return Ok(utf8_string.into_owned().into());
105            }
106
107            // Get an owned allocated wide char buffer from PyString, which we have to deallocate
108            // ourselves
109            let size =
110                unsafe { ffi::PyUnicode_AsWideChar(pystring.as_ptr(), std::ptr::null_mut(), 0) };
111            crate::err::error_on_minusone(ob.py(), size)?;
112
113            let mut buffer = vec![0; size as usize];
114            let bytes_read =
115                unsafe { ffi::PyUnicode_AsWideChar(pystring.as_ptr(), buffer.as_mut_ptr(), size) };
116            assert_eq!(bytes_read, size);
117
118            // Copy wide char buffer into OsString
119            let os_string = std::os::windows::ffi::OsStringExt::from_wide(&buffer);
120
121            Ok(os_string)
122        }
123    }
124}
125
126impl<'py> IntoPyObject<'py> for Cow<'_, OsStr> {
127    type Target = PyString;
128    type Output = Bound<'py, Self::Target>;
129    type Error = Infallible;
130
131    #[inline]
132    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
133        (*self).into_pyobject(py)
134    }
135}
136
137impl<'py> IntoPyObject<'py> for &Cow<'_, OsStr> {
138    type Target = PyString;
139    type Output = Bound<'py, Self::Target>;
140    type Error = Infallible;
141
142    #[inline]
143    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
144        (&**self).into_pyobject(py)
145    }
146}
147
148impl<'py> IntoPyObject<'py> for OsString {
149    type Target = PyString;
150    type Output = Bound<'py, Self::Target>;
151    type Error = Infallible;
152
153    #[inline]
154    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
155        self.as_os_str().into_pyobject(py)
156    }
157}
158
159impl<'py> IntoPyObject<'py> for &OsString {
160    type Target = PyString;
161    type Output = Bound<'py, Self::Target>;
162    type Error = Infallible;
163
164    #[inline]
165    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
166        self.as_os_str().into_pyobject(py)
167    }
168}
169
170#[cfg(test)]
171mod tests {
172    use crate::types::{PyAnyMethods, PyString, PyStringMethods};
173    use crate::{BoundObject, IntoPyObject, Python};
174    use std::fmt::Debug;
175    use std::{
176        borrow::Cow,
177        ffi::{OsStr, OsString},
178    };
179
180    #[test]
181    #[cfg(not(windows))]
182    fn test_non_utf8_conversion() {
183        Python::attach(|py| {
184            #[cfg(not(target_os = "wasi"))]
185            use std::os::unix::ffi::OsStrExt;
186            #[cfg(target_os = "wasi")]
187            use std::os::wasi::ffi::OsStrExt;
188
189            // this is not valid UTF-8
190            let payload = &[250, 251, 252, 253, 254, 255, 0, 255];
191            let os_str = OsStr::from_bytes(payload);
192
193            // do a roundtrip into Pythonland and back and compare
194            let py_str = os_str.into_pyobject(py).unwrap();
195            let os_str_2: OsString = py_str.extract().unwrap();
196            assert_eq!(os_str, os_str_2);
197        });
198    }
199
200    #[test]
201    fn test_intopyobject_roundtrip() {
202        Python::attach(|py| {
203            fn test_roundtrip<'py, T>(py: Python<'py>, obj: T)
204            where
205                T: IntoPyObject<'py> + AsRef<OsStr> + Debug + Clone,
206                T::Error: Debug,
207            {
208                let pyobject = obj.clone().into_pyobject(py).unwrap().into_any();
209                let pystring = pyobject.as_borrowed().cast::<PyString>().unwrap();
210                assert_eq!(pystring.to_string_lossy(), obj.as_ref().to_string_lossy());
211                let roundtripped_obj: OsString = pystring.extract().unwrap();
212                assert_eq!(obj.as_ref(), roundtripped_obj.as_os_str());
213            }
214            let os_str = OsStr::new("Hello\0\nšŸ");
215            test_roundtrip::<&OsStr>(py, os_str);
216            test_roundtrip::<Cow<'_, OsStr>>(py, Cow::Borrowed(os_str));
217            test_roundtrip::<Cow<'_, OsStr>>(py, Cow::Owned(os_str.to_os_string()));
218            test_roundtrip::<OsString>(py, os_str.to_os_string());
219        });
220    }
221}