1use anyhow::{anyhow, Result};
15use std::str;
16
17pub fn sanitize_url(resource: &str, allow_double_slashes: bool) -> Result<String> {
18 if resource == "*" || resource.is_empty() {
19 return Ok(resource.to_string());
20 }
21
22 let mut sanitized = String::with_capacity(resource.len());
23
24 for &ch in resource.as_bytes() {
26 if ch != b'\0' {
27 sanitized.push(ch as char);
28 }
29 }
30
31 let bytes = sanitized.as_bytes();
33 let mut i = 0;
34 while i < bytes.len() {
35 if bytes[i] == b'%' {
36 if i + 2 >= bytes.len() {
37 return Err(anyhow!("URI malformed"));
38 }
39 let hex = &bytes[i + 1..i + 3];
40 if !hex[0].is_ascii_hexdigit() || !hex[1].is_ascii_hexdigit() {
41 return Err(anyhow!("URI malformed"));
42 }
43 let value = u8::from_str_radix(str::from_utf8(hex)?, 16)?;
44 if value == 0xc0 || value == 0xc1 || value >= 0xfe {
45 return Err(anyhow!("URI malformed"));
46 }
47 }
48 i += 1;
49 }
50
51 let mut decoded = String::with_capacity(sanitized.len());
53 let bytes = sanitized.as_bytes();
54 let mut i = 0;
55 while i < bytes.len() {
56 if bytes[i] == b'%' && i + 2 < bytes.len() {
57 let hex = &bytes[i + 1..i + 3];
58 if let Ok(value) = u8::from_str_radix(str::from_utf8(hex)?, 16) {
59 if value != 0 {
60 let decoded_char = value as char;
61 if decoded_char.is_ascii_alphanumeric()
62 || "!$&'()*+,-./0123456789:;=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]_abcdefghijklmnopqrstuvwxyz~"
63 .contains(decoded_char)
64 {
65 decoded.push(decoded_char);
66 } else {
67 decoded.push('%');
68 decoded.push(hex[0] as char);
69 decoded.push(hex[1] as char);
70 }
71 i += 2;
72 } else {
73 i += 3;
74 continue;
75 }
76 } else {
77 decoded.push('%');
78 }
79 } else {
80 decoded.push(bytes[i] as char);
81 }
82 i += 1;
83 }
84
85 let mut encoded = String::with_capacity(decoded.len());
87 for ch in decoded.chars() {
88 match ch {
89 '<' | '>' | '^' | '`' | '{' | '|' | '}' => {
90 encoded.push_str(&format!("%{:02X}", ch as u8));
91 }
92 _ => encoded.push(ch),
93 }
94 }
95
96 if !encoded.starts_with('/') {
98 encoded.insert(0, '/');
99 }
100
101 let mut final_resource = String::with_capacity(encoded.len());
103 let mut last_was_slash = false;
104 for ch in encoded.chars() {
105 if ch == '\\' {
106 final_resource.push('/');
107 last_was_slash = true;
108 } else if ch == '/' {
109 if !allow_double_slashes && last_was_slash {
110 continue;
111 }
112 final_resource.push('/');
113 last_was_slash = true;
114 } else {
115 final_resource.push(ch);
116 last_was_slash = false;
117 }
118 }
119
120 let mut segments: Vec<&str> = Vec::new();
122 for mut part in final_resource.split('/') {
123 match part {
124 "." => continue,
125 ".." => {
126 segments.pop(); }
128 "" => {
129 if allow_double_slashes {
130 segments.push("");
131 }
132 }
133 _ => {
134 while part.ends_with('.') {
135 part = &part[..part.len() - 1];
136 }
137 if !part.is_empty() {
138 segments.push(part);
139 }
140 }
141 }
142 }
143
144 final_resource = if allow_double_slashes {
145 segments.join("/")
146 } else if !segments.is_empty() && final_resource.ends_with('/') {
147 format!("/{}/", segments.join("/"))
148 } else {
149 format!("/{}", segments.join("/"))
150 };
151
152 while final_resource.contains("/../") {
154 final_resource = final_resource.replacen("/../", "", 1);
155 }
156
157 if final_resource.is_empty() {
159 final_resource.push('/');
160 }
161
162 Ok(final_resource)
163}
164
165#[cfg(test)]
167mod tests {
168 use super::*;
169 use anyhow::Result;
170
171 #[test]
172 fn should_return_asterisk_for_asterisk() -> Result<()> {
173 assert_eq!(sanitize_url("*", false)?, "*");
174 Ok(())
175 }
176
177 #[test]
178 fn should_return_empty_string_for_empty_string() -> Result<()> {
179 assert_eq!(sanitize_url("", false)?, "");
180 Ok(())
181 }
182
183 #[test]
184 fn should_remove_null_characters() -> Result<()> {
185 assert_eq!(sanitize_url("/test%00", false)?, "/test");
186 assert_eq!(sanitize_url("/test\0", false)?, "/test");
187 Ok(())
188 }
189
190 #[test]
191 fn should_throw_uri_error_for_malformed_url() {
192 assert!(sanitize_url("%c0%af", false).is_err());
193 assert!(sanitize_url("%u002f", false).is_err());
194 assert!(sanitize_url("%as", false).is_err());
195 }
196
197 #[test]
198 fn should_ensure_the_resource_starts_with_a_slash() -> Result<()> {
199 assert_eq!(sanitize_url("test", false)?, "/test");
200 Ok(())
201 }
202
203 #[test]
204 fn should_convert_backslashes_to_slashes() -> Result<()> {
205 assert_eq!(sanitize_url("test\\path", false)?, "/test/path");
206 Ok(())
207 }
208
209 #[test]
210 fn should_handle_duplicate_slashes() -> Result<()> {
211 assert_eq!(sanitize_url("test//path", false)?, "/test/path");
212 assert_eq!(sanitize_url("test//path", true)?, "/test//path");
213 Ok(())
214 }
215
216 #[test]
217 fn should_handle_relative_navigation() -> Result<()> {
218 assert_eq!(sanitize_url("/./test", false)?, "/test");
219 assert_eq!(sanitize_url("/../test", false)?, "/test");
220 assert_eq!(sanitize_url("../test", false)?, "/test");
221 assert_eq!(sanitize_url("./test", false)?, "/test");
222 assert_eq!(sanitize_url("/test/./", false)?, "/test/");
223 assert_eq!(sanitize_url("/test/../", false)?, "/");
224 assert_eq!(sanitize_url("/test/../path", false)?, "/path");
225 Ok(())
226 }
227
228 #[test]
229 fn should_remove_trailing_dots_in_paths() -> Result<()> {
230 assert_eq!(sanitize_url("/test...", false)?, "/test");
231 assert_eq!(sanitize_url("/test.../", false)?, "/test/");
232 Ok(())
233 }
234
235 #[test]
236 fn should_return_slash_for_empty_sanitized_resource() -> Result<()> {
237 assert_eq!(sanitize_url("/../..", false)?, "/");
238 Ok(())
239 }
240
241 #[test]
242 fn should_encode_special_characters() -> Result<()> {
243 assert_eq!(sanitize_url("/test<path>", false)?, "/test%3Cpath%3E");
244 assert_eq!(sanitize_url("/test^path", false)?, "/test%5Epath");
245 assert_eq!(sanitize_url("/test`path", false)?, "/test%60path");
246 assert_eq!(sanitize_url("/test{path}", false)?, "/test%7Bpath%7D");
247 assert_eq!(sanitize_url("/test|path", false)?, "/test%7Cpath");
248 Ok(())
249 }
250
251 #[test]
252 fn should_preserve_certain_characters() -> Result<()> {
253 assert_eq!(sanitize_url("/test!path", false)?, "/test!path");
254 assert_eq!(sanitize_url("/test$path", false)?, "/test$path");
255 assert_eq!(sanitize_url("/test&path", false)?, "/test&path");
256 assert_eq!(sanitize_url("/test-path", false)?, "/test-path");
257 assert_eq!(sanitize_url("/test=path", false)?, "/test=path");
258 assert_eq!(sanitize_url("/test@path", false)?, "/test@path");
259 assert_eq!(sanitize_url("/test_path", false)?, "/test_path");
260 assert_eq!(sanitize_url("/test~path", false)?, "/test~path");
261 Ok(())
262 }
263
264 #[test]
265 fn should_decode_url_encoded_characters_while_preserving_certain_characters() -> Result<()> {
266 assert_eq!(sanitize_url("/test%20path", false)?, "/test%20path");
267 assert_eq!(sanitize_url("/test%21path", false)?, "/test!path");
268 assert_eq!(sanitize_url("/test%22path", false)?, "/test%22path");
269 assert_eq!(sanitize_url("/test%24path", false)?, "/test$path");
270 assert_eq!(sanitize_url("/test%25path", false)?, "/test%25path");
271 assert_eq!(sanitize_url("/test%26path", false)?, "/test&path");
272 assert_eq!(sanitize_url("/test%2Dpath", false)?, "/test-path");
273 assert_eq!(sanitize_url("/test%3Cpath", false)?, "/test%3Cpath");
274 assert_eq!(sanitize_url("/test%3Dpath", false)?, "/test=path");
275 assert_eq!(sanitize_url("/test%3Epath", false)?, "/test%3Epath");
276 assert_eq!(sanitize_url("/test%40path", false)?, "/test@path");
277 assert_eq!(sanitize_url("/test%5Fpath", false)?, "/test_path");
278 assert_eq!(sanitize_url("/test%7Dpath", false)?, "/test%7Dpath");
279 assert_eq!(sanitize_url("/test%7Epath", false)?, "/test~path");
280 Ok(())
281 }
282
283 #[test]
284 fn should_decode_url_encoded_alphanumeric_characters_while_preserving_certain_characters(
285 ) -> Result<()> {
286 assert_eq!(sanitize_url("/conf%69g.json", false)?, "/config.json");
287 assert_eq!(sanitize_url("/CONF%49G.JSON", false)?, "/CONFIG.JSON");
288 assert_eq!(sanitize_url("/svr%32.js", false)?, "/svr2.js");
289 assert_eq!(sanitize_url("/%73%76%72%32%2E%6A%73", false)?, "/svr2.js");
290 Ok(())
291 }
292
293 #[test]
294 fn should_decode_url_encoded_characters_regardless_of_the_letter_case_of_the_url_encoding(
295 ) -> Result<()> {
296 assert_eq!(sanitize_url("/%5f", false)?, "/_");
297 assert_eq!(sanitize_url("/%5F", false)?, "/_");
298 Ok(())
299 }
300}